moose-lib 0.6.148.dev3442438466__py3-none-any.whl → 0.6.283__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moose_lib/__init__.py +34 -3
- moose_lib/blocks.py +416 -52
- moose_lib/clients/redis_client.py +26 -14
- moose_lib/commons.py +37 -30
- moose_lib/config/config_file.py +5 -1
- moose_lib/config/runtime.py +73 -34
- moose_lib/data_models.py +331 -61
- moose_lib/dmv2/__init__.py +69 -73
- moose_lib/dmv2/_registry.py +2 -1
- moose_lib/dmv2/_source_capture.py +37 -0
- moose_lib/dmv2/consumption.py +55 -32
- moose_lib/dmv2/ingest_api.py +9 -2
- moose_lib/dmv2/ingest_pipeline.py +35 -16
- moose_lib/dmv2/life_cycle.py +3 -1
- moose_lib/dmv2/materialized_view.py +24 -14
- moose_lib/dmv2/moose_model.py +165 -0
- moose_lib/dmv2/olap_table.py +299 -151
- moose_lib/dmv2/registry.py +18 -3
- moose_lib/dmv2/sql_resource.py +16 -8
- moose_lib/dmv2/stream.py +75 -23
- moose_lib/dmv2/types.py +14 -8
- moose_lib/dmv2/view.py +13 -6
- moose_lib/dmv2/web_app.py +11 -6
- moose_lib/dmv2/web_app_helpers.py +5 -1
- moose_lib/dmv2/workflow.py +37 -9
- moose_lib/internal.py +340 -56
- moose_lib/main.py +87 -56
- moose_lib/query_builder.py +18 -5
- moose_lib/query_param.py +54 -20
- moose_lib/secrets.py +122 -0
- moose_lib/streaming/streaming_function_runner.py +233 -117
- moose_lib/utilities/sql.py +0 -1
- {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/METADATA +18 -1
- moose_lib-0.6.283.dist-info/RECORD +63 -0
- tests/__init__.py +1 -1
- tests/conftest.py +6 -5
- tests/test_backward_compatibility.py +85 -0
- tests/test_cluster_validation.py +85 -0
- tests/test_codec.py +75 -0
- tests/test_column_formatting.py +80 -0
- tests/test_fixedstring.py +43 -0
- tests/test_iceberg_config.py +105 -0
- tests/test_int_types.py +211 -0
- tests/test_kafka_config.py +141 -0
- tests/test_materialized.py +74 -0
- tests/test_metadata.py +37 -0
- tests/test_moose.py +21 -30
- tests/test_moose_model.py +153 -0
- tests/test_olap_table_moosemodel.py +89 -0
- tests/test_olap_table_versioning.py +52 -58
- tests/test_query_builder.py +97 -9
- tests/test_redis_client.py +10 -3
- tests/test_s3queue_config.py +211 -110
- tests/test_secrets.py +239 -0
- tests/test_simple_aggregate.py +42 -40
- tests/test_web_app.py +11 -5
- moose_lib-0.6.148.dev3442438466.dist-info/RECORD +0 -47
- {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
- {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
moose_lib/data_models.py
CHANGED
|
@@ -6,8 +6,19 @@ from inspect import isclass
|
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
from datetime import datetime, date
|
|
8
8
|
|
|
9
|
-
from typing import
|
|
10
|
-
|
|
9
|
+
from typing import (
|
|
10
|
+
Literal,
|
|
11
|
+
Tuple,
|
|
12
|
+
Union,
|
|
13
|
+
Any,
|
|
14
|
+
get_origin,
|
|
15
|
+
get_args,
|
|
16
|
+
TypeAliasType,
|
|
17
|
+
Annotated,
|
|
18
|
+
Type,
|
|
19
|
+
_BaseGenericAlias,
|
|
20
|
+
GenericAlias,
|
|
21
|
+
)
|
|
11
22
|
from pydantic import BaseModel, Field, PlainSerializer, GetCoreSchemaHandler, ConfigDict
|
|
12
23
|
from pydantic_core import CoreSchema, core_schema
|
|
13
24
|
import ipaddress
|
|
@@ -15,8 +26,24 @@ import ipaddress
|
|
|
15
26
|
type Key[T: (str, int)] = T
|
|
16
27
|
type JWT[T] = T
|
|
17
28
|
|
|
29
|
+
# Integer type aliases for ClickHouse integer types
|
|
30
|
+
type Int8 = Annotated[int, "int8"]
|
|
31
|
+
type Int16 = Annotated[int, "int16"]
|
|
32
|
+
type Int32 = Annotated[int, "int32"]
|
|
33
|
+
type Int64 = Annotated[int, "int64"]
|
|
34
|
+
type UInt8 = Annotated[int, "uint8"]
|
|
35
|
+
type UInt16 = Annotated[int, "uint16"]
|
|
36
|
+
type UInt32 = Annotated[int, "uint32"]
|
|
37
|
+
type UInt64 = Annotated[int, "uint64"]
|
|
18
38
|
|
|
19
|
-
|
|
39
|
+
# Float type aliases for ClickHouse float types
|
|
40
|
+
type Float32 = Annotated[float, "float32"]
|
|
41
|
+
type Float64 = Annotated[float, "float64"]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclasses.dataclass(
|
|
45
|
+
frozen=True
|
|
46
|
+
) # a BaseModel in the annotations will confuse pydantic
|
|
20
47
|
class ClickhousePrecision:
|
|
21
48
|
precision: int
|
|
22
49
|
|
|
@@ -26,6 +53,11 @@ class ClickhouseSize:
|
|
|
26
53
|
size: int
|
|
27
54
|
|
|
28
55
|
|
|
56
|
+
@dataclasses.dataclass(frozen=True)
|
|
57
|
+
class ClickhouseFixedStringSize:
|
|
58
|
+
size: int
|
|
59
|
+
|
|
60
|
+
|
|
29
61
|
@dataclasses.dataclass(frozen=True)
|
|
30
62
|
class ClickhouseDefault:
|
|
31
63
|
expression: str
|
|
@@ -40,6 +72,55 @@ class ClickHouseTTL:
|
|
|
40
72
|
expression: str
|
|
41
73
|
|
|
42
74
|
|
|
75
|
+
@dataclasses.dataclass(frozen=True)
|
|
76
|
+
class ClickHouseCodec:
|
|
77
|
+
expression: str
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclasses.dataclass(frozen=True)
|
|
81
|
+
class ClickHouseMaterialized:
|
|
82
|
+
"""
|
|
83
|
+
ClickHouse MATERIALIZED column annotation.
|
|
84
|
+
The column value is computed at INSERT time and physically stored.
|
|
85
|
+
Cannot be explicitly inserted by users.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
expression: ClickHouse SQL expression using column names (snake_case)
|
|
89
|
+
|
|
90
|
+
Examples:
|
|
91
|
+
# Extract date component
|
|
92
|
+
event_date: Annotated[date, ClickHouseMaterialized("toDate(event_time)")]
|
|
93
|
+
|
|
94
|
+
# Precompute hash
|
|
95
|
+
user_hash: Annotated[int, ClickHouseMaterialized("cityHash64(user_id)")]
|
|
96
|
+
|
|
97
|
+
# Complex expression with JSON
|
|
98
|
+
combination_hash: Annotated[
|
|
99
|
+
list[int],
|
|
100
|
+
ClickHouseMaterialized(
|
|
101
|
+
"arrayMap(kv -> cityHash64(kv.1, kv.2), "
|
|
102
|
+
"JSONExtractKeysAndValuesRaw(toString(log_blob)))"
|
|
103
|
+
)
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
Notes:
|
|
107
|
+
- Expression uses ClickHouse column names, not Python field names
|
|
108
|
+
- MATERIALIZED and DEFAULT are mutually exclusive
|
|
109
|
+
- Can be combined with ClickHouseCodec for compression
|
|
110
|
+
- Changing the expression modifies the column in-place (existing values preserved)
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
expression: str
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@dataclasses.dataclass(frozen=True)
|
|
117
|
+
class ClickHouseJson:
|
|
118
|
+
max_dynamic_paths: int | None = None
|
|
119
|
+
max_dynamic_types: int | None = None
|
|
120
|
+
skip_paths: tuple[str, ...] = ()
|
|
121
|
+
skip_regexps: tuple[str, ...] = ()
|
|
122
|
+
|
|
123
|
+
|
|
43
124
|
def clickhouse_decimal(precision: int, scale: int) -> Type[Decimal]:
|
|
44
125
|
return Annotated[Decimal, Field(max_digits=precision, decimal_places=scale)]
|
|
45
126
|
|
|
@@ -53,6 +134,23 @@ def clickhouse_datetime64(precision: int) -> Type[datetime]:
|
|
|
53
134
|
return Annotated[datetime, ClickhousePrecision(precision=precision)]
|
|
54
135
|
|
|
55
136
|
|
|
137
|
+
def FixedString(size: int) -> ClickhouseFixedStringSize:
|
|
138
|
+
"""
|
|
139
|
+
Creates a FixedString(N) annotation for fixed-length strings.
|
|
140
|
+
|
|
141
|
+
ClickHouse stores exactly N bytes, padding shorter values with null bytes.
|
|
142
|
+
Values exceeding N bytes will raise an exception.
|
|
143
|
+
|
|
144
|
+
Use for fixed-length data like hashes, IPs, UUIDs, MAC addresses.
|
|
145
|
+
|
|
146
|
+
Example:
|
|
147
|
+
md5_hash: Annotated[str, FixedString(16)] # 16-byte MD5
|
|
148
|
+
sha256: Annotated[str, FixedString(32)] # 32-byte SHA256
|
|
149
|
+
ipv6: Annotated[str, FixedString(16)] # 16-byte IPv6
|
|
150
|
+
"""
|
|
151
|
+
return ClickhouseFixedStringSize(size=size)
|
|
152
|
+
|
|
153
|
+
|
|
56
154
|
type Point = Annotated[tuple[float, float], "Point"]
|
|
57
155
|
type Ring = Annotated[list[tuple[float, float]], "Ring"]
|
|
58
156
|
type LineString = Annotated[list[tuple[float, float]], "LineString"]
|
|
@@ -62,11 +160,14 @@ type MultiPolygon = Annotated[list[list[list[tuple[float, float]]]], "MultiPolyg
|
|
|
62
160
|
|
|
63
161
|
|
|
64
162
|
def aggregated[T](
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
163
|
+
result_type: Type[T],
|
|
164
|
+
agg_func: str,
|
|
165
|
+
param_types: list[type | GenericAlias | _BaseGenericAlias],
|
|
68
166
|
) -> Type[T]:
|
|
69
|
-
return Annotated[
|
|
167
|
+
return Annotated[
|
|
168
|
+
result_type,
|
|
169
|
+
AggregateFunction(agg_func=agg_func, param_types=tuple(param_types)),
|
|
170
|
+
]
|
|
70
171
|
|
|
71
172
|
|
|
72
173
|
@dataclasses.dataclass(frozen=True)
|
|
@@ -79,14 +180,11 @@ class AggregateFunction:
|
|
|
79
180
|
"functionName": self.agg_func,
|
|
80
181
|
"argumentTypes": [
|
|
81
182
|
py_type_to_column_type(t, [])[2] for t in self.param_types
|
|
82
|
-
]
|
|
183
|
+
],
|
|
83
184
|
}
|
|
84
185
|
|
|
85
186
|
|
|
86
|
-
def simple_aggregated[T](
|
|
87
|
-
agg_func: str,
|
|
88
|
-
arg_type: Type[T]
|
|
89
|
-
) -> Type[T]:
|
|
187
|
+
def simple_aggregated[T](agg_func: str, arg_type: Type[T]) -> Type[T]:
|
|
90
188
|
"""Helper to create a SimpleAggregateFunction type annotation.
|
|
91
189
|
|
|
92
190
|
SimpleAggregateFunction is a ClickHouse type for storing aggregated values directly
|
|
@@ -108,7 +206,9 @@ def simple_aggregated[T](
|
|
|
108
206
|
last_status: simple_aggregated("anyLast", str)
|
|
109
207
|
```
|
|
110
208
|
"""
|
|
111
|
-
return Annotated[
|
|
209
|
+
return Annotated[
|
|
210
|
+
arg_type, SimpleAggregateFunction(agg_func=agg_func, arg_type=arg_type)
|
|
211
|
+
]
|
|
112
212
|
|
|
113
213
|
|
|
114
214
|
@dataclasses.dataclass(frozen=True)
|
|
@@ -119,7 +219,7 @@ class SimpleAggregateFunction:
|
|
|
119
219
|
def to_dict(self):
|
|
120
220
|
return {
|
|
121
221
|
"functionName": self.agg_func,
|
|
122
|
-
"argumentType": py_type_to_column_type(self.arg_type, [])[2]
|
|
222
|
+
"argumentType": py_type_to_column_type(self.arg_type, [])[2],
|
|
123
223
|
}
|
|
124
224
|
|
|
125
225
|
|
|
@@ -132,7 +232,9 @@ def enum_value_serializer(value: int | str):
|
|
|
132
232
|
|
|
133
233
|
class EnumValue(BaseModel):
|
|
134
234
|
name: str
|
|
135
|
-
value: Annotated[
|
|
235
|
+
value: Annotated[
|
|
236
|
+
int | str, PlainSerializer(enum_value_serializer, return_type=dict)
|
|
237
|
+
]
|
|
136
238
|
|
|
137
239
|
|
|
138
240
|
class DataEnum(BaseModel):
|
|
@@ -160,7 +262,17 @@ class MapType(BaseModel):
|
|
|
160
262
|
value_type: "DataType"
|
|
161
263
|
|
|
162
264
|
|
|
163
|
-
|
|
265
|
+
class JsonOptions(BaseModel):
|
|
266
|
+
max_dynamic_paths: int | None = None
|
|
267
|
+
max_dynamic_types: int | None = None
|
|
268
|
+
typed_paths: list[tuple[str, "DataType"]] = []
|
|
269
|
+
skip_paths: list[str] = []
|
|
270
|
+
skip_regexps: list[str] = []
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
type DataType = (
|
|
274
|
+
str | DataEnum | ArrayType | Nested | NamedTupleType | MapType | JsonOptions
|
|
275
|
+
)
|
|
164
276
|
|
|
165
277
|
|
|
166
278
|
def handle_jwt(field_type: type) -> Tuple[bool, type]:
|
|
@@ -200,12 +312,57 @@ class Column(BaseModel):
|
|
|
200
312
|
default: str | None = None
|
|
201
313
|
annotations: list[Tuple[str, Any]] = []
|
|
202
314
|
ttl: str | None = None
|
|
315
|
+
codec: str | None = None
|
|
316
|
+
materialized: str | None = None
|
|
203
317
|
|
|
204
318
|
def to_expr(self):
|
|
205
319
|
# Lazy import to avoid circular dependency at import time
|
|
206
320
|
from .query_builder import ColumnRef
|
|
321
|
+
|
|
207
322
|
return ColumnRef(self)
|
|
208
323
|
|
|
324
|
+
def __str__(self) -> str:
|
|
325
|
+
"""Return properly quoted identifier for SQL interpolation.
|
|
326
|
+
|
|
327
|
+
This enables Column objects to be used directly in f-strings and
|
|
328
|
+
string concatenation for SQL query construction.
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
Backtick-quoted identifier safe for ClickHouse SQL.
|
|
332
|
+
|
|
333
|
+
Example:
|
|
334
|
+
>>> col = Column(name="user_id", ...)
|
|
335
|
+
>>> f"SELECT {col} FROM users"
|
|
336
|
+
"SELECT `user_id` FROM users"
|
|
337
|
+
"""
|
|
338
|
+
from .utilities.sql import quote_identifier
|
|
339
|
+
|
|
340
|
+
return quote_identifier(self.name)
|
|
341
|
+
|
|
342
|
+
def __format__(self, format_spec: str) -> str:
|
|
343
|
+
"""Format Column for f-string interpolation with format specifiers.
|
|
344
|
+
|
|
345
|
+
Supports format specs:
|
|
346
|
+
- 'col', 'c', 'column': Returns quoted identifier
|
|
347
|
+
- '' (empty): Returns quoted identifier (default)
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
format_spec: Format specification string
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
Backtick-quoted identifier
|
|
354
|
+
|
|
355
|
+
Example:
|
|
356
|
+
>>> col = Column(name="email", ...)
|
|
357
|
+
>>> f"SELECT {col:col} FROM users"
|
|
358
|
+
"SELECT `email` FROM users"
|
|
359
|
+
"""
|
|
360
|
+
# All format specs return quoted identifier
|
|
361
|
+
# This provides flexibility for user preference
|
|
362
|
+
from .utilities.sql import quote_identifier
|
|
363
|
+
|
|
364
|
+
return quote_identifier(self.name)
|
|
365
|
+
|
|
209
366
|
|
|
210
367
|
def _is_point_type(t: type) -> bool:
|
|
211
368
|
origin = get_origin(t)
|
|
@@ -263,18 +420,45 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
|
|
|
263
420
|
data_type: DataType
|
|
264
421
|
|
|
265
422
|
if t is str:
|
|
266
|
-
|
|
423
|
+
# Check for FixedString annotation
|
|
424
|
+
fixed_string_size = next(
|
|
425
|
+
(md.size for md in mds if isinstance(md, ClickhouseFixedStringSize)), None
|
|
426
|
+
)
|
|
427
|
+
if fixed_string_size:
|
|
428
|
+
data_type = f"FixedString({fixed_string_size})"
|
|
429
|
+
else:
|
|
430
|
+
data_type = "String"
|
|
431
|
+
elif t is bytes:
|
|
432
|
+
# Check for FixedString annotation
|
|
433
|
+
fixed_string_size = next(
|
|
434
|
+
(md.size for md in mds if isinstance(md, ClickhouseFixedStringSize)), None
|
|
435
|
+
)
|
|
436
|
+
if fixed_string_size:
|
|
437
|
+
data_type = f"FixedString({fixed_string_size})"
|
|
438
|
+
else:
|
|
439
|
+
# Regular bytes without FixedString annotation
|
|
440
|
+
data_type = "String"
|
|
267
441
|
elif t is int:
|
|
268
442
|
# Check for int size annotations
|
|
269
|
-
int_size = next(
|
|
443
|
+
int_size = next(
|
|
444
|
+
(md for md in mds if isinstance(md, str) and re.match(r"^u?int\d+$", md)),
|
|
445
|
+
None,
|
|
446
|
+
)
|
|
270
447
|
if int_size:
|
|
271
448
|
data_type = int_size.replace("u", "U").replace("i", "I")
|
|
272
449
|
else:
|
|
273
|
-
data_type = "
|
|
450
|
+
data_type = "Int64"
|
|
274
451
|
elif t is float:
|
|
275
452
|
size = next((md for md in mds if isinstance(md, ClickhouseSize)), None)
|
|
276
453
|
if size is None:
|
|
277
|
-
bit_size = next(
|
|
454
|
+
bit_size = next(
|
|
455
|
+
(
|
|
456
|
+
md
|
|
457
|
+
for md in mds
|
|
458
|
+
if isinstance(md, str) and re.match(r"^float\d+$", md)
|
|
459
|
+
),
|
|
460
|
+
None,
|
|
461
|
+
)
|
|
278
462
|
if bit_size:
|
|
279
463
|
if bit_size == "float32":
|
|
280
464
|
data_type = "Float32"
|
|
@@ -292,12 +476,16 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
|
|
|
292
476
|
raise ValueError(f"Unsupported float size {size.size}")
|
|
293
477
|
elif t is Decimal:
|
|
294
478
|
precision = next((md.max_digits for md in mds if hasattr(md, "max_digits")), 10)
|
|
295
|
-
scale = next(
|
|
479
|
+
scale = next(
|
|
480
|
+
(md.decimal_places for md in mds if hasattr(md, "decimal_places")), 0
|
|
481
|
+
)
|
|
296
482
|
data_type = f"Decimal({precision}, {scale})"
|
|
297
483
|
elif t is bool:
|
|
298
484
|
data_type = "Boolean"
|
|
299
485
|
elif t is datetime:
|
|
300
|
-
precision = next(
|
|
486
|
+
precision = next(
|
|
487
|
+
(md for md in mds if isinstance(md, ClickhousePrecision)), None
|
|
488
|
+
)
|
|
301
489
|
if precision is None:
|
|
302
490
|
data_type = "DateTime"
|
|
303
491
|
else:
|
|
@@ -314,22 +502,31 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
|
|
|
314
502
|
data_type = "IPv4"
|
|
315
503
|
elif t is ipaddress.IPv6Address:
|
|
316
504
|
data_type = "IPv6"
|
|
317
|
-
elif any(
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
"LineString",
|
|
321
|
-
"MultiLineString",
|
|
322
|
-
"Polygon",
|
|
323
|
-
"MultiPolygon",
|
|
324
|
-
] for md in mds):
|
|
325
|
-
data_type = next(md for md in mds if md in [
|
|
505
|
+
elif any(
|
|
506
|
+
md
|
|
507
|
+
in [ # this check has to happen before t is matched against tuple/list
|
|
326
508
|
"Point",
|
|
327
509
|
"Ring",
|
|
328
510
|
"LineString",
|
|
329
511
|
"MultiLineString",
|
|
330
512
|
"Polygon",
|
|
331
513
|
"MultiPolygon",
|
|
332
|
-
]
|
|
514
|
+
]
|
|
515
|
+
for md in mds
|
|
516
|
+
):
|
|
517
|
+
data_type = next(
|
|
518
|
+
md
|
|
519
|
+
for md in mds
|
|
520
|
+
if md
|
|
521
|
+
in [
|
|
522
|
+
"Point",
|
|
523
|
+
"Ring",
|
|
524
|
+
"LineString",
|
|
525
|
+
"MultiLineString",
|
|
526
|
+
"Polygon",
|
|
527
|
+
"MultiPolygon",
|
|
528
|
+
]
|
|
529
|
+
)
|
|
333
530
|
_validate_geometry_type(data_type, t)
|
|
334
531
|
elif get_origin(t) is list:
|
|
335
532
|
inner_optional, _, inner_type = py_type_to_column_type(get_args(t)[0], [])
|
|
@@ -337,16 +534,62 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
|
|
|
337
534
|
elif get_origin(t) is dict:
|
|
338
535
|
args = get_args(t)
|
|
339
536
|
if len(args) == 2:
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
537
|
+
# Special case: dict[str, Any] should be JSON type (matches TypeScript's Record<string, any>)
|
|
538
|
+
# This is useful for storing arbitrary extra fields in a JSON column
|
|
539
|
+
if args[0] is str and args[1] is Any:
|
|
540
|
+
data_type = "Json"
|
|
541
|
+
else:
|
|
542
|
+
key_optional, _, key_type = py_type_to_column_type(args[0], [])
|
|
543
|
+
value_optional, _, value_type = py_type_to_column_type(args[1], [])
|
|
544
|
+
# For dict types, we assume keys are required and values match their type
|
|
545
|
+
data_type = MapType(key_type=key_type, value_type=value_type)
|
|
344
546
|
else:
|
|
345
|
-
raise ValueError(
|
|
547
|
+
raise ValueError(
|
|
548
|
+
f"Dict type must have exactly 2 type arguments, got {len(args)}"
|
|
549
|
+
)
|
|
346
550
|
elif t is UUID:
|
|
347
551
|
data_type = "UUID"
|
|
348
552
|
elif t is Any:
|
|
349
553
|
data_type = "Json"
|
|
554
|
+
elif any(isinstance(md, ClickHouseJson) for md in mds) and issubclass(t, BaseModel):
|
|
555
|
+
# Annotated[SomePydanticClass, ClickHouseJson(...)]
|
|
556
|
+
columns = _to_columns(t)
|
|
557
|
+
for c in columns:
|
|
558
|
+
if c.default is not None:
|
|
559
|
+
raise ValueError(
|
|
560
|
+
"Default in inner field. Put ClickHouseDefault in top level field."
|
|
561
|
+
)
|
|
562
|
+
# Enforce extra='allow' for JSON-mapped models
|
|
563
|
+
if t.model_config.get("extra") != "allow":
|
|
564
|
+
raise ValueError(
|
|
565
|
+
f"Model {t.__name__} with ClickHouseJson must have model_config with extra='allow'. "
|
|
566
|
+
"Add: model_config = ConfigDict(extra='allow')"
|
|
567
|
+
)
|
|
568
|
+
opts = next(md for md in mds if isinstance(md, ClickHouseJson))
|
|
569
|
+
|
|
570
|
+
# Build typed_paths from fields as tuples of (name, type)
|
|
571
|
+
typed_paths: list[tuple[str, DataType]] = []
|
|
572
|
+
for c in columns:
|
|
573
|
+
typed_paths.append((c.name, c.data_type))
|
|
574
|
+
|
|
575
|
+
has_any_option = (
|
|
576
|
+
opts.max_dynamic_paths is not None
|
|
577
|
+
or opts.max_dynamic_types is not None
|
|
578
|
+
or len(typed_paths) > 0
|
|
579
|
+
or len(opts.skip_paths) > 0
|
|
580
|
+
or len(opts.skip_regexps) > 0
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
if not has_any_option:
|
|
584
|
+
data_type = "Json"
|
|
585
|
+
else:
|
|
586
|
+
data_type = JsonOptions(
|
|
587
|
+
max_dynamic_paths=opts.max_dynamic_paths,
|
|
588
|
+
max_dynamic_types=opts.max_dynamic_types,
|
|
589
|
+
typed_paths=typed_paths,
|
|
590
|
+
skip_paths=list(opts.skip_paths),
|
|
591
|
+
skip_regexps=list(opts.skip_regexps),
|
|
592
|
+
)
|
|
350
593
|
elif get_origin(t) is Literal and all(isinstance(arg, str) for arg in get_args(t)):
|
|
351
594
|
data_type = "String"
|
|
352
595
|
mds.append("LowCardinality")
|
|
@@ -361,10 +604,7 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
|
|
|
361
604
|
)
|
|
362
605
|
if any(md == "ClickHouseNamedTuple" for md in mds):
|
|
363
606
|
data_type = NamedTupleType(
|
|
364
|
-
fields=[(
|
|
365
|
-
column.name,
|
|
366
|
-
column.data_type
|
|
367
|
-
) for column in columns],
|
|
607
|
+
fields=[(column.name, column.data_type) for column in columns],
|
|
368
608
|
)
|
|
369
609
|
else:
|
|
370
610
|
data_type = Nested(
|
|
@@ -382,30 +622,36 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
|
|
|
382
622
|
def _to_columns(model: type[BaseModel]) -> list[Column]:
|
|
383
623
|
"""Convert Pydantic model fields to Column definitions."""
|
|
384
624
|
columns = []
|
|
625
|
+
# Get raw annotations from the model class to preserve type aliases
|
|
626
|
+
raw_annotations = getattr(model, "__annotations__", {})
|
|
627
|
+
|
|
385
628
|
for field_name, field_info in model.model_fields.items():
|
|
386
|
-
#
|
|
387
|
-
|
|
629
|
+
# Use raw annotation if available (preserves type aliases and their metadata)
|
|
630
|
+
# Fall back to field_info.annotation if not found in __annotations__
|
|
631
|
+
field_type = raw_annotations.get(field_name, field_info.annotation)
|
|
388
632
|
if field_type is None:
|
|
389
633
|
raise ValueError(f"Missing type for {field_name}")
|
|
390
634
|
primary_key, field_type = handle_key(field_type)
|
|
391
635
|
is_jwt, field_type = handle_jwt(field_type)
|
|
392
636
|
|
|
393
|
-
optional, mds, data_type = py_type_to_column_type(
|
|
637
|
+
optional, mds, data_type = py_type_to_column_type(
|
|
638
|
+
field_type, field_info.metadata
|
|
639
|
+
)
|
|
394
640
|
|
|
395
641
|
annotations = []
|
|
396
642
|
for md in mds:
|
|
397
|
-
if isinstance(md, AggregateFunction)
|
|
398
|
-
annotations
|
|
399
|
-
|
|
400
|
-
)
|
|
401
|
-
if isinstance(md, SimpleAggregateFunction)
|
|
402
|
-
annotations
|
|
403
|
-
|
|
404
|
-
)
|
|
405
|
-
if md == "LowCardinality"
|
|
406
|
-
annotations
|
|
407
|
-
|
|
408
|
-
)
|
|
643
|
+
if isinstance(md, AggregateFunction) and all(
|
|
644
|
+
key != "aggregationFunction" for (key, _) in annotations
|
|
645
|
+
):
|
|
646
|
+
annotations.append(("aggregationFunction", md.to_dict()))
|
|
647
|
+
if isinstance(md, SimpleAggregateFunction) and all(
|
|
648
|
+
key != "simpleAggregationFunction" for (key, _) in annotations
|
|
649
|
+
):
|
|
650
|
+
annotations.append(("simpleAggregationFunction", md.to_dict()))
|
|
651
|
+
if md == "LowCardinality" and all(
|
|
652
|
+
key != "LowCardinality" for (key, _) in annotations
|
|
653
|
+
):
|
|
654
|
+
annotations.append(("LowCardinality", True))
|
|
409
655
|
|
|
410
656
|
column_name = field_name if field_info.alias is None else field_info.alias
|
|
411
657
|
|
|
@@ -415,12 +661,31 @@ def _to_columns(model: type[BaseModel]) -> list[Column]:
|
|
|
415
661
|
None,
|
|
416
662
|
)
|
|
417
663
|
|
|
664
|
+
# Extract MATERIALIZED expression from metadata, if provided
|
|
665
|
+
materialized_expr = next(
|
|
666
|
+
(md.expression for md in mds if isinstance(md, ClickHouseMaterialized)),
|
|
667
|
+
None,
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
# Validate mutual exclusivity of DEFAULT and MATERIALIZED
|
|
671
|
+
if default_expr and materialized_expr:
|
|
672
|
+
raise ValueError(
|
|
673
|
+
f"Column '{column_name}' cannot have both DEFAULT and MATERIALIZED. "
|
|
674
|
+
f"Use one or the other."
|
|
675
|
+
)
|
|
676
|
+
|
|
418
677
|
# Extract TTL expression from metadata, if provided
|
|
419
678
|
ttl_expr = next(
|
|
420
679
|
(md.expression for md in mds if isinstance(md, ClickHouseTTL)),
|
|
421
680
|
None,
|
|
422
681
|
)
|
|
423
682
|
|
|
683
|
+
# Extract CODEC expression from metadata, if provided
|
|
684
|
+
codec_expr = next(
|
|
685
|
+
(md.expression for md in mds if isinstance(md, ClickHouseCodec)),
|
|
686
|
+
None,
|
|
687
|
+
)
|
|
688
|
+
|
|
424
689
|
columns.append(
|
|
425
690
|
Column(
|
|
426
691
|
name=column_name,
|
|
@@ -429,8 +694,10 @@ def _to_columns(model: type[BaseModel]) -> list[Column]:
|
|
|
429
694
|
unique=False,
|
|
430
695
|
primary_key=primary_key,
|
|
431
696
|
default=default_expr,
|
|
697
|
+
materialized=materialized_expr,
|
|
432
698
|
annotations=annotations,
|
|
433
699
|
ttl=ttl_expr,
|
|
700
|
+
codec=codec_expr,
|
|
434
701
|
)
|
|
435
702
|
)
|
|
436
703
|
return columns
|
|
@@ -438,7 +705,9 @@ def _to_columns(model: type[BaseModel]) -> list[Column]:
|
|
|
438
705
|
|
|
439
706
|
class StringToEnumMixin:
|
|
440
707
|
@classmethod
|
|
441
|
-
def __get_pydantic_core_schema__(
|
|
708
|
+
def __get_pydantic_core_schema__(
|
|
709
|
+
cls, _source_type: Any, _handler: GetCoreSchemaHandler
|
|
710
|
+
) -> CoreSchema:
|
|
442
711
|
def validate(value: Any, _: Any) -> Any:
|
|
443
712
|
if isinstance(value, str):
|
|
444
713
|
try:
|
|
@@ -447,14 +716,15 @@ class StringToEnumMixin:
|
|
|
447
716
|
raise ValueError(f"Invalid enum name: {value}")
|
|
448
717
|
return cls(value) # fallback to default enum validation
|
|
449
718
|
|
|
450
|
-
return core_schema.with_info_before_validator_function(
|
|
719
|
+
return core_schema.with_info_before_validator_function(
|
|
720
|
+
validate, core_schema.enum_schema(cls, list(cls))
|
|
721
|
+
)
|
|
451
722
|
|
|
452
723
|
|
|
453
724
|
def is_array_nested_type(data_type: DataType) -> bool:
|
|
454
725
|
"""Type guard to check if a data type is Array(Nested(...))."""
|
|
455
|
-
return (
|
|
456
|
-
|
|
457
|
-
isinstance(data_type.element_type, Nested)
|
|
726
|
+
return isinstance(data_type, ArrayType) and isinstance(
|
|
727
|
+
data_type.element_type, Nested
|
|
458
728
|
)
|
|
459
729
|
|
|
460
730
|
|