moose-lib 0.6.90__py3-none-any.whl → 0.6.283__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moose_lib/__init__.py +38 -3
- moose_lib/blocks.py +497 -37
- moose_lib/clients/redis_client.py +26 -14
- moose_lib/commons.py +94 -5
- moose_lib/config/config_file.py +44 -2
- moose_lib/config/runtime.py +137 -5
- moose_lib/data_models.py +451 -46
- moose_lib/dmv2/__init__.py +88 -60
- moose_lib/dmv2/_registry.py +3 -1
- moose_lib/dmv2/_source_capture.py +37 -0
- moose_lib/dmv2/consumption.py +55 -32
- moose_lib/dmv2/ingest_api.py +9 -2
- moose_lib/dmv2/ingest_pipeline.py +56 -13
- moose_lib/dmv2/life_cycle.py +3 -1
- moose_lib/dmv2/materialized_view.py +24 -14
- moose_lib/dmv2/moose_model.py +165 -0
- moose_lib/dmv2/olap_table.py +304 -119
- moose_lib/dmv2/registry.py +28 -3
- moose_lib/dmv2/sql_resource.py +16 -8
- moose_lib/dmv2/stream.py +241 -21
- moose_lib/dmv2/types.py +14 -8
- moose_lib/dmv2/view.py +13 -6
- moose_lib/dmv2/web_app.py +175 -0
- moose_lib/dmv2/web_app_helpers.py +96 -0
- moose_lib/dmv2/workflow.py +37 -9
- moose_lib/internal.py +537 -68
- moose_lib/main.py +87 -56
- moose_lib/query_builder.py +18 -5
- moose_lib/query_param.py +54 -20
- moose_lib/secrets.py +122 -0
- moose_lib/streaming/streaming_function_runner.py +266 -156
- moose_lib/utilities/sql.py +0 -1
- {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/METADATA +19 -1
- moose_lib-0.6.283.dist-info/RECORD +63 -0
- tests/__init__.py +1 -1
- tests/conftest.py +38 -1
- tests/test_backward_compatibility.py +85 -0
- tests/test_cluster_validation.py +85 -0
- tests/test_codec.py +75 -0
- tests/test_column_formatting.py +80 -0
- tests/test_fixedstring.py +43 -0
- tests/test_iceberg_config.py +105 -0
- tests/test_int_types.py +211 -0
- tests/test_kafka_config.py +141 -0
- tests/test_materialized.py +74 -0
- tests/test_metadata.py +37 -0
- tests/test_moose.py +21 -30
- tests/test_moose_model.py +153 -0
- tests/test_olap_table_moosemodel.py +89 -0
- tests/test_olap_table_versioning.py +210 -0
- tests/test_query_builder.py +97 -9
- tests/test_redis_client.py +10 -3
- tests/test_s3queue_config.py +211 -110
- tests/test_secrets.py +239 -0
- tests/test_simple_aggregate.py +114 -0
- tests/test_web_app.py +227 -0
- moose_lib-0.6.90.dist-info/RECORD +0 -42
- {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
- {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
moose_lib/data_models.py
CHANGED
|
@@ -6,8 +6,19 @@ from inspect import isclass
|
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
from datetime import datetime, date
|
|
8
8
|
|
|
9
|
-
from typing import
|
|
10
|
-
|
|
9
|
+
from typing import (
|
|
10
|
+
Literal,
|
|
11
|
+
Tuple,
|
|
12
|
+
Union,
|
|
13
|
+
Any,
|
|
14
|
+
get_origin,
|
|
15
|
+
get_args,
|
|
16
|
+
TypeAliasType,
|
|
17
|
+
Annotated,
|
|
18
|
+
Type,
|
|
19
|
+
_BaseGenericAlias,
|
|
20
|
+
GenericAlias,
|
|
21
|
+
)
|
|
11
22
|
from pydantic import BaseModel, Field, PlainSerializer, GetCoreSchemaHandler, ConfigDict
|
|
12
23
|
from pydantic_core import CoreSchema, core_schema
|
|
13
24
|
import ipaddress
|
|
@@ -15,18 +26,39 @@ import ipaddress
|
|
|
15
26
|
type Key[T: (str, int)] = T
|
|
16
27
|
type JWT[T] = T
|
|
17
28
|
|
|
29
|
+
# Integer type aliases for ClickHouse integer types
|
|
30
|
+
type Int8 = Annotated[int, "int8"]
|
|
31
|
+
type Int16 = Annotated[int, "int16"]
|
|
32
|
+
type Int32 = Annotated[int, "int32"]
|
|
33
|
+
type Int64 = Annotated[int, "int64"]
|
|
34
|
+
type UInt8 = Annotated[int, "uint8"]
|
|
35
|
+
type UInt16 = Annotated[int, "uint16"]
|
|
36
|
+
type UInt32 = Annotated[int, "uint32"]
|
|
37
|
+
type UInt64 = Annotated[int, "uint64"]
|
|
18
38
|
|
|
19
|
-
|
|
39
|
+
# Float type aliases for ClickHouse float types
|
|
40
|
+
type Float32 = Annotated[float, "float32"]
|
|
41
|
+
type Float64 = Annotated[float, "float64"]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclasses.dataclass(
|
|
45
|
+
frozen=True
|
|
46
|
+
) # a BaseModel in the annotations will confuse pydantic
|
|
20
47
|
class ClickhousePrecision:
|
|
21
48
|
precision: int
|
|
22
49
|
|
|
23
50
|
|
|
24
|
-
@dataclasses.dataclass
|
|
51
|
+
@dataclasses.dataclass(frozen=True)
|
|
25
52
|
class ClickhouseSize:
|
|
26
53
|
size: int
|
|
27
54
|
|
|
28
55
|
|
|
29
|
-
@dataclasses.dataclass
|
|
56
|
+
@dataclasses.dataclass(frozen=True)
|
|
57
|
+
class ClickhouseFixedStringSize:
|
|
58
|
+
size: int
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclasses.dataclass(frozen=True)
|
|
30
62
|
class ClickhouseDefault:
|
|
31
63
|
expression: str
|
|
32
64
|
|
|
@@ -35,6 +67,60 @@ def clickhouse_default(expression: str) -> ClickhouseDefault:
|
|
|
35
67
|
return ClickhouseDefault(expression=expression)
|
|
36
68
|
|
|
37
69
|
|
|
70
|
+
@dataclasses.dataclass(frozen=True)
|
|
71
|
+
class ClickHouseTTL:
|
|
72
|
+
expression: str
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclasses.dataclass(frozen=True)
|
|
76
|
+
class ClickHouseCodec:
|
|
77
|
+
expression: str
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclasses.dataclass(frozen=True)
|
|
81
|
+
class ClickHouseMaterialized:
|
|
82
|
+
"""
|
|
83
|
+
ClickHouse MATERIALIZED column annotation.
|
|
84
|
+
The column value is computed at INSERT time and physically stored.
|
|
85
|
+
Cannot be explicitly inserted by users.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
expression: ClickHouse SQL expression using column names (snake_case)
|
|
89
|
+
|
|
90
|
+
Examples:
|
|
91
|
+
# Extract date component
|
|
92
|
+
event_date: Annotated[date, ClickHouseMaterialized("toDate(event_time)")]
|
|
93
|
+
|
|
94
|
+
# Precompute hash
|
|
95
|
+
user_hash: Annotated[int, ClickHouseMaterialized("cityHash64(user_id)")]
|
|
96
|
+
|
|
97
|
+
# Complex expression with JSON
|
|
98
|
+
combination_hash: Annotated[
|
|
99
|
+
list[int],
|
|
100
|
+
ClickHouseMaterialized(
|
|
101
|
+
"arrayMap(kv -> cityHash64(kv.1, kv.2), "
|
|
102
|
+
"JSONExtractKeysAndValuesRaw(toString(log_blob)))"
|
|
103
|
+
)
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
Notes:
|
|
107
|
+
- Expression uses ClickHouse column names, not Python field names
|
|
108
|
+
- MATERIALIZED and DEFAULT are mutually exclusive
|
|
109
|
+
- Can be combined with ClickHouseCodec for compression
|
|
110
|
+
- Changing the expression modifies the column in-place (existing values preserved)
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
expression: str
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@dataclasses.dataclass(frozen=True)
|
|
117
|
+
class ClickHouseJson:
|
|
118
|
+
max_dynamic_paths: int | None = None
|
|
119
|
+
max_dynamic_types: int | None = None
|
|
120
|
+
skip_paths: tuple[str, ...] = ()
|
|
121
|
+
skip_regexps: tuple[str, ...] = ()
|
|
122
|
+
|
|
123
|
+
|
|
38
124
|
def clickhouse_decimal(precision: int, scale: int) -> Type[Decimal]:
|
|
39
125
|
return Annotated[Decimal, Field(max_digits=precision, decimal_places=scale)]
|
|
40
126
|
|
|
@@ -48,25 +134,92 @@ def clickhouse_datetime64(precision: int) -> Type[datetime]:
|
|
|
48
134
|
return Annotated[datetime, ClickhousePrecision(precision=precision)]
|
|
49
135
|
|
|
50
136
|
|
|
137
|
+
def FixedString(size: int) -> ClickhouseFixedStringSize:
|
|
138
|
+
"""
|
|
139
|
+
Creates a FixedString(N) annotation for fixed-length strings.
|
|
140
|
+
|
|
141
|
+
ClickHouse stores exactly N bytes, padding shorter values with null bytes.
|
|
142
|
+
Values exceeding N bytes will raise an exception.
|
|
143
|
+
|
|
144
|
+
Use for fixed-length data like hashes, IPs, UUIDs, MAC addresses.
|
|
145
|
+
|
|
146
|
+
Example:
|
|
147
|
+
md5_hash: Annotated[str, FixedString(16)] # 16-byte MD5
|
|
148
|
+
sha256: Annotated[str, FixedString(32)] # 32-byte SHA256
|
|
149
|
+
ipv6: Annotated[str, FixedString(16)] # 16-byte IPv6
|
|
150
|
+
"""
|
|
151
|
+
return ClickhouseFixedStringSize(size=size)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
type Point = Annotated[tuple[float, float], "Point"]
|
|
155
|
+
type Ring = Annotated[list[tuple[float, float]], "Ring"]
|
|
156
|
+
type LineString = Annotated[list[tuple[float, float]], "LineString"]
|
|
157
|
+
type MultiLineString = Annotated[list[list[tuple[float, float]]], "MultiLineString"]
|
|
158
|
+
type Polygon = Annotated[list[list[tuple[float, float]]], "Polygon"]
|
|
159
|
+
type MultiPolygon = Annotated[list[list[list[tuple[float, float]]]], "MultiPolygon"]
|
|
160
|
+
|
|
161
|
+
|
|
51
162
|
def aggregated[T](
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
163
|
+
result_type: Type[T],
|
|
164
|
+
agg_func: str,
|
|
165
|
+
param_types: list[type | GenericAlias | _BaseGenericAlias],
|
|
55
166
|
) -> Type[T]:
|
|
56
|
-
return Annotated[
|
|
167
|
+
return Annotated[
|
|
168
|
+
result_type,
|
|
169
|
+
AggregateFunction(agg_func=agg_func, param_types=tuple(param_types)),
|
|
170
|
+
]
|
|
57
171
|
|
|
58
172
|
|
|
59
|
-
@dataclasses.dataclass
|
|
173
|
+
@dataclasses.dataclass(frozen=True)
|
|
60
174
|
class AggregateFunction:
|
|
61
175
|
agg_func: str
|
|
62
|
-
param_types:
|
|
176
|
+
param_types: tuple[type | GenericAlias | _BaseGenericAlias, ...]
|
|
63
177
|
|
|
64
178
|
def to_dict(self):
|
|
65
179
|
return {
|
|
66
180
|
"functionName": self.agg_func,
|
|
67
181
|
"argumentTypes": [
|
|
68
182
|
py_type_to_column_type(t, [])[2] for t in self.param_types
|
|
69
|
-
]
|
|
183
|
+
],
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def simple_aggregated[T](agg_func: str, arg_type: Type[T]) -> Type[T]:
|
|
188
|
+
"""Helper to create a SimpleAggregateFunction type annotation.
|
|
189
|
+
|
|
190
|
+
SimpleAggregateFunction is a ClickHouse type for storing aggregated values directly
|
|
191
|
+
instead of intermediate states. It's more efficient for functions like sum, max, min, etc.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
agg_func: The aggregation function name (e.g., "sum", "max", "anyLast")
|
|
195
|
+
arg_type: The argument type for the function (also the result type)
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
An Annotated type with SimpleAggregateFunction metadata
|
|
199
|
+
|
|
200
|
+
Example:
|
|
201
|
+
```python
|
|
202
|
+
from moose_lib import simple_aggregated
|
|
203
|
+
|
|
204
|
+
row_count: simple_aggregated("sum", int)
|
|
205
|
+
max_value: simple_aggregated("max", float)
|
|
206
|
+
last_status: simple_aggregated("anyLast", str)
|
|
207
|
+
```
|
|
208
|
+
"""
|
|
209
|
+
return Annotated[
|
|
210
|
+
arg_type, SimpleAggregateFunction(agg_func=agg_func, arg_type=arg_type)
|
|
211
|
+
]
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@dataclasses.dataclass(frozen=True)
|
|
215
|
+
class SimpleAggregateFunction:
|
|
216
|
+
agg_func: str
|
|
217
|
+
arg_type: type | GenericAlias | _BaseGenericAlias
|
|
218
|
+
|
|
219
|
+
def to_dict(self):
|
|
220
|
+
return {
|
|
221
|
+
"functionName": self.agg_func,
|
|
222
|
+
"argumentType": py_type_to_column_type(self.arg_type, [])[2],
|
|
70
223
|
}
|
|
71
224
|
|
|
72
225
|
|
|
@@ -79,7 +232,9 @@ def enum_value_serializer(value: int | str):
|
|
|
79
232
|
|
|
80
233
|
class EnumValue(BaseModel):
|
|
81
234
|
name: str
|
|
82
|
-
value: Annotated[
|
|
235
|
+
value: Annotated[
|
|
236
|
+
int | str, PlainSerializer(enum_value_serializer, return_type=dict)
|
|
237
|
+
]
|
|
83
238
|
|
|
84
239
|
|
|
85
240
|
class DataEnum(BaseModel):
|
|
@@ -107,7 +262,17 @@ class MapType(BaseModel):
|
|
|
107
262
|
value_type: "DataType"
|
|
108
263
|
|
|
109
264
|
|
|
110
|
-
|
|
265
|
+
class JsonOptions(BaseModel):
|
|
266
|
+
max_dynamic_paths: int | None = None
|
|
267
|
+
max_dynamic_types: int | None = None
|
|
268
|
+
typed_paths: list[tuple[str, "DataType"]] = []
|
|
269
|
+
skip_paths: list[str] = []
|
|
270
|
+
skip_regexps: list[str] = []
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
type DataType = (
|
|
274
|
+
str | DataEnum | ArrayType | Nested | NamedTupleType | MapType | JsonOptions
|
|
275
|
+
)
|
|
111
276
|
|
|
112
277
|
|
|
113
278
|
def handle_jwt(field_type: type) -> Tuple[bool, type]:
|
|
@@ -146,12 +311,105 @@ class Column(BaseModel):
|
|
|
146
311
|
primary_key: bool
|
|
147
312
|
default: str | None = None
|
|
148
313
|
annotations: list[Tuple[str, Any]] = []
|
|
314
|
+
ttl: str | None = None
|
|
315
|
+
codec: str | None = None
|
|
316
|
+
materialized: str | None = None
|
|
149
317
|
|
|
150
318
|
def to_expr(self):
|
|
151
319
|
# Lazy import to avoid circular dependency at import time
|
|
152
320
|
from .query_builder import ColumnRef
|
|
321
|
+
|
|
153
322
|
return ColumnRef(self)
|
|
154
323
|
|
|
324
|
+
def __str__(self) -> str:
|
|
325
|
+
"""Return properly quoted identifier for SQL interpolation.
|
|
326
|
+
|
|
327
|
+
This enables Column objects to be used directly in f-strings and
|
|
328
|
+
string concatenation for SQL query construction.
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
Backtick-quoted identifier safe for ClickHouse SQL.
|
|
332
|
+
|
|
333
|
+
Example:
|
|
334
|
+
>>> col = Column(name="user_id", ...)
|
|
335
|
+
>>> f"SELECT {col} FROM users"
|
|
336
|
+
"SELECT `user_id` FROM users"
|
|
337
|
+
"""
|
|
338
|
+
from .utilities.sql import quote_identifier
|
|
339
|
+
|
|
340
|
+
return quote_identifier(self.name)
|
|
341
|
+
|
|
342
|
+
def __format__(self, format_spec: str) -> str:
|
|
343
|
+
"""Format Column for f-string interpolation with format specifiers.
|
|
344
|
+
|
|
345
|
+
Supports format specs:
|
|
346
|
+
- 'col', 'c', 'column': Returns quoted identifier
|
|
347
|
+
- '' (empty): Returns quoted identifier (default)
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
format_spec: Format specification string
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
Backtick-quoted identifier
|
|
354
|
+
|
|
355
|
+
Example:
|
|
356
|
+
>>> col = Column(name="email", ...)
|
|
357
|
+
>>> f"SELECT {col:col} FROM users"
|
|
358
|
+
"SELECT `email` FROM users"
|
|
359
|
+
"""
|
|
360
|
+
# All format specs return quoted identifier
|
|
361
|
+
# This provides flexibility for user preference
|
|
362
|
+
from .utilities.sql import quote_identifier
|
|
363
|
+
|
|
364
|
+
return quote_identifier(self.name)
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def _is_point_type(t: type) -> bool:
|
|
368
|
+
origin = get_origin(t)
|
|
369
|
+
if origin is tuple:
|
|
370
|
+
args = get_args(t)
|
|
371
|
+
return len(args) == 2 and all(arg is float for arg in args)
|
|
372
|
+
return False
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _is_list_of(inner_check: Any, t: type) -> bool:
|
|
376
|
+
origin = get_origin(t)
|
|
377
|
+
if origin is list:
|
|
378
|
+
args = get_args(t)
|
|
379
|
+
return len(args) == 1 and inner_check(args[0])
|
|
380
|
+
return False
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def _validate_geometry_type(requested: str, t: type) -> None:
|
|
384
|
+
"""
|
|
385
|
+
Validates that the provided Python type matches the expected structure
|
|
386
|
+
for the requested geometry annotation.
|
|
387
|
+
"""
|
|
388
|
+
match requested:
|
|
389
|
+
case "Point":
|
|
390
|
+
if not _is_point_type(t):
|
|
391
|
+
raise ValueError("Point must be typed as tuple[float, float]")
|
|
392
|
+
case "Ring" | "LineString":
|
|
393
|
+
if not _is_list_of(_is_point_type, t):
|
|
394
|
+
raise ValueError(
|
|
395
|
+
f"{requested} must be typed as list[tuple[float, float]]"
|
|
396
|
+
)
|
|
397
|
+
case "MultiLineString" | "Polygon":
|
|
398
|
+
if not _is_list_of(lambda x: _is_list_of(_is_point_type, x), t):
|
|
399
|
+
raise ValueError(
|
|
400
|
+
f"{requested} must be typed as list[list[tuple[float, float]]]"
|
|
401
|
+
)
|
|
402
|
+
case "MultiPolygon":
|
|
403
|
+
if not _is_list_of(
|
|
404
|
+
lambda x: _is_list_of(lambda y: _is_list_of(_is_point_type, y), x),
|
|
405
|
+
t,
|
|
406
|
+
):
|
|
407
|
+
raise ValueError(
|
|
408
|
+
"MultiPolygon must be typed as list[list[list[tuple[float, float]]]]"
|
|
409
|
+
)
|
|
410
|
+
case _:
|
|
411
|
+
raise ValueError(f"Unknown geometry type annotation: {requested}")
|
|
412
|
+
|
|
155
413
|
|
|
156
414
|
def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], DataType]:
|
|
157
415
|
# handle Annotated[Optional[Annotated[...], ...]
|
|
@@ -162,18 +420,45 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
|
|
|
162
420
|
data_type: DataType
|
|
163
421
|
|
|
164
422
|
if t is str:
|
|
165
|
-
|
|
423
|
+
# Check for FixedString annotation
|
|
424
|
+
fixed_string_size = next(
|
|
425
|
+
(md.size for md in mds if isinstance(md, ClickhouseFixedStringSize)), None
|
|
426
|
+
)
|
|
427
|
+
if fixed_string_size:
|
|
428
|
+
data_type = f"FixedString({fixed_string_size})"
|
|
429
|
+
else:
|
|
430
|
+
data_type = "String"
|
|
431
|
+
elif t is bytes:
|
|
432
|
+
# Check for FixedString annotation
|
|
433
|
+
fixed_string_size = next(
|
|
434
|
+
(md.size for md in mds if isinstance(md, ClickhouseFixedStringSize)), None
|
|
435
|
+
)
|
|
436
|
+
if fixed_string_size:
|
|
437
|
+
data_type = f"FixedString({fixed_string_size})"
|
|
438
|
+
else:
|
|
439
|
+
# Regular bytes without FixedString annotation
|
|
440
|
+
data_type = "String"
|
|
166
441
|
elif t is int:
|
|
167
442
|
# Check for int size annotations
|
|
168
|
-
int_size = next(
|
|
443
|
+
int_size = next(
|
|
444
|
+
(md for md in mds if isinstance(md, str) and re.match(r"^u?int\d+$", md)),
|
|
445
|
+
None,
|
|
446
|
+
)
|
|
169
447
|
if int_size:
|
|
170
448
|
data_type = int_size.replace("u", "U").replace("i", "I")
|
|
171
449
|
else:
|
|
172
|
-
data_type = "
|
|
450
|
+
data_type = "Int64"
|
|
173
451
|
elif t is float:
|
|
174
452
|
size = next((md for md in mds if isinstance(md, ClickhouseSize)), None)
|
|
175
453
|
if size is None:
|
|
176
|
-
bit_size = next(
|
|
454
|
+
bit_size = next(
|
|
455
|
+
(
|
|
456
|
+
md
|
|
457
|
+
for md in mds
|
|
458
|
+
if isinstance(md, str) and re.match(r"^float\d+$", md)
|
|
459
|
+
),
|
|
460
|
+
None,
|
|
461
|
+
)
|
|
177
462
|
if bit_size:
|
|
178
463
|
if bit_size == "float32":
|
|
179
464
|
data_type = "Float32"
|
|
@@ -191,12 +476,16 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
|
|
|
191
476
|
raise ValueError(f"Unsupported float size {size.size}")
|
|
192
477
|
elif t is Decimal:
|
|
193
478
|
precision = next((md.max_digits for md in mds if hasattr(md, "max_digits")), 10)
|
|
194
|
-
scale = next(
|
|
479
|
+
scale = next(
|
|
480
|
+
(md.decimal_places for md in mds if hasattr(md, "decimal_places")), 0
|
|
481
|
+
)
|
|
195
482
|
data_type = f"Decimal({precision}, {scale})"
|
|
196
483
|
elif t is bool:
|
|
197
484
|
data_type = "Boolean"
|
|
198
485
|
elif t is datetime:
|
|
199
|
-
precision = next(
|
|
486
|
+
precision = next(
|
|
487
|
+
(md for md in mds if isinstance(md, ClickhousePrecision)), None
|
|
488
|
+
)
|
|
200
489
|
if precision is None:
|
|
201
490
|
data_type = "DateTime"
|
|
202
491
|
else:
|
|
@@ -213,39 +502,114 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
|
|
|
213
502
|
data_type = "IPv4"
|
|
214
503
|
elif t is ipaddress.IPv6Address:
|
|
215
504
|
data_type = "IPv6"
|
|
505
|
+
elif any(
|
|
506
|
+
md
|
|
507
|
+
in [ # this check has to happen before t is matched against tuple/list
|
|
508
|
+
"Point",
|
|
509
|
+
"Ring",
|
|
510
|
+
"LineString",
|
|
511
|
+
"MultiLineString",
|
|
512
|
+
"Polygon",
|
|
513
|
+
"MultiPolygon",
|
|
514
|
+
]
|
|
515
|
+
for md in mds
|
|
516
|
+
):
|
|
517
|
+
data_type = next(
|
|
518
|
+
md
|
|
519
|
+
for md in mds
|
|
520
|
+
if md
|
|
521
|
+
in [
|
|
522
|
+
"Point",
|
|
523
|
+
"Ring",
|
|
524
|
+
"LineString",
|
|
525
|
+
"MultiLineString",
|
|
526
|
+
"Polygon",
|
|
527
|
+
"MultiPolygon",
|
|
528
|
+
]
|
|
529
|
+
)
|
|
530
|
+
_validate_geometry_type(data_type, t)
|
|
216
531
|
elif get_origin(t) is list:
|
|
217
532
|
inner_optional, _, inner_type = py_type_to_column_type(get_args(t)[0], [])
|
|
218
533
|
data_type = ArrayType(element_type=inner_type, element_nullable=inner_optional)
|
|
219
534
|
elif get_origin(t) is dict:
|
|
220
535
|
args = get_args(t)
|
|
221
536
|
if len(args) == 2:
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
537
|
+
# Special case: dict[str, Any] should be JSON type (matches TypeScript's Record<string, any>)
|
|
538
|
+
# This is useful for storing arbitrary extra fields in a JSON column
|
|
539
|
+
if args[0] is str and args[1] is Any:
|
|
540
|
+
data_type = "Json"
|
|
541
|
+
else:
|
|
542
|
+
key_optional, _, key_type = py_type_to_column_type(args[0], [])
|
|
543
|
+
value_optional, _, value_type = py_type_to_column_type(args[1], [])
|
|
544
|
+
# For dict types, we assume keys are required and values match their type
|
|
545
|
+
data_type = MapType(key_type=key_type, value_type=value_type)
|
|
226
546
|
else:
|
|
227
|
-
raise ValueError(
|
|
547
|
+
raise ValueError(
|
|
548
|
+
f"Dict type must have exactly 2 type arguments, got {len(args)}"
|
|
549
|
+
)
|
|
228
550
|
elif t is UUID:
|
|
229
551
|
data_type = "UUID"
|
|
230
552
|
elif t is Any:
|
|
231
553
|
data_type = "Json"
|
|
554
|
+
elif any(isinstance(md, ClickHouseJson) for md in mds) and issubclass(t, BaseModel):
|
|
555
|
+
# Annotated[SomePydanticClass, ClickHouseJson(...)]
|
|
556
|
+
columns = _to_columns(t)
|
|
557
|
+
for c in columns:
|
|
558
|
+
if c.default is not None:
|
|
559
|
+
raise ValueError(
|
|
560
|
+
"Default in inner field. Put ClickHouseDefault in top level field."
|
|
561
|
+
)
|
|
562
|
+
# Enforce extra='allow' for JSON-mapped models
|
|
563
|
+
if t.model_config.get("extra") != "allow":
|
|
564
|
+
raise ValueError(
|
|
565
|
+
f"Model {t.__name__} with ClickHouseJson must have model_config with extra='allow'. "
|
|
566
|
+
"Add: model_config = ConfigDict(extra='allow')"
|
|
567
|
+
)
|
|
568
|
+
opts = next(md for md in mds if isinstance(md, ClickHouseJson))
|
|
569
|
+
|
|
570
|
+
# Build typed_paths from fields as tuples of (name, type)
|
|
571
|
+
typed_paths: list[tuple[str, DataType]] = []
|
|
572
|
+
for c in columns:
|
|
573
|
+
typed_paths.append((c.name, c.data_type))
|
|
574
|
+
|
|
575
|
+
has_any_option = (
|
|
576
|
+
opts.max_dynamic_paths is not None
|
|
577
|
+
or opts.max_dynamic_types is not None
|
|
578
|
+
or len(typed_paths) > 0
|
|
579
|
+
or len(opts.skip_paths) > 0
|
|
580
|
+
or len(opts.skip_regexps) > 0
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
if not has_any_option:
|
|
584
|
+
data_type = "Json"
|
|
585
|
+
else:
|
|
586
|
+
data_type = JsonOptions(
|
|
587
|
+
max_dynamic_paths=opts.max_dynamic_paths,
|
|
588
|
+
max_dynamic_types=opts.max_dynamic_types,
|
|
589
|
+
typed_paths=typed_paths,
|
|
590
|
+
skip_paths=list(opts.skip_paths),
|
|
591
|
+
skip_regexps=list(opts.skip_regexps),
|
|
592
|
+
)
|
|
232
593
|
elif get_origin(t) is Literal and all(isinstance(arg, str) for arg in get_args(t)):
|
|
233
594
|
data_type = "String"
|
|
234
595
|
mds.append("LowCardinality")
|
|
235
596
|
elif not isclass(t):
|
|
236
597
|
raise ValueError(f"Unknown type {t}")
|
|
237
598
|
elif issubclass(t, BaseModel):
|
|
599
|
+
columns = _to_columns(t)
|
|
600
|
+
for c in columns:
|
|
601
|
+
if c.default is not None:
|
|
602
|
+
raise ValueError(
|
|
603
|
+
"Default in inner field. Put ClickHouseDefault in top level field."
|
|
604
|
+
)
|
|
238
605
|
if any(md == "ClickHouseNamedTuple" for md in mds):
|
|
239
606
|
data_type = NamedTupleType(
|
|
240
|
-
fields=[(
|
|
241
|
-
column.name,
|
|
242
|
-
column.data_type
|
|
243
|
-
) for column in _to_columns(t)],
|
|
607
|
+
fields=[(column.name, column.data_type) for column in columns],
|
|
244
608
|
)
|
|
245
609
|
else:
|
|
246
610
|
data_type = Nested(
|
|
247
611
|
name=t.__name__,
|
|
248
|
-
columns=
|
|
612
|
+
columns=columns,
|
|
249
613
|
)
|
|
250
614
|
elif issubclass(t, Enum):
|
|
251
615
|
values = [EnumValue(name=member.name, value=member.value) for member in t]
|
|
@@ -258,26 +622,36 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
|
|
|
258
622
|
def _to_columns(model: type[BaseModel]) -> list[Column]:
|
|
259
623
|
"""Convert Pydantic model fields to Column definitions."""
|
|
260
624
|
columns = []
|
|
625
|
+
# Get raw annotations from the model class to preserve type aliases
|
|
626
|
+
raw_annotations = getattr(model, "__annotations__", {})
|
|
627
|
+
|
|
261
628
|
for field_name, field_info in model.model_fields.items():
|
|
262
|
-
#
|
|
263
|
-
|
|
629
|
+
# Use raw annotation if available (preserves type aliases and their metadata)
|
|
630
|
+
# Fall back to field_info.annotation if not found in __annotations__
|
|
631
|
+
field_type = raw_annotations.get(field_name, field_info.annotation)
|
|
264
632
|
if field_type is None:
|
|
265
633
|
raise ValueError(f"Missing type for {field_name}")
|
|
266
634
|
primary_key, field_type = handle_key(field_type)
|
|
267
635
|
is_jwt, field_type = handle_jwt(field_type)
|
|
268
636
|
|
|
269
|
-
optional, mds, data_type = py_type_to_column_type(
|
|
637
|
+
optional, mds, data_type = py_type_to_column_type(
|
|
638
|
+
field_type, field_info.metadata
|
|
639
|
+
)
|
|
270
640
|
|
|
271
641
|
annotations = []
|
|
272
642
|
for md in mds:
|
|
273
|
-
if isinstance(md, AggregateFunction)
|
|
274
|
-
annotations
|
|
275
|
-
|
|
276
|
-
)
|
|
277
|
-
if md
|
|
278
|
-
annotations
|
|
279
|
-
|
|
280
|
-
)
|
|
643
|
+
if isinstance(md, AggregateFunction) and all(
|
|
644
|
+
key != "aggregationFunction" for (key, _) in annotations
|
|
645
|
+
):
|
|
646
|
+
annotations.append(("aggregationFunction", md.to_dict()))
|
|
647
|
+
if isinstance(md, SimpleAggregateFunction) and all(
|
|
648
|
+
key != "simpleAggregationFunction" for (key, _) in annotations
|
|
649
|
+
):
|
|
650
|
+
annotations.append(("simpleAggregationFunction", md.to_dict()))
|
|
651
|
+
if md == "LowCardinality" and all(
|
|
652
|
+
key != "LowCardinality" for (key, _) in annotations
|
|
653
|
+
):
|
|
654
|
+
annotations.append(("LowCardinality", True))
|
|
281
655
|
|
|
282
656
|
column_name = field_name if field_info.alias is None else field_info.alias
|
|
283
657
|
|
|
@@ -287,6 +661,31 @@ def _to_columns(model: type[BaseModel]) -> list[Column]:
|
|
|
287
661
|
None,
|
|
288
662
|
)
|
|
289
663
|
|
|
664
|
+
# Extract MATERIALIZED expression from metadata, if provided
|
|
665
|
+
materialized_expr = next(
|
|
666
|
+
(md.expression for md in mds if isinstance(md, ClickHouseMaterialized)),
|
|
667
|
+
None,
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
# Validate mutual exclusivity of DEFAULT and MATERIALIZED
|
|
671
|
+
if default_expr and materialized_expr:
|
|
672
|
+
raise ValueError(
|
|
673
|
+
f"Column '{column_name}' cannot have both DEFAULT and MATERIALIZED. "
|
|
674
|
+
f"Use one or the other."
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
# Extract TTL expression from metadata, if provided
|
|
678
|
+
ttl_expr = next(
|
|
679
|
+
(md.expression for md in mds if isinstance(md, ClickHouseTTL)),
|
|
680
|
+
None,
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
# Extract CODEC expression from metadata, if provided
|
|
684
|
+
codec_expr = next(
|
|
685
|
+
(md.expression for md in mds if isinstance(md, ClickHouseCodec)),
|
|
686
|
+
None,
|
|
687
|
+
)
|
|
688
|
+
|
|
290
689
|
columns.append(
|
|
291
690
|
Column(
|
|
292
691
|
name=column_name,
|
|
@@ -295,7 +694,10 @@ def _to_columns(model: type[BaseModel]) -> list[Column]:
|
|
|
295
694
|
unique=False,
|
|
296
695
|
primary_key=primary_key,
|
|
297
696
|
default=default_expr,
|
|
697
|
+
materialized=materialized_expr,
|
|
298
698
|
annotations=annotations,
|
|
699
|
+
ttl=ttl_expr,
|
|
700
|
+
codec=codec_expr,
|
|
299
701
|
)
|
|
300
702
|
)
|
|
301
703
|
return columns
|
|
@@ -303,7 +705,9 @@ def _to_columns(model: type[BaseModel]) -> list[Column]:
|
|
|
303
705
|
|
|
304
706
|
class StringToEnumMixin:
|
|
305
707
|
@classmethod
|
|
306
|
-
def __get_pydantic_core_schema__(
|
|
708
|
+
def __get_pydantic_core_schema__(
|
|
709
|
+
cls, _source_type: Any, _handler: GetCoreSchemaHandler
|
|
710
|
+
) -> CoreSchema:
|
|
307
711
|
def validate(value: Any, _: Any) -> Any:
|
|
308
712
|
if isinstance(value, str):
|
|
309
713
|
try:
|
|
@@ -312,14 +716,15 @@ class StringToEnumMixin:
|
|
|
312
716
|
raise ValueError(f"Invalid enum name: {value}")
|
|
313
717
|
return cls(value) # fallback to default enum validation
|
|
314
718
|
|
|
315
|
-
return core_schema.with_info_before_validator_function(
|
|
719
|
+
return core_schema.with_info_before_validator_function(
|
|
720
|
+
validate, core_schema.enum_schema(cls, list(cls))
|
|
721
|
+
)
|
|
316
722
|
|
|
317
723
|
|
|
318
724
|
def is_array_nested_type(data_type: DataType) -> bool:
|
|
319
725
|
"""Type guard to check if a data type is Array(Nested(...))."""
|
|
320
|
-
return (
|
|
321
|
-
|
|
322
|
-
isinstance(data_type.element_type, Nested)
|
|
726
|
+
return isinstance(data_type, ArrayType) and isinstance(
|
|
727
|
+
data_type.element_type, Nested
|
|
323
728
|
)
|
|
324
729
|
|
|
325
730
|
|