moose-lib 0.6.90__py3-none-any.whl → 0.6.283__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. moose_lib/__init__.py +38 -3
  2. moose_lib/blocks.py +497 -37
  3. moose_lib/clients/redis_client.py +26 -14
  4. moose_lib/commons.py +94 -5
  5. moose_lib/config/config_file.py +44 -2
  6. moose_lib/config/runtime.py +137 -5
  7. moose_lib/data_models.py +451 -46
  8. moose_lib/dmv2/__init__.py +88 -60
  9. moose_lib/dmv2/_registry.py +3 -1
  10. moose_lib/dmv2/_source_capture.py +37 -0
  11. moose_lib/dmv2/consumption.py +55 -32
  12. moose_lib/dmv2/ingest_api.py +9 -2
  13. moose_lib/dmv2/ingest_pipeline.py +56 -13
  14. moose_lib/dmv2/life_cycle.py +3 -1
  15. moose_lib/dmv2/materialized_view.py +24 -14
  16. moose_lib/dmv2/moose_model.py +165 -0
  17. moose_lib/dmv2/olap_table.py +304 -119
  18. moose_lib/dmv2/registry.py +28 -3
  19. moose_lib/dmv2/sql_resource.py +16 -8
  20. moose_lib/dmv2/stream.py +241 -21
  21. moose_lib/dmv2/types.py +14 -8
  22. moose_lib/dmv2/view.py +13 -6
  23. moose_lib/dmv2/web_app.py +175 -0
  24. moose_lib/dmv2/web_app_helpers.py +96 -0
  25. moose_lib/dmv2/workflow.py +37 -9
  26. moose_lib/internal.py +537 -68
  27. moose_lib/main.py +87 -56
  28. moose_lib/query_builder.py +18 -5
  29. moose_lib/query_param.py +54 -20
  30. moose_lib/secrets.py +122 -0
  31. moose_lib/streaming/streaming_function_runner.py +266 -156
  32. moose_lib/utilities/sql.py +0 -1
  33. {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/METADATA +19 -1
  34. moose_lib-0.6.283.dist-info/RECORD +63 -0
  35. tests/__init__.py +1 -1
  36. tests/conftest.py +38 -1
  37. tests/test_backward_compatibility.py +85 -0
  38. tests/test_cluster_validation.py +85 -0
  39. tests/test_codec.py +75 -0
  40. tests/test_column_formatting.py +80 -0
  41. tests/test_fixedstring.py +43 -0
  42. tests/test_iceberg_config.py +105 -0
  43. tests/test_int_types.py +211 -0
  44. tests/test_kafka_config.py +141 -0
  45. tests/test_materialized.py +74 -0
  46. tests/test_metadata.py +37 -0
  47. tests/test_moose.py +21 -30
  48. tests/test_moose_model.py +153 -0
  49. tests/test_olap_table_moosemodel.py +89 -0
  50. tests/test_olap_table_versioning.py +210 -0
  51. tests/test_query_builder.py +97 -9
  52. tests/test_redis_client.py +10 -3
  53. tests/test_s3queue_config.py +211 -110
  54. tests/test_secrets.py +239 -0
  55. tests/test_simple_aggregate.py +114 -0
  56. tests/test_web_app.py +227 -0
  57. moose_lib-0.6.90.dist-info/RECORD +0 -42
  58. {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
  59. {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
moose_lib/data_models.py CHANGED
@@ -6,8 +6,19 @@ from inspect import isclass
6
6
  from uuid import UUID
7
7
  from datetime import datetime, date
8
8
 
9
- from typing import Literal, Tuple, Union, Any, get_origin, get_args, TypeAliasType, Annotated, Type, _BaseGenericAlias, \
10
- GenericAlias
9
+ from typing import (
10
+ Literal,
11
+ Tuple,
12
+ Union,
13
+ Any,
14
+ get_origin,
15
+ get_args,
16
+ TypeAliasType,
17
+ Annotated,
18
+ Type,
19
+ _BaseGenericAlias,
20
+ GenericAlias,
21
+ )
11
22
  from pydantic import BaseModel, Field, PlainSerializer, GetCoreSchemaHandler, ConfigDict
12
23
  from pydantic_core import CoreSchema, core_schema
13
24
  import ipaddress
@@ -15,18 +26,39 @@ import ipaddress
15
26
  type Key[T: (str, int)] = T
16
27
  type JWT[T] = T
17
28
 
29
+ # Integer type aliases for ClickHouse integer types
30
+ type Int8 = Annotated[int, "int8"]
31
+ type Int16 = Annotated[int, "int16"]
32
+ type Int32 = Annotated[int, "int32"]
33
+ type Int64 = Annotated[int, "int64"]
34
+ type UInt8 = Annotated[int, "uint8"]
35
+ type UInt16 = Annotated[int, "uint16"]
36
+ type UInt32 = Annotated[int, "uint32"]
37
+ type UInt64 = Annotated[int, "uint64"]
18
38
 
19
- @dataclasses.dataclass # a BaseModel in the annotations will confuse pydantic
39
+ # Float type aliases for ClickHouse float types
40
+ type Float32 = Annotated[float, "float32"]
41
+ type Float64 = Annotated[float, "float64"]
42
+
43
+
44
+ @dataclasses.dataclass(
45
+ frozen=True
46
+ ) # a BaseModel in the annotations will confuse pydantic
20
47
  class ClickhousePrecision:
21
48
  precision: int
22
49
 
23
50
 
24
- @dataclasses.dataclass
51
+ @dataclasses.dataclass(frozen=True)
25
52
  class ClickhouseSize:
26
53
  size: int
27
54
 
28
55
 
29
- @dataclasses.dataclass
56
+ @dataclasses.dataclass(frozen=True)
57
+ class ClickhouseFixedStringSize:
58
+ size: int
59
+
60
+
61
+ @dataclasses.dataclass(frozen=True)
30
62
  class ClickhouseDefault:
31
63
  expression: str
32
64
 
@@ -35,6 +67,60 @@ def clickhouse_default(expression: str) -> ClickhouseDefault:
35
67
  return ClickhouseDefault(expression=expression)
36
68
 
37
69
 
70
+ @dataclasses.dataclass(frozen=True)
71
+ class ClickHouseTTL:
72
+ expression: str
73
+
74
+
75
+ @dataclasses.dataclass(frozen=True)
76
+ class ClickHouseCodec:
77
+ expression: str
78
+
79
+
80
+ @dataclasses.dataclass(frozen=True)
81
+ class ClickHouseMaterialized:
82
+ """
83
+ ClickHouse MATERIALIZED column annotation.
84
+ The column value is computed at INSERT time and physically stored.
85
+ Cannot be explicitly inserted by users.
86
+
87
+ Args:
88
+ expression: ClickHouse SQL expression using column names (snake_case)
89
+
90
+ Examples:
91
+ # Extract date component
92
+ event_date: Annotated[date, ClickHouseMaterialized("toDate(event_time)")]
93
+
94
+ # Precompute hash
95
+ user_hash: Annotated[int, ClickHouseMaterialized("cityHash64(user_id)")]
96
+
97
+ # Complex expression with JSON
98
+ combination_hash: Annotated[
99
+ list[int],
100
+ ClickHouseMaterialized(
101
+ "arrayMap(kv -> cityHash64(kv.1, kv.2), "
102
+ "JSONExtractKeysAndValuesRaw(toString(log_blob)))"
103
+ )
104
+ ]
105
+
106
+ Notes:
107
+ - Expression uses ClickHouse column names, not Python field names
108
+ - MATERIALIZED and DEFAULT are mutually exclusive
109
+ - Can be combined with ClickHouseCodec for compression
110
+ - Changing the expression modifies the column in-place (existing values preserved)
111
+ """
112
+
113
+ expression: str
114
+
115
+
116
+ @dataclasses.dataclass(frozen=True)
117
+ class ClickHouseJson:
118
+ max_dynamic_paths: int | None = None
119
+ max_dynamic_types: int | None = None
120
+ skip_paths: tuple[str, ...] = ()
121
+ skip_regexps: tuple[str, ...] = ()
122
+
123
+
38
124
  def clickhouse_decimal(precision: int, scale: int) -> Type[Decimal]:
39
125
  return Annotated[Decimal, Field(max_digits=precision, decimal_places=scale)]
40
126
 
@@ -48,25 +134,92 @@ def clickhouse_datetime64(precision: int) -> Type[datetime]:
48
134
  return Annotated[datetime, ClickhousePrecision(precision=precision)]
49
135
 
50
136
 
137
+ def FixedString(size: int) -> ClickhouseFixedStringSize:
138
+ """
139
+ Creates a FixedString(N) annotation for fixed-length strings.
140
+
141
+ ClickHouse stores exactly N bytes, padding shorter values with null bytes.
142
+ Values exceeding N bytes will raise an exception.
143
+
144
+ Use for fixed-length data like hashes, IPs, UUIDs, MAC addresses.
145
+
146
+ Example:
147
+ md5_hash: Annotated[str, FixedString(16)] # 16-byte MD5
148
+ sha256: Annotated[str, FixedString(32)] # 32-byte SHA256
149
+ ipv6: Annotated[str, FixedString(16)] # 16-byte IPv6
150
+ """
151
+ return ClickhouseFixedStringSize(size=size)
152
+
153
+
154
+ type Point = Annotated[tuple[float, float], "Point"]
155
+ type Ring = Annotated[list[tuple[float, float]], "Ring"]
156
+ type LineString = Annotated[list[tuple[float, float]], "LineString"]
157
+ type MultiLineString = Annotated[list[list[tuple[float, float]]], "MultiLineString"]
158
+ type Polygon = Annotated[list[list[tuple[float, float]]], "Polygon"]
159
+ type MultiPolygon = Annotated[list[list[list[tuple[float, float]]]], "MultiPolygon"]
160
+
161
+
51
162
  def aggregated[T](
52
- result_type: Type[T],
53
- agg_func: str,
54
- param_types: list[type | GenericAlias | _BaseGenericAlias]
163
+ result_type: Type[T],
164
+ agg_func: str,
165
+ param_types: list[type | GenericAlias | _BaseGenericAlias],
55
166
  ) -> Type[T]:
56
- return Annotated[result_type, AggregateFunction(agg_func=agg_func, param_types=param_types)]
167
+ return Annotated[
168
+ result_type,
169
+ AggregateFunction(agg_func=agg_func, param_types=tuple(param_types)),
170
+ ]
57
171
 
58
172
 
59
- @dataclasses.dataclass
173
+ @dataclasses.dataclass(frozen=True)
60
174
  class AggregateFunction:
61
175
  agg_func: str
62
- param_types: list[type | GenericAlias | _BaseGenericAlias]
176
+ param_types: tuple[type | GenericAlias | _BaseGenericAlias, ...]
63
177
 
64
178
  def to_dict(self):
65
179
  return {
66
180
  "functionName": self.agg_func,
67
181
  "argumentTypes": [
68
182
  py_type_to_column_type(t, [])[2] for t in self.param_types
69
- ]
183
+ ],
184
+ }
185
+
186
+
187
+ def simple_aggregated[T](agg_func: str, arg_type: Type[T]) -> Type[T]:
188
+ """Helper to create a SimpleAggregateFunction type annotation.
189
+
190
+ SimpleAggregateFunction is a ClickHouse type for storing aggregated values directly
191
+ instead of intermediate states. It's more efficient for functions like sum, max, min, etc.
192
+
193
+ Args:
194
+ agg_func: The aggregation function name (e.g., "sum", "max", "anyLast")
195
+ arg_type: The argument type for the function (also the result type)
196
+
197
+ Returns:
198
+ An Annotated type with SimpleAggregateFunction metadata
199
+
200
+ Example:
201
+ ```python
202
+ from moose_lib import simple_aggregated
203
+
204
+ row_count: simple_aggregated("sum", int)
205
+ max_value: simple_aggregated("max", float)
206
+ last_status: simple_aggregated("anyLast", str)
207
+ ```
208
+ """
209
+ return Annotated[
210
+ arg_type, SimpleAggregateFunction(agg_func=agg_func, arg_type=arg_type)
211
+ ]
212
+
213
+
214
+ @dataclasses.dataclass(frozen=True)
215
+ class SimpleAggregateFunction:
216
+ agg_func: str
217
+ arg_type: type | GenericAlias | _BaseGenericAlias
218
+
219
+ def to_dict(self):
220
+ return {
221
+ "functionName": self.agg_func,
222
+ "argumentType": py_type_to_column_type(self.arg_type, [])[2],
70
223
  }
71
224
 
72
225
 
@@ -79,7 +232,9 @@ def enum_value_serializer(value: int | str):
79
232
 
80
233
  class EnumValue(BaseModel):
81
234
  name: str
82
- value: Annotated[int | str, PlainSerializer(enum_value_serializer, return_type=dict)]
235
+ value: Annotated[
236
+ int | str, PlainSerializer(enum_value_serializer, return_type=dict)
237
+ ]
83
238
 
84
239
 
85
240
  class DataEnum(BaseModel):
@@ -107,7 +262,17 @@ class MapType(BaseModel):
107
262
  value_type: "DataType"
108
263
 
109
264
 
110
- type DataType = str | DataEnum | ArrayType | Nested | NamedTupleType | MapType
265
+ class JsonOptions(BaseModel):
266
+ max_dynamic_paths: int | None = None
267
+ max_dynamic_types: int | None = None
268
+ typed_paths: list[tuple[str, "DataType"]] = []
269
+ skip_paths: list[str] = []
270
+ skip_regexps: list[str] = []
271
+
272
+
273
+ type DataType = (
274
+ str | DataEnum | ArrayType | Nested | NamedTupleType | MapType | JsonOptions
275
+ )
111
276
 
112
277
 
113
278
  def handle_jwt(field_type: type) -> Tuple[bool, type]:
@@ -146,12 +311,105 @@ class Column(BaseModel):
146
311
  primary_key: bool
147
312
  default: str | None = None
148
313
  annotations: list[Tuple[str, Any]] = []
314
+ ttl: str | None = None
315
+ codec: str | None = None
316
+ materialized: str | None = None
149
317
 
150
318
  def to_expr(self):
151
319
  # Lazy import to avoid circular dependency at import time
152
320
  from .query_builder import ColumnRef
321
+
153
322
  return ColumnRef(self)
154
323
 
324
+ def __str__(self) -> str:
325
+ """Return properly quoted identifier for SQL interpolation.
326
+
327
+ This enables Column objects to be used directly in f-strings and
328
+ string concatenation for SQL query construction.
329
+
330
+ Returns:
331
+ Backtick-quoted identifier safe for ClickHouse SQL.
332
+
333
+ Example:
334
+ >>> col = Column(name="user_id", ...)
335
+ >>> f"SELECT {col} FROM users"
336
+ "SELECT `user_id` FROM users"
337
+ """
338
+ from .utilities.sql import quote_identifier
339
+
340
+ return quote_identifier(self.name)
341
+
342
+ def __format__(self, format_spec: str) -> str:
343
+ """Format Column for f-string interpolation with format specifiers.
344
+
345
+ Supports format specs:
346
+ - 'col', 'c', 'column': Returns quoted identifier
347
+ - '' (empty): Returns quoted identifier (default)
348
+
349
+ Args:
350
+ format_spec: Format specification string
351
+
352
+ Returns:
353
+ Backtick-quoted identifier
354
+
355
+ Example:
356
+ >>> col = Column(name="email", ...)
357
+ >>> f"SELECT {col:col} FROM users"
358
+ "SELECT `email` FROM users"
359
+ """
360
+ # All format specs return quoted identifier
361
+ # This provides flexibility for user preference
362
+ from .utilities.sql import quote_identifier
363
+
364
+ return quote_identifier(self.name)
365
+
366
+
367
+ def _is_point_type(t: type) -> bool:
368
+ origin = get_origin(t)
369
+ if origin is tuple:
370
+ args = get_args(t)
371
+ return len(args) == 2 and all(arg is float for arg in args)
372
+ return False
373
+
374
+
375
+ def _is_list_of(inner_check: Any, t: type) -> bool:
376
+ origin = get_origin(t)
377
+ if origin is list:
378
+ args = get_args(t)
379
+ return len(args) == 1 and inner_check(args[0])
380
+ return False
381
+
382
+
383
+ def _validate_geometry_type(requested: str, t: type) -> None:
384
+ """
385
+ Validates that the provided Python type matches the expected structure
386
+ for the requested geometry annotation.
387
+ """
388
+ match requested:
389
+ case "Point":
390
+ if not _is_point_type(t):
391
+ raise ValueError("Point must be typed as tuple[float, float]")
392
+ case "Ring" | "LineString":
393
+ if not _is_list_of(_is_point_type, t):
394
+ raise ValueError(
395
+ f"{requested} must be typed as list[tuple[float, float]]"
396
+ )
397
+ case "MultiLineString" | "Polygon":
398
+ if not _is_list_of(lambda x: _is_list_of(_is_point_type, x), t):
399
+ raise ValueError(
400
+ f"{requested} must be typed as list[list[tuple[float, float]]]"
401
+ )
402
+ case "MultiPolygon":
403
+ if not _is_list_of(
404
+ lambda x: _is_list_of(lambda y: _is_list_of(_is_point_type, y), x),
405
+ t,
406
+ ):
407
+ raise ValueError(
408
+ "MultiPolygon must be typed as list[list[list[tuple[float, float]]]]"
409
+ )
410
+ case _:
411
+ raise ValueError(f"Unknown geometry type annotation: {requested}")
412
+
155
413
 
156
414
  def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], DataType]:
157
415
  # handle Annotated[Optional[Annotated[...], ...]
@@ -162,18 +420,45 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
162
420
  data_type: DataType
163
421
 
164
422
  if t is str:
165
- data_type = "String"
423
+ # Check for FixedString annotation
424
+ fixed_string_size = next(
425
+ (md.size for md in mds if isinstance(md, ClickhouseFixedStringSize)), None
426
+ )
427
+ if fixed_string_size:
428
+ data_type = f"FixedString({fixed_string_size})"
429
+ else:
430
+ data_type = "String"
431
+ elif t is bytes:
432
+ # Check for FixedString annotation
433
+ fixed_string_size = next(
434
+ (md.size for md in mds if isinstance(md, ClickhouseFixedStringSize)), None
435
+ )
436
+ if fixed_string_size:
437
+ data_type = f"FixedString({fixed_string_size})"
438
+ else:
439
+ # Regular bytes without FixedString annotation
440
+ data_type = "String"
166
441
  elif t is int:
167
442
  # Check for int size annotations
168
- int_size = next((md for md in mds if isinstance(md, str) and re.match(r'^u?int\d+$', md)), None)
443
+ int_size = next(
444
+ (md for md in mds if isinstance(md, str) and re.match(r"^u?int\d+$", md)),
445
+ None,
446
+ )
169
447
  if int_size:
170
448
  data_type = int_size.replace("u", "U").replace("i", "I")
171
449
  else:
172
- data_type = "Int"
450
+ data_type = "Int64"
173
451
  elif t is float:
174
452
  size = next((md for md in mds if isinstance(md, ClickhouseSize)), None)
175
453
  if size is None:
176
- bit_size = next((md for md in mds if isinstance(md, str) and re.match(r'^float\d+$', md)), None)
454
+ bit_size = next(
455
+ (
456
+ md
457
+ for md in mds
458
+ if isinstance(md, str) and re.match(r"^float\d+$", md)
459
+ ),
460
+ None,
461
+ )
177
462
  if bit_size:
178
463
  if bit_size == "float32":
179
464
  data_type = "Float32"
@@ -191,12 +476,16 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
191
476
  raise ValueError(f"Unsupported float size {size.size}")
192
477
  elif t is Decimal:
193
478
  precision = next((md.max_digits for md in mds if hasattr(md, "max_digits")), 10)
194
- scale = next((md.decimal_places for md in mds if hasattr(md, "decimal_places")), 0)
479
+ scale = next(
480
+ (md.decimal_places for md in mds if hasattr(md, "decimal_places")), 0
481
+ )
195
482
  data_type = f"Decimal({precision}, {scale})"
196
483
  elif t is bool:
197
484
  data_type = "Boolean"
198
485
  elif t is datetime:
199
- precision = next((md for md in mds if isinstance(md, ClickhousePrecision)), None)
486
+ precision = next(
487
+ (md for md in mds if isinstance(md, ClickhousePrecision)), None
488
+ )
200
489
  if precision is None:
201
490
  data_type = "DateTime"
202
491
  else:
@@ -213,39 +502,114 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
213
502
  data_type = "IPv4"
214
503
  elif t is ipaddress.IPv6Address:
215
504
  data_type = "IPv6"
505
+ elif any(
506
+ md
507
+ in [ # this check has to happen before t is matched against tuple/list
508
+ "Point",
509
+ "Ring",
510
+ "LineString",
511
+ "MultiLineString",
512
+ "Polygon",
513
+ "MultiPolygon",
514
+ ]
515
+ for md in mds
516
+ ):
517
+ data_type = next(
518
+ md
519
+ for md in mds
520
+ if md
521
+ in [
522
+ "Point",
523
+ "Ring",
524
+ "LineString",
525
+ "MultiLineString",
526
+ "Polygon",
527
+ "MultiPolygon",
528
+ ]
529
+ )
530
+ _validate_geometry_type(data_type, t)
216
531
  elif get_origin(t) is list:
217
532
  inner_optional, _, inner_type = py_type_to_column_type(get_args(t)[0], [])
218
533
  data_type = ArrayType(element_type=inner_type, element_nullable=inner_optional)
219
534
  elif get_origin(t) is dict:
220
535
  args = get_args(t)
221
536
  if len(args) == 2:
222
- key_optional, _, key_type = py_type_to_column_type(args[0], [])
223
- value_optional, _, value_type = py_type_to_column_type(args[1], [])
224
- # For dict types, we assume keys are required and values match their type
225
- data_type = MapType(key_type=key_type, value_type=value_type)
537
+ # Special case: dict[str, Any] should be JSON type (matches TypeScript's Record<string, any>)
538
+ # This is useful for storing arbitrary extra fields in a JSON column
539
+ if args[0] is str and args[1] is Any:
540
+ data_type = "Json"
541
+ else:
542
+ key_optional, _, key_type = py_type_to_column_type(args[0], [])
543
+ value_optional, _, value_type = py_type_to_column_type(args[1], [])
544
+ # For dict types, we assume keys are required and values match their type
545
+ data_type = MapType(key_type=key_type, value_type=value_type)
226
546
  else:
227
- raise ValueError(f"Dict type must have exactly 2 type arguments, got {len(args)}")
547
+ raise ValueError(
548
+ f"Dict type must have exactly 2 type arguments, got {len(args)}"
549
+ )
228
550
  elif t is UUID:
229
551
  data_type = "UUID"
230
552
  elif t is Any:
231
553
  data_type = "Json"
554
+ elif any(isinstance(md, ClickHouseJson) for md in mds) and issubclass(t, BaseModel):
555
+ # Annotated[SomePydanticClass, ClickHouseJson(...)]
556
+ columns = _to_columns(t)
557
+ for c in columns:
558
+ if c.default is not None:
559
+ raise ValueError(
560
+ "Default in inner field. Put ClickHouseDefault in top level field."
561
+ )
562
+ # Enforce extra='allow' for JSON-mapped models
563
+ if t.model_config.get("extra") != "allow":
564
+ raise ValueError(
565
+ f"Model {t.__name__} with ClickHouseJson must have model_config with extra='allow'. "
566
+ "Add: model_config = ConfigDict(extra='allow')"
567
+ )
568
+ opts = next(md for md in mds if isinstance(md, ClickHouseJson))
569
+
570
+ # Build typed_paths from fields as tuples of (name, type)
571
+ typed_paths: list[tuple[str, DataType]] = []
572
+ for c in columns:
573
+ typed_paths.append((c.name, c.data_type))
574
+
575
+ has_any_option = (
576
+ opts.max_dynamic_paths is not None
577
+ or opts.max_dynamic_types is not None
578
+ or len(typed_paths) > 0
579
+ or len(opts.skip_paths) > 0
580
+ or len(opts.skip_regexps) > 0
581
+ )
582
+
583
+ if not has_any_option:
584
+ data_type = "Json"
585
+ else:
586
+ data_type = JsonOptions(
587
+ max_dynamic_paths=opts.max_dynamic_paths,
588
+ max_dynamic_types=opts.max_dynamic_types,
589
+ typed_paths=typed_paths,
590
+ skip_paths=list(opts.skip_paths),
591
+ skip_regexps=list(opts.skip_regexps),
592
+ )
232
593
  elif get_origin(t) is Literal and all(isinstance(arg, str) for arg in get_args(t)):
233
594
  data_type = "String"
234
595
  mds.append("LowCardinality")
235
596
  elif not isclass(t):
236
597
  raise ValueError(f"Unknown type {t}")
237
598
  elif issubclass(t, BaseModel):
599
+ columns = _to_columns(t)
600
+ for c in columns:
601
+ if c.default is not None:
602
+ raise ValueError(
603
+ "Default in inner field. Put ClickHouseDefault in top level field."
604
+ )
238
605
  if any(md == "ClickHouseNamedTuple" for md in mds):
239
606
  data_type = NamedTupleType(
240
- fields=[(
241
- column.name,
242
- column.data_type
243
- ) for column in _to_columns(t)],
607
+ fields=[(column.name, column.data_type) for column in columns],
244
608
  )
245
609
  else:
246
610
  data_type = Nested(
247
611
  name=t.__name__,
248
- columns=_to_columns(t),
612
+ columns=columns,
249
613
  )
250
614
  elif issubclass(t, Enum):
251
615
  values = [EnumValue(name=member.name, value=member.value) for member in t]
@@ -258,26 +622,36 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
258
622
  def _to_columns(model: type[BaseModel]) -> list[Column]:
259
623
  """Convert Pydantic model fields to Column definitions."""
260
624
  columns = []
625
+ # Get raw annotations from the model class to preserve type aliases
626
+ raw_annotations = getattr(model, "__annotations__", {})
627
+
261
628
  for field_name, field_info in model.model_fields.items():
262
- # Get the field type annotation
263
- field_type = field_info.annotation
629
+ # Use raw annotation if available (preserves type aliases and their metadata)
630
+ # Fall back to field_info.annotation if not found in __annotations__
631
+ field_type = raw_annotations.get(field_name, field_info.annotation)
264
632
  if field_type is None:
265
633
  raise ValueError(f"Missing type for {field_name}")
266
634
  primary_key, field_type = handle_key(field_type)
267
635
  is_jwt, field_type = handle_jwt(field_type)
268
636
 
269
- optional, mds, data_type = py_type_to_column_type(field_type, field_info.metadata)
637
+ optional, mds, data_type = py_type_to_column_type(
638
+ field_type, field_info.metadata
639
+ )
270
640
 
271
641
  annotations = []
272
642
  for md in mds:
273
- if isinstance(md, AggregateFunction):
274
- annotations.append(
275
- ("aggregationFunction", md.to_dict())
276
- )
277
- if md == "LowCardinality":
278
- annotations.append(
279
- ("LowCardinality", True)
280
- )
643
+ if isinstance(md, AggregateFunction) and all(
644
+ key != "aggregationFunction" for (key, _) in annotations
645
+ ):
646
+ annotations.append(("aggregationFunction", md.to_dict()))
647
+ if isinstance(md, SimpleAggregateFunction) and all(
648
+ key != "simpleAggregationFunction" for (key, _) in annotations
649
+ ):
650
+ annotations.append(("simpleAggregationFunction", md.to_dict()))
651
+ if md == "LowCardinality" and all(
652
+ key != "LowCardinality" for (key, _) in annotations
653
+ ):
654
+ annotations.append(("LowCardinality", True))
281
655
 
282
656
  column_name = field_name if field_info.alias is None else field_info.alias
283
657
 
@@ -287,6 +661,31 @@ def _to_columns(model: type[BaseModel]) -> list[Column]:
287
661
  None,
288
662
  )
289
663
 
664
+ # Extract MATERIALIZED expression from metadata, if provided
665
+ materialized_expr = next(
666
+ (md.expression for md in mds if isinstance(md, ClickHouseMaterialized)),
667
+ None,
668
+ )
669
+
670
+ # Validate mutual exclusivity of DEFAULT and MATERIALIZED
671
+ if default_expr and materialized_expr:
672
+ raise ValueError(
673
+ f"Column '{column_name}' cannot have both DEFAULT and MATERIALIZED. "
674
+ f"Use one or the other."
675
+ )
676
+
677
+ # Extract TTL expression from metadata, if provided
678
+ ttl_expr = next(
679
+ (md.expression for md in mds if isinstance(md, ClickHouseTTL)),
680
+ None,
681
+ )
682
+
683
+ # Extract CODEC expression from metadata, if provided
684
+ codec_expr = next(
685
+ (md.expression for md in mds if isinstance(md, ClickHouseCodec)),
686
+ None,
687
+ )
688
+
290
689
  columns.append(
291
690
  Column(
292
691
  name=column_name,
@@ -295,7 +694,10 @@ def _to_columns(model: type[BaseModel]) -> list[Column]:
295
694
  unique=False,
296
695
  primary_key=primary_key,
297
696
  default=default_expr,
697
+ materialized=materialized_expr,
298
698
  annotations=annotations,
699
+ ttl=ttl_expr,
700
+ codec=codec_expr,
299
701
  )
300
702
  )
301
703
  return columns
@@ -303,7 +705,9 @@ def _to_columns(model: type[BaseModel]) -> list[Column]:
303
705
 
304
706
  class StringToEnumMixin:
305
707
  @classmethod
306
- def __get_pydantic_core_schema__(cls, _source_type: Any, _handler: GetCoreSchemaHandler) -> CoreSchema:
708
+ def __get_pydantic_core_schema__(
709
+ cls, _source_type: Any, _handler: GetCoreSchemaHandler
710
+ ) -> CoreSchema:
307
711
  def validate(value: Any, _: Any) -> Any:
308
712
  if isinstance(value, str):
309
713
  try:
@@ -312,14 +716,15 @@ class StringToEnumMixin:
312
716
  raise ValueError(f"Invalid enum name: {value}")
313
717
  return cls(value) # fallback to default enum validation
314
718
 
315
- return core_schema.with_info_before_validator_function(validate, core_schema.enum_schema(cls, list(cls)))
719
+ return core_schema.with_info_before_validator_function(
720
+ validate, core_schema.enum_schema(cls, list(cls))
721
+ )
316
722
 
317
723
 
318
724
  def is_array_nested_type(data_type: DataType) -> bool:
319
725
  """Type guard to check if a data type is Array(Nested(...))."""
320
- return (
321
- isinstance(data_type, ArrayType) and
322
- isinstance(data_type.element_type, Nested)
726
+ return isinstance(data_type, ArrayType) and isinstance(
727
+ data_type.element_type, Nested
323
728
  )
324
729
 
325
730