moose-lib 0.6.148.dev3442438466__py3-none-any.whl → 0.6.283__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. moose_lib/__init__.py +34 -3
  2. moose_lib/blocks.py +416 -52
  3. moose_lib/clients/redis_client.py +26 -14
  4. moose_lib/commons.py +37 -30
  5. moose_lib/config/config_file.py +5 -1
  6. moose_lib/config/runtime.py +73 -34
  7. moose_lib/data_models.py +331 -61
  8. moose_lib/dmv2/__init__.py +69 -73
  9. moose_lib/dmv2/_registry.py +2 -1
  10. moose_lib/dmv2/_source_capture.py +37 -0
  11. moose_lib/dmv2/consumption.py +55 -32
  12. moose_lib/dmv2/ingest_api.py +9 -2
  13. moose_lib/dmv2/ingest_pipeline.py +35 -16
  14. moose_lib/dmv2/life_cycle.py +3 -1
  15. moose_lib/dmv2/materialized_view.py +24 -14
  16. moose_lib/dmv2/moose_model.py +165 -0
  17. moose_lib/dmv2/olap_table.py +299 -151
  18. moose_lib/dmv2/registry.py +18 -3
  19. moose_lib/dmv2/sql_resource.py +16 -8
  20. moose_lib/dmv2/stream.py +75 -23
  21. moose_lib/dmv2/types.py +14 -8
  22. moose_lib/dmv2/view.py +13 -6
  23. moose_lib/dmv2/web_app.py +11 -6
  24. moose_lib/dmv2/web_app_helpers.py +5 -1
  25. moose_lib/dmv2/workflow.py +37 -9
  26. moose_lib/internal.py +340 -56
  27. moose_lib/main.py +87 -56
  28. moose_lib/query_builder.py +18 -5
  29. moose_lib/query_param.py +54 -20
  30. moose_lib/secrets.py +122 -0
  31. moose_lib/streaming/streaming_function_runner.py +233 -117
  32. moose_lib/utilities/sql.py +0 -1
  33. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/METADATA +18 -1
  34. moose_lib-0.6.283.dist-info/RECORD +63 -0
  35. tests/__init__.py +1 -1
  36. tests/conftest.py +6 -5
  37. tests/test_backward_compatibility.py +85 -0
  38. tests/test_cluster_validation.py +85 -0
  39. tests/test_codec.py +75 -0
  40. tests/test_column_formatting.py +80 -0
  41. tests/test_fixedstring.py +43 -0
  42. tests/test_iceberg_config.py +105 -0
  43. tests/test_int_types.py +211 -0
  44. tests/test_kafka_config.py +141 -0
  45. tests/test_materialized.py +74 -0
  46. tests/test_metadata.py +37 -0
  47. tests/test_moose.py +21 -30
  48. tests/test_moose_model.py +153 -0
  49. tests/test_olap_table_moosemodel.py +89 -0
  50. tests/test_olap_table_versioning.py +52 -58
  51. tests/test_query_builder.py +97 -9
  52. tests/test_redis_client.py +10 -3
  53. tests/test_s3queue_config.py +211 -110
  54. tests/test_secrets.py +239 -0
  55. tests/test_simple_aggregate.py +42 -40
  56. tests/test_web_app.py +11 -5
  57. moose_lib-0.6.148.dev3442438466.dist-info/RECORD +0 -47
  58. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
  59. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
moose_lib/data_models.py CHANGED
@@ -6,8 +6,19 @@ from inspect import isclass
6
6
  from uuid import UUID
7
7
  from datetime import datetime, date
8
8
 
9
- from typing import Literal, Tuple, Union, Any, get_origin, get_args, TypeAliasType, Annotated, Type, _BaseGenericAlias, \
10
- GenericAlias
9
+ from typing import (
10
+ Literal,
11
+ Tuple,
12
+ Union,
13
+ Any,
14
+ get_origin,
15
+ get_args,
16
+ TypeAliasType,
17
+ Annotated,
18
+ Type,
19
+ _BaseGenericAlias,
20
+ GenericAlias,
21
+ )
11
22
  from pydantic import BaseModel, Field, PlainSerializer, GetCoreSchemaHandler, ConfigDict
12
23
  from pydantic_core import CoreSchema, core_schema
13
24
  import ipaddress
@@ -15,8 +26,24 @@ import ipaddress
15
26
  type Key[T: (str, int)] = T
16
27
  type JWT[T] = T
17
28
 
29
+ # Integer type aliases for ClickHouse integer types
30
+ type Int8 = Annotated[int, "int8"]
31
+ type Int16 = Annotated[int, "int16"]
32
+ type Int32 = Annotated[int, "int32"]
33
+ type Int64 = Annotated[int, "int64"]
34
+ type UInt8 = Annotated[int, "uint8"]
35
+ type UInt16 = Annotated[int, "uint16"]
36
+ type UInt32 = Annotated[int, "uint32"]
37
+ type UInt64 = Annotated[int, "uint64"]
18
38
 
19
- @dataclasses.dataclass(frozen=True) # a BaseModel in the annotations will confuse pydantic
39
+ # Float type aliases for ClickHouse float types
40
+ type Float32 = Annotated[float, "float32"]
41
+ type Float64 = Annotated[float, "float64"]
42
+
43
+
44
+ @dataclasses.dataclass(
45
+ frozen=True
46
+ ) # a BaseModel in the annotations will confuse pydantic
20
47
  class ClickhousePrecision:
21
48
  precision: int
22
49
 
@@ -26,6 +53,11 @@ class ClickhouseSize:
26
53
  size: int
27
54
 
28
55
 
56
+ @dataclasses.dataclass(frozen=True)
57
+ class ClickhouseFixedStringSize:
58
+ size: int
59
+
60
+
29
61
  @dataclasses.dataclass(frozen=True)
30
62
  class ClickhouseDefault:
31
63
  expression: str
@@ -40,6 +72,55 @@ class ClickHouseTTL:
40
72
  expression: str
41
73
 
42
74
 
75
+ @dataclasses.dataclass(frozen=True)
76
+ class ClickHouseCodec:
77
+ expression: str
78
+
79
+
80
+ @dataclasses.dataclass(frozen=True)
81
+ class ClickHouseMaterialized:
82
+ """
83
+ ClickHouse MATERIALIZED column annotation.
84
+ The column value is computed at INSERT time and physically stored.
85
+ Cannot be explicitly inserted by users.
86
+
87
+ Args:
88
+ expression: ClickHouse SQL expression using column names (snake_case)
89
+
90
+ Examples:
91
+ # Extract date component
92
+ event_date: Annotated[date, ClickHouseMaterialized("toDate(event_time)")]
93
+
94
+ # Precompute hash
95
+ user_hash: Annotated[int, ClickHouseMaterialized("cityHash64(user_id)")]
96
+
97
+ # Complex expression with JSON
98
+ combination_hash: Annotated[
99
+ list[int],
100
+ ClickHouseMaterialized(
101
+ "arrayMap(kv -> cityHash64(kv.1, kv.2), "
102
+ "JSONExtractKeysAndValuesRaw(toString(log_blob)))"
103
+ )
104
+ ]
105
+
106
+ Notes:
107
+ - Expression uses ClickHouse column names, not Python field names
108
+ - MATERIALIZED and DEFAULT are mutually exclusive
109
+ - Can be combined with ClickHouseCodec for compression
110
+ - Changing the expression modifies the column in-place (existing values preserved)
111
+ """
112
+
113
+ expression: str
114
+
115
+
116
+ @dataclasses.dataclass(frozen=True)
117
+ class ClickHouseJson:
118
+ max_dynamic_paths: int | None = None
119
+ max_dynamic_types: int | None = None
120
+ skip_paths: tuple[str, ...] = ()
121
+ skip_regexps: tuple[str, ...] = ()
122
+
123
+
43
124
  def clickhouse_decimal(precision: int, scale: int) -> Type[Decimal]:
44
125
  return Annotated[Decimal, Field(max_digits=precision, decimal_places=scale)]
45
126
 
@@ -53,6 +134,23 @@ def clickhouse_datetime64(precision: int) -> Type[datetime]:
53
134
  return Annotated[datetime, ClickhousePrecision(precision=precision)]
54
135
 
55
136
 
137
+ def FixedString(size: int) -> ClickhouseFixedStringSize:
138
+ """
139
+ Creates a FixedString(N) annotation for fixed-length strings.
140
+
141
+ ClickHouse stores exactly N bytes, padding shorter values with null bytes.
142
+ Values exceeding N bytes will raise an exception.
143
+
144
+ Use for fixed-length data like hashes, IPs, UUIDs, MAC addresses.
145
+
146
+ Example:
147
+ md5_hash: Annotated[str, FixedString(16)] # 16-byte MD5
148
+ sha256: Annotated[str, FixedString(32)] # 32-byte SHA256
149
+ ipv6: Annotated[str, FixedString(16)] # 16-byte IPv6
150
+ """
151
+ return ClickhouseFixedStringSize(size=size)
152
+
153
+
56
154
  type Point = Annotated[tuple[float, float], "Point"]
57
155
  type Ring = Annotated[list[tuple[float, float]], "Ring"]
58
156
  type LineString = Annotated[list[tuple[float, float]], "LineString"]
@@ -62,11 +160,14 @@ type MultiPolygon = Annotated[list[list[list[tuple[float, float]]]], "MultiPolyg
62
160
 
63
161
 
64
162
  def aggregated[T](
65
- result_type: Type[T],
66
- agg_func: str,
67
- param_types: list[type | GenericAlias | _BaseGenericAlias]
163
+ result_type: Type[T],
164
+ agg_func: str,
165
+ param_types: list[type | GenericAlias | _BaseGenericAlias],
68
166
  ) -> Type[T]:
69
- return Annotated[result_type, AggregateFunction(agg_func=agg_func, param_types=tuple(param_types))]
167
+ return Annotated[
168
+ result_type,
169
+ AggregateFunction(agg_func=agg_func, param_types=tuple(param_types)),
170
+ ]
70
171
 
71
172
 
72
173
  @dataclasses.dataclass(frozen=True)
@@ -79,14 +180,11 @@ class AggregateFunction:
79
180
  "functionName": self.agg_func,
80
181
  "argumentTypes": [
81
182
  py_type_to_column_type(t, [])[2] for t in self.param_types
82
- ]
183
+ ],
83
184
  }
84
185
 
85
186
 
86
- def simple_aggregated[T](
87
- agg_func: str,
88
- arg_type: Type[T]
89
- ) -> Type[T]:
187
+ def simple_aggregated[T](agg_func: str, arg_type: Type[T]) -> Type[T]:
90
188
  """Helper to create a SimpleAggregateFunction type annotation.
91
189
 
92
190
  SimpleAggregateFunction is a ClickHouse type for storing aggregated values directly
@@ -108,7 +206,9 @@ def simple_aggregated[T](
108
206
  last_status: simple_aggregated("anyLast", str)
109
207
  ```
110
208
  """
111
- return Annotated[arg_type, SimpleAggregateFunction(agg_func=agg_func, arg_type=arg_type)]
209
+ return Annotated[
210
+ arg_type, SimpleAggregateFunction(agg_func=agg_func, arg_type=arg_type)
211
+ ]
112
212
 
113
213
 
114
214
  @dataclasses.dataclass(frozen=True)
@@ -119,7 +219,7 @@ class SimpleAggregateFunction:
119
219
  def to_dict(self):
120
220
  return {
121
221
  "functionName": self.agg_func,
122
- "argumentType": py_type_to_column_type(self.arg_type, [])[2]
222
+ "argumentType": py_type_to_column_type(self.arg_type, [])[2],
123
223
  }
124
224
 
125
225
 
@@ -132,7 +232,9 @@ def enum_value_serializer(value: int | str):
132
232
 
133
233
  class EnumValue(BaseModel):
134
234
  name: str
135
- value: Annotated[int | str, PlainSerializer(enum_value_serializer, return_type=dict)]
235
+ value: Annotated[
236
+ int | str, PlainSerializer(enum_value_serializer, return_type=dict)
237
+ ]
136
238
 
137
239
 
138
240
  class DataEnum(BaseModel):
@@ -160,7 +262,17 @@ class MapType(BaseModel):
160
262
  value_type: "DataType"
161
263
 
162
264
 
163
- type DataType = str | DataEnum | ArrayType | Nested | NamedTupleType | MapType
265
+ class JsonOptions(BaseModel):
266
+ max_dynamic_paths: int | None = None
267
+ max_dynamic_types: int | None = None
268
+ typed_paths: list[tuple[str, "DataType"]] = []
269
+ skip_paths: list[str] = []
270
+ skip_regexps: list[str] = []
271
+
272
+
273
+ type DataType = (
274
+ str | DataEnum | ArrayType | Nested | NamedTupleType | MapType | JsonOptions
275
+ )
164
276
 
165
277
 
166
278
  def handle_jwt(field_type: type) -> Tuple[bool, type]:
@@ -200,12 +312,57 @@ class Column(BaseModel):
200
312
  default: str | None = None
201
313
  annotations: list[Tuple[str, Any]] = []
202
314
  ttl: str | None = None
315
+ codec: str | None = None
316
+ materialized: str | None = None
203
317
 
204
318
  def to_expr(self):
205
319
  # Lazy import to avoid circular dependency at import time
206
320
  from .query_builder import ColumnRef
321
+
207
322
  return ColumnRef(self)
208
323
 
324
+ def __str__(self) -> str:
325
+ """Return properly quoted identifier for SQL interpolation.
326
+
327
+ This enables Column objects to be used directly in f-strings and
328
+ string concatenation for SQL query construction.
329
+
330
+ Returns:
331
+ Backtick-quoted identifier safe for ClickHouse SQL.
332
+
333
+ Example:
334
+ >>> col = Column(name="user_id", ...)
335
+ >>> f"SELECT {col} FROM users"
336
+ "SELECT `user_id` FROM users"
337
+ """
338
+ from .utilities.sql import quote_identifier
339
+
340
+ return quote_identifier(self.name)
341
+
342
+ def __format__(self, format_spec: str) -> str:
343
+ """Format Column for f-string interpolation with format specifiers.
344
+
345
+ Supports format specs:
346
+ - 'col', 'c', 'column': Returns quoted identifier
347
+ - '' (empty): Returns quoted identifier (default)
348
+
349
+ Args:
350
+ format_spec: Format specification string
351
+
352
+ Returns:
353
+ Backtick-quoted identifier
354
+
355
+ Example:
356
+ >>> col = Column(name="email", ...)
357
+ >>> f"SELECT {col:col} FROM users"
358
+ "SELECT `email` FROM users"
359
+ """
360
+ # All format specs return quoted identifier
361
+ # This provides flexibility for user preference
362
+ from .utilities.sql import quote_identifier
363
+
364
+ return quote_identifier(self.name)
365
+
209
366
 
210
367
  def _is_point_type(t: type) -> bool:
211
368
  origin = get_origin(t)
@@ -263,18 +420,45 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
263
420
  data_type: DataType
264
421
 
265
422
  if t is str:
266
- data_type = "String"
423
+ # Check for FixedString annotation
424
+ fixed_string_size = next(
425
+ (md.size for md in mds if isinstance(md, ClickhouseFixedStringSize)), None
426
+ )
427
+ if fixed_string_size:
428
+ data_type = f"FixedString({fixed_string_size})"
429
+ else:
430
+ data_type = "String"
431
+ elif t is bytes:
432
+ # Check for FixedString annotation
433
+ fixed_string_size = next(
434
+ (md.size for md in mds if isinstance(md, ClickhouseFixedStringSize)), None
435
+ )
436
+ if fixed_string_size:
437
+ data_type = f"FixedString({fixed_string_size})"
438
+ else:
439
+ # Regular bytes without FixedString annotation
440
+ data_type = "String"
267
441
  elif t is int:
268
442
  # Check for int size annotations
269
- int_size = next((md for md in mds if isinstance(md, str) and re.match(r'^u?int\d+$', md)), None)
443
+ int_size = next(
444
+ (md for md in mds if isinstance(md, str) and re.match(r"^u?int\d+$", md)),
445
+ None,
446
+ )
270
447
  if int_size:
271
448
  data_type = int_size.replace("u", "U").replace("i", "I")
272
449
  else:
273
- data_type = "Int"
450
+ data_type = "Int64"
274
451
  elif t is float:
275
452
  size = next((md for md in mds if isinstance(md, ClickhouseSize)), None)
276
453
  if size is None:
277
- bit_size = next((md for md in mds if isinstance(md, str) and re.match(r'^float\d+$', md)), None)
454
+ bit_size = next(
455
+ (
456
+ md
457
+ for md in mds
458
+ if isinstance(md, str) and re.match(r"^float\d+$", md)
459
+ ),
460
+ None,
461
+ )
278
462
  if bit_size:
279
463
  if bit_size == "float32":
280
464
  data_type = "Float32"
@@ -292,12 +476,16 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
292
476
  raise ValueError(f"Unsupported float size {size.size}")
293
477
  elif t is Decimal:
294
478
  precision = next((md.max_digits for md in mds if hasattr(md, "max_digits")), 10)
295
- scale = next((md.decimal_places for md in mds if hasattr(md, "decimal_places")), 0)
479
+ scale = next(
480
+ (md.decimal_places for md in mds if hasattr(md, "decimal_places")), 0
481
+ )
296
482
  data_type = f"Decimal({precision}, {scale})"
297
483
  elif t is bool:
298
484
  data_type = "Boolean"
299
485
  elif t is datetime:
300
- precision = next((md for md in mds if isinstance(md, ClickhousePrecision)), None)
486
+ precision = next(
487
+ (md for md in mds if isinstance(md, ClickhousePrecision)), None
488
+ )
301
489
  if precision is None:
302
490
  data_type = "DateTime"
303
491
  else:
@@ -314,22 +502,31 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
314
502
  data_type = "IPv4"
315
503
  elif t is ipaddress.IPv6Address:
316
504
  data_type = "IPv6"
317
- elif any(md in [ # this check has to happen before t is matched against tuple/list
318
- "Point",
319
- "Ring",
320
- "LineString",
321
- "MultiLineString",
322
- "Polygon",
323
- "MultiPolygon",
324
- ] for md in mds):
325
- data_type = next(md for md in mds if md in [
505
+ elif any(
506
+ md
507
+ in [ # this check has to happen before t is matched against tuple/list
326
508
  "Point",
327
509
  "Ring",
328
510
  "LineString",
329
511
  "MultiLineString",
330
512
  "Polygon",
331
513
  "MultiPolygon",
332
- ])
514
+ ]
515
+ for md in mds
516
+ ):
517
+ data_type = next(
518
+ md
519
+ for md in mds
520
+ if md
521
+ in [
522
+ "Point",
523
+ "Ring",
524
+ "LineString",
525
+ "MultiLineString",
526
+ "Polygon",
527
+ "MultiPolygon",
528
+ ]
529
+ )
333
530
  _validate_geometry_type(data_type, t)
334
531
  elif get_origin(t) is list:
335
532
  inner_optional, _, inner_type = py_type_to_column_type(get_args(t)[0], [])
@@ -337,16 +534,62 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
337
534
  elif get_origin(t) is dict:
338
535
  args = get_args(t)
339
536
  if len(args) == 2:
340
- key_optional, _, key_type = py_type_to_column_type(args[0], [])
341
- value_optional, _, value_type = py_type_to_column_type(args[1], [])
342
- # For dict types, we assume keys are required and values match their type
343
- data_type = MapType(key_type=key_type, value_type=value_type)
537
+ # Special case: dict[str, Any] should be JSON type (matches TypeScript's Record<string, any>)
538
+ # This is useful for storing arbitrary extra fields in a JSON column
539
+ if args[0] is str and args[1] is Any:
540
+ data_type = "Json"
541
+ else:
542
+ key_optional, _, key_type = py_type_to_column_type(args[0], [])
543
+ value_optional, _, value_type = py_type_to_column_type(args[1], [])
544
+ # For dict types, we assume keys are required and values match their type
545
+ data_type = MapType(key_type=key_type, value_type=value_type)
344
546
  else:
345
- raise ValueError(f"Dict type must have exactly 2 type arguments, got {len(args)}")
547
+ raise ValueError(
548
+ f"Dict type must have exactly 2 type arguments, got {len(args)}"
549
+ )
346
550
  elif t is UUID:
347
551
  data_type = "UUID"
348
552
  elif t is Any:
349
553
  data_type = "Json"
554
+ elif any(isinstance(md, ClickHouseJson) for md in mds) and issubclass(t, BaseModel):
555
+ # Annotated[SomePydanticClass, ClickHouseJson(...)]
556
+ columns = _to_columns(t)
557
+ for c in columns:
558
+ if c.default is not None:
559
+ raise ValueError(
560
+ "Default in inner field. Put ClickHouseDefault in top level field."
561
+ )
562
+ # Enforce extra='allow' for JSON-mapped models
563
+ if t.model_config.get("extra") != "allow":
564
+ raise ValueError(
565
+ f"Model {t.__name__} with ClickHouseJson must have model_config with extra='allow'. "
566
+ "Add: model_config = ConfigDict(extra='allow')"
567
+ )
568
+ opts = next(md for md in mds if isinstance(md, ClickHouseJson))
569
+
570
+ # Build typed_paths from fields as tuples of (name, type)
571
+ typed_paths: list[tuple[str, DataType]] = []
572
+ for c in columns:
573
+ typed_paths.append((c.name, c.data_type))
574
+
575
+ has_any_option = (
576
+ opts.max_dynamic_paths is not None
577
+ or opts.max_dynamic_types is not None
578
+ or len(typed_paths) > 0
579
+ or len(opts.skip_paths) > 0
580
+ or len(opts.skip_regexps) > 0
581
+ )
582
+
583
+ if not has_any_option:
584
+ data_type = "Json"
585
+ else:
586
+ data_type = JsonOptions(
587
+ max_dynamic_paths=opts.max_dynamic_paths,
588
+ max_dynamic_types=opts.max_dynamic_types,
589
+ typed_paths=typed_paths,
590
+ skip_paths=list(opts.skip_paths),
591
+ skip_regexps=list(opts.skip_regexps),
592
+ )
350
593
  elif get_origin(t) is Literal and all(isinstance(arg, str) for arg in get_args(t)):
351
594
  data_type = "String"
352
595
  mds.append("LowCardinality")
@@ -361,10 +604,7 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
361
604
  )
362
605
  if any(md == "ClickHouseNamedTuple" for md in mds):
363
606
  data_type = NamedTupleType(
364
- fields=[(
365
- column.name,
366
- column.data_type
367
- ) for column in columns],
607
+ fields=[(column.name, column.data_type) for column in columns],
368
608
  )
369
609
  else:
370
610
  data_type = Nested(
@@ -382,30 +622,36 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
382
622
  def _to_columns(model: type[BaseModel]) -> list[Column]:
383
623
  """Convert Pydantic model fields to Column definitions."""
384
624
  columns = []
625
+ # Get raw annotations from the model class to preserve type aliases
626
+ raw_annotations = getattr(model, "__annotations__", {})
627
+
385
628
  for field_name, field_info in model.model_fields.items():
386
- # Get the field type annotation
387
- field_type = field_info.annotation
629
+ # Use raw annotation if available (preserves type aliases and their metadata)
630
+ # Fall back to field_info.annotation if not found in __annotations__
631
+ field_type = raw_annotations.get(field_name, field_info.annotation)
388
632
  if field_type is None:
389
633
  raise ValueError(f"Missing type for {field_name}")
390
634
  primary_key, field_type = handle_key(field_type)
391
635
  is_jwt, field_type = handle_jwt(field_type)
392
636
 
393
- optional, mds, data_type = py_type_to_column_type(field_type, field_info.metadata)
637
+ optional, mds, data_type = py_type_to_column_type(
638
+ field_type, field_info.metadata
639
+ )
394
640
 
395
641
  annotations = []
396
642
  for md in mds:
397
- if isinstance(md, AggregateFunction):
398
- annotations.append(
399
- ("aggregationFunction", md.to_dict())
400
- )
401
- if isinstance(md, SimpleAggregateFunction):
402
- annotations.append(
403
- ("simpleAggregationFunction", md.to_dict())
404
- )
405
- if md == "LowCardinality":
406
- annotations.append(
407
- ("LowCardinality", True)
408
- )
643
+ if isinstance(md, AggregateFunction) and all(
644
+ key != "aggregationFunction" for (key, _) in annotations
645
+ ):
646
+ annotations.append(("aggregationFunction", md.to_dict()))
647
+ if isinstance(md, SimpleAggregateFunction) and all(
648
+ key != "simpleAggregationFunction" for (key, _) in annotations
649
+ ):
650
+ annotations.append(("simpleAggregationFunction", md.to_dict()))
651
+ if md == "LowCardinality" and all(
652
+ key != "LowCardinality" for (key, _) in annotations
653
+ ):
654
+ annotations.append(("LowCardinality", True))
409
655
 
410
656
  column_name = field_name if field_info.alias is None else field_info.alias
411
657
 
@@ -415,12 +661,31 @@ def _to_columns(model: type[BaseModel]) -> list[Column]:
415
661
  None,
416
662
  )
417
663
 
664
+ # Extract MATERIALIZED expression from metadata, if provided
665
+ materialized_expr = next(
666
+ (md.expression for md in mds if isinstance(md, ClickHouseMaterialized)),
667
+ None,
668
+ )
669
+
670
+ # Validate mutual exclusivity of DEFAULT and MATERIALIZED
671
+ if default_expr and materialized_expr:
672
+ raise ValueError(
673
+ f"Column '{column_name}' cannot have both DEFAULT and MATERIALIZED. "
674
+ f"Use one or the other."
675
+ )
676
+
418
677
  # Extract TTL expression from metadata, if provided
419
678
  ttl_expr = next(
420
679
  (md.expression for md in mds if isinstance(md, ClickHouseTTL)),
421
680
  None,
422
681
  )
423
682
 
683
+ # Extract CODEC expression from metadata, if provided
684
+ codec_expr = next(
685
+ (md.expression for md in mds if isinstance(md, ClickHouseCodec)),
686
+ None,
687
+ )
688
+
424
689
  columns.append(
425
690
  Column(
426
691
  name=column_name,
@@ -429,8 +694,10 @@ def _to_columns(model: type[BaseModel]) -> list[Column]:
429
694
  unique=False,
430
695
  primary_key=primary_key,
431
696
  default=default_expr,
697
+ materialized=materialized_expr,
432
698
  annotations=annotations,
433
699
  ttl=ttl_expr,
700
+ codec=codec_expr,
434
701
  )
435
702
  )
436
703
  return columns
@@ -438,7 +705,9 @@ def _to_columns(model: type[BaseModel]) -> list[Column]:
438
705
 
439
706
  class StringToEnumMixin:
440
707
  @classmethod
441
- def __get_pydantic_core_schema__(cls, _source_type: Any, _handler: GetCoreSchemaHandler) -> CoreSchema:
708
+ def __get_pydantic_core_schema__(
709
+ cls, _source_type: Any, _handler: GetCoreSchemaHandler
710
+ ) -> CoreSchema:
442
711
  def validate(value: Any, _: Any) -> Any:
443
712
  if isinstance(value, str):
444
713
  try:
@@ -447,14 +716,15 @@ class StringToEnumMixin:
447
716
  raise ValueError(f"Invalid enum name: {value}")
448
717
  return cls(value) # fallback to default enum validation
449
718
 
450
- return core_schema.with_info_before_validator_function(validate, core_schema.enum_schema(cls, list(cls)))
719
+ return core_schema.with_info_before_validator_function(
720
+ validate, core_schema.enum_schema(cls, list(cls))
721
+ )
451
722
 
452
723
 
453
724
  def is_array_nested_type(data_type: DataType) -> bool:
454
725
  """Type guard to check if a data type is Array(Nested(...))."""
455
- return (
456
- isinstance(data_type, ArrayType) and
457
- isinstance(data_type.element_type, Nested)
726
+ return isinstance(data_type, ArrayType) and isinstance(
727
+ data_type.element_type, Nested
458
728
  )
459
729
 
460
730