moose-lib 0.6.148.dev3442438466__py3-none-any.whl → 0.6.283__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. moose_lib/__init__.py +34 -3
  2. moose_lib/blocks.py +416 -52
  3. moose_lib/clients/redis_client.py +26 -14
  4. moose_lib/commons.py +37 -30
  5. moose_lib/config/config_file.py +5 -1
  6. moose_lib/config/runtime.py +73 -34
  7. moose_lib/data_models.py +331 -61
  8. moose_lib/dmv2/__init__.py +69 -73
  9. moose_lib/dmv2/_registry.py +2 -1
  10. moose_lib/dmv2/_source_capture.py +37 -0
  11. moose_lib/dmv2/consumption.py +55 -32
  12. moose_lib/dmv2/ingest_api.py +9 -2
  13. moose_lib/dmv2/ingest_pipeline.py +35 -16
  14. moose_lib/dmv2/life_cycle.py +3 -1
  15. moose_lib/dmv2/materialized_view.py +24 -14
  16. moose_lib/dmv2/moose_model.py +165 -0
  17. moose_lib/dmv2/olap_table.py +299 -151
  18. moose_lib/dmv2/registry.py +18 -3
  19. moose_lib/dmv2/sql_resource.py +16 -8
  20. moose_lib/dmv2/stream.py +75 -23
  21. moose_lib/dmv2/types.py +14 -8
  22. moose_lib/dmv2/view.py +13 -6
  23. moose_lib/dmv2/web_app.py +11 -6
  24. moose_lib/dmv2/web_app_helpers.py +5 -1
  25. moose_lib/dmv2/workflow.py +37 -9
  26. moose_lib/internal.py +340 -56
  27. moose_lib/main.py +87 -56
  28. moose_lib/query_builder.py +18 -5
  29. moose_lib/query_param.py +54 -20
  30. moose_lib/secrets.py +122 -0
  31. moose_lib/streaming/streaming_function_runner.py +233 -117
  32. moose_lib/utilities/sql.py +0 -1
  33. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/METADATA +18 -1
  34. moose_lib-0.6.283.dist-info/RECORD +63 -0
  35. tests/__init__.py +1 -1
  36. tests/conftest.py +6 -5
  37. tests/test_backward_compatibility.py +85 -0
  38. tests/test_cluster_validation.py +85 -0
  39. tests/test_codec.py +75 -0
  40. tests/test_column_formatting.py +80 -0
  41. tests/test_fixedstring.py +43 -0
  42. tests/test_iceberg_config.py +105 -0
  43. tests/test_int_types.py +211 -0
  44. tests/test_kafka_config.py +141 -0
  45. tests/test_materialized.py +74 -0
  46. tests/test_metadata.py +37 -0
  47. tests/test_moose.py +21 -30
  48. tests/test_moose_model.py +153 -0
  49. tests/test_olap_table_moosemodel.py +89 -0
  50. tests/test_olap_table_versioning.py +52 -58
  51. tests/test_query_builder.py +97 -9
  52. tests/test_redis_client.py +10 -3
  53. tests/test_s3queue_config.py +211 -110
  54. tests/test_secrets.py +239 -0
  55. tests/test_simple_aggregate.py +42 -40
  56. tests/test_web_app.py +11 -5
  57. moose_lib-0.6.148.dev3442438466.dist-info/RECORD +0 -47
  58. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
  59. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
moose_lib/internal.py CHANGED
@@ -6,6 +6,7 @@ of various Moose resources (tables, streams/topics, APIs) and functions
6
6
  to convert the user-defined resources (from `dmv2.py`) into a serializable
7
7
  JSON format expected by the Moose infrastructure management system.
8
8
  """
9
+
9
10
  from importlib import import_module
10
11
  from typing import Literal, Optional, List, Any, Dict, Union, TYPE_CHECKING
11
12
  from pydantic import BaseModel, ConfigDict, AliasGenerator, Field
@@ -22,15 +23,17 @@ from moose_lib.dmv2 import (
22
23
  get_web_apps,
23
24
  OlapTable,
24
25
  OlapConfig,
25
- SqlResource
26
+ SqlResource,
26
27
  )
27
28
  from moose_lib.dmv2.stream import KafkaSchemaConfig
28
29
  from pydantic.alias_generators import to_camel
29
30
  from pydantic.json_schema import JsonSchemaValue
30
31
 
31
- model_config = ConfigDict(alias_generator=AliasGenerator(
32
- serialization_alias=to_camel,
33
- ))
32
+ model_config = ConfigDict(
33
+ alias_generator=AliasGenerator(
34
+ serialization_alias=to_camel,
35
+ )
36
+ )
34
37
 
35
38
 
36
39
  class Target(BaseModel):
@@ -42,6 +45,7 @@ class Target(BaseModel):
42
45
  version: Optional version of the target stream configuration.
43
46
  metadata: Optional metadata for the target stream.
44
47
  """
48
+
45
49
  kind: Literal["stream"]
46
50
  name: str
47
51
  version: Optional[str] = None
@@ -54,22 +58,26 @@ class Consumer(BaseModel):
54
58
  Attributes:
55
59
  version: Optional version of the consumer configuration.
56
60
  """
61
+
57
62
  version: Optional[str] = None
58
63
 
59
64
 
60
65
  class BaseEngineConfigDict(BaseModel):
61
66
  """Base engine configuration for all ClickHouse table engines."""
67
+
62
68
  model_config = model_config
63
69
  engine: str
64
70
 
65
71
 
66
72
  class MergeTreeConfigDict(BaseEngineConfigDict):
67
73
  """Configuration for MergeTree engine."""
74
+
68
75
  engine: Literal["MergeTree"] = "MergeTree"
69
76
 
70
77
 
71
78
  class ReplacingMergeTreeConfigDict(BaseEngineConfigDict):
72
79
  """Configuration for ReplacingMergeTree engine."""
80
+
73
81
  engine: Literal["ReplacingMergeTree"] = "ReplacingMergeTree"
74
82
  ver: Optional[str] = None
75
83
  is_deleted: Optional[str] = None
@@ -77,17 +85,35 @@ class ReplacingMergeTreeConfigDict(BaseEngineConfigDict):
77
85
 
78
86
  class AggregatingMergeTreeConfigDict(BaseEngineConfigDict):
79
87
  """Configuration for AggregatingMergeTree engine."""
88
+
80
89
  engine: Literal["AggregatingMergeTree"] = "AggregatingMergeTree"
81
90
 
82
91
 
83
92
  class SummingMergeTreeConfigDict(BaseEngineConfigDict):
84
93
  """Configuration for SummingMergeTree engine."""
94
+
85
95
  engine: Literal["SummingMergeTree"] = "SummingMergeTree"
86
96
  columns: Optional[List[str]] = None
87
97
 
88
98
 
99
+ class CollapsingMergeTreeConfigDict(BaseEngineConfigDict):
100
+ """Configuration for CollapsingMergeTree engine."""
101
+
102
+ engine: Literal["CollapsingMergeTree"] = "CollapsingMergeTree"
103
+ sign: str
104
+
105
+
106
+ class VersionedCollapsingMergeTreeConfigDict(BaseEngineConfigDict):
107
+ """Configuration for VersionedCollapsingMergeTree engine."""
108
+
109
+ engine: Literal["VersionedCollapsingMergeTree"] = "VersionedCollapsingMergeTree"
110
+ sign: str
111
+ ver: str
112
+
113
+
89
114
  class ReplicatedMergeTreeConfigDict(BaseEngineConfigDict):
90
115
  """Configuration for ReplicatedMergeTree engine."""
116
+
91
117
  engine: Literal["ReplicatedMergeTree"] = "ReplicatedMergeTree"
92
118
  keeper_path: Optional[str] = None
93
119
  replica_name: Optional[str] = None
@@ -95,6 +121,7 @@ class ReplicatedMergeTreeConfigDict(BaseEngineConfigDict):
95
121
 
96
122
  class ReplicatedReplacingMergeTreeConfigDict(BaseEngineConfigDict):
97
123
  """Configuration for ReplicatedReplacingMergeTree engine."""
124
+
98
125
  engine: Literal["ReplicatedReplacingMergeTree"] = "ReplicatedReplacingMergeTree"
99
126
  keeper_path: Optional[str] = None
100
127
  replica_name: Optional[str] = None
@@ -104,6 +131,7 @@ class ReplicatedReplacingMergeTreeConfigDict(BaseEngineConfigDict):
104
131
 
105
132
  class ReplicatedAggregatingMergeTreeConfigDict(BaseEngineConfigDict):
106
133
  """Configuration for ReplicatedAggregatingMergeTree engine."""
134
+
107
135
  engine: Literal["ReplicatedAggregatingMergeTree"] = "ReplicatedAggregatingMergeTree"
108
136
  keeper_path: Optional[str] = None
109
137
  replica_name: Optional[str] = None
@@ -111,14 +139,37 @@ class ReplicatedAggregatingMergeTreeConfigDict(BaseEngineConfigDict):
111
139
 
112
140
  class ReplicatedSummingMergeTreeConfigDict(BaseEngineConfigDict):
113
141
  """Configuration for ReplicatedSummingMergeTree engine."""
142
+
114
143
  engine: Literal["ReplicatedSummingMergeTree"] = "ReplicatedSummingMergeTree"
115
144
  keeper_path: Optional[str] = None
116
145
  replica_name: Optional[str] = None
117
146
  columns: Optional[List[str]] = None
118
147
 
119
148
 
149
+ class ReplicatedCollapsingMergeTreeConfigDict(BaseEngineConfigDict):
150
+ """Configuration for ReplicatedCollapsingMergeTree engine."""
151
+
152
+ engine: Literal["ReplicatedCollapsingMergeTree"] = "ReplicatedCollapsingMergeTree"
153
+ keeper_path: Optional[str] = None
154
+ replica_name: Optional[str] = None
155
+ sign: str
156
+
157
+
158
+ class ReplicatedVersionedCollapsingMergeTreeConfigDict(BaseEngineConfigDict):
159
+ """Configuration for ReplicatedVersionedCollapsingMergeTree engine."""
160
+
161
+ engine: Literal["ReplicatedVersionedCollapsingMergeTree"] = (
162
+ "ReplicatedVersionedCollapsingMergeTree"
163
+ )
164
+ keeper_path: Optional[str] = None
165
+ replica_name: Optional[str] = None
166
+ sign: str
167
+ ver: str
168
+
169
+
120
170
  class S3QueueConfigDict(BaseEngineConfigDict):
121
171
  """Configuration for S3Queue engine with all specific fields."""
172
+
122
173
  engine: Literal["S3Queue"] = "S3Queue"
123
174
  s3_path: str
124
175
  format: str
@@ -128,17 +179,95 @@ class S3QueueConfigDict(BaseEngineConfigDict):
128
179
  headers: Optional[Dict[str, str]] = None
129
180
 
130
181
 
182
+ class S3ConfigDict(BaseEngineConfigDict):
183
+ """Configuration for S3 engine."""
184
+
185
+ engine: Literal["S3"] = "S3"
186
+ path: str
187
+ format: str
188
+ aws_access_key_id: Optional[str] = None
189
+ aws_secret_access_key: Optional[str] = None
190
+ compression: Optional[str] = None
191
+ partition_strategy: Optional[str] = None
192
+ partition_columns_in_data_file: Optional[str] = None
193
+
194
+
195
+ class BufferConfigDict(BaseEngineConfigDict):
196
+ """Configuration for Buffer engine."""
197
+
198
+ engine: Literal["Buffer"] = "Buffer"
199
+ target_database: str
200
+ target_table: str
201
+ num_layers: int
202
+ min_time: int
203
+ max_time: int
204
+ min_rows: int
205
+ max_rows: int
206
+ min_bytes: int
207
+ max_bytes: int
208
+ flush_time: Optional[int] = None
209
+ flush_rows: Optional[int] = None
210
+ flush_bytes: Optional[int] = None
211
+
212
+
213
+ class DistributedConfigDict(BaseEngineConfigDict):
214
+ """Configuration for Distributed engine."""
215
+
216
+ engine: Literal["Distributed"] = "Distributed"
217
+ cluster: str
218
+ target_database: str
219
+ target_table: str
220
+ sharding_key: Optional[str] = None
221
+ policy_name: Optional[str] = None
222
+
223
+
224
+ class IcebergS3ConfigDict(BaseEngineConfigDict):
225
+ """Configuration for IcebergS3 engine."""
226
+
227
+ engine: Literal["IcebergS3"] = "IcebergS3"
228
+ path: str
229
+ format: str
230
+ aws_access_key_id: Optional[str] = None
231
+ aws_secret_access_key: Optional[str] = None
232
+ compression: Optional[str] = None
233
+
234
+
235
+ class KafkaConfigDict(BaseEngineConfigDict):
236
+ """Configuration for Kafka engine.
237
+
238
+ Constructor: ENGINE = Kafka('broker', 'topic', 'group', 'format')
239
+ Settings (kafka_schema, kafka_num_consumers, security, etc.) go in table settings.
240
+
241
+ Reference: https://clickhouse.com/docs/engines/table-engines/integrations/kafka
242
+ """
243
+
244
+ engine: Literal["Kafka"] = "Kafka"
245
+ broker_list: str
246
+ topic_list: str
247
+ group_name: str
248
+ format: str
249
+
250
+
131
251
  # Discriminated union of all engine configurations
132
252
  EngineConfigDict = Union[
133
253
  MergeTreeConfigDict,
134
254
  ReplacingMergeTreeConfigDict,
135
255
  AggregatingMergeTreeConfigDict,
136
256
  SummingMergeTreeConfigDict,
257
+ CollapsingMergeTreeConfigDict,
258
+ VersionedCollapsingMergeTreeConfigDict,
137
259
  ReplicatedMergeTreeConfigDict,
138
260
  ReplicatedReplacingMergeTreeConfigDict,
139
261
  ReplicatedAggregatingMergeTreeConfigDict,
140
262
  ReplicatedSummingMergeTreeConfigDict,
141
- S3QueueConfigDict
263
+ ReplicatedCollapsingMergeTreeConfigDict,
264
+ ReplicatedVersionedCollapsingMergeTreeConfigDict,
265
+ S3QueueConfigDict,
266
+ S3ConfigDict,
267
+ BufferConfigDict,
268
+ DistributedConfigDict,
269
+ IcebergS3ConfigDict,
270
+ KafkaConfigDict,
142
271
  ]
143
272
 
144
273
 
@@ -151,12 +280,15 @@ class TableConfig(BaseModel):
151
280
  order_by: List of columns used for the ORDER BY clause.
152
281
  partition_by: The column name used for the PARTITION BY clause.
153
282
  sample_by_expression: Optional SAMPLE BY expression for data sampling.
283
+ primary_key_expression: Optional PRIMARY KEY expression (overrides column-level primary_key flags when specified).
154
284
  engine_config: Engine configuration with type-safe, engine-specific parameters.
155
285
  version: Optional version string of the table configuration.
156
286
  metadata: Optional metadata for the table.
157
287
  life_cycle: Lifecycle management setting for the table.
158
288
  table_settings: Optional table-level settings that can be modified with ALTER TABLE MODIFY SETTING.
289
+ cluster: Optional cluster name for ON CLUSTER support in ClickHouse.
159
290
  """
291
+
160
292
  model_config = model_config
161
293
 
162
294
  name: str
@@ -164,13 +296,16 @@ class TableConfig(BaseModel):
164
296
  order_by: List[str] | str
165
297
  partition_by: Optional[str]
166
298
  sample_by_expression: Optional[str] = None
167
- engine_config: Optional[EngineConfigDict] = Field(None, discriminator='engine')
299
+ primary_key_expression: Optional[str] = None
300
+ engine_config: Optional[EngineConfigDict] = Field(None, discriminator="engine")
168
301
  version: Optional[str] = None
169
302
  metadata: Optional[dict] = None
170
303
  life_cycle: Optional[str] = None
171
304
  table_settings: Optional[dict[str, str]] = None
172
305
  indexes: list[OlapConfig.TableIndex] = []
173
306
  ttl: Optional[str] = None
307
+ database: Optional[str] = None
308
+ cluster: Optional[str] = None
174
309
 
175
310
 
176
311
  class TopicConfig(BaseModel):
@@ -190,6 +325,7 @@ class TopicConfig(BaseModel):
190
325
  metadata: Optional metadata for the topic.
191
326
  life_cycle: Lifecycle management setting for the topic.
192
327
  """
328
+
193
329
  model_config = model_config
194
330
 
195
331
  name: str
@@ -218,7 +354,10 @@ class IngestApiConfig(BaseModel):
218
354
  version: Optional version string of the API configuration.
219
355
  path: Optional custom path for the ingestion endpoint.
220
356
  metadata: Optional metadata for the API.
357
+ allow_extra_fields: Whether this API allows extra fields beyond the defined columns.
358
+ When true, extra fields in payloads are passed through to streaming functions.
221
359
  """
360
+
222
361
  model_config = model_config
223
362
 
224
363
  name: str
@@ -229,6 +368,7 @@ class IngestApiConfig(BaseModel):
229
368
  path: Optional[str] = None
230
369
  metadata: Optional[dict] = None
231
370
  json_schema: dict[str, Any] = Field(serialization_alias="schema")
371
+ allow_extra_fields: bool = False
232
372
 
233
373
 
234
374
  class InternalApiConfig(BaseModel):
@@ -242,6 +382,7 @@ class InternalApiConfig(BaseModel):
242
382
  path: Optional custom path for the API endpoint.
243
383
  metadata: Optional metadata for the API.
244
384
  """
385
+
245
386
  model_config = model_config
246
387
 
247
388
  name: str
@@ -261,6 +402,7 @@ class WorkflowJson(BaseModel):
261
402
  timeout: Optional timeout string for the entire workflow.
262
403
  schedule: Optional cron-like schedule string for recurring execution.
263
404
  """
405
+
264
406
  model_config = model_config
265
407
 
266
408
  name: str
@@ -275,6 +417,7 @@ class WebAppMetadataJson(BaseModel):
275
417
  Attributes:
276
418
  description: Optional description of the WebApp.
277
419
  """
420
+
278
421
  model_config = model_config
279
422
 
280
423
  description: Optional[str] = None
@@ -288,6 +431,7 @@ class WebAppJson(BaseModel):
288
431
  mount_path: The URL path where the WebApp is mounted.
289
432
  metadata: Optional metadata for documentation purposes.
290
433
  """
434
+
291
435
  model_config = model_config
292
436
 
293
437
  name: str
@@ -304,8 +448,16 @@ class InfrastructureSignatureJson(BaseModel):
304
448
  id: A unique identifier for the resource instance (often name + version).
305
449
  kind: The type of the infrastructure component.
306
450
  """
451
+
307
452
  id: str
308
- kind: Literal["Table", "Topic", "ApiEndpoint", "TopicToTableSyncProcess", "View", "SqlResource"]
453
+ kind: Literal[
454
+ "Table",
455
+ "Topic",
456
+ "ApiEndpoint",
457
+ "TopicToTableSyncProcess",
458
+ "View",
459
+ "SqlResource",
460
+ ]
309
461
 
310
462
 
311
463
  class SqlResourceConfig(BaseModel):
@@ -317,8 +469,10 @@ class SqlResourceConfig(BaseModel):
317
469
  teardown: List of SQL commands required to drop the resource.
318
470
  pulls_data_from: List of infrastructure components this resource reads from.
319
471
  pushes_data_to: List of infrastructure components this resource writes to.
472
+ source_file: Optional path to the source file where this resource is defined.
320
473
  metadata: Optional metadata for the resource.
321
474
  """
475
+
322
476
  model_config = model_config
323
477
 
324
478
  name: str
@@ -326,6 +480,7 @@ class SqlResourceConfig(BaseModel):
326
480
  teardown: list[str]
327
481
  pulls_data_from: list[InfrastructureSignatureJson]
328
482
  pushes_data_to: list[InfrastructureSignatureJson]
483
+ source_file: Optional[str] = None
329
484
  metadata: Optional[dict] = None
330
485
 
331
486
 
@@ -343,6 +498,7 @@ class InfrastructureMap(BaseModel):
343
498
  workflows: Dictionary mapping workflow names to their configurations.
344
499
  web_apps: Dictionary mapping WebApp names to their configurations.
345
500
  """
501
+
346
502
  model_config = model_config
347
503
 
348
504
  tables: dict[str, TableConfig]
@@ -369,11 +525,15 @@ def _map_sql_resource_ref(r: Any) -> InfrastructureSignatureJson:
369
525
  Raises:
370
526
  TypeError: If the input object is not a recognized SQL resource type.
371
527
  """
372
- if hasattr(r, 'kind'):
528
+ if hasattr(r, "kind"):
373
529
  if r.kind == "OlapTable":
374
530
  # Explicitly cast for type hint checking if needed, though Python is dynamic
375
531
  table = r # type: OlapTable
376
- res_id = f"{table.name}_{table.config.version}" if table.config.version else table.name
532
+ res_id = (
533
+ f"{table.name}_{table.config.version}"
534
+ if table.config.version
535
+ else table.name
536
+ )
377
537
  return InfrastructureSignatureJson(id=res_id, kind="Table")
378
538
  elif r.kind == "SqlResource":
379
539
  # Explicitly cast for type hint checking if needed
@@ -386,84 +546,117 @@ def _map_sql_resource_ref(r: Any) -> InfrastructureSignatureJson:
386
546
  raise TypeError(f"Object {r} lacks a 'kind' attribute for dependency mapping.")
387
547
 
388
548
 
389
- def _convert_basic_engine_instance(engine: "EngineConfig") -> Optional[EngineConfigDict]:
549
+ def _convert_basic_engine_instance(
550
+ engine: "EngineConfig",
551
+ ) -> Optional[EngineConfigDict]:
390
552
  """Convert basic MergeTree engine instances to config dict.
391
-
553
+
392
554
  Args:
393
555
  engine: An EngineConfig instance
394
-
556
+
395
557
  Returns:
396
558
  EngineConfigDict if matched, None otherwise
397
559
  """
398
560
  from moose_lib.blocks import (
399
- MergeTreeEngine, ReplacingMergeTreeEngine,
400
- AggregatingMergeTreeEngine, SummingMergeTreeEngine
561
+ MergeTreeEngine,
562
+ ReplacingMergeTreeEngine,
563
+ AggregatingMergeTreeEngine,
564
+ SummingMergeTreeEngine,
565
+ CollapsingMergeTreeEngine,
566
+ VersionedCollapsingMergeTreeEngine,
401
567
  )
402
568
 
403
569
  if isinstance(engine, MergeTreeEngine):
404
570
  return MergeTreeConfigDict()
405
571
  elif isinstance(engine, ReplacingMergeTreeEngine):
406
572
  return ReplacingMergeTreeConfigDict(
407
- ver=engine.ver,
408
- is_deleted=engine.is_deleted
573
+ ver=engine.ver, is_deleted=engine.is_deleted
409
574
  )
410
575
  elif isinstance(engine, AggregatingMergeTreeEngine):
411
576
  return AggregatingMergeTreeConfigDict()
412
577
  elif isinstance(engine, SummingMergeTreeEngine):
413
578
  return SummingMergeTreeConfigDict(columns=engine.columns)
579
+ elif isinstance(engine, CollapsingMergeTreeEngine):
580
+ return CollapsingMergeTreeConfigDict(sign=engine.sign)
581
+ elif isinstance(engine, VersionedCollapsingMergeTreeEngine):
582
+ return VersionedCollapsingMergeTreeConfigDict(sign=engine.sign, ver=engine.ver)
414
583
  return None
415
584
 
416
585
 
417
- def _convert_replicated_engine_instance(engine: "EngineConfig") -> Optional[EngineConfigDict]:
586
+ def _convert_replicated_engine_instance(
587
+ engine: "EngineConfig",
588
+ ) -> Optional[EngineConfigDict]:
418
589
  """Convert replicated MergeTree engine instances to config dict.
419
-
590
+
420
591
  Args:
421
592
  engine: An EngineConfig instance
422
-
593
+
423
594
  Returns:
424
595
  EngineConfigDict if matched, None otherwise
425
596
  """
426
597
  from moose_lib.blocks import (
427
- ReplicatedMergeTreeEngine, ReplicatedReplacingMergeTreeEngine,
428
- ReplicatedAggregatingMergeTreeEngine, ReplicatedSummingMergeTreeEngine
598
+ ReplicatedMergeTreeEngine,
599
+ ReplicatedReplacingMergeTreeEngine,
600
+ ReplicatedAggregatingMergeTreeEngine,
601
+ ReplicatedSummingMergeTreeEngine,
602
+ ReplicatedCollapsingMergeTreeEngine,
603
+ ReplicatedVersionedCollapsingMergeTreeEngine,
429
604
  )
430
605
 
431
606
  if isinstance(engine, ReplicatedMergeTreeEngine):
432
607
  return ReplicatedMergeTreeConfigDict(
433
- keeper_path=engine.keeper_path,
434
- replica_name=engine.replica_name
608
+ keeper_path=engine.keeper_path, replica_name=engine.replica_name
435
609
  )
436
610
  elif isinstance(engine, ReplicatedReplacingMergeTreeEngine):
437
611
  return ReplicatedReplacingMergeTreeConfigDict(
438
612
  keeper_path=engine.keeper_path,
439
613
  replica_name=engine.replica_name,
440
614
  ver=engine.ver,
441
- is_deleted=engine.is_deleted
615
+ is_deleted=engine.is_deleted,
442
616
  )
443
617
  elif isinstance(engine, ReplicatedAggregatingMergeTreeEngine):
444
618
  return ReplicatedAggregatingMergeTreeConfigDict(
445
- keeper_path=engine.keeper_path,
446
- replica_name=engine.replica_name
619
+ keeper_path=engine.keeper_path, replica_name=engine.replica_name
447
620
  )
448
621
  elif isinstance(engine, ReplicatedSummingMergeTreeEngine):
449
622
  return ReplicatedSummingMergeTreeConfigDict(
450
623
  keeper_path=engine.keeper_path,
451
624
  replica_name=engine.replica_name,
452
- columns=engine.columns
625
+ columns=engine.columns,
626
+ )
627
+ elif isinstance(engine, ReplicatedCollapsingMergeTreeEngine):
628
+ return ReplicatedCollapsingMergeTreeConfigDict(
629
+ keeper_path=engine.keeper_path,
630
+ replica_name=engine.replica_name,
631
+ sign=engine.sign,
632
+ )
633
+ elif isinstance(engine, ReplicatedVersionedCollapsingMergeTreeEngine):
634
+ return ReplicatedVersionedCollapsingMergeTreeConfigDict(
635
+ keeper_path=engine.keeper_path,
636
+ replica_name=engine.replica_name,
637
+ sign=engine.sign,
638
+ ver=engine.ver,
453
639
  )
454
640
  return None
455
641
 
456
642
 
457
643
  def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineConfigDict:
458
644
  """Convert an EngineConfig instance to config dict format.
459
-
645
+
460
646
  Args:
461
647
  engine: An EngineConfig instance
462
-
648
+
463
649
  Returns:
464
650
  EngineConfigDict with engine-specific configuration
465
651
  """
466
- from moose_lib.blocks import S3QueueEngine
652
+ from moose_lib.blocks import (
653
+ S3QueueEngine,
654
+ S3Engine,
655
+ BufferEngine,
656
+ DistributedEngine,
657
+ IcebergS3Engine,
658
+ KafkaEngine,
659
+ )
467
660
 
468
661
  # Try S3Queue first
469
662
  if isinstance(engine, S3QueueEngine):
@@ -473,7 +666,65 @@ def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineCon
473
666
  aws_access_key_id=engine.aws_access_key_id,
474
667
  aws_secret_access_key=engine.aws_secret_access_key,
475
668
  compression=engine.compression,
476
- headers=engine.headers
669
+ headers=engine.headers,
670
+ )
671
+
672
+ # Try S3
673
+ if isinstance(engine, S3Engine):
674
+ return S3ConfigDict(
675
+ path=engine.path,
676
+ format=engine.format,
677
+ aws_access_key_id=engine.aws_access_key_id,
678
+ aws_secret_access_key=engine.aws_secret_access_key,
679
+ compression=engine.compression,
680
+ partition_strategy=engine.partition_strategy,
681
+ partition_columns_in_data_file=engine.partition_columns_in_data_file,
682
+ )
683
+
684
+ # Try Buffer
685
+ if isinstance(engine, BufferEngine):
686
+ return BufferConfigDict(
687
+ target_database=engine.target_database,
688
+ target_table=engine.target_table,
689
+ num_layers=engine.num_layers,
690
+ min_time=engine.min_time,
691
+ max_time=engine.max_time,
692
+ min_rows=engine.min_rows,
693
+ max_rows=engine.max_rows,
694
+ min_bytes=engine.min_bytes,
695
+ max_bytes=engine.max_bytes,
696
+ flush_time=engine.flush_time,
697
+ flush_rows=engine.flush_rows,
698
+ flush_bytes=engine.flush_bytes,
699
+ )
700
+
701
+ # Try Distributed
702
+ if isinstance(engine, DistributedEngine):
703
+ return DistributedConfigDict(
704
+ cluster=engine.cluster,
705
+ target_database=engine.target_database,
706
+ target_table=engine.target_table,
707
+ sharding_key=engine.sharding_key,
708
+ policy_name=engine.policy_name,
709
+ )
710
+
711
+ # Try IcebergS3
712
+ if isinstance(engine, IcebergS3Engine):
713
+ return IcebergS3ConfigDict(
714
+ path=engine.path,
715
+ format=engine.format,
716
+ aws_access_key_id=engine.aws_access_key_id,
717
+ aws_secret_access_key=engine.aws_secret_access_key,
718
+ compression=engine.compression,
719
+ )
720
+
721
+ # Try Kafka
722
+ if isinstance(engine, KafkaEngine):
723
+ return KafkaConfigDict(
724
+ broker_list=engine.broker_list,
725
+ topic_list=engine.topic_list,
726
+ group_name=engine.group_name,
727
+ format=engine.format,
477
728
  )
478
729
 
479
730
  # Try basic engines
@@ -490,14 +741,15 @@ def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineCon
490
741
  return BaseEngineConfigDict(engine=engine.__class__.__name__.replace("Engine", ""))
491
742
 
492
743
 
493
- def _convert_engine_to_config_dict(engine: Union[ClickHouseEngines, EngineConfig],
494
- table: OlapTable) -> EngineConfigDict:
744
+ def _convert_engine_to_config_dict(
745
+ engine: Union[ClickHouseEngines, EngineConfig], table: OlapTable
746
+ ) -> EngineConfigDict:
495
747
  """Convert engine enum or EngineConfig instance to new engine config format.
496
-
748
+
497
749
  Args:
498
750
  engine: Either a ClickHouseEngines enum value or an EngineConfig instance
499
751
  table: The OlapTable instance with configuration
500
-
752
+
501
753
  Returns:
502
754
  EngineConfigDict with engine-specific configuration
503
755
  """
@@ -516,7 +768,7 @@ def _convert_engine_to_config_dict(engine: Union[ClickHouseEngines, EngineConfig
516
768
  engine_name = str(engine)
517
769
 
518
770
  # For S3Queue with legacy configuration, check for s3_queue_engine_config
519
- if engine_name == "S3Queue" and hasattr(table.config, 's3_queue_engine_config'):
771
+ if engine_name == "S3Queue" and hasattr(table.config, "s3_queue_engine_config"):
520
772
  s3_config = table.config.s3_queue_engine_config
521
773
  if s3_config:
522
774
  logger = Logger(action="S3QueueConfig")
@@ -530,7 +782,7 @@ def _convert_engine_to_config_dict(engine: Union[ClickHouseEngines, EngineConfig
530
782
  aws_access_key_id=s3_config.aws_access_key_id,
531
783
  aws_secret_access_key=s3_config.aws_secret_access_key,
532
784
  compression=s3_config.compression,
533
- headers=s3_config.headers
785
+ headers=s3_config.headers,
534
786
  )
535
787
 
536
788
  # Map engine names to specific config classes
@@ -588,16 +840,24 @@ def to_infra_map() -> dict:
588
840
  table_settings["mode"] = "unordered"
589
841
 
590
842
  id_key = (
591
- f"{table.name}_{table.config.version}" if table.config.version else table.name
843
+ f"{table.name}_{table.config.version}"
844
+ if table.config.version
845
+ else table.name
592
846
  )
593
847
 
594
848
  # Determine ORDER BY: list of fields or single expression
595
849
  has_fields = bool(table.config.order_by_fields)
596
850
  has_expr = table.config.order_by_expression is not None
597
851
  if has_fields and has_expr:
598
- raise ValueError(f"Table {table.name}: Provide either order_by_fields or order_by_expression, not both.")
852
+ raise ValueError(
853
+ f"Table {table.name}: Provide either order_by_fields or order_by_expression, not both."
854
+ )
599
855
 
600
- order_by_value = table.config.order_by_expression if has_expr else table.config.order_by_fields
856
+ order_by_value = (
857
+ table.config.order_by_expression
858
+ if has_expr
859
+ else table.config.order_by_fields
860
+ )
601
861
 
602
862
  tables[id_key] = TableConfig(
603
863
  name=table.name,
@@ -605,14 +865,19 @@ def to_infra_map() -> dict:
605
865
  order_by=order_by_value,
606
866
  partition_by=table.config.partition_by,
607
867
  sample_by_expression=table.config.sample_by_expression,
868
+ primary_key_expression=table.config.primary_key_expression,
608
869
  engine_config=engine_config,
609
870
  version=table.config.version,
610
871
  metadata=getattr(table, "metadata", None),
611
- life_cycle=table.config.life_cycle.value if table.config.life_cycle else None,
872
+ life_cycle=(
873
+ table.config.life_cycle.value if table.config.life_cycle else None
874
+ ),
612
875
  # Map 'settings' to 'table_settings' for internal use
613
876
  table_settings=table_settings if table_settings else None,
614
877
  indexes=table.config.indexes,
615
878
  ttl=table.config.ttl,
879
+ database=table.config.database,
880
+ cluster=table.config.cluster,
616
881
  )
617
882
 
618
883
  for name, stream in get_streams().items():
@@ -628,15 +893,20 @@ def to_infra_map() -> dict:
628
893
  ]
629
894
 
630
895
  consumers = [
631
- Consumer(version=consumer.config.version)
632
- for consumer in stream.consumers
896
+ Consumer(version=consumer.config.version) for consumer in stream.consumers
633
897
  ]
634
898
 
635
899
  topics[name] = TopicConfig(
636
900
  name=name,
637
901
  columns=_to_columns(stream._t),
638
- target_table=stream.config.destination.name if stream.config.destination else None,
639
- target_table_version=stream.config.destination.config.version if stream.config.destination else None,
902
+ target_table=(
903
+ stream.config.destination.name if stream.config.destination else None
904
+ ),
905
+ target_table_version=(
906
+ stream.config.destination.config.version
907
+ if stream.config.destination
908
+ else None
909
+ ),
640
910
  retention_period=stream.config.retention_period,
641
911
  partition_count=stream.config.parallelism,
642
912
  version=stream.config.version,
@@ -644,25 +914,33 @@ def to_infra_map() -> dict:
644
914
  has_multi_transform=stream._multipleTransformations is not None,
645
915
  consumers=consumers,
646
916
  metadata=getattr(stream, "metadata", None),
647
- life_cycle=stream.config.life_cycle.value if stream.config.life_cycle else None,
917
+ life_cycle=(
918
+ stream.config.life_cycle.value if stream.config.life_cycle else None
919
+ ),
648
920
  schema_config=stream.config.schema_config,
649
921
  )
650
922
 
651
923
  for name, api in get_ingest_apis().items():
924
+ # Check if the Pydantic model allows extra fields (extra='allow')
925
+ # This is the Python equivalent of TypeScript's index signatures
926
+ model_allows_extra = api._t.model_config.get("extra") == "allow"
927
+
652
928
  ingest_apis[name] = IngestApiConfig(
653
929
  name=name,
654
930
  columns=_to_columns(api._t),
655
931
  version=api.config.version,
656
932
  path=api.config.path,
657
- write_to=Target(
658
- kind="stream",
659
- name=api.config.destination.name
660
- ),
933
+ write_to=Target(kind="stream", name=api.config.destination.name),
661
934
  metadata=getattr(api, "metadata", None),
662
935
  json_schema=api._t.model_json_schema(
663
- ref_template='#/components/schemas/{model}'
936
+ ref_template="#/components/schemas/{model}"
937
+ ),
938
+ dead_letter_queue=(
939
+ api.config.dead_letter_queue.name
940
+ if api.config.dead_letter_queue
941
+ else None
664
942
  ),
665
- dead_letter_queue=api.config.dead_letter_queue.name if api.config.dead_letter_queue else None
943
+ allow_extra_fields=model_allows_extra,
666
944
  )
667
945
 
668
946
  for name, api in get_apis().items():
@@ -680,8 +958,13 @@ def to_infra_map() -> dict:
680
958
  name=resource.name,
681
959
  setup=resource.setup,
682
960
  teardown=resource.teardown,
683
- pulls_data_from=[_map_sql_resource_ref(dep) for dep in resource.pulls_data_from],
684
- pushes_data_to=[_map_sql_resource_ref(dep) for dep in resource.pushes_data_to],
961
+ pulls_data_from=[
962
+ _map_sql_resource_ref(dep) for dep in resource.pulls_data_from
963
+ ],
964
+ pushes_data_to=[
965
+ _map_sql_resource_ref(dep) for dep in resource.pushes_data_to
966
+ ],
967
+ source_file=getattr(resource, "source_file", None),
685
968
  metadata=getattr(resource, "metadata", None),
686
969
  )
687
970
 
@@ -713,10 +996,10 @@ def to_infra_map() -> dict:
713
996
  apis=apis,
714
997
  sql_resources=sql_resources,
715
998
  workflows=workflows,
716
- web_apps=web_apps
999
+ web_apps=web_apps,
717
1000
  )
718
1001
 
719
- return infra_map.model_dump(by_alias=True)
1002
+ return infra_map.model_dump(by_alias=True, exclude_none=False)
720
1003
 
721
1004
 
722
1005
  def load_models():
@@ -733,6 +1016,7 @@ def load_models():
733
1016
  calling system uses to extract the configuration.
734
1017
  """
735
1018
  import os
1019
+
736
1020
  source_dir = os.environ.get("MOOSE_SOURCE_DIR", "app")
737
1021
  import_module(f"{source_dir}.main")
738
1022