moose-lib 0.6.90__py3-none-any.whl → 0.6.283__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. moose_lib/__init__.py +38 -3
  2. moose_lib/blocks.py +497 -37
  3. moose_lib/clients/redis_client.py +26 -14
  4. moose_lib/commons.py +94 -5
  5. moose_lib/config/config_file.py +44 -2
  6. moose_lib/config/runtime.py +137 -5
  7. moose_lib/data_models.py +451 -46
  8. moose_lib/dmv2/__init__.py +88 -60
  9. moose_lib/dmv2/_registry.py +3 -1
  10. moose_lib/dmv2/_source_capture.py +37 -0
  11. moose_lib/dmv2/consumption.py +55 -32
  12. moose_lib/dmv2/ingest_api.py +9 -2
  13. moose_lib/dmv2/ingest_pipeline.py +56 -13
  14. moose_lib/dmv2/life_cycle.py +3 -1
  15. moose_lib/dmv2/materialized_view.py +24 -14
  16. moose_lib/dmv2/moose_model.py +165 -0
  17. moose_lib/dmv2/olap_table.py +304 -119
  18. moose_lib/dmv2/registry.py +28 -3
  19. moose_lib/dmv2/sql_resource.py +16 -8
  20. moose_lib/dmv2/stream.py +241 -21
  21. moose_lib/dmv2/types.py +14 -8
  22. moose_lib/dmv2/view.py +13 -6
  23. moose_lib/dmv2/web_app.py +175 -0
  24. moose_lib/dmv2/web_app_helpers.py +96 -0
  25. moose_lib/dmv2/workflow.py +37 -9
  26. moose_lib/internal.py +537 -68
  27. moose_lib/main.py +87 -56
  28. moose_lib/query_builder.py +18 -5
  29. moose_lib/query_param.py +54 -20
  30. moose_lib/secrets.py +122 -0
  31. moose_lib/streaming/streaming_function_runner.py +266 -156
  32. moose_lib/utilities/sql.py +0 -1
  33. {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/METADATA +19 -1
  34. moose_lib-0.6.283.dist-info/RECORD +63 -0
  35. tests/__init__.py +1 -1
  36. tests/conftest.py +38 -1
  37. tests/test_backward_compatibility.py +85 -0
  38. tests/test_cluster_validation.py +85 -0
  39. tests/test_codec.py +75 -0
  40. tests/test_column_formatting.py +80 -0
  41. tests/test_fixedstring.py +43 -0
  42. tests/test_iceberg_config.py +105 -0
  43. tests/test_int_types.py +211 -0
  44. tests/test_kafka_config.py +141 -0
  45. tests/test_materialized.py +74 -0
  46. tests/test_metadata.py +37 -0
  47. tests/test_moose.py +21 -30
  48. tests/test_moose_model.py +153 -0
  49. tests/test_olap_table_moosemodel.py +89 -0
  50. tests/test_olap_table_versioning.py +210 -0
  51. tests/test_query_builder.py +97 -9
  52. tests/test_redis_client.py +10 -3
  53. tests/test_s3queue_config.py +211 -110
  54. tests/test_secrets.py +239 -0
  55. tests/test_simple_aggregate.py +114 -0
  56. tests/test_web_app.py +227 -0
  57. moose_lib-0.6.90.dist-info/RECORD +0 -42
  58. {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
  59. {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
moose_lib/internal.py CHANGED
@@ -6,6 +6,7 @@ of various Moose resources (tables, streams/topics, APIs) and functions
6
6
  to convert the user-defined resources (from `dmv2.py`) into a serializable
7
7
  JSON format expected by the Moose infrastructure management system.
8
8
  """
9
+
9
10
  from importlib import import_module
10
11
  from typing import Literal, Optional, List, Any, Dict, Union, TYPE_CHECKING
11
12
  from pydantic import BaseModel, ConfigDict, AliasGenerator, Field
@@ -19,17 +20,20 @@ from moose_lib.dmv2 import (
19
20
  get_apis,
20
21
  get_sql_resources,
21
22
  get_workflows,
23
+ get_web_apps,
22
24
  OlapTable,
23
- View,
24
- MaterializedView,
25
- SqlResource
25
+ OlapConfig,
26
+ SqlResource,
26
27
  )
28
+ from moose_lib.dmv2.stream import KafkaSchemaConfig
27
29
  from pydantic.alias_generators import to_camel
28
30
  from pydantic.json_schema import JsonSchemaValue
29
31
 
30
- model_config = ConfigDict(alias_generator=AliasGenerator(
31
- serialization_alias=to_camel,
32
- ))
32
+ model_config = ConfigDict(
33
+ alias_generator=AliasGenerator(
34
+ serialization_alias=to_camel,
35
+ )
36
+ )
33
37
 
34
38
 
35
39
  class Target(BaseModel):
@@ -41,6 +45,7 @@ class Target(BaseModel):
41
45
  version: Optional version of the target stream configuration.
42
46
  metadata: Optional metadata for the target stream.
43
47
  """
48
+
44
49
  kind: Literal["stream"]
45
50
  name: str
46
51
  version: Optional[str] = None
@@ -53,22 +58,26 @@ class Consumer(BaseModel):
53
58
  Attributes:
54
59
  version: Optional version of the consumer configuration.
55
60
  """
61
+
56
62
  version: Optional[str] = None
57
63
 
58
64
 
59
65
  class BaseEngineConfigDict(BaseModel):
60
66
  """Base engine configuration for all ClickHouse table engines."""
67
+
61
68
  model_config = model_config
62
69
  engine: str
63
70
 
64
71
 
65
72
  class MergeTreeConfigDict(BaseEngineConfigDict):
66
73
  """Configuration for MergeTree engine."""
74
+
67
75
  engine: Literal["MergeTree"] = "MergeTree"
68
76
 
69
77
 
70
78
  class ReplacingMergeTreeConfigDict(BaseEngineConfigDict):
71
79
  """Configuration for ReplacingMergeTree engine."""
80
+
72
81
  engine: Literal["ReplacingMergeTree"] = "ReplacingMergeTree"
73
82
  ver: Optional[str] = None
74
83
  is_deleted: Optional[str] = None
@@ -76,16 +85,91 @@ class ReplacingMergeTreeConfigDict(BaseEngineConfigDict):
76
85
 
77
86
  class AggregatingMergeTreeConfigDict(BaseEngineConfigDict):
78
87
  """Configuration for AggregatingMergeTree engine."""
88
+
79
89
  engine: Literal["AggregatingMergeTree"] = "AggregatingMergeTree"
80
90
 
81
91
 
82
92
  class SummingMergeTreeConfigDict(BaseEngineConfigDict):
83
93
  """Configuration for SummingMergeTree engine."""
94
+
84
95
  engine: Literal["SummingMergeTree"] = "SummingMergeTree"
96
+ columns: Optional[List[str]] = None
97
+
98
+
99
+ class CollapsingMergeTreeConfigDict(BaseEngineConfigDict):
100
+ """Configuration for CollapsingMergeTree engine."""
101
+
102
+ engine: Literal["CollapsingMergeTree"] = "CollapsingMergeTree"
103
+ sign: str
104
+
105
+
106
+ class VersionedCollapsingMergeTreeConfigDict(BaseEngineConfigDict):
107
+ """Configuration for VersionedCollapsingMergeTree engine."""
108
+
109
+ engine: Literal["VersionedCollapsingMergeTree"] = "VersionedCollapsingMergeTree"
110
+ sign: str
111
+ ver: str
112
+
113
+
114
+ class ReplicatedMergeTreeConfigDict(BaseEngineConfigDict):
115
+ """Configuration for ReplicatedMergeTree engine."""
116
+
117
+ engine: Literal["ReplicatedMergeTree"] = "ReplicatedMergeTree"
118
+ keeper_path: Optional[str] = None
119
+ replica_name: Optional[str] = None
120
+
121
+
122
+ class ReplicatedReplacingMergeTreeConfigDict(BaseEngineConfigDict):
123
+ """Configuration for ReplicatedReplacingMergeTree engine."""
124
+
125
+ engine: Literal["ReplicatedReplacingMergeTree"] = "ReplicatedReplacingMergeTree"
126
+ keeper_path: Optional[str] = None
127
+ replica_name: Optional[str] = None
128
+ ver: Optional[str] = None
129
+ is_deleted: Optional[str] = None
130
+
131
+
132
+ class ReplicatedAggregatingMergeTreeConfigDict(BaseEngineConfigDict):
133
+ """Configuration for ReplicatedAggregatingMergeTree engine."""
134
+
135
+ engine: Literal["ReplicatedAggregatingMergeTree"] = "ReplicatedAggregatingMergeTree"
136
+ keeper_path: Optional[str] = None
137
+ replica_name: Optional[str] = None
138
+
139
+
140
+ class ReplicatedSummingMergeTreeConfigDict(BaseEngineConfigDict):
141
+ """Configuration for ReplicatedSummingMergeTree engine."""
142
+
143
+ engine: Literal["ReplicatedSummingMergeTree"] = "ReplicatedSummingMergeTree"
144
+ keeper_path: Optional[str] = None
145
+ replica_name: Optional[str] = None
146
+ columns: Optional[List[str]] = None
147
+
148
+
149
+ class ReplicatedCollapsingMergeTreeConfigDict(BaseEngineConfigDict):
150
+ """Configuration for ReplicatedCollapsingMergeTree engine."""
151
+
152
+ engine: Literal["ReplicatedCollapsingMergeTree"] = "ReplicatedCollapsingMergeTree"
153
+ keeper_path: Optional[str] = None
154
+ replica_name: Optional[str] = None
155
+ sign: str
156
+
157
+
158
+ class ReplicatedVersionedCollapsingMergeTreeConfigDict(BaseEngineConfigDict):
159
+ """Configuration for ReplicatedVersionedCollapsingMergeTree engine."""
160
+
161
+ engine: Literal["ReplicatedVersionedCollapsingMergeTree"] = (
162
+ "ReplicatedVersionedCollapsingMergeTree"
163
+ )
164
+ keeper_path: Optional[str] = None
165
+ replica_name: Optional[str] = None
166
+ sign: str
167
+ ver: str
85
168
 
86
169
 
87
170
  class S3QueueConfigDict(BaseEngineConfigDict):
88
171
  """Configuration for S3Queue engine with all specific fields."""
172
+
89
173
  engine: Literal["S3Queue"] = "S3Queue"
90
174
  s3_path: str
91
175
  format: str
@@ -95,13 +179,95 @@ class S3QueueConfigDict(BaseEngineConfigDict):
95
179
  headers: Optional[Dict[str, str]] = None
96
180
 
97
181
 
182
+ class S3ConfigDict(BaseEngineConfigDict):
183
+ """Configuration for S3 engine."""
184
+
185
+ engine: Literal["S3"] = "S3"
186
+ path: str
187
+ format: str
188
+ aws_access_key_id: Optional[str] = None
189
+ aws_secret_access_key: Optional[str] = None
190
+ compression: Optional[str] = None
191
+ partition_strategy: Optional[str] = None
192
+ partition_columns_in_data_file: Optional[str] = None
193
+
194
+
195
+ class BufferConfigDict(BaseEngineConfigDict):
196
+ """Configuration for Buffer engine."""
197
+
198
+ engine: Literal["Buffer"] = "Buffer"
199
+ target_database: str
200
+ target_table: str
201
+ num_layers: int
202
+ min_time: int
203
+ max_time: int
204
+ min_rows: int
205
+ max_rows: int
206
+ min_bytes: int
207
+ max_bytes: int
208
+ flush_time: Optional[int] = None
209
+ flush_rows: Optional[int] = None
210
+ flush_bytes: Optional[int] = None
211
+
212
+
213
+ class DistributedConfigDict(BaseEngineConfigDict):
214
+ """Configuration for Distributed engine."""
215
+
216
+ engine: Literal["Distributed"] = "Distributed"
217
+ cluster: str
218
+ target_database: str
219
+ target_table: str
220
+ sharding_key: Optional[str] = None
221
+ policy_name: Optional[str] = None
222
+
223
+
224
+ class IcebergS3ConfigDict(BaseEngineConfigDict):
225
+ """Configuration for IcebergS3 engine."""
226
+
227
+ engine: Literal["IcebergS3"] = "IcebergS3"
228
+ path: str
229
+ format: str
230
+ aws_access_key_id: Optional[str] = None
231
+ aws_secret_access_key: Optional[str] = None
232
+ compression: Optional[str] = None
233
+
234
+
235
+ class KafkaConfigDict(BaseEngineConfigDict):
236
+ """Configuration for Kafka engine.
237
+
238
+ Constructor: ENGINE = Kafka('broker', 'topic', 'group', 'format')
239
+ Settings (kafka_schema, kafka_num_consumers, security, etc.) go in table settings.
240
+
241
+ Reference: https://clickhouse.com/docs/engines/table-engines/integrations/kafka
242
+ """
243
+
244
+ engine: Literal["Kafka"] = "Kafka"
245
+ broker_list: str
246
+ topic_list: str
247
+ group_name: str
248
+ format: str
249
+
250
+
98
251
  # Discriminated union of all engine configurations
99
252
  EngineConfigDict = Union[
100
253
  MergeTreeConfigDict,
101
254
  ReplacingMergeTreeConfigDict,
102
255
  AggregatingMergeTreeConfigDict,
103
256
  SummingMergeTreeConfigDict,
104
- S3QueueConfigDict
257
+ CollapsingMergeTreeConfigDict,
258
+ VersionedCollapsingMergeTreeConfigDict,
259
+ ReplicatedMergeTreeConfigDict,
260
+ ReplicatedReplacingMergeTreeConfigDict,
261
+ ReplicatedAggregatingMergeTreeConfigDict,
262
+ ReplicatedSummingMergeTreeConfigDict,
263
+ ReplicatedCollapsingMergeTreeConfigDict,
264
+ ReplicatedVersionedCollapsingMergeTreeConfigDict,
265
+ S3QueueConfigDict,
266
+ S3ConfigDict,
267
+ BufferConfigDict,
268
+ DistributedConfigDict,
269
+ IcebergS3ConfigDict,
270
+ KafkaConfigDict,
105
271
  ]
106
272
 
107
273
 
@@ -113,23 +279,33 @@ class TableConfig(BaseModel):
113
279
  columns: List of columns with their types and attributes.
114
280
  order_by: List of columns used for the ORDER BY clause.
115
281
  partition_by: The column name used for the PARTITION BY clause.
282
+ sample_by_expression: Optional SAMPLE BY expression for data sampling.
283
+ primary_key_expression: Optional PRIMARY KEY expression (overrides column-level primary_key flags when specified).
116
284
  engine_config: Engine configuration with type-safe, engine-specific parameters.
117
285
  version: Optional version string of the table configuration.
118
286
  metadata: Optional metadata for the table.
119
287
  life_cycle: Lifecycle management setting for the table.
120
288
  table_settings: Optional table-level settings that can be modified with ALTER TABLE MODIFY SETTING.
289
+ cluster: Optional cluster name for ON CLUSTER support in ClickHouse.
121
290
  """
291
+
122
292
  model_config = model_config
123
293
 
124
294
  name: str
125
295
  columns: List[Column]
126
- order_by: List[str]
296
+ order_by: List[str] | str
127
297
  partition_by: Optional[str]
128
- engine_config: Optional[EngineConfigDict] = Field(None, discriminator='engine')
298
+ sample_by_expression: Optional[str] = None
299
+ primary_key_expression: Optional[str] = None
300
+ engine_config: Optional[EngineConfigDict] = Field(None, discriminator="engine")
129
301
  version: Optional[str] = None
130
302
  metadata: Optional[dict] = None
131
303
  life_cycle: Optional[str] = None
132
- table_settings: Optional[Dict[str, str]] = None
304
+ table_settings: Optional[dict[str, str]] = None
305
+ indexes: list[OlapConfig.TableIndex] = []
306
+ ttl: Optional[str] = None
307
+ database: Optional[str] = None
308
+ cluster: Optional[str] = None
133
309
 
134
310
 
135
311
  class TopicConfig(BaseModel):
@@ -149,6 +325,7 @@ class TopicConfig(BaseModel):
149
325
  metadata: Optional metadata for the topic.
150
326
  life_cycle: Lifecycle management setting for the topic.
151
327
  """
328
+
152
329
  model_config = model_config
153
330
 
154
331
  name: str
@@ -163,6 +340,7 @@ class TopicConfig(BaseModel):
163
340
  consumers: List[Consumer]
164
341
  metadata: Optional[dict] = None
165
342
  life_cycle: Optional[str] = None
343
+ schema_config: Optional[KafkaSchemaConfig] = None
166
344
 
167
345
 
168
346
  class IngestApiConfig(BaseModel):
@@ -176,7 +354,10 @@ class IngestApiConfig(BaseModel):
176
354
  version: Optional version string of the API configuration.
177
355
  path: Optional custom path for the ingestion endpoint.
178
356
  metadata: Optional metadata for the API.
357
+ allow_extra_fields: Whether this API allows extra fields beyond the defined columns.
358
+ When true, extra fields in payloads are passed through to streaming functions.
179
359
  """
360
+
180
361
  model_config = model_config
181
362
 
182
363
  name: str
@@ -187,6 +368,7 @@ class IngestApiConfig(BaseModel):
187
368
  path: Optional[str] = None
188
369
  metadata: Optional[dict] = None
189
370
  json_schema: dict[str, Any] = Field(serialization_alias="schema")
371
+ allow_extra_fields: bool = False
190
372
 
191
373
 
192
374
  class InternalApiConfig(BaseModel):
@@ -200,6 +382,7 @@ class InternalApiConfig(BaseModel):
200
382
  path: Optional custom path for the API endpoint.
201
383
  metadata: Optional metadata for the API.
202
384
  """
385
+
203
386
  model_config = model_config
204
387
 
205
388
  name: str
@@ -219,6 +402,7 @@ class WorkflowJson(BaseModel):
219
402
  timeout: Optional timeout string for the entire workflow.
220
403
  schedule: Optional cron-like schedule string for recurring execution.
221
404
  """
405
+
222
406
  model_config = model_config
223
407
 
224
408
  name: str
@@ -227,6 +411,34 @@ class WorkflowJson(BaseModel):
227
411
  schedule: Optional[str] = None
228
412
 
229
413
 
414
+ class WebAppMetadataJson(BaseModel):
415
+ """Internal representation of WebApp metadata for serialization.
416
+
417
+ Attributes:
418
+ description: Optional description of the WebApp.
419
+ """
420
+
421
+ model_config = model_config
422
+
423
+ description: Optional[str] = None
424
+
425
+
426
+ class WebAppJson(BaseModel):
427
+ """Internal representation of a WebApp configuration for serialization.
428
+
429
+ Attributes:
430
+ name: Name of the WebApp.
431
+ mount_path: The URL path where the WebApp is mounted.
432
+ metadata: Optional metadata for documentation purposes.
433
+ """
434
+
435
+ model_config = model_config
436
+
437
+ name: str
438
+ mount_path: str
439
+ metadata: Optional[WebAppMetadataJson] = None
440
+
441
+
230
442
  class InfrastructureSignatureJson(BaseModel):
231
443
  """Represents the unique signature of an infrastructure component (Table, Topic, etc.).
232
444
 
@@ -236,8 +448,16 @@ class InfrastructureSignatureJson(BaseModel):
236
448
  id: A unique identifier for the resource instance (often name + version).
237
449
  kind: The type of the infrastructure component.
238
450
  """
451
+
239
452
  id: str
240
- kind: Literal["Table", "Topic", "ApiEndpoint", "TopicToTableSyncProcess", "View", "SqlResource"]
453
+ kind: Literal[
454
+ "Table",
455
+ "Topic",
456
+ "ApiEndpoint",
457
+ "TopicToTableSyncProcess",
458
+ "View",
459
+ "SqlResource",
460
+ ]
241
461
 
242
462
 
243
463
  class SqlResourceConfig(BaseModel):
@@ -249,8 +469,10 @@ class SqlResourceConfig(BaseModel):
249
469
  teardown: List of SQL commands required to drop the resource.
250
470
  pulls_data_from: List of infrastructure components this resource reads from.
251
471
  pushes_data_to: List of infrastructure components this resource writes to.
472
+ source_file: Optional path to the source file where this resource is defined.
252
473
  metadata: Optional metadata for the resource.
253
474
  """
475
+
254
476
  model_config = model_config
255
477
 
256
478
  name: str
@@ -258,6 +480,7 @@ class SqlResourceConfig(BaseModel):
258
480
  teardown: list[str]
259
481
  pulls_data_from: list[InfrastructureSignatureJson]
260
482
  pushes_data_to: list[InfrastructureSignatureJson]
483
+ source_file: Optional[str] = None
261
484
  metadata: Optional[dict] = None
262
485
 
263
486
 
@@ -273,7 +496,9 @@ class InfrastructureMap(BaseModel):
273
496
  apis: Dictionary mapping API names to their configurations.
274
497
  sql_resources: Dictionary mapping SQL resource names to their configurations.
275
498
  workflows: Dictionary mapping workflow names to their configurations.
499
+ web_apps: Dictionary mapping WebApp names to their configurations.
276
500
  """
501
+
277
502
  model_config = model_config
278
503
 
279
504
  tables: dict[str, TableConfig]
@@ -282,6 +507,7 @@ class InfrastructureMap(BaseModel):
282
507
  apis: dict[str, InternalApiConfig]
283
508
  sql_resources: dict[str, SqlResourceConfig]
284
509
  workflows: dict[str, WorkflowJson]
510
+ web_apps: dict[str, WebAppJson]
285
511
 
286
512
 
287
513
  def _map_sql_resource_ref(r: Any) -> InfrastructureSignatureJson:
@@ -299,11 +525,15 @@ def _map_sql_resource_ref(r: Any) -> InfrastructureSignatureJson:
299
525
  Raises:
300
526
  TypeError: If the input object is not a recognized SQL resource type.
301
527
  """
302
- if hasattr(r, 'kind'):
528
+ if hasattr(r, "kind"):
303
529
  if r.kind == "OlapTable":
304
530
  # Explicitly cast for type hint checking if needed, though Python is dynamic
305
531
  table = r # type: OlapTable
306
- res_id = f"{table.name}_{table.config.version}" if table.config.version else table.name
532
+ res_id = (
533
+ f"{table.name}_{table.config.version}"
534
+ if table.config.version
535
+ else table.name
536
+ )
307
537
  return InfrastructureSignatureJson(id=res_id, kind="Table")
308
538
  elif r.kind == "SqlResource":
309
539
  # Explicitly cast for type hint checking if needed
@@ -316,50 +546,220 @@ def _map_sql_resource_ref(r: Any) -> InfrastructureSignatureJson:
316
546
  raise TypeError(f"Object {r} lacks a 'kind' attribute for dependency mapping.")
317
547
 
318
548
 
319
- def _convert_engine_to_config_dict(engine: Union[ClickHouseEngines, EngineConfig],
320
- table: OlapTable) -> EngineConfigDict:
549
+ def _convert_basic_engine_instance(
550
+ engine: "EngineConfig",
551
+ ) -> Optional[EngineConfigDict]:
552
+ """Convert basic MergeTree engine instances to config dict.
553
+
554
+ Args:
555
+ engine: An EngineConfig instance
556
+
557
+ Returns:
558
+ EngineConfigDict if matched, None otherwise
559
+ """
560
+ from moose_lib.blocks import (
561
+ MergeTreeEngine,
562
+ ReplacingMergeTreeEngine,
563
+ AggregatingMergeTreeEngine,
564
+ SummingMergeTreeEngine,
565
+ CollapsingMergeTreeEngine,
566
+ VersionedCollapsingMergeTreeEngine,
567
+ )
568
+
569
+ if isinstance(engine, MergeTreeEngine):
570
+ return MergeTreeConfigDict()
571
+ elif isinstance(engine, ReplacingMergeTreeEngine):
572
+ return ReplacingMergeTreeConfigDict(
573
+ ver=engine.ver, is_deleted=engine.is_deleted
574
+ )
575
+ elif isinstance(engine, AggregatingMergeTreeEngine):
576
+ return AggregatingMergeTreeConfigDict()
577
+ elif isinstance(engine, SummingMergeTreeEngine):
578
+ return SummingMergeTreeConfigDict(columns=engine.columns)
579
+ elif isinstance(engine, CollapsingMergeTreeEngine):
580
+ return CollapsingMergeTreeConfigDict(sign=engine.sign)
581
+ elif isinstance(engine, VersionedCollapsingMergeTreeEngine):
582
+ return VersionedCollapsingMergeTreeConfigDict(sign=engine.sign, ver=engine.ver)
583
+ return None
584
+
585
+
586
+ def _convert_replicated_engine_instance(
587
+ engine: "EngineConfig",
588
+ ) -> Optional[EngineConfigDict]:
589
+ """Convert replicated MergeTree engine instances to config dict.
590
+
591
+ Args:
592
+ engine: An EngineConfig instance
593
+
594
+ Returns:
595
+ EngineConfigDict if matched, None otherwise
596
+ """
597
+ from moose_lib.blocks import (
598
+ ReplicatedMergeTreeEngine,
599
+ ReplicatedReplacingMergeTreeEngine,
600
+ ReplicatedAggregatingMergeTreeEngine,
601
+ ReplicatedSummingMergeTreeEngine,
602
+ ReplicatedCollapsingMergeTreeEngine,
603
+ ReplicatedVersionedCollapsingMergeTreeEngine,
604
+ )
605
+
606
+ if isinstance(engine, ReplicatedMergeTreeEngine):
607
+ return ReplicatedMergeTreeConfigDict(
608
+ keeper_path=engine.keeper_path, replica_name=engine.replica_name
609
+ )
610
+ elif isinstance(engine, ReplicatedReplacingMergeTreeEngine):
611
+ return ReplicatedReplacingMergeTreeConfigDict(
612
+ keeper_path=engine.keeper_path,
613
+ replica_name=engine.replica_name,
614
+ ver=engine.ver,
615
+ is_deleted=engine.is_deleted,
616
+ )
617
+ elif isinstance(engine, ReplicatedAggregatingMergeTreeEngine):
618
+ return ReplicatedAggregatingMergeTreeConfigDict(
619
+ keeper_path=engine.keeper_path, replica_name=engine.replica_name
620
+ )
621
+ elif isinstance(engine, ReplicatedSummingMergeTreeEngine):
622
+ return ReplicatedSummingMergeTreeConfigDict(
623
+ keeper_path=engine.keeper_path,
624
+ replica_name=engine.replica_name,
625
+ columns=engine.columns,
626
+ )
627
+ elif isinstance(engine, ReplicatedCollapsingMergeTreeEngine):
628
+ return ReplicatedCollapsingMergeTreeConfigDict(
629
+ keeper_path=engine.keeper_path,
630
+ replica_name=engine.replica_name,
631
+ sign=engine.sign,
632
+ )
633
+ elif isinstance(engine, ReplicatedVersionedCollapsingMergeTreeEngine):
634
+ return ReplicatedVersionedCollapsingMergeTreeConfigDict(
635
+ keeper_path=engine.keeper_path,
636
+ replica_name=engine.replica_name,
637
+ sign=engine.sign,
638
+ ver=engine.ver,
639
+ )
640
+ return None
641
+
642
+
643
+ def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineConfigDict:
644
+ """Convert an EngineConfig instance to config dict format.
645
+
646
+ Args:
647
+ engine: An EngineConfig instance
648
+
649
+ Returns:
650
+ EngineConfigDict with engine-specific configuration
651
+ """
652
+ from moose_lib.blocks import (
653
+ S3QueueEngine,
654
+ S3Engine,
655
+ BufferEngine,
656
+ DistributedEngine,
657
+ IcebergS3Engine,
658
+ KafkaEngine,
659
+ )
660
+
661
+ # Try S3Queue first
662
+ if isinstance(engine, S3QueueEngine):
663
+ return S3QueueConfigDict(
664
+ s3_path=engine.s3_path,
665
+ format=engine.format,
666
+ aws_access_key_id=engine.aws_access_key_id,
667
+ aws_secret_access_key=engine.aws_secret_access_key,
668
+ compression=engine.compression,
669
+ headers=engine.headers,
670
+ )
671
+
672
+ # Try S3
673
+ if isinstance(engine, S3Engine):
674
+ return S3ConfigDict(
675
+ path=engine.path,
676
+ format=engine.format,
677
+ aws_access_key_id=engine.aws_access_key_id,
678
+ aws_secret_access_key=engine.aws_secret_access_key,
679
+ compression=engine.compression,
680
+ partition_strategy=engine.partition_strategy,
681
+ partition_columns_in_data_file=engine.partition_columns_in_data_file,
682
+ )
683
+
684
+ # Try Buffer
685
+ if isinstance(engine, BufferEngine):
686
+ return BufferConfigDict(
687
+ target_database=engine.target_database,
688
+ target_table=engine.target_table,
689
+ num_layers=engine.num_layers,
690
+ min_time=engine.min_time,
691
+ max_time=engine.max_time,
692
+ min_rows=engine.min_rows,
693
+ max_rows=engine.max_rows,
694
+ min_bytes=engine.min_bytes,
695
+ max_bytes=engine.max_bytes,
696
+ flush_time=engine.flush_time,
697
+ flush_rows=engine.flush_rows,
698
+ flush_bytes=engine.flush_bytes,
699
+ )
700
+
701
+ # Try Distributed
702
+ if isinstance(engine, DistributedEngine):
703
+ return DistributedConfigDict(
704
+ cluster=engine.cluster,
705
+ target_database=engine.target_database,
706
+ target_table=engine.target_table,
707
+ sharding_key=engine.sharding_key,
708
+ policy_name=engine.policy_name,
709
+ )
710
+
711
+ # Try IcebergS3
712
+ if isinstance(engine, IcebergS3Engine):
713
+ return IcebergS3ConfigDict(
714
+ path=engine.path,
715
+ format=engine.format,
716
+ aws_access_key_id=engine.aws_access_key_id,
717
+ aws_secret_access_key=engine.aws_secret_access_key,
718
+ compression=engine.compression,
719
+ )
720
+
721
+ # Try Kafka
722
+ if isinstance(engine, KafkaEngine):
723
+ return KafkaConfigDict(
724
+ broker_list=engine.broker_list,
725
+ topic_list=engine.topic_list,
726
+ group_name=engine.group_name,
727
+ format=engine.format,
728
+ )
729
+
730
+ # Try basic engines
731
+ basic_config = _convert_basic_engine_instance(engine)
732
+ if basic_config:
733
+ return basic_config
734
+
735
+ # Try replicated engines
736
+ replicated_config = _convert_replicated_engine_instance(engine)
737
+ if replicated_config:
738
+ return replicated_config
739
+
740
+ # Fallback for any other EngineConfig subclass
741
+ return BaseEngineConfigDict(engine=engine.__class__.__name__.replace("Engine", ""))
742
+
743
+
744
+ def _convert_engine_to_config_dict(
745
+ engine: Union[ClickHouseEngines, EngineConfig], table: OlapTable
746
+ ) -> EngineConfigDict:
321
747
  """Convert engine enum or EngineConfig instance to new engine config format.
322
-
748
+
323
749
  Args:
324
750
  engine: Either a ClickHouseEngines enum value or an EngineConfig instance
325
751
  table: The OlapTable instance with configuration
326
-
752
+
327
753
  Returns:
328
754
  EngineConfigDict with engine-specific configuration
329
755
  """
330
756
  from moose_lib import ClickHouseEngines
331
- from moose_lib.blocks import (
332
- EngineConfig, S3QueueEngine, MergeTreeEngine,
333
- ReplacingMergeTreeEngine, AggregatingMergeTreeEngine,
334
- SummingMergeTreeEngine
335
- )
757
+ from moose_lib.blocks import EngineConfig
336
758
  from moose_lib.commons import Logger
337
759
 
338
760
  # Check if engine is an EngineConfig instance (new API)
339
761
  if isinstance(engine, EngineConfig):
340
- if isinstance(engine, S3QueueEngine):
341
- return S3QueueConfigDict(
342
- s3_path=engine.s3_path,
343
- format=engine.format,
344
- aws_access_key_id=engine.aws_access_key_id,
345
- aws_secret_access_key=engine.aws_secret_access_key,
346
- compression=engine.compression,
347
- headers=engine.headers
348
- )
349
- elif isinstance(engine, ReplacingMergeTreeEngine):
350
- return ReplacingMergeTreeConfigDict(
351
- ver=engine.ver,
352
- is_deleted=engine.is_deleted
353
- )
354
- elif isinstance(engine, AggregatingMergeTreeEngine):
355
- return AggregatingMergeTreeConfigDict()
356
- elif isinstance(engine, SummingMergeTreeEngine):
357
- return SummingMergeTreeConfigDict()
358
- elif isinstance(engine, MergeTreeEngine):
359
- return MergeTreeConfigDict()
360
- else:
361
- # Fallback for any other EngineConfig subclass - use base class
362
- return BaseEngineConfigDict(engine=engine.__class__.__name__.replace("Engine", ""))
762
+ return _convert_engine_instance_to_config_dict(engine)
363
763
 
364
764
  # Handle legacy enum-based engine configuration
365
765
  if isinstance(engine, ClickHouseEngines):
@@ -368,7 +768,7 @@ def _convert_engine_to_config_dict(engine: Union[ClickHouseEngines, EngineConfig
368
768
  engine_name = str(engine)
369
769
 
370
770
  # For S3Queue with legacy configuration, check for s3_queue_engine_config
371
- if engine_name == "S3Queue" and hasattr(table.config, 's3_queue_engine_config'):
771
+ if engine_name == "S3Queue" and hasattr(table.config, "s3_queue_engine_config"):
372
772
  s3_config = table.config.s3_queue_engine_config
373
773
  if s3_config:
374
774
  logger = Logger(action="S3QueueConfig")
@@ -382,7 +782,7 @@ def _convert_engine_to_config_dict(engine: Union[ClickHouseEngines, EngineConfig
382
782
  aws_access_key_id=s3_config.aws_access_key_id,
383
783
  aws_secret_access_key=s3_config.aws_secret_access_key,
384
784
  compression=s3_config.compression,
385
- headers=s3_config.headers
785
+ headers=s3_config.headers,
386
786
  )
387
787
 
388
788
  # Map engine names to specific config classes
@@ -391,6 +791,10 @@ def _convert_engine_to_config_dict(engine: Union[ClickHouseEngines, EngineConfig
391
791
  "ReplacingMergeTree": ReplacingMergeTreeConfigDict,
392
792
  "AggregatingMergeTree": AggregatingMergeTreeConfigDict,
393
793
  "SummingMergeTree": SummingMergeTreeConfigDict,
794
+ "ReplicatedMergeTree": ReplicatedMergeTreeConfigDict,
795
+ "ReplicatedReplacingMergeTree": ReplicatedReplacingMergeTreeConfigDict,
796
+ "ReplicatedAggregatingMergeTree": ReplicatedAggregatingMergeTreeConfigDict,
797
+ "ReplicatedSummingMergeTree": ReplicatedSummingMergeTreeConfigDict,
394
798
  }
395
799
 
396
800
  config_class = engine_map.get(engine_name)
@@ -418,8 +822,9 @@ def to_infra_map() -> dict:
418
822
  apis = {}
419
823
  sql_resources = {}
420
824
  workflows = {}
825
+ web_apps = {}
421
826
 
422
- for name, table in get_tables().items():
827
+ for _registry_key, table in get_tables().items():
423
828
  # Convert engine configuration to new format
424
829
  engine_config = None
425
830
  if table.config.engine:
@@ -434,17 +839,45 @@ def to_infra_map() -> dict:
434
839
  if "mode" not in table_settings:
435
840
  table_settings["mode"] = "unordered"
436
841
 
437
- tables[name] = TableConfig(
438
- name=name,
842
+ id_key = (
843
+ f"{table.name}_{table.config.version}"
844
+ if table.config.version
845
+ else table.name
846
+ )
847
+
848
+ # Determine ORDER BY: list of fields or single expression
849
+ has_fields = bool(table.config.order_by_fields)
850
+ has_expr = table.config.order_by_expression is not None
851
+ if has_fields and has_expr:
852
+ raise ValueError(
853
+ f"Table {table.name}: Provide either order_by_fields or order_by_expression, not both."
854
+ )
855
+
856
+ order_by_value = (
857
+ table.config.order_by_expression
858
+ if has_expr
859
+ else table.config.order_by_fields
860
+ )
861
+
862
+ tables[id_key] = TableConfig(
863
+ name=table.name,
439
864
  columns=table._column_list,
440
- order_by=table.config.order_by_fields,
865
+ order_by=order_by_value,
441
866
  partition_by=table.config.partition_by,
867
+ sample_by_expression=table.config.sample_by_expression,
868
+ primary_key_expression=table.config.primary_key_expression,
442
869
  engine_config=engine_config,
443
870
  version=table.config.version,
444
871
  metadata=getattr(table, "metadata", None),
445
- life_cycle=table.config.life_cycle.value if table.config.life_cycle else None,
872
+ life_cycle=(
873
+ table.config.life_cycle.value if table.config.life_cycle else None
874
+ ),
446
875
  # Map 'settings' to 'table_settings' for internal use
447
876
  table_settings=table_settings if table_settings else None,
877
+ indexes=table.config.indexes,
878
+ ttl=table.config.ttl,
879
+ database=table.config.database,
880
+ cluster=table.config.cluster,
448
881
  )
449
882
 
450
883
  for name, stream in get_streams().items():
@@ -460,15 +893,20 @@ def to_infra_map() -> dict:
460
893
  ]
461
894
 
462
895
  consumers = [
463
- Consumer(version=consumer.config.version)
464
- for consumer in stream.consumers
896
+ Consumer(version=consumer.config.version) for consumer in stream.consumers
465
897
  ]
466
898
 
467
899
  topics[name] = TopicConfig(
468
900
  name=name,
469
901
  columns=_to_columns(stream._t),
470
- target_table=stream.config.destination.name if stream.config.destination else None,
471
- target_table_version=stream.config.destination.config.version if stream.config.destination else None,
902
+ target_table=(
903
+ stream.config.destination.name if stream.config.destination else None
904
+ ),
905
+ target_table_version=(
906
+ stream.config.destination.config.version
907
+ if stream.config.destination
908
+ else None
909
+ ),
472
910
  retention_period=stream.config.retention_period,
473
911
  partition_count=stream.config.parallelism,
474
912
  version=stream.config.version,
@@ -476,24 +914,33 @@ def to_infra_map() -> dict:
476
914
  has_multi_transform=stream._multipleTransformations is not None,
477
915
  consumers=consumers,
478
916
  metadata=getattr(stream, "metadata", None),
479
- life_cycle=stream.config.life_cycle.value if stream.config.life_cycle else None,
917
+ life_cycle=(
918
+ stream.config.life_cycle.value if stream.config.life_cycle else None
919
+ ),
920
+ schema_config=stream.config.schema_config,
480
921
  )
481
922
 
482
923
  for name, api in get_ingest_apis().items():
924
+ # Check if the Pydantic model allows extra fields (extra='allow')
925
+ # This is the Python equivalent of TypeScript's index signatures
926
+ model_allows_extra = api._t.model_config.get("extra") == "allow"
927
+
483
928
  ingest_apis[name] = IngestApiConfig(
484
929
  name=name,
485
930
  columns=_to_columns(api._t),
486
931
  version=api.config.version,
487
932
  path=api.config.path,
488
- write_to=Target(
489
- kind="stream",
490
- name=api.config.destination.name
491
- ),
933
+ write_to=Target(kind="stream", name=api.config.destination.name),
492
934
  metadata=getattr(api, "metadata", None),
493
935
  json_schema=api._t.model_json_schema(
494
- ref_template='#/components/schemas/{model}'
936
+ ref_template="#/components/schemas/{model}"
495
937
  ),
496
- dead_letter_queue=api.config.dead_letter_queue.name if api.config.dead_letter_queue else None
938
+ dead_letter_queue=(
939
+ api.config.dead_letter_queue.name
940
+ if api.config.dead_letter_queue
941
+ else None
942
+ ),
943
+ allow_extra_fields=model_allows_extra,
497
944
  )
498
945
 
499
946
  for name, api in get_apis().items():
@@ -511,8 +958,13 @@ def to_infra_map() -> dict:
511
958
  name=resource.name,
512
959
  setup=resource.setup,
513
960
  teardown=resource.teardown,
514
- pulls_data_from=[_map_sql_resource_ref(dep) for dep in resource.pulls_data_from],
515
- pushes_data_to=[_map_sql_resource_ref(dep) for dep in resource.pushes_data_to],
961
+ pulls_data_from=[
962
+ _map_sql_resource_ref(dep) for dep in resource.pulls_data_from
963
+ ],
964
+ pushes_data_to=[
965
+ _map_sql_resource_ref(dep) for dep in resource.pushes_data_to
966
+ ],
967
+ source_file=getattr(resource, "source_file", None),
516
968
  metadata=getattr(resource, "metadata", None),
517
969
  )
518
970
 
@@ -524,16 +976,30 @@ def to_infra_map() -> dict:
524
976
  schedule=workflow.config.schedule,
525
977
  )
526
978
 
979
+ for name, web_app in get_web_apps().items():
980
+ mount_path = web_app.config.mount_path or "/"
981
+ metadata = None
982
+ if web_app.config.metadata:
983
+ metadata = WebAppMetadataJson(
984
+ description=web_app.config.metadata.description
985
+ )
986
+ web_apps[name] = WebAppJson(
987
+ name=web_app.name,
988
+ mount_path=mount_path,
989
+ metadata=metadata,
990
+ )
991
+
527
992
  infra_map = InfrastructureMap(
528
993
  tables=tables,
529
994
  topics=topics,
530
995
  ingest_apis=ingest_apis,
531
996
  apis=apis,
532
997
  sql_resources=sql_resources,
533
- workflows=workflows
998
+ workflows=workflows,
999
+ web_apps=web_apps,
534
1000
  )
535
1001
 
536
- return infra_map.model_dump(by_alias=True)
1002
+ return infra_map.model_dump(by_alias=True, exclude_none=False)
537
1003
 
538
1004
 
539
1005
  def load_models():
@@ -549,7 +1015,10 @@ def load_models():
549
1015
  (`___MOOSE_STUFF___start` and `end___MOOSE_STUFF___`), which the
550
1016
  calling system uses to extract the configuration.
551
1017
  """
552
- import_module("app.main")
1018
+ import os
1019
+
1020
+ source_dir = os.environ.get("MOOSE_SOURCE_DIR", "app")
1021
+ import_module(f"{source_dir}.main")
553
1022
 
554
1023
  # Generate the infrastructure map
555
1024
  infra_map = to_infra_map()