moose-lib 0.6.148.dev3442438466__py3-none-any.whl → 0.6.283__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moose_lib/__init__.py +34 -3
- moose_lib/blocks.py +416 -52
- moose_lib/clients/redis_client.py +26 -14
- moose_lib/commons.py +37 -30
- moose_lib/config/config_file.py +5 -1
- moose_lib/config/runtime.py +73 -34
- moose_lib/data_models.py +331 -61
- moose_lib/dmv2/__init__.py +69 -73
- moose_lib/dmv2/_registry.py +2 -1
- moose_lib/dmv2/_source_capture.py +37 -0
- moose_lib/dmv2/consumption.py +55 -32
- moose_lib/dmv2/ingest_api.py +9 -2
- moose_lib/dmv2/ingest_pipeline.py +35 -16
- moose_lib/dmv2/life_cycle.py +3 -1
- moose_lib/dmv2/materialized_view.py +24 -14
- moose_lib/dmv2/moose_model.py +165 -0
- moose_lib/dmv2/olap_table.py +299 -151
- moose_lib/dmv2/registry.py +18 -3
- moose_lib/dmv2/sql_resource.py +16 -8
- moose_lib/dmv2/stream.py +75 -23
- moose_lib/dmv2/types.py +14 -8
- moose_lib/dmv2/view.py +13 -6
- moose_lib/dmv2/web_app.py +11 -6
- moose_lib/dmv2/web_app_helpers.py +5 -1
- moose_lib/dmv2/workflow.py +37 -9
- moose_lib/internal.py +340 -56
- moose_lib/main.py +87 -56
- moose_lib/query_builder.py +18 -5
- moose_lib/query_param.py +54 -20
- moose_lib/secrets.py +122 -0
- moose_lib/streaming/streaming_function_runner.py +233 -117
- moose_lib/utilities/sql.py +0 -1
- {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/METADATA +18 -1
- moose_lib-0.6.283.dist-info/RECORD +63 -0
- tests/__init__.py +1 -1
- tests/conftest.py +6 -5
- tests/test_backward_compatibility.py +85 -0
- tests/test_cluster_validation.py +85 -0
- tests/test_codec.py +75 -0
- tests/test_column_formatting.py +80 -0
- tests/test_fixedstring.py +43 -0
- tests/test_iceberg_config.py +105 -0
- tests/test_int_types.py +211 -0
- tests/test_kafka_config.py +141 -0
- tests/test_materialized.py +74 -0
- tests/test_metadata.py +37 -0
- tests/test_moose.py +21 -30
- tests/test_moose_model.py +153 -0
- tests/test_olap_table_moosemodel.py +89 -0
- tests/test_olap_table_versioning.py +52 -58
- tests/test_query_builder.py +97 -9
- tests/test_redis_client.py +10 -3
- tests/test_s3queue_config.py +211 -110
- tests/test_secrets.py +239 -0
- tests/test_simple_aggregate.py +42 -40
- tests/test_web_app.py +11 -5
- moose_lib-0.6.148.dev3442438466.dist-info/RECORD +0 -47
- {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
- {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
moose_lib/internal.py
CHANGED
|
@@ -6,6 +6,7 @@ of various Moose resources (tables, streams/topics, APIs) and functions
|
|
|
6
6
|
to convert the user-defined resources (from `dmv2.py`) into a serializable
|
|
7
7
|
JSON format expected by the Moose infrastructure management system.
|
|
8
8
|
"""
|
|
9
|
+
|
|
9
10
|
from importlib import import_module
|
|
10
11
|
from typing import Literal, Optional, List, Any, Dict, Union, TYPE_CHECKING
|
|
11
12
|
from pydantic import BaseModel, ConfigDict, AliasGenerator, Field
|
|
@@ -22,15 +23,17 @@ from moose_lib.dmv2 import (
|
|
|
22
23
|
get_web_apps,
|
|
23
24
|
OlapTable,
|
|
24
25
|
OlapConfig,
|
|
25
|
-
SqlResource
|
|
26
|
+
SqlResource,
|
|
26
27
|
)
|
|
27
28
|
from moose_lib.dmv2.stream import KafkaSchemaConfig
|
|
28
29
|
from pydantic.alias_generators import to_camel
|
|
29
30
|
from pydantic.json_schema import JsonSchemaValue
|
|
30
31
|
|
|
31
|
-
model_config = ConfigDict(
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
model_config = ConfigDict(
|
|
33
|
+
alias_generator=AliasGenerator(
|
|
34
|
+
serialization_alias=to_camel,
|
|
35
|
+
)
|
|
36
|
+
)
|
|
34
37
|
|
|
35
38
|
|
|
36
39
|
class Target(BaseModel):
|
|
@@ -42,6 +45,7 @@ class Target(BaseModel):
|
|
|
42
45
|
version: Optional version of the target stream configuration.
|
|
43
46
|
metadata: Optional metadata for the target stream.
|
|
44
47
|
"""
|
|
48
|
+
|
|
45
49
|
kind: Literal["stream"]
|
|
46
50
|
name: str
|
|
47
51
|
version: Optional[str] = None
|
|
@@ -54,22 +58,26 @@ class Consumer(BaseModel):
|
|
|
54
58
|
Attributes:
|
|
55
59
|
version: Optional version of the consumer configuration.
|
|
56
60
|
"""
|
|
61
|
+
|
|
57
62
|
version: Optional[str] = None
|
|
58
63
|
|
|
59
64
|
|
|
60
65
|
class BaseEngineConfigDict(BaseModel):
|
|
61
66
|
"""Base engine configuration for all ClickHouse table engines."""
|
|
67
|
+
|
|
62
68
|
model_config = model_config
|
|
63
69
|
engine: str
|
|
64
70
|
|
|
65
71
|
|
|
66
72
|
class MergeTreeConfigDict(BaseEngineConfigDict):
|
|
67
73
|
"""Configuration for MergeTree engine."""
|
|
74
|
+
|
|
68
75
|
engine: Literal["MergeTree"] = "MergeTree"
|
|
69
76
|
|
|
70
77
|
|
|
71
78
|
class ReplacingMergeTreeConfigDict(BaseEngineConfigDict):
|
|
72
79
|
"""Configuration for ReplacingMergeTree engine."""
|
|
80
|
+
|
|
73
81
|
engine: Literal["ReplacingMergeTree"] = "ReplacingMergeTree"
|
|
74
82
|
ver: Optional[str] = None
|
|
75
83
|
is_deleted: Optional[str] = None
|
|
@@ -77,17 +85,35 @@ class ReplacingMergeTreeConfigDict(BaseEngineConfigDict):
|
|
|
77
85
|
|
|
78
86
|
class AggregatingMergeTreeConfigDict(BaseEngineConfigDict):
|
|
79
87
|
"""Configuration for AggregatingMergeTree engine."""
|
|
88
|
+
|
|
80
89
|
engine: Literal["AggregatingMergeTree"] = "AggregatingMergeTree"
|
|
81
90
|
|
|
82
91
|
|
|
83
92
|
class SummingMergeTreeConfigDict(BaseEngineConfigDict):
|
|
84
93
|
"""Configuration for SummingMergeTree engine."""
|
|
94
|
+
|
|
85
95
|
engine: Literal["SummingMergeTree"] = "SummingMergeTree"
|
|
86
96
|
columns: Optional[List[str]] = None
|
|
87
97
|
|
|
88
98
|
|
|
99
|
+
class CollapsingMergeTreeConfigDict(BaseEngineConfigDict):
|
|
100
|
+
"""Configuration for CollapsingMergeTree engine."""
|
|
101
|
+
|
|
102
|
+
engine: Literal["CollapsingMergeTree"] = "CollapsingMergeTree"
|
|
103
|
+
sign: str
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class VersionedCollapsingMergeTreeConfigDict(BaseEngineConfigDict):
|
|
107
|
+
"""Configuration for VersionedCollapsingMergeTree engine."""
|
|
108
|
+
|
|
109
|
+
engine: Literal["VersionedCollapsingMergeTree"] = "VersionedCollapsingMergeTree"
|
|
110
|
+
sign: str
|
|
111
|
+
ver: str
|
|
112
|
+
|
|
113
|
+
|
|
89
114
|
class ReplicatedMergeTreeConfigDict(BaseEngineConfigDict):
|
|
90
115
|
"""Configuration for ReplicatedMergeTree engine."""
|
|
116
|
+
|
|
91
117
|
engine: Literal["ReplicatedMergeTree"] = "ReplicatedMergeTree"
|
|
92
118
|
keeper_path: Optional[str] = None
|
|
93
119
|
replica_name: Optional[str] = None
|
|
@@ -95,6 +121,7 @@ class ReplicatedMergeTreeConfigDict(BaseEngineConfigDict):
|
|
|
95
121
|
|
|
96
122
|
class ReplicatedReplacingMergeTreeConfigDict(BaseEngineConfigDict):
|
|
97
123
|
"""Configuration for ReplicatedReplacingMergeTree engine."""
|
|
124
|
+
|
|
98
125
|
engine: Literal["ReplicatedReplacingMergeTree"] = "ReplicatedReplacingMergeTree"
|
|
99
126
|
keeper_path: Optional[str] = None
|
|
100
127
|
replica_name: Optional[str] = None
|
|
@@ -104,6 +131,7 @@ class ReplicatedReplacingMergeTreeConfigDict(BaseEngineConfigDict):
|
|
|
104
131
|
|
|
105
132
|
class ReplicatedAggregatingMergeTreeConfigDict(BaseEngineConfigDict):
|
|
106
133
|
"""Configuration for ReplicatedAggregatingMergeTree engine."""
|
|
134
|
+
|
|
107
135
|
engine: Literal["ReplicatedAggregatingMergeTree"] = "ReplicatedAggregatingMergeTree"
|
|
108
136
|
keeper_path: Optional[str] = None
|
|
109
137
|
replica_name: Optional[str] = None
|
|
@@ -111,14 +139,37 @@ class ReplicatedAggregatingMergeTreeConfigDict(BaseEngineConfigDict):
|
|
|
111
139
|
|
|
112
140
|
class ReplicatedSummingMergeTreeConfigDict(BaseEngineConfigDict):
|
|
113
141
|
"""Configuration for ReplicatedSummingMergeTree engine."""
|
|
142
|
+
|
|
114
143
|
engine: Literal["ReplicatedSummingMergeTree"] = "ReplicatedSummingMergeTree"
|
|
115
144
|
keeper_path: Optional[str] = None
|
|
116
145
|
replica_name: Optional[str] = None
|
|
117
146
|
columns: Optional[List[str]] = None
|
|
118
147
|
|
|
119
148
|
|
|
149
|
+
class ReplicatedCollapsingMergeTreeConfigDict(BaseEngineConfigDict):
|
|
150
|
+
"""Configuration for ReplicatedCollapsingMergeTree engine."""
|
|
151
|
+
|
|
152
|
+
engine: Literal["ReplicatedCollapsingMergeTree"] = "ReplicatedCollapsingMergeTree"
|
|
153
|
+
keeper_path: Optional[str] = None
|
|
154
|
+
replica_name: Optional[str] = None
|
|
155
|
+
sign: str
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class ReplicatedVersionedCollapsingMergeTreeConfigDict(BaseEngineConfigDict):
|
|
159
|
+
"""Configuration for ReplicatedVersionedCollapsingMergeTree engine."""
|
|
160
|
+
|
|
161
|
+
engine: Literal["ReplicatedVersionedCollapsingMergeTree"] = (
|
|
162
|
+
"ReplicatedVersionedCollapsingMergeTree"
|
|
163
|
+
)
|
|
164
|
+
keeper_path: Optional[str] = None
|
|
165
|
+
replica_name: Optional[str] = None
|
|
166
|
+
sign: str
|
|
167
|
+
ver: str
|
|
168
|
+
|
|
169
|
+
|
|
120
170
|
class S3QueueConfigDict(BaseEngineConfigDict):
|
|
121
171
|
"""Configuration for S3Queue engine with all specific fields."""
|
|
172
|
+
|
|
122
173
|
engine: Literal["S3Queue"] = "S3Queue"
|
|
123
174
|
s3_path: str
|
|
124
175
|
format: str
|
|
@@ -128,17 +179,95 @@ class S3QueueConfigDict(BaseEngineConfigDict):
|
|
|
128
179
|
headers: Optional[Dict[str, str]] = None
|
|
129
180
|
|
|
130
181
|
|
|
182
|
+
class S3ConfigDict(BaseEngineConfigDict):
|
|
183
|
+
"""Configuration for S3 engine."""
|
|
184
|
+
|
|
185
|
+
engine: Literal["S3"] = "S3"
|
|
186
|
+
path: str
|
|
187
|
+
format: str
|
|
188
|
+
aws_access_key_id: Optional[str] = None
|
|
189
|
+
aws_secret_access_key: Optional[str] = None
|
|
190
|
+
compression: Optional[str] = None
|
|
191
|
+
partition_strategy: Optional[str] = None
|
|
192
|
+
partition_columns_in_data_file: Optional[str] = None
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class BufferConfigDict(BaseEngineConfigDict):
|
|
196
|
+
"""Configuration for Buffer engine."""
|
|
197
|
+
|
|
198
|
+
engine: Literal["Buffer"] = "Buffer"
|
|
199
|
+
target_database: str
|
|
200
|
+
target_table: str
|
|
201
|
+
num_layers: int
|
|
202
|
+
min_time: int
|
|
203
|
+
max_time: int
|
|
204
|
+
min_rows: int
|
|
205
|
+
max_rows: int
|
|
206
|
+
min_bytes: int
|
|
207
|
+
max_bytes: int
|
|
208
|
+
flush_time: Optional[int] = None
|
|
209
|
+
flush_rows: Optional[int] = None
|
|
210
|
+
flush_bytes: Optional[int] = None
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
class DistributedConfigDict(BaseEngineConfigDict):
|
|
214
|
+
"""Configuration for Distributed engine."""
|
|
215
|
+
|
|
216
|
+
engine: Literal["Distributed"] = "Distributed"
|
|
217
|
+
cluster: str
|
|
218
|
+
target_database: str
|
|
219
|
+
target_table: str
|
|
220
|
+
sharding_key: Optional[str] = None
|
|
221
|
+
policy_name: Optional[str] = None
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class IcebergS3ConfigDict(BaseEngineConfigDict):
|
|
225
|
+
"""Configuration for IcebergS3 engine."""
|
|
226
|
+
|
|
227
|
+
engine: Literal["IcebergS3"] = "IcebergS3"
|
|
228
|
+
path: str
|
|
229
|
+
format: str
|
|
230
|
+
aws_access_key_id: Optional[str] = None
|
|
231
|
+
aws_secret_access_key: Optional[str] = None
|
|
232
|
+
compression: Optional[str] = None
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
class KafkaConfigDict(BaseEngineConfigDict):
|
|
236
|
+
"""Configuration for Kafka engine.
|
|
237
|
+
|
|
238
|
+
Constructor: ENGINE = Kafka('broker', 'topic', 'group', 'format')
|
|
239
|
+
Settings (kafka_schema, kafka_num_consumers, security, etc.) go in table settings.
|
|
240
|
+
|
|
241
|
+
Reference: https://clickhouse.com/docs/engines/table-engines/integrations/kafka
|
|
242
|
+
"""
|
|
243
|
+
|
|
244
|
+
engine: Literal["Kafka"] = "Kafka"
|
|
245
|
+
broker_list: str
|
|
246
|
+
topic_list: str
|
|
247
|
+
group_name: str
|
|
248
|
+
format: str
|
|
249
|
+
|
|
250
|
+
|
|
131
251
|
# Discriminated union of all engine configurations
|
|
132
252
|
EngineConfigDict = Union[
|
|
133
253
|
MergeTreeConfigDict,
|
|
134
254
|
ReplacingMergeTreeConfigDict,
|
|
135
255
|
AggregatingMergeTreeConfigDict,
|
|
136
256
|
SummingMergeTreeConfigDict,
|
|
257
|
+
CollapsingMergeTreeConfigDict,
|
|
258
|
+
VersionedCollapsingMergeTreeConfigDict,
|
|
137
259
|
ReplicatedMergeTreeConfigDict,
|
|
138
260
|
ReplicatedReplacingMergeTreeConfigDict,
|
|
139
261
|
ReplicatedAggregatingMergeTreeConfigDict,
|
|
140
262
|
ReplicatedSummingMergeTreeConfigDict,
|
|
141
|
-
|
|
263
|
+
ReplicatedCollapsingMergeTreeConfigDict,
|
|
264
|
+
ReplicatedVersionedCollapsingMergeTreeConfigDict,
|
|
265
|
+
S3QueueConfigDict,
|
|
266
|
+
S3ConfigDict,
|
|
267
|
+
BufferConfigDict,
|
|
268
|
+
DistributedConfigDict,
|
|
269
|
+
IcebergS3ConfigDict,
|
|
270
|
+
KafkaConfigDict,
|
|
142
271
|
]
|
|
143
272
|
|
|
144
273
|
|
|
@@ -151,12 +280,15 @@ class TableConfig(BaseModel):
|
|
|
151
280
|
order_by: List of columns used for the ORDER BY clause.
|
|
152
281
|
partition_by: The column name used for the PARTITION BY clause.
|
|
153
282
|
sample_by_expression: Optional SAMPLE BY expression for data sampling.
|
|
283
|
+
primary_key_expression: Optional PRIMARY KEY expression (overrides column-level primary_key flags when specified).
|
|
154
284
|
engine_config: Engine configuration with type-safe, engine-specific parameters.
|
|
155
285
|
version: Optional version string of the table configuration.
|
|
156
286
|
metadata: Optional metadata for the table.
|
|
157
287
|
life_cycle: Lifecycle management setting for the table.
|
|
158
288
|
table_settings: Optional table-level settings that can be modified with ALTER TABLE MODIFY SETTING.
|
|
289
|
+
cluster: Optional cluster name for ON CLUSTER support in ClickHouse.
|
|
159
290
|
"""
|
|
291
|
+
|
|
160
292
|
model_config = model_config
|
|
161
293
|
|
|
162
294
|
name: str
|
|
@@ -164,13 +296,16 @@ class TableConfig(BaseModel):
|
|
|
164
296
|
order_by: List[str] | str
|
|
165
297
|
partition_by: Optional[str]
|
|
166
298
|
sample_by_expression: Optional[str] = None
|
|
167
|
-
|
|
299
|
+
primary_key_expression: Optional[str] = None
|
|
300
|
+
engine_config: Optional[EngineConfigDict] = Field(None, discriminator="engine")
|
|
168
301
|
version: Optional[str] = None
|
|
169
302
|
metadata: Optional[dict] = None
|
|
170
303
|
life_cycle: Optional[str] = None
|
|
171
304
|
table_settings: Optional[dict[str, str]] = None
|
|
172
305
|
indexes: list[OlapConfig.TableIndex] = []
|
|
173
306
|
ttl: Optional[str] = None
|
|
307
|
+
database: Optional[str] = None
|
|
308
|
+
cluster: Optional[str] = None
|
|
174
309
|
|
|
175
310
|
|
|
176
311
|
class TopicConfig(BaseModel):
|
|
@@ -190,6 +325,7 @@ class TopicConfig(BaseModel):
|
|
|
190
325
|
metadata: Optional metadata for the topic.
|
|
191
326
|
life_cycle: Lifecycle management setting for the topic.
|
|
192
327
|
"""
|
|
328
|
+
|
|
193
329
|
model_config = model_config
|
|
194
330
|
|
|
195
331
|
name: str
|
|
@@ -218,7 +354,10 @@ class IngestApiConfig(BaseModel):
|
|
|
218
354
|
version: Optional version string of the API configuration.
|
|
219
355
|
path: Optional custom path for the ingestion endpoint.
|
|
220
356
|
metadata: Optional metadata for the API.
|
|
357
|
+
allow_extra_fields: Whether this API allows extra fields beyond the defined columns.
|
|
358
|
+
When true, extra fields in payloads are passed through to streaming functions.
|
|
221
359
|
"""
|
|
360
|
+
|
|
222
361
|
model_config = model_config
|
|
223
362
|
|
|
224
363
|
name: str
|
|
@@ -229,6 +368,7 @@ class IngestApiConfig(BaseModel):
|
|
|
229
368
|
path: Optional[str] = None
|
|
230
369
|
metadata: Optional[dict] = None
|
|
231
370
|
json_schema: dict[str, Any] = Field(serialization_alias="schema")
|
|
371
|
+
allow_extra_fields: bool = False
|
|
232
372
|
|
|
233
373
|
|
|
234
374
|
class InternalApiConfig(BaseModel):
|
|
@@ -242,6 +382,7 @@ class InternalApiConfig(BaseModel):
|
|
|
242
382
|
path: Optional custom path for the API endpoint.
|
|
243
383
|
metadata: Optional metadata for the API.
|
|
244
384
|
"""
|
|
385
|
+
|
|
245
386
|
model_config = model_config
|
|
246
387
|
|
|
247
388
|
name: str
|
|
@@ -261,6 +402,7 @@ class WorkflowJson(BaseModel):
|
|
|
261
402
|
timeout: Optional timeout string for the entire workflow.
|
|
262
403
|
schedule: Optional cron-like schedule string for recurring execution.
|
|
263
404
|
"""
|
|
405
|
+
|
|
264
406
|
model_config = model_config
|
|
265
407
|
|
|
266
408
|
name: str
|
|
@@ -275,6 +417,7 @@ class WebAppMetadataJson(BaseModel):
|
|
|
275
417
|
Attributes:
|
|
276
418
|
description: Optional description of the WebApp.
|
|
277
419
|
"""
|
|
420
|
+
|
|
278
421
|
model_config = model_config
|
|
279
422
|
|
|
280
423
|
description: Optional[str] = None
|
|
@@ -288,6 +431,7 @@ class WebAppJson(BaseModel):
|
|
|
288
431
|
mount_path: The URL path where the WebApp is mounted.
|
|
289
432
|
metadata: Optional metadata for documentation purposes.
|
|
290
433
|
"""
|
|
434
|
+
|
|
291
435
|
model_config = model_config
|
|
292
436
|
|
|
293
437
|
name: str
|
|
@@ -304,8 +448,16 @@ class InfrastructureSignatureJson(BaseModel):
|
|
|
304
448
|
id: A unique identifier for the resource instance (often name + version).
|
|
305
449
|
kind: The type of the infrastructure component.
|
|
306
450
|
"""
|
|
451
|
+
|
|
307
452
|
id: str
|
|
308
|
-
kind: Literal[
|
|
453
|
+
kind: Literal[
|
|
454
|
+
"Table",
|
|
455
|
+
"Topic",
|
|
456
|
+
"ApiEndpoint",
|
|
457
|
+
"TopicToTableSyncProcess",
|
|
458
|
+
"View",
|
|
459
|
+
"SqlResource",
|
|
460
|
+
]
|
|
309
461
|
|
|
310
462
|
|
|
311
463
|
class SqlResourceConfig(BaseModel):
|
|
@@ -317,8 +469,10 @@ class SqlResourceConfig(BaseModel):
|
|
|
317
469
|
teardown: List of SQL commands required to drop the resource.
|
|
318
470
|
pulls_data_from: List of infrastructure components this resource reads from.
|
|
319
471
|
pushes_data_to: List of infrastructure components this resource writes to.
|
|
472
|
+
source_file: Optional path to the source file where this resource is defined.
|
|
320
473
|
metadata: Optional metadata for the resource.
|
|
321
474
|
"""
|
|
475
|
+
|
|
322
476
|
model_config = model_config
|
|
323
477
|
|
|
324
478
|
name: str
|
|
@@ -326,6 +480,7 @@ class SqlResourceConfig(BaseModel):
|
|
|
326
480
|
teardown: list[str]
|
|
327
481
|
pulls_data_from: list[InfrastructureSignatureJson]
|
|
328
482
|
pushes_data_to: list[InfrastructureSignatureJson]
|
|
483
|
+
source_file: Optional[str] = None
|
|
329
484
|
metadata: Optional[dict] = None
|
|
330
485
|
|
|
331
486
|
|
|
@@ -343,6 +498,7 @@ class InfrastructureMap(BaseModel):
|
|
|
343
498
|
workflows: Dictionary mapping workflow names to their configurations.
|
|
344
499
|
web_apps: Dictionary mapping WebApp names to their configurations.
|
|
345
500
|
"""
|
|
501
|
+
|
|
346
502
|
model_config = model_config
|
|
347
503
|
|
|
348
504
|
tables: dict[str, TableConfig]
|
|
@@ -369,11 +525,15 @@ def _map_sql_resource_ref(r: Any) -> InfrastructureSignatureJson:
|
|
|
369
525
|
Raises:
|
|
370
526
|
TypeError: If the input object is not a recognized SQL resource type.
|
|
371
527
|
"""
|
|
372
|
-
if hasattr(r,
|
|
528
|
+
if hasattr(r, "kind"):
|
|
373
529
|
if r.kind == "OlapTable":
|
|
374
530
|
# Explicitly cast for type hint checking if needed, though Python is dynamic
|
|
375
531
|
table = r # type: OlapTable
|
|
376
|
-
res_id =
|
|
532
|
+
res_id = (
|
|
533
|
+
f"{table.name}_{table.config.version}"
|
|
534
|
+
if table.config.version
|
|
535
|
+
else table.name
|
|
536
|
+
)
|
|
377
537
|
return InfrastructureSignatureJson(id=res_id, kind="Table")
|
|
378
538
|
elif r.kind == "SqlResource":
|
|
379
539
|
# Explicitly cast for type hint checking if needed
|
|
@@ -386,84 +546,117 @@ def _map_sql_resource_ref(r: Any) -> InfrastructureSignatureJson:
|
|
|
386
546
|
raise TypeError(f"Object {r} lacks a 'kind' attribute for dependency mapping.")
|
|
387
547
|
|
|
388
548
|
|
|
389
|
-
def _convert_basic_engine_instance(
|
|
549
|
+
def _convert_basic_engine_instance(
|
|
550
|
+
engine: "EngineConfig",
|
|
551
|
+
) -> Optional[EngineConfigDict]:
|
|
390
552
|
"""Convert basic MergeTree engine instances to config dict.
|
|
391
|
-
|
|
553
|
+
|
|
392
554
|
Args:
|
|
393
555
|
engine: An EngineConfig instance
|
|
394
|
-
|
|
556
|
+
|
|
395
557
|
Returns:
|
|
396
558
|
EngineConfigDict if matched, None otherwise
|
|
397
559
|
"""
|
|
398
560
|
from moose_lib.blocks import (
|
|
399
|
-
MergeTreeEngine,
|
|
400
|
-
|
|
561
|
+
MergeTreeEngine,
|
|
562
|
+
ReplacingMergeTreeEngine,
|
|
563
|
+
AggregatingMergeTreeEngine,
|
|
564
|
+
SummingMergeTreeEngine,
|
|
565
|
+
CollapsingMergeTreeEngine,
|
|
566
|
+
VersionedCollapsingMergeTreeEngine,
|
|
401
567
|
)
|
|
402
568
|
|
|
403
569
|
if isinstance(engine, MergeTreeEngine):
|
|
404
570
|
return MergeTreeConfigDict()
|
|
405
571
|
elif isinstance(engine, ReplacingMergeTreeEngine):
|
|
406
572
|
return ReplacingMergeTreeConfigDict(
|
|
407
|
-
ver=engine.ver,
|
|
408
|
-
is_deleted=engine.is_deleted
|
|
573
|
+
ver=engine.ver, is_deleted=engine.is_deleted
|
|
409
574
|
)
|
|
410
575
|
elif isinstance(engine, AggregatingMergeTreeEngine):
|
|
411
576
|
return AggregatingMergeTreeConfigDict()
|
|
412
577
|
elif isinstance(engine, SummingMergeTreeEngine):
|
|
413
578
|
return SummingMergeTreeConfigDict(columns=engine.columns)
|
|
579
|
+
elif isinstance(engine, CollapsingMergeTreeEngine):
|
|
580
|
+
return CollapsingMergeTreeConfigDict(sign=engine.sign)
|
|
581
|
+
elif isinstance(engine, VersionedCollapsingMergeTreeEngine):
|
|
582
|
+
return VersionedCollapsingMergeTreeConfigDict(sign=engine.sign, ver=engine.ver)
|
|
414
583
|
return None
|
|
415
584
|
|
|
416
585
|
|
|
417
|
-
def _convert_replicated_engine_instance(
|
|
586
|
+
def _convert_replicated_engine_instance(
|
|
587
|
+
engine: "EngineConfig",
|
|
588
|
+
) -> Optional[EngineConfigDict]:
|
|
418
589
|
"""Convert replicated MergeTree engine instances to config dict.
|
|
419
|
-
|
|
590
|
+
|
|
420
591
|
Args:
|
|
421
592
|
engine: An EngineConfig instance
|
|
422
|
-
|
|
593
|
+
|
|
423
594
|
Returns:
|
|
424
595
|
EngineConfigDict if matched, None otherwise
|
|
425
596
|
"""
|
|
426
597
|
from moose_lib.blocks import (
|
|
427
|
-
ReplicatedMergeTreeEngine,
|
|
428
|
-
|
|
598
|
+
ReplicatedMergeTreeEngine,
|
|
599
|
+
ReplicatedReplacingMergeTreeEngine,
|
|
600
|
+
ReplicatedAggregatingMergeTreeEngine,
|
|
601
|
+
ReplicatedSummingMergeTreeEngine,
|
|
602
|
+
ReplicatedCollapsingMergeTreeEngine,
|
|
603
|
+
ReplicatedVersionedCollapsingMergeTreeEngine,
|
|
429
604
|
)
|
|
430
605
|
|
|
431
606
|
if isinstance(engine, ReplicatedMergeTreeEngine):
|
|
432
607
|
return ReplicatedMergeTreeConfigDict(
|
|
433
|
-
keeper_path=engine.keeper_path,
|
|
434
|
-
replica_name=engine.replica_name
|
|
608
|
+
keeper_path=engine.keeper_path, replica_name=engine.replica_name
|
|
435
609
|
)
|
|
436
610
|
elif isinstance(engine, ReplicatedReplacingMergeTreeEngine):
|
|
437
611
|
return ReplicatedReplacingMergeTreeConfigDict(
|
|
438
612
|
keeper_path=engine.keeper_path,
|
|
439
613
|
replica_name=engine.replica_name,
|
|
440
614
|
ver=engine.ver,
|
|
441
|
-
is_deleted=engine.is_deleted
|
|
615
|
+
is_deleted=engine.is_deleted,
|
|
442
616
|
)
|
|
443
617
|
elif isinstance(engine, ReplicatedAggregatingMergeTreeEngine):
|
|
444
618
|
return ReplicatedAggregatingMergeTreeConfigDict(
|
|
445
|
-
keeper_path=engine.keeper_path,
|
|
446
|
-
replica_name=engine.replica_name
|
|
619
|
+
keeper_path=engine.keeper_path, replica_name=engine.replica_name
|
|
447
620
|
)
|
|
448
621
|
elif isinstance(engine, ReplicatedSummingMergeTreeEngine):
|
|
449
622
|
return ReplicatedSummingMergeTreeConfigDict(
|
|
450
623
|
keeper_path=engine.keeper_path,
|
|
451
624
|
replica_name=engine.replica_name,
|
|
452
|
-
columns=engine.columns
|
|
625
|
+
columns=engine.columns,
|
|
626
|
+
)
|
|
627
|
+
elif isinstance(engine, ReplicatedCollapsingMergeTreeEngine):
|
|
628
|
+
return ReplicatedCollapsingMergeTreeConfigDict(
|
|
629
|
+
keeper_path=engine.keeper_path,
|
|
630
|
+
replica_name=engine.replica_name,
|
|
631
|
+
sign=engine.sign,
|
|
632
|
+
)
|
|
633
|
+
elif isinstance(engine, ReplicatedVersionedCollapsingMergeTreeEngine):
|
|
634
|
+
return ReplicatedVersionedCollapsingMergeTreeConfigDict(
|
|
635
|
+
keeper_path=engine.keeper_path,
|
|
636
|
+
replica_name=engine.replica_name,
|
|
637
|
+
sign=engine.sign,
|
|
638
|
+
ver=engine.ver,
|
|
453
639
|
)
|
|
454
640
|
return None
|
|
455
641
|
|
|
456
642
|
|
|
457
643
|
def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineConfigDict:
|
|
458
644
|
"""Convert an EngineConfig instance to config dict format.
|
|
459
|
-
|
|
645
|
+
|
|
460
646
|
Args:
|
|
461
647
|
engine: An EngineConfig instance
|
|
462
|
-
|
|
648
|
+
|
|
463
649
|
Returns:
|
|
464
650
|
EngineConfigDict with engine-specific configuration
|
|
465
651
|
"""
|
|
466
|
-
from moose_lib.blocks import
|
|
652
|
+
from moose_lib.blocks import (
|
|
653
|
+
S3QueueEngine,
|
|
654
|
+
S3Engine,
|
|
655
|
+
BufferEngine,
|
|
656
|
+
DistributedEngine,
|
|
657
|
+
IcebergS3Engine,
|
|
658
|
+
KafkaEngine,
|
|
659
|
+
)
|
|
467
660
|
|
|
468
661
|
# Try S3Queue first
|
|
469
662
|
if isinstance(engine, S3QueueEngine):
|
|
@@ -473,7 +666,65 @@ def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineCon
|
|
|
473
666
|
aws_access_key_id=engine.aws_access_key_id,
|
|
474
667
|
aws_secret_access_key=engine.aws_secret_access_key,
|
|
475
668
|
compression=engine.compression,
|
|
476
|
-
headers=engine.headers
|
|
669
|
+
headers=engine.headers,
|
|
670
|
+
)
|
|
671
|
+
|
|
672
|
+
# Try S3
|
|
673
|
+
if isinstance(engine, S3Engine):
|
|
674
|
+
return S3ConfigDict(
|
|
675
|
+
path=engine.path,
|
|
676
|
+
format=engine.format,
|
|
677
|
+
aws_access_key_id=engine.aws_access_key_id,
|
|
678
|
+
aws_secret_access_key=engine.aws_secret_access_key,
|
|
679
|
+
compression=engine.compression,
|
|
680
|
+
partition_strategy=engine.partition_strategy,
|
|
681
|
+
partition_columns_in_data_file=engine.partition_columns_in_data_file,
|
|
682
|
+
)
|
|
683
|
+
|
|
684
|
+
# Try Buffer
|
|
685
|
+
if isinstance(engine, BufferEngine):
|
|
686
|
+
return BufferConfigDict(
|
|
687
|
+
target_database=engine.target_database,
|
|
688
|
+
target_table=engine.target_table,
|
|
689
|
+
num_layers=engine.num_layers,
|
|
690
|
+
min_time=engine.min_time,
|
|
691
|
+
max_time=engine.max_time,
|
|
692
|
+
min_rows=engine.min_rows,
|
|
693
|
+
max_rows=engine.max_rows,
|
|
694
|
+
min_bytes=engine.min_bytes,
|
|
695
|
+
max_bytes=engine.max_bytes,
|
|
696
|
+
flush_time=engine.flush_time,
|
|
697
|
+
flush_rows=engine.flush_rows,
|
|
698
|
+
flush_bytes=engine.flush_bytes,
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
# Try Distributed
|
|
702
|
+
if isinstance(engine, DistributedEngine):
|
|
703
|
+
return DistributedConfigDict(
|
|
704
|
+
cluster=engine.cluster,
|
|
705
|
+
target_database=engine.target_database,
|
|
706
|
+
target_table=engine.target_table,
|
|
707
|
+
sharding_key=engine.sharding_key,
|
|
708
|
+
policy_name=engine.policy_name,
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
# Try IcebergS3
|
|
712
|
+
if isinstance(engine, IcebergS3Engine):
|
|
713
|
+
return IcebergS3ConfigDict(
|
|
714
|
+
path=engine.path,
|
|
715
|
+
format=engine.format,
|
|
716
|
+
aws_access_key_id=engine.aws_access_key_id,
|
|
717
|
+
aws_secret_access_key=engine.aws_secret_access_key,
|
|
718
|
+
compression=engine.compression,
|
|
719
|
+
)
|
|
720
|
+
|
|
721
|
+
# Try Kafka
|
|
722
|
+
if isinstance(engine, KafkaEngine):
|
|
723
|
+
return KafkaConfigDict(
|
|
724
|
+
broker_list=engine.broker_list,
|
|
725
|
+
topic_list=engine.topic_list,
|
|
726
|
+
group_name=engine.group_name,
|
|
727
|
+
format=engine.format,
|
|
477
728
|
)
|
|
478
729
|
|
|
479
730
|
# Try basic engines
|
|
@@ -490,14 +741,15 @@ def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineCon
|
|
|
490
741
|
return BaseEngineConfigDict(engine=engine.__class__.__name__.replace("Engine", ""))
|
|
491
742
|
|
|
492
743
|
|
|
493
|
-
def _convert_engine_to_config_dict(
|
|
494
|
-
|
|
744
|
+
def _convert_engine_to_config_dict(
|
|
745
|
+
engine: Union[ClickHouseEngines, EngineConfig], table: OlapTable
|
|
746
|
+
) -> EngineConfigDict:
|
|
495
747
|
"""Convert engine enum or EngineConfig instance to new engine config format.
|
|
496
|
-
|
|
748
|
+
|
|
497
749
|
Args:
|
|
498
750
|
engine: Either a ClickHouseEngines enum value or an EngineConfig instance
|
|
499
751
|
table: The OlapTable instance with configuration
|
|
500
|
-
|
|
752
|
+
|
|
501
753
|
Returns:
|
|
502
754
|
EngineConfigDict with engine-specific configuration
|
|
503
755
|
"""
|
|
@@ -516,7 +768,7 @@ def _convert_engine_to_config_dict(engine: Union[ClickHouseEngines, EngineConfig
|
|
|
516
768
|
engine_name = str(engine)
|
|
517
769
|
|
|
518
770
|
# For S3Queue with legacy configuration, check for s3_queue_engine_config
|
|
519
|
-
if engine_name == "S3Queue" and hasattr(table.config,
|
|
771
|
+
if engine_name == "S3Queue" and hasattr(table.config, "s3_queue_engine_config"):
|
|
520
772
|
s3_config = table.config.s3_queue_engine_config
|
|
521
773
|
if s3_config:
|
|
522
774
|
logger = Logger(action="S3QueueConfig")
|
|
@@ -530,7 +782,7 @@ def _convert_engine_to_config_dict(engine: Union[ClickHouseEngines, EngineConfig
|
|
|
530
782
|
aws_access_key_id=s3_config.aws_access_key_id,
|
|
531
783
|
aws_secret_access_key=s3_config.aws_secret_access_key,
|
|
532
784
|
compression=s3_config.compression,
|
|
533
|
-
headers=s3_config.headers
|
|
785
|
+
headers=s3_config.headers,
|
|
534
786
|
)
|
|
535
787
|
|
|
536
788
|
# Map engine names to specific config classes
|
|
@@ -588,16 +840,24 @@ def to_infra_map() -> dict:
|
|
|
588
840
|
table_settings["mode"] = "unordered"
|
|
589
841
|
|
|
590
842
|
id_key = (
|
|
591
|
-
f"{table.name}_{table.config.version}"
|
|
843
|
+
f"{table.name}_{table.config.version}"
|
|
844
|
+
if table.config.version
|
|
845
|
+
else table.name
|
|
592
846
|
)
|
|
593
847
|
|
|
594
848
|
# Determine ORDER BY: list of fields or single expression
|
|
595
849
|
has_fields = bool(table.config.order_by_fields)
|
|
596
850
|
has_expr = table.config.order_by_expression is not None
|
|
597
851
|
if has_fields and has_expr:
|
|
598
|
-
raise ValueError(
|
|
852
|
+
raise ValueError(
|
|
853
|
+
f"Table {table.name}: Provide either order_by_fields or order_by_expression, not both."
|
|
854
|
+
)
|
|
599
855
|
|
|
600
|
-
order_by_value =
|
|
856
|
+
order_by_value = (
|
|
857
|
+
table.config.order_by_expression
|
|
858
|
+
if has_expr
|
|
859
|
+
else table.config.order_by_fields
|
|
860
|
+
)
|
|
601
861
|
|
|
602
862
|
tables[id_key] = TableConfig(
|
|
603
863
|
name=table.name,
|
|
@@ -605,14 +865,19 @@ def to_infra_map() -> dict:
|
|
|
605
865
|
order_by=order_by_value,
|
|
606
866
|
partition_by=table.config.partition_by,
|
|
607
867
|
sample_by_expression=table.config.sample_by_expression,
|
|
868
|
+
primary_key_expression=table.config.primary_key_expression,
|
|
608
869
|
engine_config=engine_config,
|
|
609
870
|
version=table.config.version,
|
|
610
871
|
metadata=getattr(table, "metadata", None),
|
|
611
|
-
life_cycle=
|
|
872
|
+
life_cycle=(
|
|
873
|
+
table.config.life_cycle.value if table.config.life_cycle else None
|
|
874
|
+
),
|
|
612
875
|
# Map 'settings' to 'table_settings' for internal use
|
|
613
876
|
table_settings=table_settings if table_settings else None,
|
|
614
877
|
indexes=table.config.indexes,
|
|
615
878
|
ttl=table.config.ttl,
|
|
879
|
+
database=table.config.database,
|
|
880
|
+
cluster=table.config.cluster,
|
|
616
881
|
)
|
|
617
882
|
|
|
618
883
|
for name, stream in get_streams().items():
|
|
@@ -628,15 +893,20 @@ def to_infra_map() -> dict:
|
|
|
628
893
|
]
|
|
629
894
|
|
|
630
895
|
consumers = [
|
|
631
|
-
Consumer(version=consumer.config.version)
|
|
632
|
-
for consumer in stream.consumers
|
|
896
|
+
Consumer(version=consumer.config.version) for consumer in stream.consumers
|
|
633
897
|
]
|
|
634
898
|
|
|
635
899
|
topics[name] = TopicConfig(
|
|
636
900
|
name=name,
|
|
637
901
|
columns=_to_columns(stream._t),
|
|
638
|
-
target_table=
|
|
639
|
-
|
|
902
|
+
target_table=(
|
|
903
|
+
stream.config.destination.name if stream.config.destination else None
|
|
904
|
+
),
|
|
905
|
+
target_table_version=(
|
|
906
|
+
stream.config.destination.config.version
|
|
907
|
+
if stream.config.destination
|
|
908
|
+
else None
|
|
909
|
+
),
|
|
640
910
|
retention_period=stream.config.retention_period,
|
|
641
911
|
partition_count=stream.config.parallelism,
|
|
642
912
|
version=stream.config.version,
|
|
@@ -644,25 +914,33 @@ def to_infra_map() -> dict:
|
|
|
644
914
|
has_multi_transform=stream._multipleTransformations is not None,
|
|
645
915
|
consumers=consumers,
|
|
646
916
|
metadata=getattr(stream, "metadata", None),
|
|
647
|
-
life_cycle=
|
|
917
|
+
life_cycle=(
|
|
918
|
+
stream.config.life_cycle.value if stream.config.life_cycle else None
|
|
919
|
+
),
|
|
648
920
|
schema_config=stream.config.schema_config,
|
|
649
921
|
)
|
|
650
922
|
|
|
651
923
|
for name, api in get_ingest_apis().items():
|
|
924
|
+
# Check if the Pydantic model allows extra fields (extra='allow')
|
|
925
|
+
# This is the Python equivalent of TypeScript's index signatures
|
|
926
|
+
model_allows_extra = api._t.model_config.get("extra") == "allow"
|
|
927
|
+
|
|
652
928
|
ingest_apis[name] = IngestApiConfig(
|
|
653
929
|
name=name,
|
|
654
930
|
columns=_to_columns(api._t),
|
|
655
931
|
version=api.config.version,
|
|
656
932
|
path=api.config.path,
|
|
657
|
-
write_to=Target(
|
|
658
|
-
kind="stream",
|
|
659
|
-
name=api.config.destination.name
|
|
660
|
-
),
|
|
933
|
+
write_to=Target(kind="stream", name=api.config.destination.name),
|
|
661
934
|
metadata=getattr(api, "metadata", None),
|
|
662
935
|
json_schema=api._t.model_json_schema(
|
|
663
|
-
ref_template=
|
|
936
|
+
ref_template="#/components/schemas/{model}"
|
|
937
|
+
),
|
|
938
|
+
dead_letter_queue=(
|
|
939
|
+
api.config.dead_letter_queue.name
|
|
940
|
+
if api.config.dead_letter_queue
|
|
941
|
+
else None
|
|
664
942
|
),
|
|
665
|
-
|
|
943
|
+
allow_extra_fields=model_allows_extra,
|
|
666
944
|
)
|
|
667
945
|
|
|
668
946
|
for name, api in get_apis().items():
|
|
@@ -680,8 +958,13 @@ def to_infra_map() -> dict:
|
|
|
680
958
|
name=resource.name,
|
|
681
959
|
setup=resource.setup,
|
|
682
960
|
teardown=resource.teardown,
|
|
683
|
-
pulls_data_from=[
|
|
684
|
-
|
|
961
|
+
pulls_data_from=[
|
|
962
|
+
_map_sql_resource_ref(dep) for dep in resource.pulls_data_from
|
|
963
|
+
],
|
|
964
|
+
pushes_data_to=[
|
|
965
|
+
_map_sql_resource_ref(dep) for dep in resource.pushes_data_to
|
|
966
|
+
],
|
|
967
|
+
source_file=getattr(resource, "source_file", None),
|
|
685
968
|
metadata=getattr(resource, "metadata", None),
|
|
686
969
|
)
|
|
687
970
|
|
|
@@ -713,10 +996,10 @@ def to_infra_map() -> dict:
|
|
|
713
996
|
apis=apis,
|
|
714
997
|
sql_resources=sql_resources,
|
|
715
998
|
workflows=workflows,
|
|
716
|
-
web_apps=web_apps
|
|
999
|
+
web_apps=web_apps,
|
|
717
1000
|
)
|
|
718
1001
|
|
|
719
|
-
return infra_map.model_dump(by_alias=True)
|
|
1002
|
+
return infra_map.model_dump(by_alias=True, exclude_none=False)
|
|
720
1003
|
|
|
721
1004
|
|
|
722
1005
|
def load_models():
|
|
@@ -733,6 +1016,7 @@ def load_models():
|
|
|
733
1016
|
calling system uses to extract the configuration.
|
|
734
1017
|
"""
|
|
735
1018
|
import os
|
|
1019
|
+
|
|
736
1020
|
source_dir = os.environ.get("MOOSE_SOURCE_DIR", "app")
|
|
737
1021
|
import_module(f"{source_dir}.main")
|
|
738
1022
|
|