moose-lib 0.6.148.dev3442438466__py3-none-any.whl → 0.6.283__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. moose_lib/__init__.py +34 -3
  2. moose_lib/blocks.py +416 -52
  3. moose_lib/clients/redis_client.py +26 -14
  4. moose_lib/commons.py +37 -30
  5. moose_lib/config/config_file.py +5 -1
  6. moose_lib/config/runtime.py +73 -34
  7. moose_lib/data_models.py +331 -61
  8. moose_lib/dmv2/__init__.py +69 -73
  9. moose_lib/dmv2/_registry.py +2 -1
  10. moose_lib/dmv2/_source_capture.py +37 -0
  11. moose_lib/dmv2/consumption.py +55 -32
  12. moose_lib/dmv2/ingest_api.py +9 -2
  13. moose_lib/dmv2/ingest_pipeline.py +35 -16
  14. moose_lib/dmv2/life_cycle.py +3 -1
  15. moose_lib/dmv2/materialized_view.py +24 -14
  16. moose_lib/dmv2/moose_model.py +165 -0
  17. moose_lib/dmv2/olap_table.py +299 -151
  18. moose_lib/dmv2/registry.py +18 -3
  19. moose_lib/dmv2/sql_resource.py +16 -8
  20. moose_lib/dmv2/stream.py +75 -23
  21. moose_lib/dmv2/types.py +14 -8
  22. moose_lib/dmv2/view.py +13 -6
  23. moose_lib/dmv2/web_app.py +11 -6
  24. moose_lib/dmv2/web_app_helpers.py +5 -1
  25. moose_lib/dmv2/workflow.py +37 -9
  26. moose_lib/internal.py +340 -56
  27. moose_lib/main.py +87 -56
  28. moose_lib/query_builder.py +18 -5
  29. moose_lib/query_param.py +54 -20
  30. moose_lib/secrets.py +122 -0
  31. moose_lib/streaming/streaming_function_runner.py +233 -117
  32. moose_lib/utilities/sql.py +0 -1
  33. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/METADATA +18 -1
  34. moose_lib-0.6.283.dist-info/RECORD +63 -0
  35. tests/__init__.py +1 -1
  36. tests/conftest.py +6 -5
  37. tests/test_backward_compatibility.py +85 -0
  38. tests/test_cluster_validation.py +85 -0
  39. tests/test_codec.py +75 -0
  40. tests/test_column_formatting.py +80 -0
  41. tests/test_fixedstring.py +43 -0
  42. tests/test_iceberg_config.py +105 -0
  43. tests/test_int_types.py +211 -0
  44. tests/test_kafka_config.py +141 -0
  45. tests/test_materialized.py +74 -0
  46. tests/test_metadata.py +37 -0
  47. tests/test_moose.py +21 -30
  48. tests/test_moose_model.py +153 -0
  49. tests/test_olap_table_moosemodel.py +89 -0
  50. tests/test_olap_table_versioning.py +52 -58
  51. tests/test_query_builder.py +97 -9
  52. tests/test_redis_client.py +10 -3
  53. tests/test_s3queue_config.py +211 -110
  54. tests/test_secrets.py +239 -0
  55. tests/test_simple_aggregate.py +42 -40
  56. tests/test_web_app.py +11 -5
  57. moose_lib-0.6.148.dev3442438466.dist-info/RECORD +0 -47
  58. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
  59. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
moose_lib/blocks.py CHANGED
@@ -14,78 +14,135 @@ class ClickHouseEngines(Enum):
14
14
  VersionedCollapsingMergeTree = "VersionedCollapsingMergeTree"
15
15
  GraphiteMergeTree = "GraphiteMergeTree"
16
16
  S3Queue = "S3Queue"
17
+ S3 = "S3"
18
+ Buffer = "Buffer"
19
+ Distributed = "Distributed"
20
+ IcebergS3 = "IcebergS3"
21
+ Kafka = "Kafka"
17
22
  ReplicatedMergeTree = "ReplicatedMergeTree"
18
23
  ReplicatedReplacingMergeTree = "ReplicatedReplacingMergeTree"
19
24
  ReplicatedAggregatingMergeTree = "ReplicatedAggregatingMergeTree"
20
25
  ReplicatedSummingMergeTree = "ReplicatedSummingMergeTree"
26
+ ReplicatedCollapsingMergeTree = "ReplicatedCollapsingMergeTree"
27
+ ReplicatedVersionedCollapsingMergeTree = "ReplicatedVersionedCollapsingMergeTree"
28
+
21
29
 
22
30
  # ==========================
23
31
  # New Engine Configuration Classes
24
32
  # ==========================
25
33
 
34
+
26
35
  @dataclass
27
36
  class EngineConfig(ABC):
28
37
  """Base class for engine configurations"""
38
+
29
39
  pass
30
40
 
41
+
31
42
  @dataclass
32
43
  class MergeTreeEngine(EngineConfig):
33
44
  """Configuration for MergeTree engine"""
45
+
34
46
  pass
35
47
 
36
- @dataclass
48
+
49
+ @dataclass
37
50
  class ReplacingMergeTreeEngine(EngineConfig):
38
51
  """Configuration for ReplacingMergeTree engine (with deduplication)
39
-
52
+
40
53
  Args:
41
54
  ver: Optional column name for version tracking
42
55
  is_deleted: Optional column name for deletion marking (requires ver)
43
56
  """
57
+
44
58
  ver: Optional[str] = None
45
59
  is_deleted: Optional[str] = None
46
-
60
+
47
61
  def __post_init__(self):
48
62
  if self.is_deleted and not self.ver:
49
63
  raise ValueError("is_deleted requires ver to be specified")
50
64
 
65
+
51
66
  @dataclass
52
67
  class AggregatingMergeTreeEngine(EngineConfig):
53
68
  """Configuration for AggregatingMergeTree engine"""
69
+
54
70
  pass
55
71
 
72
+
56
73
  @dataclass
57
74
  class SummingMergeTreeEngine(EngineConfig):
58
75
  """Configuration for SummingMergeTree engine
59
-
76
+
60
77
  Args:
61
78
  columns: Optional list of column names to sum
62
79
  """
80
+
63
81
  columns: Optional[List[str]] = None
64
82
 
83
+
84
+ @dataclass
85
+ class CollapsingMergeTreeEngine(EngineConfig):
86
+ """Configuration for CollapsingMergeTree engine
87
+
88
+ Args:
89
+ sign: Column name indicating row type (1 = state, -1 = cancel)
90
+ """
91
+
92
+ sign: str
93
+
94
+ def __post_init__(self):
95
+ if not self.sign:
96
+ raise ValueError("sign column is required for CollapsingMergeTree")
97
+
98
+
99
+ @dataclass
100
+ class VersionedCollapsingMergeTreeEngine(EngineConfig):
101
+ """Configuration for VersionedCollapsingMergeTree engine
102
+
103
+ Args:
104
+ sign: Column name indicating row type (1 = state, -1 = cancel)
105
+ ver: Column name for object state versioning
106
+ """
107
+
108
+ sign: str
109
+ ver: str
110
+
111
+ def __post_init__(self):
112
+ if not self.sign:
113
+ raise ValueError("sign column is required for VersionedCollapsingMergeTree")
114
+ if not self.ver:
115
+ raise ValueError("ver column is required for VersionedCollapsingMergeTree")
116
+
117
+
65
118
  @dataclass
66
119
  class ReplicatedMergeTreeEngine(EngineConfig):
67
120
  """Configuration for ReplicatedMergeTree engine (replicated version of MergeTree)
68
-
121
+
69
122
  Args:
70
123
  keeper_path: Keeper path for replication (e.g., '/clickhouse/tables/{database}/{shard}/table_name')
71
124
  Optional: omit for ClickHouse Cloud which manages replication automatically
72
125
  replica_name: Replica name (e.g., '{replica}')
73
126
  Optional: omit for ClickHouse Cloud which manages replication automatically
74
-
127
+
75
128
  Note: Both keeper_path and replica_name must be provided together, or both omitted.
76
129
  """
130
+
77
131
  keeper_path: Optional[str] = None
78
132
  replica_name: Optional[str] = None
79
-
133
+
80
134
  def __post_init__(self):
81
135
  # Both must be provided or both must be None
82
136
  if (self.keeper_path is None) != (self.replica_name is None):
83
- raise ValueError("keeper_path and replica_name must both be provided or both be None")
137
+ raise ValueError(
138
+ "keeper_path and replica_name must both be provided or both be None"
139
+ )
140
+
84
141
 
85
142
  @dataclass
86
143
  class ReplicatedReplacingMergeTreeEngine(EngineConfig):
87
144
  """Configuration for ReplicatedReplacingMergeTree engine (replicated version with deduplication)
88
-
145
+
89
146
  Args:
90
147
  keeper_path: Keeper path for replication (e.g., '/clickhouse/tables/{database}/{shard}/table_name')
91
148
  Optional: omit for ClickHouse Cloud which manages replication automatically
@@ -93,83 +150,161 @@ class ReplicatedReplacingMergeTreeEngine(EngineConfig):
93
150
  Optional: omit for ClickHouse Cloud which manages replication automatically
94
151
  ver: Optional column name for version tracking
95
152
  is_deleted: Optional column name for deletion marking (requires ver)
96
-
153
+
97
154
  Note: Both keeper_path and replica_name must be provided together, or both omitted.
98
155
  """
156
+
99
157
  keeper_path: Optional[str] = None
100
158
  replica_name: Optional[str] = None
101
159
  ver: Optional[str] = None
102
160
  is_deleted: Optional[str] = None
103
-
161
+
104
162
  def __post_init__(self):
105
163
  # Both must be provided or both must be None
106
164
  if (self.keeper_path is None) != (self.replica_name is None):
107
- raise ValueError("keeper_path and replica_name must both be provided or both be None")
165
+ raise ValueError(
166
+ "keeper_path and replica_name must both be provided or both be None"
167
+ )
108
168
  if self.is_deleted and not self.ver:
109
169
  raise ValueError("is_deleted requires ver to be specified")
110
170
 
171
+
111
172
  @dataclass
112
173
  class ReplicatedAggregatingMergeTreeEngine(EngineConfig):
113
174
  """Configuration for ReplicatedAggregatingMergeTree engine (replicated version for aggregations)
114
-
175
+
115
176
  Args:
116
177
  keeper_path: Keeper path for replication (e.g., '/clickhouse/tables/{database}/{shard}/table_name')
117
178
  Optional: omit for ClickHouse Cloud which manages replication automatically
118
179
  replica_name: Replica name (e.g., '{replica}')
119
180
  Optional: omit for ClickHouse Cloud which manages replication automatically
120
-
181
+
121
182
  Note: Both keeper_path and replica_name must be provided together, or both omitted.
122
183
  """
184
+
123
185
  keeper_path: Optional[str] = None
124
186
  replica_name: Optional[str] = None
125
-
187
+
126
188
  def __post_init__(self):
127
189
  # Both must be provided or both must be None
128
190
  if (self.keeper_path is None) != (self.replica_name is None):
129
- raise ValueError("keeper_path and replica_name must both be provided or both be None")
191
+ raise ValueError(
192
+ "keeper_path and replica_name must both be provided or both be None"
193
+ )
194
+
130
195
 
131
196
  @dataclass
132
197
  class ReplicatedSummingMergeTreeEngine(EngineConfig):
133
198
  """Configuration for ReplicatedSummingMergeTree engine (replicated version for summation)
134
-
199
+
135
200
  Args:
136
201
  keeper_path: Keeper path for replication (e.g., '/clickhouse/tables/{database}/{shard}/table_name')
137
202
  Optional: omit for ClickHouse Cloud which manages replication automatically
138
203
  replica_name: Replica name (e.g., '{replica}')
139
204
  Optional: omit for ClickHouse Cloud which manages replication automatically
140
205
  columns: Optional list of column names to sum
141
-
206
+
142
207
  Note: Both keeper_path and replica_name must be provided together, or both omitted.
143
208
  """
209
+
144
210
  keeper_path: Optional[str] = None
145
211
  replica_name: Optional[str] = None
146
212
  columns: Optional[List[str]] = None
147
-
213
+
214
+ def __post_init__(self):
215
+ # Both must be provided or both must be None
216
+ if (self.keeper_path is None) != (self.replica_name is None):
217
+ raise ValueError(
218
+ "keeper_path and replica_name must both be provided or both be None"
219
+ )
220
+
221
+
222
+ @dataclass
223
+ class ReplicatedCollapsingMergeTreeEngine(EngineConfig):
224
+ """Configuration for ReplicatedCollapsingMergeTree engine (replicated version with collapsing)
225
+
226
+ Args:
227
+ keeper_path: Keeper path for replication (e.g., '/clickhouse/tables/{database}/{shard}/table_name')
228
+ Optional: omit for ClickHouse Cloud which manages replication automatically
229
+ replica_name: Replica name (e.g., '{replica}')
230
+ Optional: omit for ClickHouse Cloud which manages replication automatically
231
+ sign: Column name indicating row type (1 = state, -1 = cancel)
232
+
233
+ Note: Both keeper_path and replica_name must be provided together, or both omitted.
234
+ """
235
+
236
+ keeper_path: Optional[str] = None
237
+ replica_name: Optional[str] = None
238
+ sign: str = field(default=None)
239
+
148
240
  def __post_init__(self):
149
241
  # Both must be provided or both must be None
150
242
  if (self.keeper_path is None) != (self.replica_name is None):
151
- raise ValueError("keeper_path and replica_name must both be provided or both be None")
243
+ raise ValueError(
244
+ "keeper_path and replica_name must both be provided or both be None"
245
+ )
246
+ if not self.sign:
247
+ raise ValueError(
248
+ "sign column is required for ReplicatedCollapsingMergeTree"
249
+ )
250
+
251
+
252
+ @dataclass
253
+ class ReplicatedVersionedCollapsingMergeTreeEngine(EngineConfig):
254
+ """Configuration for ReplicatedVersionedCollapsingMergeTree engine (replicated version with versioned collapsing)
255
+
256
+ Args:
257
+ keeper_path: Keeper path for replication (e.g., '/clickhouse/tables/{database}/{shard}/table_name')
258
+ Optional: omit for ClickHouse Cloud which manages replication automatically
259
+ replica_name: Replica name (e.g., '{replica}')
260
+ Optional: omit for ClickHouse Cloud which manages replication automatically
261
+ sign: Column name indicating row type (1 = state, -1 = cancel)
262
+ ver: Column name for object state versioning
263
+
264
+ Note: Both keeper_path and replica_name must be provided together, or both omitted.
265
+ """
266
+
267
+ keeper_path: Optional[str] = None
268
+ replica_name: Optional[str] = None
269
+ sign: str = field(default=None)
270
+ ver: str = field(default=None)
271
+
272
+ def __post_init__(self):
273
+ # Both must be provided or both must be None
274
+ if (self.keeper_path is None) != (self.replica_name is None):
275
+ raise ValueError(
276
+ "keeper_path and replica_name must both be provided or both be None"
277
+ )
278
+ if not self.sign:
279
+ raise ValueError(
280
+ "sign column is required for ReplicatedVersionedCollapsingMergeTree"
281
+ )
282
+ if not self.ver:
283
+ raise ValueError(
284
+ "ver column is required for ReplicatedVersionedCollapsingMergeTree"
285
+ )
286
+
152
287
 
153
288
  @dataclass
154
289
  class S3QueueEngine(EngineConfig):
155
290
  """Configuration for S3Queue engine - only non-alterable constructor parameters.
156
-
291
+
157
292
  S3Queue-specific settings like 'mode', 'keeper_path', etc. should be specified
158
293
  in the settings field of OlapConfig, not here.
159
294
  """
160
-
295
+
161
296
  # Required fields
162
297
  s3_path: str # S3 bucket path with wildcards (e.g., 's3://bucket/prefix/*.json')
163
- format: str # Data format (e.g., 'JSONEachRow', 'CSV', 'Parquet')
164
-
298
+ format: str # Data format (e.g., 'JSONEachRow', 'CSV', 'Parquet')
299
+
165
300
  # Optional AWS credentials
166
301
  aws_access_key_id: Optional[str] = None
167
302
  aws_secret_access_key: Optional[str] = None
168
-
303
+
169
304
  # Optional configuration
170
305
  compression: Optional[str] = None # e.g., 'gzip', 'zstd'
171
306
  headers: Optional[Dict[str, str]] = None
172
-
307
+
173
308
  def __post_init__(self):
174
309
  """Validate required fields"""
175
310
  if not self.s3_path:
@@ -177,34 +312,233 @@ class S3QueueEngine(EngineConfig):
177
312
  if not self.format:
178
313
  raise ValueError("S3Queue engine requires 'format'")
179
314
 
315
+
316
+ @dataclass
317
+ class S3Engine(EngineConfig):
318
+ """Configuration for S3 engine - direct read/write from S3 storage.
319
+
320
+ Args:
321
+ path: S3 path to the data file(s) (e.g., 's3://bucket/path/file.json')
322
+ format: Data format (e.g., 'JSONEachRow', 'CSV', 'Parquet')
323
+ aws_access_key_id: AWS access key ID (optional, omit for public buckets)
324
+ aws_secret_access_key: AWS secret access key (optional, omit for public buckets)
325
+ compression: Compression type (e.g., 'gzip', 'zstd', 'auto')
326
+ partition_strategy: Optional partition strategy
327
+ partition_columns_in_data_file: Optional partition columns in data file
328
+ """
329
+
330
+ # Required fields
331
+ path: str
332
+ format: str
333
+
334
+ # Optional fields
335
+ aws_access_key_id: Optional[str] = None
336
+ aws_secret_access_key: Optional[str] = None
337
+ compression: Optional[str] = None
338
+ partition_strategy: Optional[str] = None
339
+ partition_columns_in_data_file: Optional[str] = None
340
+
341
+ def __post_init__(self):
342
+ """Validate required fields"""
343
+ if not self.path:
344
+ raise ValueError("S3 engine requires 'path'")
345
+ if not self.format:
346
+ raise ValueError("S3 engine requires 'format'")
347
+
348
+
349
+ @dataclass
350
+ class BufferEngine(EngineConfig):
351
+ """Configuration for Buffer engine - in-memory buffer that flushes to a destination table.
352
+
353
+ Args:
354
+ target_database: Target database name for the destination table
355
+ target_table: Target table name where data will be flushed
356
+ num_layers: Number of buffer layers (typically 16)
357
+ min_time: Minimum time in seconds before flushing
358
+ max_time: Maximum time in seconds before flushing
359
+ min_rows: Minimum number of rows before flushing
360
+ max_rows: Maximum number of rows before flushing
361
+ min_bytes: Minimum bytes before flushing
362
+ max_bytes: Maximum bytes before flushing
363
+ flush_time: Optional flush time in seconds
364
+ flush_rows: Optional flush number of rows
365
+ flush_bytes: Optional flush number of bytes
366
+ """
367
+
368
+ # Required fields
369
+ target_database: str
370
+ target_table: str
371
+ num_layers: int
372
+ min_time: int
373
+ max_time: int
374
+ min_rows: int
375
+ max_rows: int
376
+ min_bytes: int
377
+ max_bytes: int
378
+
379
+ # Optional fields
380
+ flush_time: Optional[int] = None
381
+ flush_rows: Optional[int] = None
382
+ flush_bytes: Optional[int] = None
383
+
384
+ def __post_init__(self):
385
+ """Validate required fields"""
386
+ if not self.target_database:
387
+ raise ValueError("Buffer engine requires 'target_database'")
388
+ if not self.target_table:
389
+ raise ValueError("Buffer engine requires 'target_table'")
390
+
391
+
392
+ @dataclass
393
+ class DistributedEngine(EngineConfig):
394
+ """Configuration for Distributed engine - distributed table across a cluster.
395
+
396
+ Args:
397
+ cluster: Cluster name from the ClickHouse configuration
398
+ target_database: Database name on the cluster
399
+ target_table: Table name on the cluster
400
+ sharding_key: Optional sharding key expression for data distribution
401
+ policy_name: Optional policy name for data distribution
402
+ """
403
+
404
+ # Required fields
405
+ cluster: str
406
+ target_database: str
407
+ target_table: str
408
+
409
+ # Optional fields
410
+ sharding_key: Optional[str] = None
411
+ policy_name: Optional[str] = None
412
+
413
+ def __post_init__(self):
414
+ """Validate required fields"""
415
+ if not self.cluster:
416
+ raise ValueError("Distributed engine requires 'cluster'")
417
+ if not self.target_database:
418
+ raise ValueError("Distributed engine requires 'target_database'")
419
+ if not self.target_table:
420
+ raise ValueError("Distributed engine requires 'target_table'")
421
+
422
+
423
+ @dataclass
424
+ class IcebergS3Engine(EngineConfig):
425
+ """Configuration for IcebergS3 engine - read-only Iceberg table access.
426
+
427
+ Provides direct querying of Apache Iceberg tables stored on S3.
428
+ Data is not copied; queries stream directly from Parquet/ORC files.
429
+
430
+ Args:
431
+ path: S3 path to Iceberg table root (e.g., 's3://bucket/warehouse/events/')
432
+ format: Data format - 'Parquet' or 'ORC'
433
+ aws_access_key_id: AWS access key ID (optional, omit for public buckets or IAM roles)
434
+ aws_secret_access_key: AWS secret access key (optional)
435
+ compression: Compression type (optional: 'gzip', 'zstd', 'auto')
436
+
437
+ Example:
438
+ >>> from moose_lib import OlapTable, OlapConfig, moose_runtime_env
439
+ >>> from moose_lib.blocks import IcebergS3Engine
440
+ >>>
441
+ >>> lake_events = OlapTable[Event](
442
+ ... "lake_events",
443
+ ... OlapConfig(
444
+ ... engine=IcebergS3Engine(
445
+ ... path="s3://datalake/events/",
446
+ ... format="Parquet",
447
+ ... aws_access_key_id=moose_runtime_env.get("AWS_ACCESS_KEY_ID"),
448
+ ... aws_secret_access_key=moose_runtime_env.get("AWS_SECRET_ACCESS_KEY")
449
+ ... )
450
+ ... )
451
+ ... )
452
+
453
+ Note:
454
+ - IcebergS3 engine is read-only
455
+ - Does not support ORDER BY, PARTITION BY, or SAMPLE BY clauses
456
+ - Queries always see the latest Iceberg snapshot (with metadata cache)
457
+ """
458
+
459
+ # Required fields
460
+ path: str
461
+ format: str
462
+
463
+ # Optional fields
464
+ aws_access_key_id: Optional[str] = None
465
+ aws_secret_access_key: Optional[str] = None
466
+ compression: Optional[str] = None
467
+
468
+ def __post_init__(self):
469
+ """Validate required fields"""
470
+ if not self.path:
471
+ raise ValueError("IcebergS3 engine requires 'path'")
472
+ if not self.format:
473
+ raise ValueError("IcebergS3 engine requires 'format'")
474
+ if self.format not in ["Parquet", "ORC"]:
475
+ raise ValueError(
476
+ f"IcebergS3 format must be 'Parquet' or 'ORC', got '{self.format}'"
477
+ )
478
+
479
+
480
+ @dataclass
481
+ class KafkaEngine(EngineConfig):
482
+ """Kafka engine for streaming data from Kafka topics.
483
+
484
+ Args:
485
+ broker_list: Kafka broker addresses (e.g., 'kafka:9092')
486
+ topic_list: Topics to consume from
487
+ group_name: Consumer group identifier
488
+ format: Message format (e.g., 'JSONEachRow')
489
+
490
+ Additional settings (kafka_num_consumers, security) go in OlapConfig.settings.
491
+ """
492
+
493
+ broker_list: str
494
+ topic_list: str
495
+ group_name: str
496
+ format: str
497
+
498
+ def __post_init__(self):
499
+ """Validate required fields"""
500
+ if not self.broker_list:
501
+ raise ValueError("Kafka engine requires 'broker_list'")
502
+ if not self.topic_list:
503
+ raise ValueError("Kafka engine requires 'topic_list'")
504
+ if not self.group_name:
505
+ raise ValueError("Kafka engine requires 'group_name'")
506
+ if not self.format:
507
+ raise ValueError("Kafka engine requires 'format'")
508
+
509
+
180
510
  # ==========================
181
511
  # New Table Configuration (Recommended API)
182
512
  # ==========================
183
513
 
514
+
184
515
  @dataclass
185
516
  class TableConfig:
186
517
  """Modern table configuration with engine-specific settings"""
187
-
518
+
188
519
  # Engine configuration (required in new API)
189
520
  engine: EngineConfig
190
-
521
+
191
522
  # Common settings
192
523
  name: str
193
524
  columns: Dict[str, str]
194
525
  order_by: Optional[str] = None
195
-
526
+
196
527
  # Note: Factory methods (with_s3_queue, with_merge_tree, with_replacing_merge_tree)
197
528
  # were removed in ENG-856. Use direct configuration instead, e.g.:
198
529
  # TableConfig(name="table", columns={...}, engine=S3QueueEngine(s3_path="...", format="..."))
199
530
  # TableConfig(name="table", columns={...}, engine=ReplacingMergeTreeEngine(ver="updated_at"))
200
531
 
532
+
201
533
  # ==========================
202
534
  # Legacy API Support (Deprecated)
203
535
  # ==========================
204
536
 
537
+
205
538
  @dataclass
206
539
  class S3QueueEngineConfig:
207
540
  """Legacy S3Queue configuration (deprecated - use S3QueueEngine instead)"""
541
+
208
542
  path: str # S3 path pattern (e.g., 's3://bucket/data/*.json')
209
543
  format: str # Data format (e.g., 'JSONEachRow', 'CSV', etc.)
210
544
  # Optional S3 access credentials - can be NOSIGN for public buckets
@@ -215,37 +549,48 @@ class S3QueueEngineConfig:
215
549
  # Optional headers
216
550
  headers: Optional[Dict[str, str]] = None
217
551
 
552
+
218
553
  @dataclass
219
554
  class TableCreateOptions:
220
555
  name: str
221
556
  columns: Dict[str, str]
222
557
  engine: Optional[ClickHouseEngines] = ClickHouseEngines.MergeTree
223
558
  order_by: Optional[str] = None
224
- s3_queue_engine_config: Optional[S3QueueEngineConfig] = None # Required when engine is S3Queue
559
+ s3_queue_engine_config: Optional[S3QueueEngineConfig] = (
560
+ None # Required when engine is S3Queue
561
+ )
225
562
 
226
563
  def __post_init__(self):
227
564
  """Validate S3Queue configuration"""
228
- if self.engine == ClickHouseEngines.S3Queue and self.s3_queue_engine_config is None:
565
+ if (
566
+ self.engine == ClickHouseEngines.S3Queue
567
+ and self.s3_queue_engine_config is None
568
+ ):
229
569
  raise ValueError(
230
570
  "s3_queue_engine_config is required when using ClickHouseEngines.S3Queue engine. "
231
571
  "Please provide s3_queue_engine_config with path, format, and optional settings."
232
572
  )
233
573
 
574
+
234
575
  # ==========================
235
576
  # Backward Compatibility Layer
236
577
  # ==========================
237
578
 
579
+
238
580
  def is_new_config(config: Any) -> bool:
239
581
  """Check if configuration uses new API"""
240
582
  if isinstance(config, TableConfig):
241
583
  return True
242
- if hasattr(config, 'engine') and isinstance(getattr(config, 'engine'), EngineConfig):
584
+ if hasattr(config, "engine") and isinstance(
585
+ getattr(config, "engine"), EngineConfig
586
+ ):
243
587
  return True
244
588
  return False
245
589
 
590
+
246
591
  def migrate_legacy_config(legacy: TableCreateOptions) -> TableConfig:
247
592
  """Convert legacy configuration to new format"""
248
-
593
+
249
594
  # Show deprecation warning
250
595
  warnings.warn(
251
596
  "Using deprecated TableCreateOptions. Please migrate to TableConfig:\n"
@@ -253,9 +598,9 @@ def migrate_legacy_config(legacy: TableCreateOptions) -> TableConfig:
253
598
  "- For deduplication: Use TableConfig(name='table', columns={...}, engine=ReplacingMergeTreeEngine())\n"
254
599
  "See documentation for examples.",
255
600
  DeprecationWarning,
256
- stacklevel=2
601
+ stacklevel=2,
257
602
  )
258
-
603
+
259
604
  # Handle S3Queue with separate config
260
605
  if legacy.engine == ClickHouseEngines.S3Queue and legacy.s3_queue_engine_config:
261
606
  s3_config = legacy.s3_queue_engine_config
@@ -268,11 +613,11 @@ def migrate_legacy_config(legacy: TableCreateOptions) -> TableConfig:
268
613
  aws_access_key_id=s3_config.aws_access_key_id,
269
614
  aws_secret_access_key=s3_config.aws_secret_access_key,
270
615
  compression=s3_config.compression,
271
- headers=s3_config.headers
616
+ headers=s3_config.headers,
272
617
  ),
273
- order_by=legacy.order_by
618
+ order_by=legacy.order_by,
274
619
  )
275
-
620
+
276
621
  # Map legacy engine enum to new engine classes
277
622
  engine_map = {
278
623
  ClickHouseEngines.MergeTree: MergeTreeEngine(),
@@ -280,97 +625,115 @@ def migrate_legacy_config(legacy: TableCreateOptions) -> TableConfig:
280
625
  ClickHouseEngines.AggregatingMergeTree: AggregatingMergeTreeEngine(),
281
626
  ClickHouseEngines.SummingMergeTree: SummingMergeTreeEngine(),
282
627
  }
283
-
628
+
284
629
  engine = engine_map.get(legacy.engine) if legacy.engine else MergeTreeEngine()
285
630
  if engine is None:
286
631
  engine = MergeTreeEngine()
287
-
632
+
288
633
  return TableConfig(
289
634
  name=legacy.name,
290
635
  columns=legacy.columns,
291
636
  engine=engine,
292
- order_by=legacy.order_by
637
+ order_by=legacy.order_by,
293
638
  )
294
639
 
640
+
295
641
  def normalize_config(config: Union[TableConfig, TableCreateOptions]) -> TableConfig:
296
642
  """Normalize any configuration format to new API"""
297
643
  if is_new_config(config):
298
644
  return config # type: ignore
299
645
  return migrate_legacy_config(config) # type: ignore
300
646
 
647
+
301
648
  @dataclass
302
649
  class AggregationCreateOptions:
303
650
  table_create_options: TableCreateOptions
304
651
  materialized_view_name: str
305
652
  select: str
306
653
 
654
+
307
655
  @dataclass
308
656
  class AggregationDropOptions:
309
657
  view_name: str
310
658
  table_name: str
311
659
 
660
+
312
661
  @dataclass
313
662
  class MaterializedViewCreateOptions:
314
663
  name: str
315
664
  destination_table: str
316
665
  select: str
317
666
 
667
+
318
668
  @dataclass
319
669
  class PopulateTableOptions:
320
670
  destination_table: str
321
671
  select: str
322
672
 
673
+
323
674
  @dataclass
324
675
  class Blocks:
325
676
  teardown: list[str]
326
677
  setup: list[str]
327
678
 
679
+
328
680
  def drop_aggregation(options: AggregationDropOptions) -> list[str]:
329
681
  """
330
682
  Drops an aggregation's view & underlying table.
331
683
  """
332
684
  return [drop_view(options.view_name), drop_table(options.table_name)]
333
685
 
686
+
334
687
  def drop_table(name: str) -> str:
335
688
  """
336
689
  Drops an existing table if it exists.
337
690
  """
338
691
  return f"DROP TABLE IF EXISTS {name}".strip()
339
692
 
693
+
340
694
  def drop_view(name: str) -> str:
341
695
  """
342
696
  Drops an existing view if it exists.
343
697
  """
344
698
  return f"DROP VIEW IF EXISTS {name}".strip()
345
699
 
700
+
346
701
  def create_aggregation(options: AggregationCreateOptions) -> list[str]:
347
702
  """
348
703
  Creates an aggregation which includes a table, materialized view, and initial data load.
349
704
  """
350
705
  return [
351
706
  create_table(options.table_create_options),
352
- create_materialized_view(MaterializedViewCreateOptions(
353
- name=options.materialized_view_name,
354
- destination_table=options.table_create_options.name,
355
- select=options.select
356
- )),
357
- populate_table(PopulateTableOptions(
358
- destination_table=options.table_create_options.name,
359
- select=options.select
360
- )),
707
+ create_materialized_view(
708
+ MaterializedViewCreateOptions(
709
+ name=options.materialized_view_name,
710
+ destination_table=options.table_create_options.name,
711
+ select=options.select,
712
+ )
713
+ ),
714
+ populate_table(
715
+ PopulateTableOptions(
716
+ destination_table=options.table_create_options.name,
717
+ select=options.select,
718
+ )
719
+ ),
361
720
  ]
362
721
 
722
+
363
723
  def create_materialized_view(options: MaterializedViewCreateOptions) -> str:
364
724
  """
365
725
  Creates a materialized view.
366
726
  """
367
727
  return f"CREATE MATERIALIZED VIEW IF NOT EXISTS {options.name} \nTO {options.destination_table}\nAS {options.select}".strip()
368
728
 
729
+
369
730
  def create_table(options: TableCreateOptions) -> str:
370
731
  """
371
732
  Creates a new table with default MergeTree engine.
372
733
  """
373
- column_definitions = ",\n".join([f"{name} {type}" for name, type in options.columns.items()])
734
+ column_definitions = ",\n".join(
735
+ [f"{name} {type}" for name, type in options.columns.items()]
736
+ )
374
737
  order_by_clause = f"ORDER BY {options.order_by}" if options.order_by else ""
375
738
  engine = options.engine.value if options.engine else "MergeTree"
376
739
 
@@ -383,8 +746,9 @@ def create_table(options: TableCreateOptions) -> str:
383
746
  {order_by_clause}
384
747
  """.strip()
385
748
 
749
+
386
750
  def populate_table(options: PopulateTableOptions) -> str:
387
751
  """
388
752
  Populates a table with data.
389
753
  """
390
- return f"INSERT INTO {options.destination_table}\n{options.select}".strip()
754
+ return f"INSERT INTO {options.destination_table}\n{options.select}".strip()