moose-lib 0.6.90__py3-none-any.whl → 0.6.283__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. moose_lib/__init__.py +38 -3
  2. moose_lib/blocks.py +497 -37
  3. moose_lib/clients/redis_client.py +26 -14
  4. moose_lib/commons.py +94 -5
  5. moose_lib/config/config_file.py +44 -2
  6. moose_lib/config/runtime.py +137 -5
  7. moose_lib/data_models.py +451 -46
  8. moose_lib/dmv2/__init__.py +88 -60
  9. moose_lib/dmv2/_registry.py +3 -1
  10. moose_lib/dmv2/_source_capture.py +37 -0
  11. moose_lib/dmv2/consumption.py +55 -32
  12. moose_lib/dmv2/ingest_api.py +9 -2
  13. moose_lib/dmv2/ingest_pipeline.py +56 -13
  14. moose_lib/dmv2/life_cycle.py +3 -1
  15. moose_lib/dmv2/materialized_view.py +24 -14
  16. moose_lib/dmv2/moose_model.py +165 -0
  17. moose_lib/dmv2/olap_table.py +304 -119
  18. moose_lib/dmv2/registry.py +28 -3
  19. moose_lib/dmv2/sql_resource.py +16 -8
  20. moose_lib/dmv2/stream.py +241 -21
  21. moose_lib/dmv2/types.py +14 -8
  22. moose_lib/dmv2/view.py +13 -6
  23. moose_lib/dmv2/web_app.py +175 -0
  24. moose_lib/dmv2/web_app_helpers.py +96 -0
  25. moose_lib/dmv2/workflow.py +37 -9
  26. moose_lib/internal.py +537 -68
  27. moose_lib/main.py +87 -56
  28. moose_lib/query_builder.py +18 -5
  29. moose_lib/query_param.py +54 -20
  30. moose_lib/secrets.py +122 -0
  31. moose_lib/streaming/streaming_function_runner.py +266 -156
  32. moose_lib/utilities/sql.py +0 -1
  33. {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/METADATA +19 -1
  34. moose_lib-0.6.283.dist-info/RECORD +63 -0
  35. tests/__init__.py +1 -1
  36. tests/conftest.py +38 -1
  37. tests/test_backward_compatibility.py +85 -0
  38. tests/test_cluster_validation.py +85 -0
  39. tests/test_codec.py +75 -0
  40. tests/test_column_formatting.py +80 -0
  41. tests/test_fixedstring.py +43 -0
  42. tests/test_iceberg_config.py +105 -0
  43. tests/test_int_types.py +211 -0
  44. tests/test_kafka_config.py +141 -0
  45. tests/test_materialized.py +74 -0
  46. tests/test_metadata.py +37 -0
  47. tests/test_moose.py +21 -30
  48. tests/test_moose_model.py +153 -0
  49. tests/test_olap_table_moosemodel.py +89 -0
  50. tests/test_olap_table_versioning.py +210 -0
  51. tests/test_query_builder.py +97 -9
  52. tests/test_redis_client.py +10 -3
  53. tests/test_s3queue_config.py +211 -110
  54. tests/test_secrets.py +239 -0
  55. tests/test_simple_aggregate.py +114 -0
  56. tests/test_web_app.py +227 -0
  57. moose_lib-0.6.90.dist-info/RECORD +0 -42
  58. {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
  59. {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
moose_lib/blocks.py CHANGED
@@ -14,66 +14,297 @@ class ClickHouseEngines(Enum):
14
14
  VersionedCollapsingMergeTree = "VersionedCollapsingMergeTree"
15
15
  GraphiteMergeTree = "GraphiteMergeTree"
16
16
  S3Queue = "S3Queue"
17
+ S3 = "S3"
18
+ Buffer = "Buffer"
19
+ Distributed = "Distributed"
20
+ IcebergS3 = "IcebergS3"
21
+ Kafka = "Kafka"
22
+ ReplicatedMergeTree = "ReplicatedMergeTree"
23
+ ReplicatedReplacingMergeTree = "ReplicatedReplacingMergeTree"
24
+ ReplicatedAggregatingMergeTree = "ReplicatedAggregatingMergeTree"
25
+ ReplicatedSummingMergeTree = "ReplicatedSummingMergeTree"
26
+ ReplicatedCollapsingMergeTree = "ReplicatedCollapsingMergeTree"
27
+ ReplicatedVersionedCollapsingMergeTree = "ReplicatedVersionedCollapsingMergeTree"
28
+
17
29
 
18
30
  # ==========================
19
31
  # New Engine Configuration Classes
20
32
  # ==========================
21
33
 
34
+
22
35
  @dataclass
23
36
  class EngineConfig(ABC):
24
37
  """Base class for engine configurations"""
38
+
25
39
  pass
26
40
 
41
+
27
42
  @dataclass
28
43
  class MergeTreeEngine(EngineConfig):
29
44
  """Configuration for MergeTree engine"""
45
+
30
46
  pass
31
47
 
32
- @dataclass
48
+
49
+ @dataclass
33
50
  class ReplacingMergeTreeEngine(EngineConfig):
34
51
  """Configuration for ReplacingMergeTree engine (with deduplication)
35
-
52
+
36
53
  Args:
37
54
  ver: Optional column name for version tracking
38
55
  is_deleted: Optional column name for deletion marking (requires ver)
39
56
  """
57
+
40
58
  ver: Optional[str] = None
41
59
  is_deleted: Optional[str] = None
42
-
60
+
43
61
  def __post_init__(self):
44
62
  if self.is_deleted and not self.ver:
45
63
  raise ValueError("is_deleted requires ver to be specified")
46
64
 
65
+
47
66
  @dataclass
48
67
  class AggregatingMergeTreeEngine(EngineConfig):
49
68
  """Configuration for AggregatingMergeTree engine"""
69
+
50
70
  pass
51
71
 
72
+
52
73
  @dataclass
53
74
  class SummingMergeTreeEngine(EngineConfig):
54
- """Configuration for SummingMergeTree engine"""
55
- pass
75
+ """Configuration for SummingMergeTree engine
76
+
77
+ Args:
78
+ columns: Optional list of column names to sum
79
+ """
80
+
81
+ columns: Optional[List[str]] = None
82
+
83
+
84
+ @dataclass
85
+ class CollapsingMergeTreeEngine(EngineConfig):
86
+ """Configuration for CollapsingMergeTree engine
87
+
88
+ Args:
89
+ sign: Column name indicating row type (1 = state, -1 = cancel)
90
+ """
91
+
92
+ sign: str
93
+
94
+ def __post_init__(self):
95
+ if not self.sign:
96
+ raise ValueError("sign column is required for CollapsingMergeTree")
97
+
98
+
99
+ @dataclass
100
+ class VersionedCollapsingMergeTreeEngine(EngineConfig):
101
+ """Configuration for VersionedCollapsingMergeTree engine
102
+
103
+ Args:
104
+ sign: Column name indicating row type (1 = state, -1 = cancel)
105
+ ver: Column name for object state versioning
106
+ """
107
+
108
+ sign: str
109
+ ver: str
110
+
111
+ def __post_init__(self):
112
+ if not self.sign:
113
+ raise ValueError("sign column is required for VersionedCollapsingMergeTree")
114
+ if not self.ver:
115
+ raise ValueError("ver column is required for VersionedCollapsingMergeTree")
116
+
117
+
118
+ @dataclass
119
+ class ReplicatedMergeTreeEngine(EngineConfig):
120
+ """Configuration for ReplicatedMergeTree engine (replicated version of MergeTree)
121
+
122
+ Args:
123
+ keeper_path: Keeper path for replication (e.g., '/clickhouse/tables/{database}/{shard}/table_name')
124
+ Optional: omit for ClickHouse Cloud which manages replication automatically
125
+ replica_name: Replica name (e.g., '{replica}')
126
+ Optional: omit for ClickHouse Cloud which manages replication automatically
127
+
128
+ Note: Both keeper_path and replica_name must be provided together, or both omitted.
129
+ """
130
+
131
+ keeper_path: Optional[str] = None
132
+ replica_name: Optional[str] = None
133
+
134
+ def __post_init__(self):
135
+ # Both must be provided or both must be None
136
+ if (self.keeper_path is None) != (self.replica_name is None):
137
+ raise ValueError(
138
+ "keeper_path and replica_name must both be provided or both be None"
139
+ )
140
+
141
+
142
+ @dataclass
143
+ class ReplicatedReplacingMergeTreeEngine(EngineConfig):
144
+ """Configuration for ReplicatedReplacingMergeTree engine (replicated version with deduplication)
145
+
146
+ Args:
147
+ keeper_path: Keeper path for replication (e.g., '/clickhouse/tables/{database}/{shard}/table_name')
148
+ Optional: omit for ClickHouse Cloud which manages replication automatically
149
+ replica_name: Replica name (e.g., '{replica}')
150
+ Optional: omit for ClickHouse Cloud which manages replication automatically
151
+ ver: Optional column name for version tracking
152
+ is_deleted: Optional column name for deletion marking (requires ver)
153
+
154
+ Note: Both keeper_path and replica_name must be provided together, or both omitted.
155
+ """
156
+
157
+ keeper_path: Optional[str] = None
158
+ replica_name: Optional[str] = None
159
+ ver: Optional[str] = None
160
+ is_deleted: Optional[str] = None
161
+
162
+ def __post_init__(self):
163
+ # Both must be provided or both must be None
164
+ if (self.keeper_path is None) != (self.replica_name is None):
165
+ raise ValueError(
166
+ "keeper_path and replica_name must both be provided or both be None"
167
+ )
168
+ if self.is_deleted and not self.ver:
169
+ raise ValueError("is_deleted requires ver to be specified")
170
+
171
+
172
+ @dataclass
173
+ class ReplicatedAggregatingMergeTreeEngine(EngineConfig):
174
+ """Configuration for ReplicatedAggregatingMergeTree engine (replicated version for aggregations)
175
+
176
+ Args:
177
+ keeper_path: Keeper path for replication (e.g., '/clickhouse/tables/{database}/{shard}/table_name')
178
+ Optional: omit for ClickHouse Cloud which manages replication automatically
179
+ replica_name: Replica name (e.g., '{replica}')
180
+ Optional: omit for ClickHouse Cloud which manages replication automatically
181
+
182
+ Note: Both keeper_path and replica_name must be provided together, or both omitted.
183
+ """
184
+
185
+ keeper_path: Optional[str] = None
186
+ replica_name: Optional[str] = None
187
+
188
+ def __post_init__(self):
189
+ # Both must be provided or both must be None
190
+ if (self.keeper_path is None) != (self.replica_name is None):
191
+ raise ValueError(
192
+ "keeper_path and replica_name must both be provided or both be None"
193
+ )
194
+
195
+
196
+ @dataclass
197
+ class ReplicatedSummingMergeTreeEngine(EngineConfig):
198
+ """Configuration for ReplicatedSummingMergeTree engine (replicated version for summation)
199
+
200
+ Args:
201
+ keeper_path: Keeper path for replication (e.g., '/clickhouse/tables/{database}/{shard}/table_name')
202
+ Optional: omit for ClickHouse Cloud which manages replication automatically
203
+ replica_name: Replica name (e.g., '{replica}')
204
+ Optional: omit for ClickHouse Cloud which manages replication automatically
205
+ columns: Optional list of column names to sum
206
+
207
+ Note: Both keeper_path and replica_name must be provided together, or both omitted.
208
+ """
209
+
210
+ keeper_path: Optional[str] = None
211
+ replica_name: Optional[str] = None
212
+ columns: Optional[List[str]] = None
213
+
214
+ def __post_init__(self):
215
+ # Both must be provided or both must be None
216
+ if (self.keeper_path is None) != (self.replica_name is None):
217
+ raise ValueError(
218
+ "keeper_path and replica_name must both be provided or both be None"
219
+ )
220
+
221
+
222
+ @dataclass
223
+ class ReplicatedCollapsingMergeTreeEngine(EngineConfig):
224
+ """Configuration for ReplicatedCollapsingMergeTree engine (replicated version with collapsing)
225
+
226
+ Args:
227
+ keeper_path: Keeper path for replication (e.g., '/clickhouse/tables/{database}/{shard}/table_name')
228
+ Optional: omit for ClickHouse Cloud which manages replication automatically
229
+ replica_name: Replica name (e.g., '{replica}')
230
+ Optional: omit for ClickHouse Cloud which manages replication automatically
231
+ sign: Column name indicating row type (1 = state, -1 = cancel)
232
+
233
+ Note: Both keeper_path and replica_name must be provided together, or both omitted.
234
+ """
235
+
236
+ keeper_path: Optional[str] = None
237
+ replica_name: Optional[str] = None
238
+ sign: str = field(default=None)
239
+
240
+ def __post_init__(self):
241
+ # Both must be provided or both must be None
242
+ if (self.keeper_path is None) != (self.replica_name is None):
243
+ raise ValueError(
244
+ "keeper_path and replica_name must both be provided or both be None"
245
+ )
246
+ if not self.sign:
247
+ raise ValueError(
248
+ "sign column is required for ReplicatedCollapsingMergeTree"
249
+ )
250
+
251
+
252
+ @dataclass
253
+ class ReplicatedVersionedCollapsingMergeTreeEngine(EngineConfig):
254
+ """Configuration for ReplicatedVersionedCollapsingMergeTree engine (replicated version with versioned collapsing)
255
+
256
+ Args:
257
+ keeper_path: Keeper path for replication (e.g., '/clickhouse/tables/{database}/{shard}/table_name')
258
+ Optional: omit for ClickHouse Cloud which manages replication automatically
259
+ replica_name: Replica name (e.g., '{replica}')
260
+ Optional: omit for ClickHouse Cloud which manages replication automatically
261
+ sign: Column name indicating row type (1 = state, -1 = cancel)
262
+ ver: Column name for object state versioning
263
+
264
+ Note: Both keeper_path and replica_name must be provided together, or both omitted.
265
+ """
266
+
267
+ keeper_path: Optional[str] = None
268
+ replica_name: Optional[str] = None
269
+ sign: str = field(default=None)
270
+ ver: str = field(default=None)
271
+
272
+ def __post_init__(self):
273
+ # Both must be provided or both must be None
274
+ if (self.keeper_path is None) != (self.replica_name is None):
275
+ raise ValueError(
276
+ "keeper_path and replica_name must both be provided or both be None"
277
+ )
278
+ if not self.sign:
279
+ raise ValueError(
280
+ "sign column is required for ReplicatedVersionedCollapsingMergeTree"
281
+ )
282
+ if not self.ver:
283
+ raise ValueError(
284
+ "ver column is required for ReplicatedVersionedCollapsingMergeTree"
285
+ )
286
+
56
287
 
57
288
  @dataclass
58
289
  class S3QueueEngine(EngineConfig):
59
290
  """Configuration for S3Queue engine - only non-alterable constructor parameters.
60
-
291
+
61
292
  S3Queue-specific settings like 'mode', 'keeper_path', etc. should be specified
62
293
  in the settings field of OlapConfig, not here.
63
294
  """
64
-
295
+
65
296
  # Required fields
66
297
  s3_path: str # S3 bucket path with wildcards (e.g., 's3://bucket/prefix/*.json')
67
- format: str # Data format (e.g., 'JSONEachRow', 'CSV', 'Parquet')
68
-
298
+ format: str # Data format (e.g., 'JSONEachRow', 'CSV', 'Parquet')
299
+
69
300
  # Optional AWS credentials
70
301
  aws_access_key_id: Optional[str] = None
71
302
  aws_secret_access_key: Optional[str] = None
72
-
303
+
73
304
  # Optional configuration
74
305
  compression: Optional[str] = None # e.g., 'gzip', 'zstd'
75
306
  headers: Optional[Dict[str, str]] = None
76
-
307
+
77
308
  def __post_init__(self):
78
309
  """Validate required fields"""
79
310
  if not self.s3_path:
@@ -81,34 +312,233 @@ class S3QueueEngine(EngineConfig):
81
312
  if not self.format:
82
313
  raise ValueError("S3Queue engine requires 'format'")
83
314
 
315
+
316
+ @dataclass
317
+ class S3Engine(EngineConfig):
318
+ """Configuration for S3 engine - direct read/write from S3 storage.
319
+
320
+ Args:
321
+ path: S3 path to the data file(s) (e.g., 's3://bucket/path/file.json')
322
+ format: Data format (e.g., 'JSONEachRow', 'CSV', 'Parquet')
323
+ aws_access_key_id: AWS access key ID (optional, omit for public buckets)
324
+ aws_secret_access_key: AWS secret access key (optional, omit for public buckets)
325
+ compression: Compression type (e.g., 'gzip', 'zstd', 'auto')
326
+ partition_strategy: Optional partition strategy
327
+ partition_columns_in_data_file: Optional partition columns in data file
328
+ """
329
+
330
+ # Required fields
331
+ path: str
332
+ format: str
333
+
334
+ # Optional fields
335
+ aws_access_key_id: Optional[str] = None
336
+ aws_secret_access_key: Optional[str] = None
337
+ compression: Optional[str] = None
338
+ partition_strategy: Optional[str] = None
339
+ partition_columns_in_data_file: Optional[str] = None
340
+
341
+ def __post_init__(self):
342
+ """Validate required fields"""
343
+ if not self.path:
344
+ raise ValueError("S3 engine requires 'path'")
345
+ if not self.format:
346
+ raise ValueError("S3 engine requires 'format'")
347
+
348
+
349
+ @dataclass
350
+ class BufferEngine(EngineConfig):
351
+ """Configuration for Buffer engine - in-memory buffer that flushes to a destination table.
352
+
353
+ Args:
354
+ target_database: Target database name for the destination table
355
+ target_table: Target table name where data will be flushed
356
+ num_layers: Number of buffer layers (typically 16)
357
+ min_time: Minimum time in seconds before flushing
358
+ max_time: Maximum time in seconds before flushing
359
+ min_rows: Minimum number of rows before flushing
360
+ max_rows: Maximum number of rows before flushing
361
+ min_bytes: Minimum bytes before flushing
362
+ max_bytes: Maximum bytes before flushing
363
+ flush_time: Optional flush time in seconds
364
+ flush_rows: Optional flush number of rows
365
+ flush_bytes: Optional flush number of bytes
366
+ """
367
+
368
+ # Required fields
369
+ target_database: str
370
+ target_table: str
371
+ num_layers: int
372
+ min_time: int
373
+ max_time: int
374
+ min_rows: int
375
+ max_rows: int
376
+ min_bytes: int
377
+ max_bytes: int
378
+
379
+ # Optional fields
380
+ flush_time: Optional[int] = None
381
+ flush_rows: Optional[int] = None
382
+ flush_bytes: Optional[int] = None
383
+
384
+ def __post_init__(self):
385
+ """Validate required fields"""
386
+ if not self.target_database:
387
+ raise ValueError("Buffer engine requires 'target_database'")
388
+ if not self.target_table:
389
+ raise ValueError("Buffer engine requires 'target_table'")
390
+
391
+
392
+ @dataclass
393
+ class DistributedEngine(EngineConfig):
394
+ """Configuration for Distributed engine - distributed table across a cluster.
395
+
396
+ Args:
397
+ cluster: Cluster name from the ClickHouse configuration
398
+ target_database: Database name on the cluster
399
+ target_table: Table name on the cluster
400
+ sharding_key: Optional sharding key expression for data distribution
401
+ policy_name: Optional policy name for data distribution
402
+ """
403
+
404
+ # Required fields
405
+ cluster: str
406
+ target_database: str
407
+ target_table: str
408
+
409
+ # Optional fields
410
+ sharding_key: Optional[str] = None
411
+ policy_name: Optional[str] = None
412
+
413
+ def __post_init__(self):
414
+ """Validate required fields"""
415
+ if not self.cluster:
416
+ raise ValueError("Distributed engine requires 'cluster'")
417
+ if not self.target_database:
418
+ raise ValueError("Distributed engine requires 'target_database'")
419
+ if not self.target_table:
420
+ raise ValueError("Distributed engine requires 'target_table'")
421
+
422
+
423
+ @dataclass
424
+ class IcebergS3Engine(EngineConfig):
425
+ """Configuration for IcebergS3 engine - read-only Iceberg table access.
426
+
427
+ Provides direct querying of Apache Iceberg tables stored on S3.
428
+ Data is not copied; queries stream directly from Parquet/ORC files.
429
+
430
+ Args:
431
+ path: S3 path to Iceberg table root (e.g., 's3://bucket/warehouse/events/')
432
+ format: Data format - 'Parquet' or 'ORC'
433
+ aws_access_key_id: AWS access key ID (optional, omit for public buckets or IAM roles)
434
+ aws_secret_access_key: AWS secret access key (optional)
435
+ compression: Compression type (optional: 'gzip', 'zstd', 'auto')
436
+
437
+ Example:
438
+ >>> from moose_lib import OlapTable, OlapConfig, moose_runtime_env
439
+ >>> from moose_lib.blocks import IcebergS3Engine
440
+ >>>
441
+ >>> lake_events = OlapTable[Event](
442
+ ... "lake_events",
443
+ ... OlapConfig(
444
+ ... engine=IcebergS3Engine(
445
+ ... path="s3://datalake/events/",
446
+ ... format="Parquet",
447
+ ... aws_access_key_id=moose_runtime_env.get("AWS_ACCESS_KEY_ID"),
448
+ ... aws_secret_access_key=moose_runtime_env.get("AWS_SECRET_ACCESS_KEY")
449
+ ... )
450
+ ... )
451
+ ... )
452
+
453
+ Note:
454
+ - IcebergS3 engine is read-only
455
+ - Does not support ORDER BY, PARTITION BY, or SAMPLE BY clauses
456
+ - Queries always see the latest Iceberg snapshot (with metadata cache)
457
+ """
458
+
459
+ # Required fields
460
+ path: str
461
+ format: str
462
+
463
+ # Optional fields
464
+ aws_access_key_id: Optional[str] = None
465
+ aws_secret_access_key: Optional[str] = None
466
+ compression: Optional[str] = None
467
+
468
+ def __post_init__(self):
469
+ """Validate required fields"""
470
+ if not self.path:
471
+ raise ValueError("IcebergS3 engine requires 'path'")
472
+ if not self.format:
473
+ raise ValueError("IcebergS3 engine requires 'format'")
474
+ if self.format not in ["Parquet", "ORC"]:
475
+ raise ValueError(
476
+ f"IcebergS3 format must be 'Parquet' or 'ORC', got '{self.format}'"
477
+ )
478
+
479
+
480
+ @dataclass
481
+ class KafkaEngine(EngineConfig):
482
+ """Kafka engine for streaming data from Kafka topics.
483
+
484
+ Args:
485
+ broker_list: Kafka broker addresses (e.g., 'kafka:9092')
486
+ topic_list: Topics to consume from
487
+ group_name: Consumer group identifier
488
+ format: Message format (e.g., 'JSONEachRow')
489
+
490
+ Additional settings (kafka_num_consumers, security) go in OlapConfig.settings.
491
+ """
492
+
493
+ broker_list: str
494
+ topic_list: str
495
+ group_name: str
496
+ format: str
497
+
498
+ def __post_init__(self):
499
+ """Validate required fields"""
500
+ if not self.broker_list:
501
+ raise ValueError("Kafka engine requires 'broker_list'")
502
+ if not self.topic_list:
503
+ raise ValueError("Kafka engine requires 'topic_list'")
504
+ if not self.group_name:
505
+ raise ValueError("Kafka engine requires 'group_name'")
506
+ if not self.format:
507
+ raise ValueError("Kafka engine requires 'format'")
508
+
509
+
84
510
  # ==========================
85
511
  # New Table Configuration (Recommended API)
86
512
  # ==========================
87
513
 
514
+
88
515
  @dataclass
89
516
  class TableConfig:
90
517
  """Modern table configuration with engine-specific settings"""
91
-
518
+
92
519
  # Engine configuration (required in new API)
93
520
  engine: EngineConfig
94
-
521
+
95
522
  # Common settings
96
523
  name: str
97
524
  columns: Dict[str, str]
98
525
  order_by: Optional[str] = None
99
-
526
+
100
527
  # Note: Factory methods (with_s3_queue, with_merge_tree, with_replacing_merge_tree)
101
528
  # were removed in ENG-856. Use direct configuration instead, e.g.:
102
529
  # TableConfig(name="table", columns={...}, engine=S3QueueEngine(s3_path="...", format="..."))
103
530
  # TableConfig(name="table", columns={...}, engine=ReplacingMergeTreeEngine(ver="updated_at"))
104
531
 
532
+
105
533
  # ==========================
106
534
  # Legacy API Support (Deprecated)
107
535
  # ==========================
108
536
 
537
+
109
538
  @dataclass
110
539
  class S3QueueEngineConfig:
111
540
  """Legacy S3Queue configuration (deprecated - use S3QueueEngine instead)"""
541
+
112
542
  path: str # S3 path pattern (e.g., 's3://bucket/data/*.json')
113
543
  format: str # Data format (e.g., 'JSONEachRow', 'CSV', etc.)
114
544
  # Optional S3 access credentials - can be NOSIGN for public buckets
@@ -119,37 +549,48 @@ class S3QueueEngineConfig:
119
549
  # Optional headers
120
550
  headers: Optional[Dict[str, str]] = None
121
551
 
552
+
122
553
  @dataclass
123
554
  class TableCreateOptions:
124
555
  name: str
125
556
  columns: Dict[str, str]
126
557
  engine: Optional[ClickHouseEngines] = ClickHouseEngines.MergeTree
127
558
  order_by: Optional[str] = None
128
- s3_queue_engine_config: Optional[S3QueueEngineConfig] = None # Required when engine is S3Queue
559
+ s3_queue_engine_config: Optional[S3QueueEngineConfig] = (
560
+ None # Required when engine is S3Queue
561
+ )
129
562
 
130
563
  def __post_init__(self):
131
564
  """Validate S3Queue configuration"""
132
- if self.engine == ClickHouseEngines.S3Queue and self.s3_queue_engine_config is None:
565
+ if (
566
+ self.engine == ClickHouseEngines.S3Queue
567
+ and self.s3_queue_engine_config is None
568
+ ):
133
569
  raise ValueError(
134
570
  "s3_queue_engine_config is required when using ClickHouseEngines.S3Queue engine. "
135
571
  "Please provide s3_queue_engine_config with path, format, and optional settings."
136
572
  )
137
573
 
574
+
138
575
  # ==========================
139
576
  # Backward Compatibility Layer
140
577
  # ==========================
141
578
 
579
+
142
580
  def is_new_config(config: Any) -> bool:
143
581
  """Check if configuration uses new API"""
144
582
  if isinstance(config, TableConfig):
145
583
  return True
146
- if hasattr(config, 'engine') and isinstance(getattr(config, 'engine'), EngineConfig):
584
+ if hasattr(config, "engine") and isinstance(
585
+ getattr(config, "engine"), EngineConfig
586
+ ):
147
587
  return True
148
588
  return False
149
589
 
590
+
150
591
  def migrate_legacy_config(legacy: TableCreateOptions) -> TableConfig:
151
592
  """Convert legacy configuration to new format"""
152
-
593
+
153
594
  # Show deprecation warning
154
595
  warnings.warn(
155
596
  "Using deprecated TableCreateOptions. Please migrate to TableConfig:\n"
@@ -157,9 +598,9 @@ def migrate_legacy_config(legacy: TableCreateOptions) -> TableConfig:
157
598
  "- For deduplication: Use TableConfig(name='table', columns={...}, engine=ReplacingMergeTreeEngine())\n"
158
599
  "See documentation for examples.",
159
600
  DeprecationWarning,
160
- stacklevel=2
601
+ stacklevel=2,
161
602
  )
162
-
603
+
163
604
  # Handle S3Queue with separate config
164
605
  if legacy.engine == ClickHouseEngines.S3Queue and legacy.s3_queue_engine_config:
165
606
  s3_config = legacy.s3_queue_engine_config
@@ -172,11 +613,11 @@ def migrate_legacy_config(legacy: TableCreateOptions) -> TableConfig:
172
613
  aws_access_key_id=s3_config.aws_access_key_id,
173
614
  aws_secret_access_key=s3_config.aws_secret_access_key,
174
615
  compression=s3_config.compression,
175
- headers=s3_config.headers
616
+ headers=s3_config.headers,
176
617
  ),
177
- order_by=legacy.order_by
618
+ order_by=legacy.order_by,
178
619
  )
179
-
620
+
180
621
  # Map legacy engine enum to new engine classes
181
622
  engine_map = {
182
623
  ClickHouseEngines.MergeTree: MergeTreeEngine(),
@@ -184,97 +625,115 @@ def migrate_legacy_config(legacy: TableCreateOptions) -> TableConfig:
184
625
  ClickHouseEngines.AggregatingMergeTree: AggregatingMergeTreeEngine(),
185
626
  ClickHouseEngines.SummingMergeTree: SummingMergeTreeEngine(),
186
627
  }
187
-
628
+
188
629
  engine = engine_map.get(legacy.engine) if legacy.engine else MergeTreeEngine()
189
630
  if engine is None:
190
631
  engine = MergeTreeEngine()
191
-
632
+
192
633
  return TableConfig(
193
634
  name=legacy.name,
194
635
  columns=legacy.columns,
195
636
  engine=engine,
196
- order_by=legacy.order_by
637
+ order_by=legacy.order_by,
197
638
  )
198
639
 
640
+
199
641
  def normalize_config(config: Union[TableConfig, TableCreateOptions]) -> TableConfig:
200
642
  """Normalize any configuration format to new API"""
201
643
  if is_new_config(config):
202
644
  return config # type: ignore
203
645
  return migrate_legacy_config(config) # type: ignore
204
646
 
647
+
205
648
  @dataclass
206
649
  class AggregationCreateOptions:
207
650
  table_create_options: TableCreateOptions
208
651
  materialized_view_name: str
209
652
  select: str
210
653
 
654
+
211
655
  @dataclass
212
656
  class AggregationDropOptions:
213
657
  view_name: str
214
658
  table_name: str
215
659
 
660
+
216
661
  @dataclass
217
662
  class MaterializedViewCreateOptions:
218
663
  name: str
219
664
  destination_table: str
220
665
  select: str
221
666
 
667
+
222
668
  @dataclass
223
669
  class PopulateTableOptions:
224
670
  destination_table: str
225
671
  select: str
226
672
 
673
+
227
674
  @dataclass
228
675
  class Blocks:
229
676
  teardown: list[str]
230
677
  setup: list[str]
231
678
 
679
+
232
680
  def drop_aggregation(options: AggregationDropOptions) -> list[str]:
233
681
  """
234
682
  Drops an aggregation's view & underlying table.
235
683
  """
236
684
  return [drop_view(options.view_name), drop_table(options.table_name)]
237
685
 
686
+
238
687
  def drop_table(name: str) -> str:
239
688
  """
240
689
  Drops an existing table if it exists.
241
690
  """
242
691
  return f"DROP TABLE IF EXISTS {name}".strip()
243
692
 
693
+
244
694
  def drop_view(name: str) -> str:
245
695
  """
246
696
  Drops an existing view if it exists.
247
697
  """
248
698
  return f"DROP VIEW IF EXISTS {name}".strip()
249
699
 
700
+
250
701
  def create_aggregation(options: AggregationCreateOptions) -> list[str]:
251
702
  """
252
703
  Creates an aggregation which includes a table, materialized view, and initial data load.
253
704
  """
254
705
  return [
255
706
  create_table(options.table_create_options),
256
- create_materialized_view(MaterializedViewCreateOptions(
257
- name=options.materialized_view_name,
258
- destination_table=options.table_create_options.name,
259
- select=options.select
260
- )),
261
- populate_table(PopulateTableOptions(
262
- destination_table=options.table_create_options.name,
263
- select=options.select
264
- )),
707
+ create_materialized_view(
708
+ MaterializedViewCreateOptions(
709
+ name=options.materialized_view_name,
710
+ destination_table=options.table_create_options.name,
711
+ select=options.select,
712
+ )
713
+ ),
714
+ populate_table(
715
+ PopulateTableOptions(
716
+ destination_table=options.table_create_options.name,
717
+ select=options.select,
718
+ )
719
+ ),
265
720
  ]
266
721
 
722
+
267
723
  def create_materialized_view(options: MaterializedViewCreateOptions) -> str:
268
724
  """
269
725
  Creates a materialized view.
270
726
  """
271
727
  return f"CREATE MATERIALIZED VIEW IF NOT EXISTS {options.name} \nTO {options.destination_table}\nAS {options.select}".strip()
272
728
 
729
+
273
730
  def create_table(options: TableCreateOptions) -> str:
274
731
  """
275
732
  Creates a new table with default MergeTree engine.
276
733
  """
277
- column_definitions = ",\n".join([f"{name} {type}" for name, type in options.columns.items()])
734
+ column_definitions = ",\n".join(
735
+ [f"{name} {type}" for name, type in options.columns.items()]
736
+ )
278
737
  order_by_clause = f"ORDER BY {options.order_by}" if options.order_by else ""
279
738
  engine = options.engine.value if options.engine else "MergeTree"
280
739
 
@@ -287,8 +746,9 @@ def create_table(options: TableCreateOptions) -> str:
287
746
  {order_by_clause}
288
747
  """.strip()
289
748
 
749
+
290
750
  def populate_table(options: PopulateTableOptions) -> str:
291
751
  """
292
752
  Populates a table with data.
293
753
  """
294
- return f"INSERT INTO {options.destination_table}\n{options.select}".strip()
754
+ return f"INSERT INTO {options.destination_table}\n{options.select}".strip()