moose-lib 0.6.148.dev3442438466__py3-none-any.whl → 0.6.283__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. moose_lib/__init__.py +34 -3
  2. moose_lib/blocks.py +416 -52
  3. moose_lib/clients/redis_client.py +26 -14
  4. moose_lib/commons.py +37 -30
  5. moose_lib/config/config_file.py +5 -1
  6. moose_lib/config/runtime.py +73 -34
  7. moose_lib/data_models.py +331 -61
  8. moose_lib/dmv2/__init__.py +69 -73
  9. moose_lib/dmv2/_registry.py +2 -1
  10. moose_lib/dmv2/_source_capture.py +37 -0
  11. moose_lib/dmv2/consumption.py +55 -32
  12. moose_lib/dmv2/ingest_api.py +9 -2
  13. moose_lib/dmv2/ingest_pipeline.py +35 -16
  14. moose_lib/dmv2/life_cycle.py +3 -1
  15. moose_lib/dmv2/materialized_view.py +24 -14
  16. moose_lib/dmv2/moose_model.py +165 -0
  17. moose_lib/dmv2/olap_table.py +299 -151
  18. moose_lib/dmv2/registry.py +18 -3
  19. moose_lib/dmv2/sql_resource.py +16 -8
  20. moose_lib/dmv2/stream.py +75 -23
  21. moose_lib/dmv2/types.py +14 -8
  22. moose_lib/dmv2/view.py +13 -6
  23. moose_lib/dmv2/web_app.py +11 -6
  24. moose_lib/dmv2/web_app_helpers.py +5 -1
  25. moose_lib/dmv2/workflow.py +37 -9
  26. moose_lib/internal.py +340 -56
  27. moose_lib/main.py +87 -56
  28. moose_lib/query_builder.py +18 -5
  29. moose_lib/query_param.py +54 -20
  30. moose_lib/secrets.py +122 -0
  31. moose_lib/streaming/streaming_function_runner.py +233 -117
  32. moose_lib/utilities/sql.py +0 -1
  33. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/METADATA +18 -1
  34. moose_lib-0.6.283.dist-info/RECORD +63 -0
  35. tests/__init__.py +1 -1
  36. tests/conftest.py +6 -5
  37. tests/test_backward_compatibility.py +85 -0
  38. tests/test_cluster_validation.py +85 -0
  39. tests/test_codec.py +75 -0
  40. tests/test_column_formatting.py +80 -0
  41. tests/test_fixedstring.py +43 -0
  42. tests/test_iceberg_config.py +105 -0
  43. tests/test_int_types.py +211 -0
  44. tests/test_kafka_config.py +141 -0
  45. tests/test_materialized.py +74 -0
  46. tests/test_metadata.py +37 -0
  47. tests/test_moose.py +21 -30
  48. tests/test_moose_model.py +153 -0
  49. tests/test_olap_table_moosemodel.py +89 -0
  50. tests/test_olap_table_versioning.py +52 -58
  51. tests/test_query_builder.py +97 -9
  52. tests/test_redis_client.py +10 -3
  53. tests/test_s3queue_config.py +211 -110
  54. tests/test_secrets.py +239 -0
  55. tests/test_simple_aggregate.py +42 -40
  56. tests/test_web_app.py +11 -5
  57. moose_lib-0.6.148.dev3442438466.dist-info/RECORD +0 -47
  58. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
  59. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
@@ -8,16 +8,17 @@ import warnings
8
8
  from moose_lib import OlapTable, OlapConfig, ClickHouseEngines
9
9
  from moose_lib.blocks import S3QueueEngine, MergeTreeEngine, ReplacingMergeTreeEngine
10
10
  from moose_lib.internal import (
11
- _convert_engine_to_config_dict,
11
+ _convert_engine_to_config_dict,
12
12
  EngineConfigDict,
13
13
  S3QueueConfigDict,
14
14
  MergeTreeConfigDict,
15
- ReplacingMergeTreeConfigDict
15
+ ReplacingMergeTreeConfigDict,
16
16
  )
17
17
 
18
18
 
19
19
  class SampleEvent(BaseModel):
20
20
  """Sample model for S3Queue table tests."""
21
+
21
22
  id: str
22
23
  timestamp: datetime
23
24
  message: str
@@ -25,10 +26,7 @@ class SampleEvent(BaseModel):
25
26
 
26
27
  def test_olap_config_accepts_enum():
27
28
  """Test that OlapConfig accepts ClickHouseEngines enum values."""
28
- config = OlapConfig(
29
- engine=ClickHouseEngines.MergeTree,
30
- order_by_fields=["id"]
31
- )
29
+ config = OlapConfig(engine=ClickHouseEngines.MergeTree, order_by_fields=["id"])
32
30
  assert config.engine == ClickHouseEngines.MergeTree
33
31
 
34
32
 
@@ -38,11 +36,11 @@ def test_olap_config_accepts_engine_config():
38
36
  s3_path="s3://bucket/data/*.json",
39
37
  format="JSONEachRow",
40
38
  aws_access_key_id="AKIA123",
41
- aws_secret_access_key="secret123"
39
+ aws_secret_access_key="secret123",
42
40
  )
43
41
  config = OlapConfig(
44
- engine=s3_engine,
45
- order_by_fields=["timestamp"]
42
+ engine=s3_engine
43
+ # Note: S3QueueEngine does not support order_by_fields
46
44
  )
47
45
  assert isinstance(config.engine, S3QueueEngine)
48
46
  assert config.engine.s3_path == "s3://bucket/data/*.json"
@@ -56,16 +54,16 @@ def test_olap_table_with_s3queue_engine():
56
54
  engine=S3QueueEngine(
57
55
  s3_path="s3://test-bucket/logs/*.json",
58
56
  format="JSONEachRow",
59
- compression="gzip"
57
+ compression="gzip",
60
58
  ),
61
- order_by_fields=["timestamp", "id"],
59
+ # Note: S3QueueEngine does not support order_by_fields
62
60
  settings={
63
61
  "s3queue_mode": "unordered",
64
- "s3queue_keeper_path": "/clickhouse/s3queue/test"
65
- }
66
- )
62
+ "s3queue_keeper_path": "/clickhouse/s3queue/test",
63
+ },
64
+ ),
67
65
  )
68
-
66
+
69
67
  assert table.name == "TestS3Table"
70
68
  assert isinstance(table.config.engine, S3QueueEngine)
71
69
  assert table.config.engine.s3_path == "s3://test-bucket/logs/*.json"
@@ -76,21 +74,14 @@ def test_olap_table_with_mergetree_engines():
76
74
  """Test creating OlapTable with various MergeTree engine configs."""
77
75
  # Test with MergeTreeEngine
78
76
  table1 = OlapTable[SampleEvent](
79
- "MergeTreeTable",
80
- OlapConfig(
81
- engine=MergeTreeEngine(),
82
- order_by_fields=["id"]
83
- )
77
+ "MergeTreeTable", OlapConfig(engine=MergeTreeEngine(), order_by_fields=["id"])
84
78
  )
85
79
  assert isinstance(table1.config.engine, MergeTreeEngine)
86
-
80
+
87
81
  # Test with ReplacingMergeTreeEngine
88
82
  table2 = OlapTable[SampleEvent](
89
83
  "ReplacingTable",
90
- OlapConfig(
91
- engine=ReplacingMergeTreeEngine(),
92
- order_by_fields=["id"]
93
- )
84
+ OlapConfig(engine=ReplacingMergeTreeEngine(), order_by_fields=["id"]),
94
85
  )
95
86
  assert isinstance(table2.config.engine, ReplacingMergeTreeEngine)
96
87
 
@@ -107,14 +98,14 @@ def test_engine_conversion_to_dict():
107
98
  aws_access_key_id="AKIA456",
108
99
  aws_secret_access_key="secret456",
109
100
  compression="zstd",
110
- headers={"X-Custom": "value"}
101
+ headers={"X-Custom": "value"},
111
102
  )
112
- )
103
+ ),
113
104
  )
114
-
105
+
115
106
  # Convert engine to dict
116
107
  engine_dict = _convert_engine_to_config_dict(table.config.engine, table)
117
-
108
+
118
109
  assert engine_dict.engine == "S3Queue"
119
110
  assert engine_dict.s3_path == "s3://bucket/data/*.parquet"
120
111
  assert engine_dict.format == "Parquet"
@@ -128,15 +119,12 @@ def test_engine_conversion_with_enum():
128
119
  """Test conversion of enum engines to EngineConfigDict."""
129
120
  # Create a mock table with enum engine
130
121
  table = OlapTable[SampleEvent](
131
- "TestTable",
132
- OlapConfig(
133
- engine=ClickHouseEngines.ReplacingMergeTree
134
- )
122
+ "TestTable", OlapConfig(engine=ClickHouseEngines.ReplacingMergeTree)
135
123
  )
136
-
124
+
137
125
  # Convert engine to dict
138
126
  engine_dict = _convert_engine_to_config_dict(table.config.engine, table)
139
-
127
+
140
128
  assert engine_dict.engine == "ReplacingMergeTree"
141
129
 
142
130
 
@@ -145,21 +133,14 @@ def test_backward_compatibility():
145
133
  # Old API with enum
146
134
  old_table = OlapTable[SampleEvent](
147
135
  "OldTable",
148
- OlapConfig(
149
- engine=ClickHouseEngines.MergeTree,
150
- order_by_fields=["id"]
151
- )
136
+ OlapConfig(engine=ClickHouseEngines.MergeTree, order_by_fields=["id"]),
152
137
  )
153
-
138
+
154
139
  # New API with EngineConfig
155
140
  new_table = OlapTable[SampleEvent](
156
- "NewTable",
157
- OlapConfig(
158
- engine=MergeTreeEngine(),
159
- order_by_fields=["id"]
160
- )
141
+ "NewTable", OlapConfig(engine=MergeTreeEngine(), order_by_fields=["id"])
161
142
  )
162
-
143
+
163
144
  # Both should work
164
145
  assert old_table.config.engine == ClickHouseEngines.MergeTree
165
146
  assert isinstance(new_table.config.engine, MergeTreeEngine)
@@ -169,15 +150,12 @@ def test_deprecation_warning_for_enum():
169
150
  """Test that using enum engine triggers deprecation warning."""
170
151
  with warnings.catch_warnings(record=True) as w:
171
152
  warnings.simplefilter("always")
172
-
153
+
173
154
  table = OlapTable[SampleEvent](
174
155
  "LegacyTable",
175
- OlapConfig(
176
- engine=ClickHouseEngines.S3Queue,
177
- order_by_fields=["id"]
178
- )
156
+ OlapConfig(engine=ClickHouseEngines.S3Queue, order_by_fields=["id"]),
179
157
  )
180
-
158
+
181
159
  # Check that a deprecation warning was issued
182
160
  assert len(w) == 1
183
161
  assert issubclass(w[0].category, DeprecationWarning)
@@ -193,26 +171,23 @@ def test_s3queue_with_all_options():
193
171
  aws_access_key_id="AKIAIOSFODNN7EXAMPLE",
194
172
  aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
195
173
  compression="gzip",
196
- headers={
197
- "X-Custom-Header": "value1",
198
- "Authorization": "Bearer token"
199
- }
174
+ headers={"X-Custom-Header": "value1", "Authorization": "Bearer token"},
200
175
  )
201
-
176
+
202
177
  table = OlapTable[SampleEvent](
203
178
  "FullConfigTable",
204
179
  OlapConfig(
205
180
  engine=engine,
206
- order_by_fields=["timestamp"],
181
+ # Note: S3QueueEngine does not support order_by_fields
207
182
  settings={
208
183
  "s3queue_mode": "ordered",
209
184
  "s3queue_keeper_path": "/clickhouse/s3queue/full",
210
185
  "s3queue_loading_retries": "5",
211
- "s3queue_processing_threads_num": "8"
212
- }
213
- )
186
+ "s3queue_processing_threads_num": "8",
187
+ },
188
+ ),
214
189
  )
215
-
190
+
216
191
  assert table.config.engine.s3_path == "s3://my-bucket/path/to/data/*.json"
217
192
  assert table.config.engine.format == "JSONEachRow"
218
193
  assert table.config.engine.compression == "gzip"
@@ -223,88 +198,214 @@ def test_s3queue_public_bucket():
223
198
  """Test S3QueueEngine for public bucket (no credentials)."""
224
199
  engine = S3QueueEngine(
225
200
  s3_path="s3://public-bucket/open-data/*.parquet",
226
- format="Parquet"
201
+ format="Parquet",
227
202
  # No AWS credentials needed for public buckets
228
203
  )
229
-
204
+
230
205
  table = OlapTable[SampleEvent](
231
206
  "PublicBucketTable",
232
207
  OlapConfig(
233
- engine=engine,
234
- order_by_fields=["id"]
235
- )
208
+ engine=engine
209
+ # Note: S3QueueEngine does not support order_by_fields
210
+ ),
236
211
  )
237
-
212
+
238
213
  assert table.config.engine.aws_access_key_id is None
239
214
  assert table.config.engine.aws_secret_access_key is None
240
215
 
241
216
 
242
217
  def test_migration_from_legacy_to_new():
243
218
  """Test migration path from legacy to new API."""
244
- # Legacy approach
219
+ # Legacy approach (with MergeTree, which supports order_by_fields)
245
220
  legacy_config = OlapConfig(
246
- engine=ClickHouseEngines.S3Queue,
247
- order_by_fields=["timestamp"]
248
- )
249
-
250
- # New approach - equivalent configuration
251
- new_config = OlapConfig(
252
- engine=S3QueueEngine(
253
- s3_path="s3://bucket/data/*.json",
254
- format="JSONEachRow"
255
- ),
256
- order_by_fields=["timestamp"]
221
+ engine=ClickHouseEngines.MergeTree, order_by_fields=["timestamp"]
257
222
  )
258
-
223
+
224
+ # New approach - equivalent configuration for MergeTree
225
+ new_config = OlapConfig(engine=MergeTreeEngine(), order_by_fields=["timestamp"])
226
+
259
227
  # Both should have the same order_by_fields
260
228
  assert legacy_config.order_by_fields == new_config.order_by_fields
261
-
229
+
262
230
  # Engine types should be different
263
231
  assert isinstance(legacy_config.engine, ClickHouseEngines)
264
- assert isinstance(new_config.engine, S3QueueEngine)
232
+ assert isinstance(new_config.engine, MergeTreeEngine)
233
+
234
+ # For S3Queue, the new API correctly prevents unsupported clauses
235
+ s3queue_config = OlapConfig(
236
+ engine=S3QueueEngine(s3_path="s3://bucket/data/*.json", format="JSONEachRow")
237
+ # Note: order_by_fields is not supported for S3QueueEngine
238
+ )
239
+ assert isinstance(s3queue_config.engine, S3QueueEngine)
265
240
 
266
241
 
267
242
  def test_engine_config_validation():
268
243
  """Test that S3QueueEngine validates required fields."""
269
244
  # Test missing required fields
270
245
  with pytest.raises(ValueError, match="S3Queue engine requires 's3_path'"):
271
- S3QueueEngine(
272
- s3_path="", # Empty path should fail
273
- format="JSONEachRow"
274
- )
275
-
246
+ S3QueueEngine(s3_path="", format="JSONEachRow") # Empty path should fail
247
+
276
248
  with pytest.raises(ValueError, match="S3Queue engine requires 'format'"):
277
249
  S3QueueEngine(
278
- s3_path="s3://bucket/data/*.json",
279
- format="" # Empty format should fail
250
+ s3_path="s3://bucket/data/*.json", format="" # Empty format should fail
280
251
  )
281
252
 
282
253
 
254
+ def test_non_mergetree_engines_reject_unsupported_clauses():
255
+ """Test that non-MergeTree engines reject unsupported ORDER BY and SAMPLE BY clauses."""
256
+ from moose_lib.blocks import (
257
+ S3Engine,
258
+ S3QueueEngine,
259
+ BufferEngine,
260
+ DistributedEngine,
261
+ )
262
+
263
+ # Test S3Engine DOES support ORDER BY (should not raise)
264
+ config_s3_with_order_by = OlapConfig(
265
+ engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
266
+ order_by_fields=["id"],
267
+ )
268
+ assert config_s3_with_order_by.order_by_fields == ["id"]
269
+
270
+ # Test S3Engine rejects SAMPLE BY
271
+ with pytest.raises(ValueError, match="S3Engine does not support SAMPLE BY clause"):
272
+ OlapConfig(
273
+ engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
274
+ sample_by_expression="cityHash64(id)",
275
+ )
276
+
277
+ # Test S3Engine DOES support PARTITION BY (should not raise)
278
+ config_s3_with_partition = OlapConfig(
279
+ engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
280
+ partition_by="toYYYYMM(timestamp)",
281
+ )
282
+ assert config_s3_with_partition.partition_by == "toYYYYMM(timestamp)"
283
+
284
+ # Test S3QueueEngine rejects ORDER BY
285
+ with pytest.raises(
286
+ ValueError, match="S3QueueEngine does not support ORDER BY clauses"
287
+ ):
288
+ OlapConfig(
289
+ engine=S3QueueEngine(s3_path="s3://bucket/*.json", format="JSONEachRow"),
290
+ order_by_fields=["id"],
291
+ )
292
+
293
+ # Test S3QueueEngine rejects PARTITION BY (unlike S3Engine)
294
+ with pytest.raises(
295
+ ValueError, match="S3QueueEngine does not support PARTITION BY clause"
296
+ ):
297
+ OlapConfig(
298
+ engine=S3QueueEngine(s3_path="s3://bucket/*.json", format="JSONEachRow"),
299
+ partition_by="toYYYYMM(timestamp)",
300
+ )
301
+
302
+ # Test BufferEngine rejects ORDER BY
303
+ with pytest.raises(
304
+ ValueError, match="BufferEngine does not support ORDER BY clauses"
305
+ ):
306
+ OlapConfig(
307
+ engine=BufferEngine(
308
+ target_database="default",
309
+ target_table="dest",
310
+ num_layers=16,
311
+ min_time=10,
312
+ max_time=100,
313
+ min_rows=10000,
314
+ max_rows=100000,
315
+ min_bytes=10000000,
316
+ max_bytes=100000000,
317
+ ),
318
+ order_by_fields=["id"],
319
+ )
320
+
321
+ # Test BufferEngine rejects PARTITION BY
322
+ with pytest.raises(
323
+ ValueError, match="BufferEngine does not support PARTITION BY clause"
324
+ ):
325
+ OlapConfig(
326
+ engine=BufferEngine(
327
+ target_database="default",
328
+ target_table="dest",
329
+ num_layers=16,
330
+ min_time=10,
331
+ max_time=100,
332
+ min_rows=10000,
333
+ max_rows=100000,
334
+ min_bytes=10000000,
335
+ max_bytes=100000000,
336
+ ),
337
+ partition_by="date",
338
+ )
339
+
340
+ # Test DistributedEngine rejects PARTITION BY
341
+ with pytest.raises(
342
+ ValueError, match="DistributedEngine does not support PARTITION BY clause"
343
+ ):
344
+ OlapConfig(
345
+ engine=DistributedEngine(
346
+ cluster="my_cluster",
347
+ target_database="default",
348
+ target_table="local_table",
349
+ ),
350
+ partition_by="date",
351
+ )
352
+
353
+ # Verify that S3Engine works without unsupported clauses
354
+ config = OlapConfig(
355
+ engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow")
356
+ )
357
+ assert isinstance(config.engine, S3Engine)
358
+
359
+
360
+ def test_mergetree_engines_still_accept_clauses():
361
+ """Test that MergeTree engines still accept ORDER BY, PARTITION BY, and SAMPLE BY clauses."""
362
+ from moose_lib.blocks import MergeTreeEngine, ReplacingMergeTreeEngine
363
+
364
+ # MergeTree should accept all clauses
365
+ config1 = OlapConfig(
366
+ engine=MergeTreeEngine(),
367
+ order_by_fields=["id", "timestamp"],
368
+ partition_by="toYYYYMM(timestamp)",
369
+ sample_by_expression="cityHash64(id)",
370
+ )
371
+ assert config1.order_by_fields == ["id", "timestamp"]
372
+ assert config1.partition_by == "toYYYYMM(timestamp)"
373
+ assert config1.sample_by_expression == "cityHash64(id)"
374
+
375
+ # ReplacingMergeTree should also accept these clauses
376
+ config2 = OlapConfig(
377
+ engine=ReplacingMergeTreeEngine(ver="updated_at"),
378
+ order_by_expression="(id, name)",
379
+ partition_by="date",
380
+ )
381
+ assert config2.order_by_expression == "(id, name)"
382
+ assert config2.partition_by == "date"
383
+
384
+
283
385
  def test_multiple_engine_types():
284
386
  """Test that different engine types can be used in the same application."""
285
387
  tables = []
286
-
388
+
287
389
  # Create tables with different engine types
288
- tables.append(OlapTable[SampleEvent](
289
- "MergeTreeTable",
290
- OlapConfig(engine=MergeTreeEngine())
291
- ))
292
-
293
- tables.append(OlapTable[SampleEvent](
294
- "ReplacingTreeTable",
295
- OlapConfig(engine=ReplacingMergeTreeEngine())
296
- ))
297
-
298
- tables.append(OlapTable[SampleEvent](
299
- "S3QueueTable",
300
- OlapConfig(
301
- engine=S3QueueEngine(
302
- s3_path="s3://bucket/*.json",
303
- format="JSONEachRow"
304
- )
390
+ tables.append(
391
+ OlapTable[SampleEvent]("MergeTreeTable", OlapConfig(engine=MergeTreeEngine()))
392
+ )
393
+
394
+ tables.append(
395
+ OlapTable[SampleEvent](
396
+ "ReplacingTreeTable", OlapConfig(engine=ReplacingMergeTreeEngine())
397
+ )
398
+ )
399
+
400
+ tables.append(
401
+ OlapTable[SampleEvent](
402
+ "S3QueueTable",
403
+ OlapConfig(
404
+ engine=S3QueueEngine(s3_path="s3://bucket/*.json", format="JSONEachRow")
405
+ ),
305
406
  )
306
- ))
307
-
407
+ )
408
+
308
409
  # Verify all tables were created with correct engine types
309
410
  assert isinstance(tables[0].config.engine, MergeTreeEngine)
310
411
  assert isinstance(tables[1].config.engine, ReplacingMergeTreeEngine)