moose-lib 0.6.160__tar.gz → 0.6.162__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of moose-lib might be problematic. Click here for more details.

Files changed (54) hide show
  1. {moose_lib-0.6.160 → moose_lib-0.6.162}/PKG-INFO +1 -1
  2. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/blocks.py +107 -0
  3. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/dmv2/olap_table.py +33 -0
  4. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/internal.py +83 -2
  5. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib.egg-info/PKG-INFO +1 -1
  6. {moose_lib-0.6.160 → moose_lib-0.6.162}/tests/test_s3queue_config.py +167 -38
  7. {moose_lib-0.6.160 → moose_lib-0.6.162}/README.md +0 -0
  8. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/__init__.py +0 -0
  9. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/clients/__init__.py +0 -0
  10. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/clients/redis_client.py +0 -0
  11. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/commons.py +0 -0
  12. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/config/__init__.py +0 -0
  13. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/config/config_file.py +0 -0
  14. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/config/runtime.py +0 -0
  15. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/data_models.py +0 -0
  16. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/dmv2/__init__.py +0 -0
  17. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/dmv2/_registry.py +0 -0
  18. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/dmv2/consumption.py +0 -0
  19. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/dmv2/ingest_api.py +0 -0
  20. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/dmv2/ingest_pipeline.py +0 -0
  21. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/dmv2/life_cycle.py +0 -0
  22. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/dmv2/materialized_view.py +0 -0
  23. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/dmv2/registry.py +0 -0
  24. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/dmv2/sql_resource.py +0 -0
  25. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/dmv2/stream.py +0 -0
  26. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/dmv2/types.py +0 -0
  27. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/dmv2/view.py +0 -0
  28. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/dmv2/web_app.py +0 -0
  29. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/dmv2/web_app_helpers.py +0 -0
  30. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/dmv2/workflow.py +0 -0
  31. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/dmv2_serializer.py +0 -0
  32. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/main.py +0 -0
  33. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/query_builder.py +0 -0
  34. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/query_param.py +0 -0
  35. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/secrets.py +0 -0
  36. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/streaming/__init__.py +0 -0
  37. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/streaming/streaming_function_runner.py +0 -0
  38. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/utilities/__init__.py +0 -0
  39. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib/utilities/sql.py +0 -0
  40. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib.egg-info/SOURCES.txt +0 -0
  41. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib.egg-info/dependency_links.txt +0 -0
  42. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib.egg-info/requires.txt +0 -0
  43. {moose_lib-0.6.160 → moose_lib-0.6.162}/moose_lib.egg-info/top_level.txt +0 -0
  44. {moose_lib-0.6.160 → moose_lib-0.6.162}/setup.cfg +0 -0
  45. {moose_lib-0.6.160 → moose_lib-0.6.162}/setup.py +0 -0
  46. {moose_lib-0.6.160 → moose_lib-0.6.162}/tests/__init__.py +0 -0
  47. {moose_lib-0.6.160 → moose_lib-0.6.162}/tests/conftest.py +0 -0
  48. {moose_lib-0.6.160 → moose_lib-0.6.162}/tests/test_moose.py +0 -0
  49. {moose_lib-0.6.160 → moose_lib-0.6.162}/tests/test_olap_table_versioning.py +0 -0
  50. {moose_lib-0.6.160 → moose_lib-0.6.162}/tests/test_query_builder.py +0 -0
  51. {moose_lib-0.6.160 → moose_lib-0.6.162}/tests/test_redis_client.py +0 -0
  52. {moose_lib-0.6.160 → moose_lib-0.6.162}/tests/test_secrets.py +0 -0
  53. {moose_lib-0.6.160 → moose_lib-0.6.162}/tests/test_simple_aggregate.py +0 -0
  54. {moose_lib-0.6.160 → moose_lib-0.6.162}/tests/test_web_app.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: moose_lib
3
- Version: 0.6.160
3
+ Version: 0.6.162
4
4
  Home-page: https://www.fiveonefour.com/moose
5
5
  Author: Fiveonefour Labs Inc.
6
6
  Author-email: support@fiveonefour.com
@@ -14,6 +14,9 @@ class ClickHouseEngines(Enum):
14
14
  VersionedCollapsingMergeTree = "VersionedCollapsingMergeTree"
15
15
  GraphiteMergeTree = "GraphiteMergeTree"
16
16
  S3Queue = "S3Queue"
17
+ S3 = "S3"
18
+ Buffer = "Buffer"
19
+ Distributed = "Distributed"
17
20
  ReplicatedMergeTree = "ReplicatedMergeTree"
18
21
  ReplicatedReplacingMergeTree = "ReplicatedReplacingMergeTree"
19
22
  ReplicatedAggregatingMergeTree = "ReplicatedAggregatingMergeTree"
@@ -177,6 +180,110 @@ class S3QueueEngine(EngineConfig):
177
180
  if not self.format:
178
181
  raise ValueError("S3Queue engine requires 'format'")
179
182
 
183
+ @dataclass
184
+ class S3Engine(EngineConfig):
185
+ """Configuration for S3 engine - direct read/write from S3 storage.
186
+
187
+ Args:
188
+ path: S3 path to the data file(s) (e.g., 's3://bucket/path/file.json')
189
+ format: Data format (e.g., 'JSONEachRow', 'CSV', 'Parquet')
190
+ aws_access_key_id: AWS access key ID (optional, omit for public buckets)
191
+ aws_secret_access_key: AWS secret access key (optional, omit for public buckets)
192
+ compression: Compression type (e.g., 'gzip', 'zstd', 'auto')
193
+ partition_strategy: Optional partition strategy
194
+ partition_columns_in_data_file: Optional partition columns in data file
195
+ """
196
+
197
+ # Required fields
198
+ path: str
199
+ format: str
200
+
201
+ # Optional fields
202
+ aws_access_key_id: Optional[str] = None
203
+ aws_secret_access_key: Optional[str] = None
204
+ compression: Optional[str] = None
205
+ partition_strategy: Optional[str] = None
206
+ partition_columns_in_data_file: Optional[str] = None
207
+
208
+ def __post_init__(self):
209
+ """Validate required fields"""
210
+ if not self.path:
211
+ raise ValueError("S3 engine requires 'path'")
212
+ if not self.format:
213
+ raise ValueError("S3 engine requires 'format'")
214
+
215
+ @dataclass
216
+ class BufferEngine(EngineConfig):
217
+ """Configuration for Buffer engine - in-memory buffer that flushes to a destination table.
218
+
219
+ Args:
220
+ target_database: Target database name for the destination table
221
+ target_table: Target table name where data will be flushed
222
+ num_layers: Number of buffer layers (typically 16)
223
+ min_time: Minimum time in seconds before flushing
224
+ max_time: Maximum time in seconds before flushing
225
+ min_rows: Minimum number of rows before flushing
226
+ max_rows: Maximum number of rows before flushing
227
+ min_bytes: Minimum bytes before flushing
228
+ max_bytes: Maximum bytes before flushing
229
+ flush_time: Optional flush time in seconds
230
+ flush_rows: Optional flush number of rows
231
+ flush_bytes: Optional flush number of bytes
232
+ """
233
+
234
+ # Required fields
235
+ target_database: str
236
+ target_table: str
237
+ num_layers: int
238
+ min_time: int
239
+ max_time: int
240
+ min_rows: int
241
+ max_rows: int
242
+ min_bytes: int
243
+ max_bytes: int
244
+
245
+ # Optional fields
246
+ flush_time: Optional[int] = None
247
+ flush_rows: Optional[int] = None
248
+ flush_bytes: Optional[int] = None
249
+
250
+ def __post_init__(self):
251
+ """Validate required fields"""
252
+ if not self.target_database:
253
+ raise ValueError("Buffer engine requires 'target_database'")
254
+ if not self.target_table:
255
+ raise ValueError("Buffer engine requires 'target_table'")
256
+
257
+ @dataclass
258
+ class DistributedEngine(EngineConfig):
259
+ """Configuration for Distributed engine - distributed table across a cluster.
260
+
261
+ Args:
262
+ cluster: Cluster name from the ClickHouse configuration
263
+ target_database: Database name on the cluster
264
+ target_table: Table name on the cluster
265
+ sharding_key: Optional sharding key expression for data distribution
266
+ policy_name: Optional policy name for data distribution
267
+ """
268
+
269
+ # Required fields
270
+ cluster: str
271
+ target_database: str
272
+ target_table: str
273
+
274
+ # Optional fields
275
+ sharding_key: Optional[str] = None
276
+ policy_name: Optional[str] = None
277
+
278
+ def __post_init__(self):
279
+ """Validate required fields"""
280
+ if not self.cluster:
281
+ raise ValueError("Distributed engine requires 'cluster'")
282
+ if not self.target_database:
283
+ raise ValueError("Distributed engine requires 'target_database'")
284
+ if not self.target_table:
285
+ raise ValueError("Distributed engine requires 'target_table'")
286
+
180
287
  # ==========================
181
288
  # New Table Configuration (Recommended API)
182
289
  # ==========================
@@ -151,6 +151,39 @@ class OlapConfig(BaseModel):
151
151
  if has_fields and has_expr:
152
152
  raise ValueError("Provide either order_by_fields or order_by_expression, not both.")
153
153
 
154
+ # Validate that non-MergeTree engines don't have unsupported clauses
155
+ if self.engine:
156
+ from ..blocks import S3Engine, S3QueueEngine, BufferEngine, DistributedEngine
157
+
158
+ # All non-MergeTree engines don't support ORDER BY
159
+ non_mergetree_engines = (S3Engine, S3QueueEngine, BufferEngine, DistributedEngine)
160
+ if isinstance(self.engine, non_mergetree_engines):
161
+ engine_name = type(self.engine).__name__
162
+
163
+ if has_fields or has_expr:
164
+ raise ValueError(
165
+ f"{engine_name} does not support ORDER BY clauses. "
166
+ f"Remove order_by_fields or order_by_expression from your configuration."
167
+ )
168
+
169
+ if self.sample_by_expression:
170
+ raise ValueError(
171
+ f"{engine_name} does not support SAMPLE BY clause. "
172
+ f"Remove sample_by_expression from your configuration."
173
+ )
174
+
175
+ # Only S3QueueEngine, BufferEngine, and DistributedEngine don't support PARTITION BY
176
+ # S3Engine DOES support PARTITION BY
177
+ engines_without_partition_by = (S3QueueEngine, BufferEngine, DistributedEngine)
178
+ if isinstance(self.engine, engines_without_partition_by):
179
+ engine_name = type(self.engine).__name__
180
+
181
+ if self.partition_by:
182
+ raise ValueError(
183
+ f"{engine_name} does not support PARTITION BY clause. "
184
+ f"Remove partition_by from your configuration."
185
+ )
186
+
154
187
 
155
188
  class OlapTable(TypedMooseResource, Generic[T]):
156
189
  """Represents an OLAP table (e.g., a ClickHouse table) typed with a Pydantic model.
@@ -128,6 +128,45 @@ class S3QueueConfigDict(BaseEngineConfigDict):
128
128
  headers: Optional[Dict[str, str]] = None
129
129
 
130
130
 
131
+ class S3ConfigDict(BaseEngineConfigDict):
132
+ """Configuration for S3 engine."""
133
+ engine: Literal["S3"] = "S3"
134
+ path: str
135
+ format: str
136
+ aws_access_key_id: Optional[str] = None
137
+ aws_secret_access_key: Optional[str] = None
138
+ compression: Optional[str] = None
139
+ partition_strategy: Optional[str] = None
140
+ partition_columns_in_data_file: Optional[str] = None
141
+
142
+
143
+ class BufferConfigDict(BaseEngineConfigDict):
144
+ """Configuration for Buffer engine."""
145
+ engine: Literal["Buffer"] = "Buffer"
146
+ target_database: str
147
+ target_table: str
148
+ num_layers: int
149
+ min_time: int
150
+ max_time: int
151
+ min_rows: int
152
+ max_rows: int
153
+ min_bytes: int
154
+ max_bytes: int
155
+ flush_time: Optional[int] = None
156
+ flush_rows: Optional[int] = None
157
+ flush_bytes: Optional[int] = None
158
+
159
+
160
+ class DistributedConfigDict(BaseEngineConfigDict):
161
+ """Configuration for Distributed engine."""
162
+ engine: Literal["Distributed"] = "Distributed"
163
+ cluster: str
164
+ target_database: str
165
+ target_table: str
166
+ sharding_key: Optional[str] = None
167
+ policy_name: Optional[str] = None
168
+
169
+
131
170
  # Discriminated union of all engine configurations
132
171
  EngineConfigDict = Union[
133
172
  MergeTreeConfigDict,
@@ -138,7 +177,10 @@ EngineConfigDict = Union[
138
177
  ReplicatedReplacingMergeTreeConfigDict,
139
178
  ReplicatedAggregatingMergeTreeConfigDict,
140
179
  ReplicatedSummingMergeTreeConfigDict,
141
- S3QueueConfigDict
180
+ S3QueueConfigDict,
181
+ S3ConfigDict,
182
+ BufferConfigDict,
183
+ DistributedConfigDict
142
184
  ]
143
185
 
144
186
 
@@ -464,7 +506,7 @@ def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineCon
464
506
  Returns:
465
507
  EngineConfigDict with engine-specific configuration
466
508
  """
467
- from moose_lib.blocks import S3QueueEngine
509
+ from moose_lib.blocks import S3QueueEngine, S3Engine, BufferEngine, DistributedEngine
468
510
 
469
511
  # Try S3Queue first
470
512
  if isinstance(engine, S3QueueEngine):
@@ -476,6 +518,45 @@ def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineCon
476
518
  compression=engine.compression,
477
519
  headers=engine.headers
478
520
  )
521
+
522
+ # Try S3
523
+ if isinstance(engine, S3Engine):
524
+ return S3ConfigDict(
525
+ path=engine.path,
526
+ format=engine.format,
527
+ aws_access_key_id=engine.aws_access_key_id,
528
+ aws_secret_access_key=engine.aws_secret_access_key,
529
+ compression=engine.compression,
530
+ partition_strategy=engine.partition_strategy,
531
+ partition_columns_in_data_file=engine.partition_columns_in_data_file
532
+ )
533
+
534
+ # Try Buffer
535
+ if isinstance(engine, BufferEngine):
536
+ return BufferConfigDict(
537
+ target_database=engine.target_database,
538
+ target_table=engine.target_table,
539
+ num_layers=engine.num_layers,
540
+ min_time=engine.min_time,
541
+ max_time=engine.max_time,
542
+ min_rows=engine.min_rows,
543
+ max_rows=engine.max_rows,
544
+ min_bytes=engine.min_bytes,
545
+ max_bytes=engine.max_bytes,
546
+ flush_time=engine.flush_time,
547
+ flush_rows=engine.flush_rows,
548
+ flush_bytes=engine.flush_bytes
549
+ )
550
+
551
+ # Try Distributed
552
+ if isinstance(engine, DistributedEngine):
553
+ return DistributedConfigDict(
554
+ cluster=engine.cluster,
555
+ target_database=engine.target_database,
556
+ target_table=engine.target_table,
557
+ sharding_key=engine.sharding_key,
558
+ policy_name=engine.policy_name
559
+ )
479
560
 
480
561
  # Try basic engines
481
562
  basic_config = _convert_basic_engine_instance(engine)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: moose_lib
3
- Version: 0.6.160
3
+ Version: 0.6.162
4
4
  Home-page: https://www.fiveonefour.com/moose
5
5
  Author: Fiveonefour Labs Inc.
6
6
  Author-email: support@fiveonefour.com
@@ -8,7 +8,7 @@ import warnings
8
8
  from moose_lib import OlapTable, OlapConfig, ClickHouseEngines
9
9
  from moose_lib.blocks import S3QueueEngine, MergeTreeEngine, ReplacingMergeTreeEngine
10
10
  from moose_lib.internal import (
11
- _convert_engine_to_config_dict,
11
+ _convert_engine_to_config_dict,
12
12
  EngineConfigDict,
13
13
  S3QueueConfigDict,
14
14
  MergeTreeConfigDict,
@@ -41,8 +41,8 @@ def test_olap_config_accepts_engine_config():
41
41
  aws_secret_access_key="secret123"
42
42
  )
43
43
  config = OlapConfig(
44
- engine=s3_engine,
45
- order_by_fields=["timestamp"]
44
+ engine=s3_engine
45
+ # Note: S3QueueEngine does not support order_by_fields
46
46
  )
47
47
  assert isinstance(config.engine, S3QueueEngine)
48
48
  assert config.engine.s3_path == "s3://bucket/data/*.json"
@@ -58,14 +58,14 @@ def test_olap_table_with_s3queue_engine():
58
58
  format="JSONEachRow",
59
59
  compression="gzip"
60
60
  ),
61
- order_by_fields=["timestamp", "id"],
61
+ # Note: S3QueueEngine does not support order_by_fields
62
62
  settings={
63
63
  "s3queue_mode": "unordered",
64
64
  "s3queue_keeper_path": "/clickhouse/s3queue/test"
65
65
  }
66
66
  )
67
67
  )
68
-
68
+
69
69
  assert table.name == "TestS3Table"
70
70
  assert isinstance(table.config.engine, S3QueueEngine)
71
71
  assert table.config.engine.s3_path == "s3://test-bucket/logs/*.json"
@@ -83,7 +83,7 @@ def test_olap_table_with_mergetree_engines():
83
83
  )
84
84
  )
85
85
  assert isinstance(table1.config.engine, MergeTreeEngine)
86
-
86
+
87
87
  # Test with ReplacingMergeTreeEngine
88
88
  table2 = OlapTable[SampleEvent](
89
89
  "ReplacingTable",
@@ -111,10 +111,10 @@ def test_engine_conversion_to_dict():
111
111
  )
112
112
  )
113
113
  )
114
-
114
+
115
115
  # Convert engine to dict
116
116
  engine_dict = _convert_engine_to_config_dict(table.config.engine, table)
117
-
117
+
118
118
  assert engine_dict.engine == "S3Queue"
119
119
  assert engine_dict.s3_path == "s3://bucket/data/*.parquet"
120
120
  assert engine_dict.format == "Parquet"
@@ -133,10 +133,10 @@ def test_engine_conversion_with_enum():
133
133
  engine=ClickHouseEngines.ReplacingMergeTree
134
134
  )
135
135
  )
136
-
136
+
137
137
  # Convert engine to dict
138
138
  engine_dict = _convert_engine_to_config_dict(table.config.engine, table)
139
-
139
+
140
140
  assert engine_dict.engine == "ReplacingMergeTree"
141
141
 
142
142
 
@@ -150,16 +150,16 @@ def test_backward_compatibility():
150
150
  order_by_fields=["id"]
151
151
  )
152
152
  )
153
-
153
+
154
154
  # New API with EngineConfig
155
155
  new_table = OlapTable[SampleEvent](
156
- "NewTable",
156
+ "NewTable",
157
157
  OlapConfig(
158
158
  engine=MergeTreeEngine(),
159
159
  order_by_fields=["id"]
160
160
  )
161
161
  )
162
-
162
+
163
163
  # Both should work
164
164
  assert old_table.config.engine == ClickHouseEngines.MergeTree
165
165
  assert isinstance(new_table.config.engine, MergeTreeEngine)
@@ -169,7 +169,7 @@ def test_deprecation_warning_for_enum():
169
169
  """Test that using enum engine triggers deprecation warning."""
170
170
  with warnings.catch_warnings(record=True) as w:
171
171
  warnings.simplefilter("always")
172
-
172
+
173
173
  table = OlapTable[SampleEvent](
174
174
  "LegacyTable",
175
175
  OlapConfig(
@@ -177,7 +177,7 @@ def test_deprecation_warning_for_enum():
177
177
  order_by_fields=["id"]
178
178
  )
179
179
  )
180
-
180
+
181
181
  # Check that a deprecation warning was issued
182
182
  assert len(w) == 1
183
183
  assert issubclass(w[0].category, DeprecationWarning)
@@ -198,12 +198,12 @@ def test_s3queue_with_all_options():
198
198
  "Authorization": "Bearer token"
199
199
  }
200
200
  )
201
-
201
+
202
202
  table = OlapTable[SampleEvent](
203
203
  "FullConfigTable",
204
204
  OlapConfig(
205
205
  engine=engine,
206
- order_by_fields=["timestamp"],
206
+ # Note: S3QueueEngine does not support order_by_fields
207
207
  settings={
208
208
  "s3queue_mode": "ordered",
209
209
  "s3queue_keeper_path": "/clickhouse/s3queue/full",
@@ -212,7 +212,7 @@ def test_s3queue_with_all_options():
212
212
  }
213
213
  )
214
214
  )
215
-
215
+
216
216
  assert table.config.engine.s3_path == "s3://my-bucket/path/to/data/*.json"
217
217
  assert table.config.engine.format == "JSONEachRow"
218
218
  assert table.config.engine.compression == "gzip"
@@ -226,42 +226,49 @@ def test_s3queue_public_bucket():
226
226
  format="Parquet"
227
227
  # No AWS credentials needed for public buckets
228
228
  )
229
-
229
+
230
230
  table = OlapTable[SampleEvent](
231
231
  "PublicBucketTable",
232
232
  OlapConfig(
233
- engine=engine,
234
- order_by_fields=["id"]
233
+ engine=engine
234
+ # Note: S3QueueEngine does not support order_by_fields
235
235
  )
236
236
  )
237
-
237
+
238
238
  assert table.config.engine.aws_access_key_id is None
239
239
  assert table.config.engine.aws_secret_access_key is None
240
240
 
241
241
 
242
242
  def test_migration_from_legacy_to_new():
243
243
  """Test migration path from legacy to new API."""
244
- # Legacy approach
244
+ # Legacy approach (with MergeTree, which supports order_by_fields)
245
245
  legacy_config = OlapConfig(
246
- engine=ClickHouseEngines.S3Queue,
246
+ engine=ClickHouseEngines.MergeTree,
247
247
  order_by_fields=["timestamp"]
248
248
  )
249
-
250
- # New approach - equivalent configuration
249
+
250
+ # New approach - equivalent configuration for MergeTree
251
251
  new_config = OlapConfig(
252
- engine=S3QueueEngine(
253
- s3_path="s3://bucket/data/*.json",
254
- format="JSONEachRow"
255
- ),
252
+ engine=MergeTreeEngine(),
256
253
  order_by_fields=["timestamp"]
257
254
  )
258
-
255
+
259
256
  # Both should have the same order_by_fields
260
257
  assert legacy_config.order_by_fields == new_config.order_by_fields
261
-
258
+
262
259
  # Engine types should be different
263
260
  assert isinstance(legacy_config.engine, ClickHouseEngines)
264
- assert isinstance(new_config.engine, S3QueueEngine)
261
+ assert isinstance(new_config.engine, MergeTreeEngine)
262
+
263
+ # For S3Queue, the new API correctly prevents unsupported clauses
264
+ s3queue_config = OlapConfig(
265
+ engine=S3QueueEngine(
266
+ s3_path="s3://bucket/data/*.json",
267
+ format="JSONEachRow"
268
+ )
269
+ # Note: order_by_fields is not supported for S3QueueEngine
270
+ )
271
+ assert isinstance(s3queue_config.engine, S3QueueEngine)
265
272
 
266
273
 
267
274
  def test_engine_config_validation():
@@ -272,7 +279,7 @@ def test_engine_config_validation():
272
279
  s3_path="", # Empty path should fail
273
280
  format="JSONEachRow"
274
281
  )
275
-
282
+
276
283
  with pytest.raises(ValueError, match="S3Queue engine requires 'format'"):
277
284
  S3QueueEngine(
278
285
  s3_path="s3://bucket/data/*.json",
@@ -280,21 +287,143 @@ def test_engine_config_validation():
280
287
  )
281
288
 
282
289
 
290
+ def test_non_mergetree_engines_reject_unsupported_clauses():
291
+ """Test that non-MergeTree engines reject unsupported ORDER BY and SAMPLE BY clauses."""
292
+ from moose_lib.blocks import S3Engine, S3QueueEngine, BufferEngine, DistributedEngine
293
+
294
+ # Test S3Engine rejects ORDER BY
295
+ with pytest.raises(ValueError, match="S3Engine does not support ORDER BY clauses"):
296
+ OlapConfig(
297
+ engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
298
+ order_by_fields=["id"]
299
+ )
300
+
301
+ with pytest.raises(ValueError, match="S3Engine does not support ORDER BY clauses"):
302
+ OlapConfig(
303
+ engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
304
+ order_by_expression="(id, name)"
305
+ )
306
+
307
+ # Test S3Engine rejects SAMPLE BY
308
+ with pytest.raises(ValueError, match="S3Engine does not support SAMPLE BY clause"):
309
+ OlapConfig(
310
+ engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
311
+ sample_by_expression="cityHash64(id)"
312
+ )
313
+
314
+ # Test S3Engine DOES support PARTITION BY (should not raise)
315
+ config_s3_with_partition = OlapConfig(
316
+ engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
317
+ partition_by="toYYYYMM(timestamp)"
318
+ )
319
+ assert config_s3_with_partition.partition_by == "toYYYYMM(timestamp)"
320
+
321
+ # Test S3QueueEngine rejects ORDER BY
322
+ with pytest.raises(ValueError, match="S3QueueEngine does not support ORDER BY clauses"):
323
+ OlapConfig(
324
+ engine=S3QueueEngine(s3_path="s3://bucket/*.json", format="JSONEachRow"),
325
+ order_by_fields=["id"]
326
+ )
327
+
328
+ # Test S3QueueEngine rejects PARTITION BY (unlike S3Engine)
329
+ with pytest.raises(ValueError, match="S3QueueEngine does not support PARTITION BY clause"):
330
+ OlapConfig(
331
+ engine=S3QueueEngine(s3_path="s3://bucket/*.json", format="JSONEachRow"),
332
+ partition_by="toYYYYMM(timestamp)"
333
+ )
334
+
335
+ # Test BufferEngine rejects ORDER BY
336
+ with pytest.raises(ValueError, match="BufferEngine does not support ORDER BY clauses"):
337
+ OlapConfig(
338
+ engine=BufferEngine(
339
+ target_database="default",
340
+ target_table="dest",
341
+ num_layers=16,
342
+ min_time=10,
343
+ max_time=100,
344
+ min_rows=10000,
345
+ max_rows=100000,
346
+ min_bytes=10000000,
347
+ max_bytes=100000000
348
+ ),
349
+ order_by_fields=["id"]
350
+ )
351
+
352
+ # Test BufferEngine rejects PARTITION BY
353
+ with pytest.raises(ValueError, match="BufferEngine does not support PARTITION BY clause"):
354
+ OlapConfig(
355
+ engine=BufferEngine(
356
+ target_database="default",
357
+ target_table="dest",
358
+ num_layers=16,
359
+ min_time=10,
360
+ max_time=100,
361
+ min_rows=10000,
362
+ max_rows=100000,
363
+ min_bytes=10000000,
364
+ max_bytes=100000000
365
+ ),
366
+ partition_by="date"
367
+ )
368
+
369
+ # Test DistributedEngine rejects PARTITION BY
370
+ with pytest.raises(ValueError, match="DistributedEngine does not support PARTITION BY clause"):
371
+ OlapConfig(
372
+ engine=DistributedEngine(
373
+ cluster="my_cluster",
374
+ target_database="default",
375
+ target_table="local_table"
376
+ ),
377
+ partition_by="date"
378
+ )
379
+
380
+ # Verify that S3Engine works without unsupported clauses
381
+ config = OlapConfig(
382
+ engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow")
383
+ )
384
+ assert isinstance(config.engine, S3Engine)
385
+
386
+
387
+ def test_mergetree_engines_still_accept_clauses():
388
+ """Test that MergeTree engines still accept ORDER BY, PARTITION BY, and SAMPLE BY clauses."""
389
+ from moose_lib.blocks import MergeTreeEngine, ReplacingMergeTreeEngine
390
+
391
+ # MergeTree should accept all clauses
392
+ config1 = OlapConfig(
393
+ engine=MergeTreeEngine(),
394
+ order_by_fields=["id", "timestamp"],
395
+ partition_by="toYYYYMM(timestamp)",
396
+ sample_by_expression="cityHash64(id)"
397
+ )
398
+ assert config1.order_by_fields == ["id", "timestamp"]
399
+ assert config1.partition_by == "toYYYYMM(timestamp)"
400
+ assert config1.sample_by_expression == "cityHash64(id)"
401
+
402
+ # ReplacingMergeTree should also accept these clauses
403
+ config2 = OlapConfig(
404
+ engine=ReplacingMergeTreeEngine(ver="updated_at"),
405
+ order_by_expression="(id, name)",
406
+ partition_by="date"
407
+ )
408
+ assert config2.order_by_expression == "(id, name)"
409
+ assert config2.partition_by == "date"
410
+
411
+
283
412
  def test_multiple_engine_types():
284
413
  """Test that different engine types can be used in the same application."""
285
414
  tables = []
286
-
415
+
287
416
  # Create tables with different engine types
288
417
  tables.append(OlapTable[SampleEvent](
289
418
  "MergeTreeTable",
290
419
  OlapConfig(engine=MergeTreeEngine())
291
420
  ))
292
-
421
+
293
422
  tables.append(OlapTable[SampleEvent](
294
423
  "ReplacingTreeTable",
295
424
  OlapConfig(engine=ReplacingMergeTreeEngine())
296
425
  ))
297
-
426
+
298
427
  tables.append(OlapTable[SampleEvent](
299
428
  "S3QueueTable",
300
429
  OlapConfig(
@@ -304,7 +433,7 @@ def test_multiple_engine_types():
304
433
  )
305
434
  )
306
435
  ))
307
-
436
+
308
437
  # Verify all tables were created with correct engine types
309
438
  assert isinstance(tables[0].config.engine, MergeTreeEngine)
310
439
  assert isinstance(tables[1].config.engine, ReplacingMergeTreeEngine)
File without changes
File without changes
File without changes