moose-lib 0.6.161__tar.gz → 0.6.162__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of moose-lib might be problematic. Click here for more details.
- {moose_lib-0.6.161 → moose_lib-0.6.162}/PKG-INFO +1 -1
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/blocks.py +107 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/dmv2/olap_table.py +33 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/internal.py +83 -2
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib.egg-info/PKG-INFO +1 -1
- {moose_lib-0.6.161 → moose_lib-0.6.162}/tests/test_s3queue_config.py +167 -38
- {moose_lib-0.6.161 → moose_lib-0.6.162}/README.md +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/__init__.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/clients/__init__.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/clients/redis_client.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/commons.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/config/__init__.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/config/config_file.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/config/runtime.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/data_models.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/dmv2/__init__.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/dmv2/_registry.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/dmv2/consumption.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/dmv2/ingest_api.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/dmv2/ingest_pipeline.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/dmv2/life_cycle.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/dmv2/materialized_view.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/dmv2/registry.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/dmv2/sql_resource.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/dmv2/stream.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/dmv2/types.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/dmv2/view.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/dmv2/web_app.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/dmv2/web_app_helpers.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/dmv2/workflow.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/dmv2_serializer.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/main.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/query_builder.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/query_param.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/secrets.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/streaming/__init__.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/streaming/streaming_function_runner.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/utilities/__init__.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib/utilities/sql.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib.egg-info/SOURCES.txt +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib.egg-info/dependency_links.txt +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib.egg-info/requires.txt +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/moose_lib.egg-info/top_level.txt +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/setup.cfg +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/setup.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/tests/__init__.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/tests/conftest.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/tests/test_moose.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/tests/test_olap_table_versioning.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/tests/test_query_builder.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/tests/test_redis_client.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/tests/test_secrets.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/tests/test_simple_aggregate.py +0 -0
- {moose_lib-0.6.161 → moose_lib-0.6.162}/tests/test_web_app.py +0 -0
|
@@ -14,6 +14,9 @@ class ClickHouseEngines(Enum):
|
|
|
14
14
|
VersionedCollapsingMergeTree = "VersionedCollapsingMergeTree"
|
|
15
15
|
GraphiteMergeTree = "GraphiteMergeTree"
|
|
16
16
|
S3Queue = "S3Queue"
|
|
17
|
+
S3 = "S3"
|
|
18
|
+
Buffer = "Buffer"
|
|
19
|
+
Distributed = "Distributed"
|
|
17
20
|
ReplicatedMergeTree = "ReplicatedMergeTree"
|
|
18
21
|
ReplicatedReplacingMergeTree = "ReplicatedReplacingMergeTree"
|
|
19
22
|
ReplicatedAggregatingMergeTree = "ReplicatedAggregatingMergeTree"
|
|
@@ -177,6 +180,110 @@ class S3QueueEngine(EngineConfig):
|
|
|
177
180
|
if not self.format:
|
|
178
181
|
raise ValueError("S3Queue engine requires 'format'")
|
|
179
182
|
|
|
183
|
+
@dataclass
|
|
184
|
+
class S3Engine(EngineConfig):
|
|
185
|
+
"""Configuration for S3 engine - direct read/write from S3 storage.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
path: S3 path to the data file(s) (e.g., 's3://bucket/path/file.json')
|
|
189
|
+
format: Data format (e.g., 'JSONEachRow', 'CSV', 'Parquet')
|
|
190
|
+
aws_access_key_id: AWS access key ID (optional, omit for public buckets)
|
|
191
|
+
aws_secret_access_key: AWS secret access key (optional, omit for public buckets)
|
|
192
|
+
compression: Compression type (e.g., 'gzip', 'zstd', 'auto')
|
|
193
|
+
partition_strategy: Optional partition strategy
|
|
194
|
+
partition_columns_in_data_file: Optional partition columns in data file
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
# Required fields
|
|
198
|
+
path: str
|
|
199
|
+
format: str
|
|
200
|
+
|
|
201
|
+
# Optional fields
|
|
202
|
+
aws_access_key_id: Optional[str] = None
|
|
203
|
+
aws_secret_access_key: Optional[str] = None
|
|
204
|
+
compression: Optional[str] = None
|
|
205
|
+
partition_strategy: Optional[str] = None
|
|
206
|
+
partition_columns_in_data_file: Optional[str] = None
|
|
207
|
+
|
|
208
|
+
def __post_init__(self):
|
|
209
|
+
"""Validate required fields"""
|
|
210
|
+
if not self.path:
|
|
211
|
+
raise ValueError("S3 engine requires 'path'")
|
|
212
|
+
if not self.format:
|
|
213
|
+
raise ValueError("S3 engine requires 'format'")
|
|
214
|
+
|
|
215
|
+
@dataclass
|
|
216
|
+
class BufferEngine(EngineConfig):
|
|
217
|
+
"""Configuration for Buffer engine - in-memory buffer that flushes to a destination table.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
target_database: Target database name for the destination table
|
|
221
|
+
target_table: Target table name where data will be flushed
|
|
222
|
+
num_layers: Number of buffer layers (typically 16)
|
|
223
|
+
min_time: Minimum time in seconds before flushing
|
|
224
|
+
max_time: Maximum time in seconds before flushing
|
|
225
|
+
min_rows: Minimum number of rows before flushing
|
|
226
|
+
max_rows: Maximum number of rows before flushing
|
|
227
|
+
min_bytes: Minimum bytes before flushing
|
|
228
|
+
max_bytes: Maximum bytes before flushing
|
|
229
|
+
flush_time: Optional flush time in seconds
|
|
230
|
+
flush_rows: Optional flush number of rows
|
|
231
|
+
flush_bytes: Optional flush number of bytes
|
|
232
|
+
"""
|
|
233
|
+
|
|
234
|
+
# Required fields
|
|
235
|
+
target_database: str
|
|
236
|
+
target_table: str
|
|
237
|
+
num_layers: int
|
|
238
|
+
min_time: int
|
|
239
|
+
max_time: int
|
|
240
|
+
min_rows: int
|
|
241
|
+
max_rows: int
|
|
242
|
+
min_bytes: int
|
|
243
|
+
max_bytes: int
|
|
244
|
+
|
|
245
|
+
# Optional fields
|
|
246
|
+
flush_time: Optional[int] = None
|
|
247
|
+
flush_rows: Optional[int] = None
|
|
248
|
+
flush_bytes: Optional[int] = None
|
|
249
|
+
|
|
250
|
+
def __post_init__(self):
|
|
251
|
+
"""Validate required fields"""
|
|
252
|
+
if not self.target_database:
|
|
253
|
+
raise ValueError("Buffer engine requires 'target_database'")
|
|
254
|
+
if not self.target_table:
|
|
255
|
+
raise ValueError("Buffer engine requires 'target_table'")
|
|
256
|
+
|
|
257
|
+
@dataclass
|
|
258
|
+
class DistributedEngine(EngineConfig):
|
|
259
|
+
"""Configuration for Distributed engine - distributed table across a cluster.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
cluster: Cluster name from the ClickHouse configuration
|
|
263
|
+
target_database: Database name on the cluster
|
|
264
|
+
target_table: Table name on the cluster
|
|
265
|
+
sharding_key: Optional sharding key expression for data distribution
|
|
266
|
+
policy_name: Optional policy name for data distribution
|
|
267
|
+
"""
|
|
268
|
+
|
|
269
|
+
# Required fields
|
|
270
|
+
cluster: str
|
|
271
|
+
target_database: str
|
|
272
|
+
target_table: str
|
|
273
|
+
|
|
274
|
+
# Optional fields
|
|
275
|
+
sharding_key: Optional[str] = None
|
|
276
|
+
policy_name: Optional[str] = None
|
|
277
|
+
|
|
278
|
+
def __post_init__(self):
|
|
279
|
+
"""Validate required fields"""
|
|
280
|
+
if not self.cluster:
|
|
281
|
+
raise ValueError("Distributed engine requires 'cluster'")
|
|
282
|
+
if not self.target_database:
|
|
283
|
+
raise ValueError("Distributed engine requires 'target_database'")
|
|
284
|
+
if not self.target_table:
|
|
285
|
+
raise ValueError("Distributed engine requires 'target_table'")
|
|
286
|
+
|
|
180
287
|
# ==========================
|
|
181
288
|
# New Table Configuration (Recommended API)
|
|
182
289
|
# ==========================
|
|
@@ -151,6 +151,39 @@ class OlapConfig(BaseModel):
|
|
|
151
151
|
if has_fields and has_expr:
|
|
152
152
|
raise ValueError("Provide either order_by_fields or order_by_expression, not both.")
|
|
153
153
|
|
|
154
|
+
# Validate that non-MergeTree engines don't have unsupported clauses
|
|
155
|
+
if self.engine:
|
|
156
|
+
from ..blocks import S3Engine, S3QueueEngine, BufferEngine, DistributedEngine
|
|
157
|
+
|
|
158
|
+
# All non-MergeTree engines don't support ORDER BY
|
|
159
|
+
non_mergetree_engines = (S3Engine, S3QueueEngine, BufferEngine, DistributedEngine)
|
|
160
|
+
if isinstance(self.engine, non_mergetree_engines):
|
|
161
|
+
engine_name = type(self.engine).__name__
|
|
162
|
+
|
|
163
|
+
if has_fields or has_expr:
|
|
164
|
+
raise ValueError(
|
|
165
|
+
f"{engine_name} does not support ORDER BY clauses. "
|
|
166
|
+
f"Remove order_by_fields or order_by_expression from your configuration."
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
if self.sample_by_expression:
|
|
170
|
+
raise ValueError(
|
|
171
|
+
f"{engine_name} does not support SAMPLE BY clause. "
|
|
172
|
+
f"Remove sample_by_expression from your configuration."
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Only S3QueueEngine, BufferEngine, and DistributedEngine don't support PARTITION BY
|
|
176
|
+
# S3Engine DOES support PARTITION BY
|
|
177
|
+
engines_without_partition_by = (S3QueueEngine, BufferEngine, DistributedEngine)
|
|
178
|
+
if isinstance(self.engine, engines_without_partition_by):
|
|
179
|
+
engine_name = type(self.engine).__name__
|
|
180
|
+
|
|
181
|
+
if self.partition_by:
|
|
182
|
+
raise ValueError(
|
|
183
|
+
f"{engine_name} does not support PARTITION BY clause. "
|
|
184
|
+
f"Remove partition_by from your configuration."
|
|
185
|
+
)
|
|
186
|
+
|
|
154
187
|
|
|
155
188
|
class OlapTable(TypedMooseResource, Generic[T]):
|
|
156
189
|
"""Represents an OLAP table (e.g., a ClickHouse table) typed with a Pydantic model.
|
|
@@ -128,6 +128,45 @@ class S3QueueConfigDict(BaseEngineConfigDict):
|
|
|
128
128
|
headers: Optional[Dict[str, str]] = None
|
|
129
129
|
|
|
130
130
|
|
|
131
|
+
class S3ConfigDict(BaseEngineConfigDict):
|
|
132
|
+
"""Configuration for S3 engine."""
|
|
133
|
+
engine: Literal["S3"] = "S3"
|
|
134
|
+
path: str
|
|
135
|
+
format: str
|
|
136
|
+
aws_access_key_id: Optional[str] = None
|
|
137
|
+
aws_secret_access_key: Optional[str] = None
|
|
138
|
+
compression: Optional[str] = None
|
|
139
|
+
partition_strategy: Optional[str] = None
|
|
140
|
+
partition_columns_in_data_file: Optional[str] = None
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class BufferConfigDict(BaseEngineConfigDict):
|
|
144
|
+
"""Configuration for Buffer engine."""
|
|
145
|
+
engine: Literal["Buffer"] = "Buffer"
|
|
146
|
+
target_database: str
|
|
147
|
+
target_table: str
|
|
148
|
+
num_layers: int
|
|
149
|
+
min_time: int
|
|
150
|
+
max_time: int
|
|
151
|
+
min_rows: int
|
|
152
|
+
max_rows: int
|
|
153
|
+
min_bytes: int
|
|
154
|
+
max_bytes: int
|
|
155
|
+
flush_time: Optional[int] = None
|
|
156
|
+
flush_rows: Optional[int] = None
|
|
157
|
+
flush_bytes: Optional[int] = None
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class DistributedConfigDict(BaseEngineConfigDict):
|
|
161
|
+
"""Configuration for Distributed engine."""
|
|
162
|
+
engine: Literal["Distributed"] = "Distributed"
|
|
163
|
+
cluster: str
|
|
164
|
+
target_database: str
|
|
165
|
+
target_table: str
|
|
166
|
+
sharding_key: Optional[str] = None
|
|
167
|
+
policy_name: Optional[str] = None
|
|
168
|
+
|
|
169
|
+
|
|
131
170
|
# Discriminated union of all engine configurations
|
|
132
171
|
EngineConfigDict = Union[
|
|
133
172
|
MergeTreeConfigDict,
|
|
@@ -138,7 +177,10 @@ EngineConfigDict = Union[
|
|
|
138
177
|
ReplicatedReplacingMergeTreeConfigDict,
|
|
139
178
|
ReplicatedAggregatingMergeTreeConfigDict,
|
|
140
179
|
ReplicatedSummingMergeTreeConfigDict,
|
|
141
|
-
S3QueueConfigDict
|
|
180
|
+
S3QueueConfigDict,
|
|
181
|
+
S3ConfigDict,
|
|
182
|
+
BufferConfigDict,
|
|
183
|
+
DistributedConfigDict
|
|
142
184
|
]
|
|
143
185
|
|
|
144
186
|
|
|
@@ -464,7 +506,7 @@ def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineCon
|
|
|
464
506
|
Returns:
|
|
465
507
|
EngineConfigDict with engine-specific configuration
|
|
466
508
|
"""
|
|
467
|
-
from moose_lib.blocks import S3QueueEngine
|
|
509
|
+
from moose_lib.blocks import S3QueueEngine, S3Engine, BufferEngine, DistributedEngine
|
|
468
510
|
|
|
469
511
|
# Try S3Queue first
|
|
470
512
|
if isinstance(engine, S3QueueEngine):
|
|
@@ -476,6 +518,45 @@ def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineCon
|
|
|
476
518
|
compression=engine.compression,
|
|
477
519
|
headers=engine.headers
|
|
478
520
|
)
|
|
521
|
+
|
|
522
|
+
# Try S3
|
|
523
|
+
if isinstance(engine, S3Engine):
|
|
524
|
+
return S3ConfigDict(
|
|
525
|
+
path=engine.path,
|
|
526
|
+
format=engine.format,
|
|
527
|
+
aws_access_key_id=engine.aws_access_key_id,
|
|
528
|
+
aws_secret_access_key=engine.aws_secret_access_key,
|
|
529
|
+
compression=engine.compression,
|
|
530
|
+
partition_strategy=engine.partition_strategy,
|
|
531
|
+
partition_columns_in_data_file=engine.partition_columns_in_data_file
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
# Try Buffer
|
|
535
|
+
if isinstance(engine, BufferEngine):
|
|
536
|
+
return BufferConfigDict(
|
|
537
|
+
target_database=engine.target_database,
|
|
538
|
+
target_table=engine.target_table,
|
|
539
|
+
num_layers=engine.num_layers,
|
|
540
|
+
min_time=engine.min_time,
|
|
541
|
+
max_time=engine.max_time,
|
|
542
|
+
min_rows=engine.min_rows,
|
|
543
|
+
max_rows=engine.max_rows,
|
|
544
|
+
min_bytes=engine.min_bytes,
|
|
545
|
+
max_bytes=engine.max_bytes,
|
|
546
|
+
flush_time=engine.flush_time,
|
|
547
|
+
flush_rows=engine.flush_rows,
|
|
548
|
+
flush_bytes=engine.flush_bytes
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
# Try Distributed
|
|
552
|
+
if isinstance(engine, DistributedEngine):
|
|
553
|
+
return DistributedConfigDict(
|
|
554
|
+
cluster=engine.cluster,
|
|
555
|
+
target_database=engine.target_database,
|
|
556
|
+
target_table=engine.target_table,
|
|
557
|
+
sharding_key=engine.sharding_key,
|
|
558
|
+
policy_name=engine.policy_name
|
|
559
|
+
)
|
|
479
560
|
|
|
480
561
|
# Try basic engines
|
|
481
562
|
basic_config = _convert_basic_engine_instance(engine)
|
|
@@ -8,7 +8,7 @@ import warnings
|
|
|
8
8
|
from moose_lib import OlapTable, OlapConfig, ClickHouseEngines
|
|
9
9
|
from moose_lib.blocks import S3QueueEngine, MergeTreeEngine, ReplacingMergeTreeEngine
|
|
10
10
|
from moose_lib.internal import (
|
|
11
|
-
_convert_engine_to_config_dict,
|
|
11
|
+
_convert_engine_to_config_dict,
|
|
12
12
|
EngineConfigDict,
|
|
13
13
|
S3QueueConfigDict,
|
|
14
14
|
MergeTreeConfigDict,
|
|
@@ -41,8 +41,8 @@ def test_olap_config_accepts_engine_config():
|
|
|
41
41
|
aws_secret_access_key="secret123"
|
|
42
42
|
)
|
|
43
43
|
config = OlapConfig(
|
|
44
|
-
engine=s3_engine
|
|
45
|
-
order_by_fields
|
|
44
|
+
engine=s3_engine
|
|
45
|
+
# Note: S3QueueEngine does not support order_by_fields
|
|
46
46
|
)
|
|
47
47
|
assert isinstance(config.engine, S3QueueEngine)
|
|
48
48
|
assert config.engine.s3_path == "s3://bucket/data/*.json"
|
|
@@ -58,14 +58,14 @@ def test_olap_table_with_s3queue_engine():
|
|
|
58
58
|
format="JSONEachRow",
|
|
59
59
|
compression="gzip"
|
|
60
60
|
),
|
|
61
|
-
order_by_fields
|
|
61
|
+
# Note: S3QueueEngine does not support order_by_fields
|
|
62
62
|
settings={
|
|
63
63
|
"s3queue_mode": "unordered",
|
|
64
64
|
"s3queue_keeper_path": "/clickhouse/s3queue/test"
|
|
65
65
|
}
|
|
66
66
|
)
|
|
67
67
|
)
|
|
68
|
-
|
|
68
|
+
|
|
69
69
|
assert table.name == "TestS3Table"
|
|
70
70
|
assert isinstance(table.config.engine, S3QueueEngine)
|
|
71
71
|
assert table.config.engine.s3_path == "s3://test-bucket/logs/*.json"
|
|
@@ -83,7 +83,7 @@ def test_olap_table_with_mergetree_engines():
|
|
|
83
83
|
)
|
|
84
84
|
)
|
|
85
85
|
assert isinstance(table1.config.engine, MergeTreeEngine)
|
|
86
|
-
|
|
86
|
+
|
|
87
87
|
# Test with ReplacingMergeTreeEngine
|
|
88
88
|
table2 = OlapTable[SampleEvent](
|
|
89
89
|
"ReplacingTable",
|
|
@@ -111,10 +111,10 @@ def test_engine_conversion_to_dict():
|
|
|
111
111
|
)
|
|
112
112
|
)
|
|
113
113
|
)
|
|
114
|
-
|
|
114
|
+
|
|
115
115
|
# Convert engine to dict
|
|
116
116
|
engine_dict = _convert_engine_to_config_dict(table.config.engine, table)
|
|
117
|
-
|
|
117
|
+
|
|
118
118
|
assert engine_dict.engine == "S3Queue"
|
|
119
119
|
assert engine_dict.s3_path == "s3://bucket/data/*.parquet"
|
|
120
120
|
assert engine_dict.format == "Parquet"
|
|
@@ -133,10 +133,10 @@ def test_engine_conversion_with_enum():
|
|
|
133
133
|
engine=ClickHouseEngines.ReplacingMergeTree
|
|
134
134
|
)
|
|
135
135
|
)
|
|
136
|
-
|
|
136
|
+
|
|
137
137
|
# Convert engine to dict
|
|
138
138
|
engine_dict = _convert_engine_to_config_dict(table.config.engine, table)
|
|
139
|
-
|
|
139
|
+
|
|
140
140
|
assert engine_dict.engine == "ReplacingMergeTree"
|
|
141
141
|
|
|
142
142
|
|
|
@@ -150,16 +150,16 @@ def test_backward_compatibility():
|
|
|
150
150
|
order_by_fields=["id"]
|
|
151
151
|
)
|
|
152
152
|
)
|
|
153
|
-
|
|
153
|
+
|
|
154
154
|
# New API with EngineConfig
|
|
155
155
|
new_table = OlapTable[SampleEvent](
|
|
156
|
-
"NewTable",
|
|
156
|
+
"NewTable",
|
|
157
157
|
OlapConfig(
|
|
158
158
|
engine=MergeTreeEngine(),
|
|
159
159
|
order_by_fields=["id"]
|
|
160
160
|
)
|
|
161
161
|
)
|
|
162
|
-
|
|
162
|
+
|
|
163
163
|
# Both should work
|
|
164
164
|
assert old_table.config.engine == ClickHouseEngines.MergeTree
|
|
165
165
|
assert isinstance(new_table.config.engine, MergeTreeEngine)
|
|
@@ -169,7 +169,7 @@ def test_deprecation_warning_for_enum():
|
|
|
169
169
|
"""Test that using enum engine triggers deprecation warning."""
|
|
170
170
|
with warnings.catch_warnings(record=True) as w:
|
|
171
171
|
warnings.simplefilter("always")
|
|
172
|
-
|
|
172
|
+
|
|
173
173
|
table = OlapTable[SampleEvent](
|
|
174
174
|
"LegacyTable",
|
|
175
175
|
OlapConfig(
|
|
@@ -177,7 +177,7 @@ def test_deprecation_warning_for_enum():
|
|
|
177
177
|
order_by_fields=["id"]
|
|
178
178
|
)
|
|
179
179
|
)
|
|
180
|
-
|
|
180
|
+
|
|
181
181
|
# Check that a deprecation warning was issued
|
|
182
182
|
assert len(w) == 1
|
|
183
183
|
assert issubclass(w[0].category, DeprecationWarning)
|
|
@@ -198,12 +198,12 @@ def test_s3queue_with_all_options():
|
|
|
198
198
|
"Authorization": "Bearer token"
|
|
199
199
|
}
|
|
200
200
|
)
|
|
201
|
-
|
|
201
|
+
|
|
202
202
|
table = OlapTable[SampleEvent](
|
|
203
203
|
"FullConfigTable",
|
|
204
204
|
OlapConfig(
|
|
205
205
|
engine=engine,
|
|
206
|
-
order_by_fields
|
|
206
|
+
# Note: S3QueueEngine does not support order_by_fields
|
|
207
207
|
settings={
|
|
208
208
|
"s3queue_mode": "ordered",
|
|
209
209
|
"s3queue_keeper_path": "/clickhouse/s3queue/full",
|
|
@@ -212,7 +212,7 @@ def test_s3queue_with_all_options():
|
|
|
212
212
|
}
|
|
213
213
|
)
|
|
214
214
|
)
|
|
215
|
-
|
|
215
|
+
|
|
216
216
|
assert table.config.engine.s3_path == "s3://my-bucket/path/to/data/*.json"
|
|
217
217
|
assert table.config.engine.format == "JSONEachRow"
|
|
218
218
|
assert table.config.engine.compression == "gzip"
|
|
@@ -226,42 +226,49 @@ def test_s3queue_public_bucket():
|
|
|
226
226
|
format="Parquet"
|
|
227
227
|
# No AWS credentials needed for public buckets
|
|
228
228
|
)
|
|
229
|
-
|
|
229
|
+
|
|
230
230
|
table = OlapTable[SampleEvent](
|
|
231
231
|
"PublicBucketTable",
|
|
232
232
|
OlapConfig(
|
|
233
|
-
engine=engine
|
|
234
|
-
order_by_fields
|
|
233
|
+
engine=engine
|
|
234
|
+
# Note: S3QueueEngine does not support order_by_fields
|
|
235
235
|
)
|
|
236
236
|
)
|
|
237
|
-
|
|
237
|
+
|
|
238
238
|
assert table.config.engine.aws_access_key_id is None
|
|
239
239
|
assert table.config.engine.aws_secret_access_key is None
|
|
240
240
|
|
|
241
241
|
|
|
242
242
|
def test_migration_from_legacy_to_new():
|
|
243
243
|
"""Test migration path from legacy to new API."""
|
|
244
|
-
# Legacy approach
|
|
244
|
+
# Legacy approach (with MergeTree, which supports order_by_fields)
|
|
245
245
|
legacy_config = OlapConfig(
|
|
246
|
-
engine=ClickHouseEngines.
|
|
246
|
+
engine=ClickHouseEngines.MergeTree,
|
|
247
247
|
order_by_fields=["timestamp"]
|
|
248
248
|
)
|
|
249
|
-
|
|
250
|
-
# New approach - equivalent configuration
|
|
249
|
+
|
|
250
|
+
# New approach - equivalent configuration for MergeTree
|
|
251
251
|
new_config = OlapConfig(
|
|
252
|
-
engine=
|
|
253
|
-
s3_path="s3://bucket/data/*.json",
|
|
254
|
-
format="JSONEachRow"
|
|
255
|
-
),
|
|
252
|
+
engine=MergeTreeEngine(),
|
|
256
253
|
order_by_fields=["timestamp"]
|
|
257
254
|
)
|
|
258
|
-
|
|
255
|
+
|
|
259
256
|
# Both should have the same order_by_fields
|
|
260
257
|
assert legacy_config.order_by_fields == new_config.order_by_fields
|
|
261
|
-
|
|
258
|
+
|
|
262
259
|
# Engine types should be different
|
|
263
260
|
assert isinstance(legacy_config.engine, ClickHouseEngines)
|
|
264
|
-
assert isinstance(new_config.engine,
|
|
261
|
+
assert isinstance(new_config.engine, MergeTreeEngine)
|
|
262
|
+
|
|
263
|
+
# For S3Queue, the new API correctly prevents unsupported clauses
|
|
264
|
+
s3queue_config = OlapConfig(
|
|
265
|
+
engine=S3QueueEngine(
|
|
266
|
+
s3_path="s3://bucket/data/*.json",
|
|
267
|
+
format="JSONEachRow"
|
|
268
|
+
)
|
|
269
|
+
# Note: order_by_fields is not supported for S3QueueEngine
|
|
270
|
+
)
|
|
271
|
+
assert isinstance(s3queue_config.engine, S3QueueEngine)
|
|
265
272
|
|
|
266
273
|
|
|
267
274
|
def test_engine_config_validation():
|
|
@@ -272,7 +279,7 @@ def test_engine_config_validation():
|
|
|
272
279
|
s3_path="", # Empty path should fail
|
|
273
280
|
format="JSONEachRow"
|
|
274
281
|
)
|
|
275
|
-
|
|
282
|
+
|
|
276
283
|
with pytest.raises(ValueError, match="S3Queue engine requires 'format'"):
|
|
277
284
|
S3QueueEngine(
|
|
278
285
|
s3_path="s3://bucket/data/*.json",
|
|
@@ -280,21 +287,143 @@ def test_engine_config_validation():
|
|
|
280
287
|
)
|
|
281
288
|
|
|
282
289
|
|
|
290
|
+
def test_non_mergetree_engines_reject_unsupported_clauses():
|
|
291
|
+
"""Test that non-MergeTree engines reject unsupported ORDER BY and SAMPLE BY clauses."""
|
|
292
|
+
from moose_lib.blocks import S3Engine, S3QueueEngine, BufferEngine, DistributedEngine
|
|
293
|
+
|
|
294
|
+
# Test S3Engine rejects ORDER BY
|
|
295
|
+
with pytest.raises(ValueError, match="S3Engine does not support ORDER BY clauses"):
|
|
296
|
+
OlapConfig(
|
|
297
|
+
engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
|
|
298
|
+
order_by_fields=["id"]
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
with pytest.raises(ValueError, match="S3Engine does not support ORDER BY clauses"):
|
|
302
|
+
OlapConfig(
|
|
303
|
+
engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
|
|
304
|
+
order_by_expression="(id, name)"
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# Test S3Engine rejects SAMPLE BY
|
|
308
|
+
with pytest.raises(ValueError, match="S3Engine does not support SAMPLE BY clause"):
|
|
309
|
+
OlapConfig(
|
|
310
|
+
engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
|
|
311
|
+
sample_by_expression="cityHash64(id)"
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# Test S3Engine DOES support PARTITION BY (should not raise)
|
|
315
|
+
config_s3_with_partition = OlapConfig(
|
|
316
|
+
engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
|
|
317
|
+
partition_by="toYYYYMM(timestamp)"
|
|
318
|
+
)
|
|
319
|
+
assert config_s3_with_partition.partition_by == "toYYYYMM(timestamp)"
|
|
320
|
+
|
|
321
|
+
# Test S3QueueEngine rejects ORDER BY
|
|
322
|
+
with pytest.raises(ValueError, match="S3QueueEngine does not support ORDER BY clauses"):
|
|
323
|
+
OlapConfig(
|
|
324
|
+
engine=S3QueueEngine(s3_path="s3://bucket/*.json", format="JSONEachRow"),
|
|
325
|
+
order_by_fields=["id"]
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
# Test S3QueueEngine rejects PARTITION BY (unlike S3Engine)
|
|
329
|
+
with pytest.raises(ValueError, match="S3QueueEngine does not support PARTITION BY clause"):
|
|
330
|
+
OlapConfig(
|
|
331
|
+
engine=S3QueueEngine(s3_path="s3://bucket/*.json", format="JSONEachRow"),
|
|
332
|
+
partition_by="toYYYYMM(timestamp)"
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
# Test BufferEngine rejects ORDER BY
|
|
336
|
+
with pytest.raises(ValueError, match="BufferEngine does not support ORDER BY clauses"):
|
|
337
|
+
OlapConfig(
|
|
338
|
+
engine=BufferEngine(
|
|
339
|
+
target_database="default",
|
|
340
|
+
target_table="dest",
|
|
341
|
+
num_layers=16,
|
|
342
|
+
min_time=10,
|
|
343
|
+
max_time=100,
|
|
344
|
+
min_rows=10000,
|
|
345
|
+
max_rows=100000,
|
|
346
|
+
min_bytes=10000000,
|
|
347
|
+
max_bytes=100000000
|
|
348
|
+
),
|
|
349
|
+
order_by_fields=["id"]
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
# Test BufferEngine rejects PARTITION BY
|
|
353
|
+
with pytest.raises(ValueError, match="BufferEngine does not support PARTITION BY clause"):
|
|
354
|
+
OlapConfig(
|
|
355
|
+
engine=BufferEngine(
|
|
356
|
+
target_database="default",
|
|
357
|
+
target_table="dest",
|
|
358
|
+
num_layers=16,
|
|
359
|
+
min_time=10,
|
|
360
|
+
max_time=100,
|
|
361
|
+
min_rows=10000,
|
|
362
|
+
max_rows=100000,
|
|
363
|
+
min_bytes=10000000,
|
|
364
|
+
max_bytes=100000000
|
|
365
|
+
),
|
|
366
|
+
partition_by="date"
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
# Test DistributedEngine rejects PARTITION BY
|
|
370
|
+
with pytest.raises(ValueError, match="DistributedEngine does not support PARTITION BY clause"):
|
|
371
|
+
OlapConfig(
|
|
372
|
+
engine=DistributedEngine(
|
|
373
|
+
cluster="my_cluster",
|
|
374
|
+
target_database="default",
|
|
375
|
+
target_table="local_table"
|
|
376
|
+
),
|
|
377
|
+
partition_by="date"
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
# Verify that S3Engine works without unsupported clauses
|
|
381
|
+
config = OlapConfig(
|
|
382
|
+
engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow")
|
|
383
|
+
)
|
|
384
|
+
assert isinstance(config.engine, S3Engine)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def test_mergetree_engines_still_accept_clauses():
|
|
388
|
+
"""Test that MergeTree engines still accept ORDER BY, PARTITION BY, and SAMPLE BY clauses."""
|
|
389
|
+
from moose_lib.blocks import MergeTreeEngine, ReplacingMergeTreeEngine
|
|
390
|
+
|
|
391
|
+
# MergeTree should accept all clauses
|
|
392
|
+
config1 = OlapConfig(
|
|
393
|
+
engine=MergeTreeEngine(),
|
|
394
|
+
order_by_fields=["id", "timestamp"],
|
|
395
|
+
partition_by="toYYYYMM(timestamp)",
|
|
396
|
+
sample_by_expression="cityHash64(id)"
|
|
397
|
+
)
|
|
398
|
+
assert config1.order_by_fields == ["id", "timestamp"]
|
|
399
|
+
assert config1.partition_by == "toYYYYMM(timestamp)"
|
|
400
|
+
assert config1.sample_by_expression == "cityHash64(id)"
|
|
401
|
+
|
|
402
|
+
# ReplacingMergeTree should also accept these clauses
|
|
403
|
+
config2 = OlapConfig(
|
|
404
|
+
engine=ReplacingMergeTreeEngine(ver="updated_at"),
|
|
405
|
+
order_by_expression="(id, name)",
|
|
406
|
+
partition_by="date"
|
|
407
|
+
)
|
|
408
|
+
assert config2.order_by_expression == "(id, name)"
|
|
409
|
+
assert config2.partition_by == "date"
|
|
410
|
+
|
|
411
|
+
|
|
283
412
|
def test_multiple_engine_types():
|
|
284
413
|
"""Test that different engine types can be used in the same application."""
|
|
285
414
|
tables = []
|
|
286
|
-
|
|
415
|
+
|
|
287
416
|
# Create tables with different engine types
|
|
288
417
|
tables.append(OlapTable[SampleEvent](
|
|
289
418
|
"MergeTreeTable",
|
|
290
419
|
OlapConfig(engine=MergeTreeEngine())
|
|
291
420
|
))
|
|
292
|
-
|
|
421
|
+
|
|
293
422
|
tables.append(OlapTable[SampleEvent](
|
|
294
423
|
"ReplacingTreeTable",
|
|
295
424
|
OlapConfig(engine=ReplacingMergeTreeEngine())
|
|
296
425
|
))
|
|
297
|
-
|
|
426
|
+
|
|
298
427
|
tables.append(OlapTable[SampleEvent](
|
|
299
428
|
"S3QueueTable",
|
|
300
429
|
OlapConfig(
|
|
@@ -304,7 +433,7 @@ def test_multiple_engine_types():
|
|
|
304
433
|
)
|
|
305
434
|
)
|
|
306
435
|
))
|
|
307
|
-
|
|
436
|
+
|
|
308
437
|
# Verify all tables were created with correct engine types
|
|
309
438
|
assert isinstance(tables[0].config.engine, MergeTreeEngine)
|
|
310
439
|
assert isinstance(tables[1].config.engine, ReplacingMergeTreeEngine)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|