moose-lib 0.6.43__py3-none-any.whl → 0.6.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of moose-lib might be problematic. Click here for more details.

moose_lib/blocks.py CHANGED
@@ -1,6 +1,8 @@
1
- from dataclasses import dataclass
1
+ from dataclasses import dataclass, field
2
2
  from enum import Enum
3
- from typing import Dict, Optional
3
+ from typing import Dict, List, Optional, Any, Union
4
+ from abc import ABC
5
+ import warnings
4
6
 
5
7
 
6
8
  class ClickHouseEngines(Enum):
@@ -11,6 +13,128 @@ class ClickHouseEngines(Enum):
11
13
  CollapsingMergeTree = "CollapsingMergeTree"
12
14
  VersionedCollapsingMergeTree = "VersionedCollapsingMergeTree"
13
15
  GraphiteMergeTree = "GraphiteMergeTree"
16
+ S3Queue = "S3Queue"
17
+
18
+ # ==========================
19
+ # New Engine Configuration Classes
20
+ # ==========================
21
+
22
+ @dataclass
23
+ class EngineConfig(ABC):
24
+ """Base class for engine configurations"""
25
+ pass
26
+
27
+ @dataclass
28
+ class MergeTreeEngine(EngineConfig):
29
+ """Configuration for MergeTree engine"""
30
+ pass
31
+
32
+ @dataclass
33
+ class ReplacingMergeTreeEngine(EngineConfig):
34
+ """Configuration for ReplacingMergeTree engine (with deduplication)"""
35
+ pass
36
+
37
+ @dataclass
38
+ class AggregatingMergeTreeEngine(EngineConfig):
39
+ """Configuration for AggregatingMergeTree engine"""
40
+ pass
41
+
42
+ @dataclass
43
+ class SummingMergeTreeEngine(EngineConfig):
44
+ """Configuration for SummingMergeTree engine"""
45
+ pass
46
+
47
+ @dataclass
48
+ class S3QueueEngine(EngineConfig):
49
+ """Configuration for S3Queue engine - only non-alterable constructor parameters.
50
+
51
+ S3Queue-specific settings like 'mode', 'keeper_path', etc. should be specified
52
+ in the settings field of OlapConfig, not here.
53
+ """
54
+
55
+ # Required fields
56
+ s3_path: str # S3 bucket path with wildcards (e.g., 's3://bucket/prefix/*.json')
57
+ format: str # Data format (e.g., 'JSONEachRow', 'CSV', 'Parquet')
58
+
59
+ # Optional AWS credentials
60
+ aws_access_key_id: Optional[str] = None
61
+ aws_secret_access_key: Optional[str] = None
62
+
63
+ # Optional configuration
64
+ compression: Optional[str] = None # e.g., 'gzip', 'zstd'
65
+ headers: Optional[Dict[str, str]] = None
66
+
67
+ def __post_init__(self):
68
+ """Validate required fields"""
69
+ if not self.s3_path:
70
+ raise ValueError("S3Queue engine requires 's3_path'")
71
+ if not self.format:
72
+ raise ValueError("S3Queue engine requires 'format'")
73
+
74
+ # ==========================
75
+ # New Table Configuration (Recommended API)
76
+ # ==========================
77
+
78
+ @dataclass
79
+ class TableConfig:
80
+ """Modern table configuration with engine-specific settings"""
81
+
82
+ # Engine configuration (required in new API)
83
+ engine: EngineConfig
84
+
85
+ # Common settings
86
+ name: str
87
+ columns: Dict[str, str]
88
+ order_by: Optional[str] = None
89
+
90
+ @classmethod
91
+ def with_s3_queue(cls,
92
+ name: str,
93
+ columns: Dict[str, str],
94
+ s3_path: str,
95
+ format: str,
96
+ order_by: Optional[str] = None,
97
+ **kwargs) -> 'TableConfig':
98
+ """Create a table with S3Queue engine"""
99
+ return cls(
100
+ name=name,
101
+ columns=columns,
102
+ engine=S3QueueEngine(s3_path=s3_path, format=format, **kwargs),
103
+ order_by=order_by
104
+ )
105
+
106
+ @classmethod
107
+ def with_merge_tree(cls,
108
+ name: str,
109
+ columns: Dict[str, str],
110
+ order_by: Optional[str] = None,
111
+ deduplicate: bool = False,
112
+ **kwargs) -> 'TableConfig':
113
+ """Create a table with MergeTree or ReplacingMergeTree engine"""
114
+ engine = ReplacingMergeTreeEngine() if deduplicate else MergeTreeEngine()
115
+ return cls(
116
+ name=name,
117
+ columns=columns,
118
+ engine=engine,
119
+ order_by=order_by
120
+ )
121
+
122
+ # ==========================
123
+ # Legacy API Support (Deprecated)
124
+ # ==========================
125
+
126
+ @dataclass
127
+ class S3QueueEngineConfig:
128
+ """Legacy S3Queue configuration (deprecated - use S3QueueEngine instead)"""
129
+ path: str # S3 path pattern (e.g., 's3://bucket/data/*.json')
130
+ format: str # Data format (e.g., 'JSONEachRow', 'CSV', etc.)
131
+ # Optional S3 access credentials - can be NOSIGN for public buckets
132
+ aws_access_key_id: Optional[str] = None
133
+ aws_secret_access_key: Optional[str] = None
134
+ # Optional compression
135
+ compression: Optional[str] = None
136
+ # Optional headers
137
+ headers: Optional[Dict[str, str]] = None
14
138
 
15
139
  @dataclass
16
140
  class TableCreateOptions:
@@ -18,6 +142,82 @@ class TableCreateOptions:
18
142
  columns: Dict[str, str]
19
143
  engine: Optional[ClickHouseEngines] = ClickHouseEngines.MergeTree
20
144
  order_by: Optional[str] = None
145
+ s3_queue_engine_config: Optional[S3QueueEngineConfig] = None # Required when engine is S3Queue
146
+
147
+ def __post_init__(self):
148
+ """Validate S3Queue configuration"""
149
+ if self.engine == ClickHouseEngines.S3Queue and self.s3_queue_engine_config is None:
150
+ raise ValueError(
151
+ "s3_queue_engine_config is required when using ClickHouseEngines.S3Queue engine. "
152
+ "Please provide s3_queue_engine_config with path, format, and optional settings."
153
+ )
154
+
155
+ # ==========================
156
+ # Backward Compatibility Layer
157
+ # ==========================
158
+
159
+ def is_new_config(config: Any) -> bool:
160
+ """Check if configuration uses new API"""
161
+ if isinstance(config, TableConfig):
162
+ return True
163
+ if hasattr(config, 'engine') and isinstance(getattr(config, 'engine'), EngineConfig):
164
+ return True
165
+ return False
166
+
167
+ def migrate_legacy_config(legacy: TableCreateOptions) -> TableConfig:
168
+ """Convert legacy configuration to new format"""
169
+
170
+ # Show deprecation warning
171
+ warnings.warn(
172
+ "Using deprecated TableCreateOptions. Please migrate to TableConfig:\n"
173
+ "- For S3Queue: Use TableConfig.with_s3_queue()\n"
174
+ "- For deduplication: Use engine=ReplacingMergeTreeEngine()\n"
175
+ "See documentation for examples.",
176
+ DeprecationWarning,
177
+ stacklevel=2
178
+ )
179
+
180
+ # Handle S3Queue with separate config
181
+ if legacy.engine == ClickHouseEngines.S3Queue and legacy.s3_queue_engine_config:
182
+ s3_config = legacy.s3_queue_engine_config
183
+ return TableConfig(
184
+ name=legacy.name,
185
+ columns=legacy.columns,
186
+ engine=S3QueueEngine(
187
+ s3_path=s3_config.path,
188
+ format=s3_config.format,
189
+ aws_access_key_id=s3_config.aws_access_key_id,
190
+ aws_secret_access_key=s3_config.aws_secret_access_key,
191
+ compression=s3_config.compression,
192
+ headers=s3_config.headers
193
+ ),
194
+ order_by=legacy.order_by
195
+ )
196
+
197
+ # Map legacy engine enum to new engine classes
198
+ engine_map = {
199
+ ClickHouseEngines.MergeTree: MergeTreeEngine(),
200
+ ClickHouseEngines.ReplacingMergeTree: ReplacingMergeTreeEngine(),
201
+ ClickHouseEngines.AggregatingMergeTree: AggregatingMergeTreeEngine(),
202
+ ClickHouseEngines.SummingMergeTree: SummingMergeTreeEngine(),
203
+ }
204
+
205
+ engine = engine_map.get(legacy.engine) if legacy.engine else MergeTreeEngine()
206
+ if engine is None:
207
+ engine = MergeTreeEngine()
208
+
209
+ return TableConfig(
210
+ name=legacy.name,
211
+ columns=legacy.columns,
212
+ engine=engine,
213
+ order_by=legacy.order_by
214
+ )
215
+
216
+ def normalize_config(config: Union[TableConfig, TableCreateOptions]) -> TableConfig:
217
+ """Normalize any configuration format to new API"""
218
+ if is_new_config(config):
219
+ return config # type: ignore
220
+ return migrate_legacy_config(config) # type: ignore
21
221
 
22
222
  @dataclass
23
223
  class AggregationCreateOptions:
@@ -93,7 +293,7 @@ def create_table(options: TableCreateOptions) -> str:
93
293
  """
94
294
  column_definitions = ",\n".join([f"{name} {type}" for name, type in options.columns.items()])
95
295
  order_by_clause = f"ORDER BY {options.order_by}" if options.order_by else ""
96
- engine = options.engine.value
296
+ engine = options.engine.value if options.engine else "MergeTree"
97
297
 
98
298
  return f"""
99
299
  CREATE TABLE IF NOT EXISTS {options.name}
@@ -96,7 +96,6 @@ class MaterializedView(SqlResource, BaseTypedResource, Generic[T]):
96
96
 
97
97
  setup = [
98
98
  f"CREATE MATERIALIZED VIEW IF NOT EXISTS {quote_identifier(options.materialized_view_name)} TO {quote_identifier(target_table.name)} AS {options.select_statement}",
99
- f"INSERT INTO {quote_identifier(target_table.name)} {options.select_statement}"
100
99
  ]
101
100
  teardown = [f"DROP VIEW IF EXISTS {quote_identifier(options.materialized_view_name)}"]
102
101
 
@@ -5,6 +5,7 @@ This module provides classes for defining and configuring OLAP tables,
5
5
  particularly for ClickHouse.
6
6
  """
7
7
  import json
8
+ import warnings
8
9
  from clickhouse_connect import get_client
9
10
  from clickhouse_connect.driver.client import Client
10
11
  from clickhouse_connect.driver.exceptions import ClickHouseError
@@ -12,6 +13,8 @@ from dataclasses import dataclass
12
13
  from pydantic import BaseModel
13
14
  from typing import List, Optional, Any, Literal, Union, Tuple, TypeVar, Generic, Iterator
14
15
  from moose_lib import ClickHouseEngines
16
+ from ..blocks import EngineConfig # Add import for EngineConfig
17
+ from ..commons import Logger # Add import for Moose logging
15
18
  from ..config.runtime import RuntimeClickHouseConfig
16
19
  from ..utilities.sql import quote_identifier
17
20
  from .types import TypedMooseResource, T
@@ -93,21 +96,27 @@ class InsertResult(Generic[T]):
93
96
  failed_records: Optional[List[FailedRecord[T]]] = None
94
97
 
95
98
  class OlapConfig(BaseModel):
99
+ model_config = {"extra": "forbid"} # Reject unknown fields for a clean API
100
+
96
101
  """Configuration for OLAP tables (e.g., ClickHouse tables).
97
102
 
98
103
  Attributes:
99
104
  order_by_fields: List of column names to use for the ORDER BY clause.
100
105
  Crucial for `ReplacingMergeTree` and performance.
101
- engine: The ClickHouse table engine to use (e.g., MergeTree, ReplacingMergeTree).
106
+ engine: The ClickHouse table engine to use. Can be either a ClickHouseEngines enum value
107
+ (for backward compatibility) or an EngineConfig instance (recommended).
102
108
  version: Optional version string for tracking configuration changes.
103
109
  metadata: Optional metadata for the table.
104
110
  life_cycle: Determines how changes in code will propagate to the resources.
111
+ settings: Optional table-level settings that can be modified with ALTER TABLE MODIFY SETTING.
112
+ These are alterable settings that can be changed without recreating the table.
105
113
  """
106
114
  order_by_fields: list[str] = []
107
- engine: Optional[ClickHouseEngines] = None
115
+ engine: Optional[Union[ClickHouseEngines, EngineConfig]] = None
108
116
  version: Optional[str] = None
109
117
  metadata: Optional[dict] = None
110
118
  life_cycle: Optional[LifeCycle] = None
119
+ settings: Optional[dict[str, str]] = None
111
120
 
112
121
  class OlapTable(TypedMooseResource, Generic[T]):
113
122
  """Represents an OLAP table (e.g., a ClickHouse table) typed with a Pydantic model.
@@ -136,6 +145,38 @@ class OlapTable(TypedMooseResource, Generic[T]):
136
145
  self.config = config
137
146
  self.metadata = config.metadata
138
147
  _tables[name] = self
148
+
149
+ # Check if using legacy enum-based engine configuration
150
+ if config.engine and isinstance(config.engine, ClickHouseEngines):
151
+ logger = Logger(action="OlapTable")
152
+
153
+ # Special case for S3Queue - more detailed migration message
154
+ if config.engine == ClickHouseEngines.S3Queue:
155
+ logger.highlight(
156
+ f"Table '{name}' uses legacy S3Queue configuration. "
157
+ "Please migrate to the new API:\n"
158
+ " Old: engine=ClickHouseEngines.S3Queue\n"
159
+ " New: engine=S3QueueEngine(s3_path='...', format='...')\n"
160
+ "See documentation for complete S3Queue configuration options."
161
+ )
162
+ else:
163
+ # Generic message for other engines
164
+ engine_name = config.engine.value
165
+ logger.highlight(
166
+ f"Table '{name}' uses legacy engine configuration. "
167
+ f"Consider migrating to the new API:\n"
168
+ f" Old: engine=ClickHouseEngines.{engine_name}\n"
169
+ f" New: from moose_lib.blocks import {engine_name}Engine; engine={engine_name}Engine()\n"
170
+ "The new API provides better type safety and configuration options."
171
+ )
172
+
173
+ # Also emit a Python warning for development environments
174
+ warnings.warn(
175
+ f"Table '{name}' uses deprecated ClickHouseEngines enum. "
176
+ f"Please migrate to engine configuration classes (e.g., {config.engine.value}Engine).",
177
+ DeprecationWarning,
178
+ stacklevel=2
179
+ )
139
180
 
140
181
  def _generate_table_name(self) -> str:
141
182
  """Generate the versioned table name following Moose's naming convention.
moose_lib/internal.py CHANGED
@@ -7,10 +7,11 @@ to convert the user-defined resources (from `dmv2.py`) into a serializable
7
7
  JSON format expected by the Moose infrastructure management system.
8
8
  """
9
9
  from importlib import import_module
10
- from typing import Literal, Optional, List, Any
10
+ from typing import Literal, Optional, List, Any, Dict, Union, TYPE_CHECKING
11
11
  from pydantic import BaseModel, ConfigDict, AliasGenerator, Field
12
12
  import json
13
13
  from .data_models import Column, _to_columns
14
+ from .blocks import EngineConfig, ClickHouseEngines
14
15
  from moose_lib.dmv2 import (
15
16
  get_tables,
16
17
  get_streams,
@@ -53,6 +54,46 @@ class Consumer(BaseModel):
53
54
  """
54
55
  version: Optional[str] = None
55
56
 
57
+ class BaseEngineConfigDict(BaseModel):
58
+ """Base engine configuration for all ClickHouse table engines."""
59
+ model_config = model_config
60
+ engine: str
61
+
62
+ class MergeTreeConfigDict(BaseEngineConfigDict):
63
+ """Configuration for MergeTree engine."""
64
+ engine: Literal["MergeTree"] = "MergeTree"
65
+
66
+ class ReplacingMergeTreeConfigDict(BaseEngineConfigDict):
67
+ """Configuration for ReplacingMergeTree engine."""
68
+ engine: Literal["ReplacingMergeTree"] = "ReplacingMergeTree"
69
+
70
+ class AggregatingMergeTreeConfigDict(BaseEngineConfigDict):
71
+ """Configuration for AggregatingMergeTree engine."""
72
+ engine: Literal["AggregatingMergeTree"] = "AggregatingMergeTree"
73
+
74
+ class SummingMergeTreeConfigDict(BaseEngineConfigDict):
75
+ """Configuration for SummingMergeTree engine."""
76
+ engine: Literal["SummingMergeTree"] = "SummingMergeTree"
77
+
78
+ class S3QueueConfigDict(BaseEngineConfigDict):
79
+ """Configuration for S3Queue engine with all specific fields."""
80
+ engine: Literal["S3Queue"] = "S3Queue"
81
+ s3_path: str
82
+ format: str
83
+ aws_access_key_id: Optional[str] = None
84
+ aws_secret_access_key: Optional[str] = None
85
+ compression: Optional[str] = None
86
+ headers: Optional[Dict[str, str]] = None
87
+
88
+ # Discriminated union of all engine configurations
89
+ EngineConfigDict = Union[
90
+ MergeTreeConfigDict,
91
+ ReplacingMergeTreeConfigDict,
92
+ AggregatingMergeTreeConfigDict,
93
+ SummingMergeTreeConfigDict,
94
+ S3QueueConfigDict
95
+ ]
96
+
56
97
  class TableConfig(BaseModel):
57
98
  """Internal representation of an OLAP table configuration for serialization.
58
99
 
@@ -60,20 +101,22 @@ class TableConfig(BaseModel):
60
101
  name: Name of the table.
61
102
  columns: List of columns with their types and attributes.
62
103
  order_by: List of columns used for the ORDER BY clause.
63
- engine: The name of the ClickHouse engine used.
104
+ engine_config: Engine configuration with type-safe, engine-specific parameters.
64
105
  version: Optional version string of the table configuration.
65
106
  metadata: Optional metadata for the table.
66
107
  life_cycle: Lifecycle management setting for the table.
108
+ table_settings: Optional table-level settings that can be modified with ALTER TABLE MODIFY SETTING.
67
109
  """
68
110
  model_config = model_config
69
111
 
70
112
  name: str
71
113
  columns: List[Column]
72
114
  order_by: List[str]
73
- engine: Optional[str]
115
+ engine_config: Optional[EngineConfigDict] = Field(None, discriminator='engine')
74
116
  version: Optional[str] = None
75
117
  metadata: Optional[dict] = None
76
118
  life_cycle: Optional[str] = None
119
+ table_settings: Optional[Dict[str, str]] = None
77
120
 
78
121
  class TopicConfig(BaseModel):
79
122
  """Internal representation of a stream/topic configuration for serialization.
@@ -249,6 +292,86 @@ def _map_sql_resource_ref(r: Any) -> InfrastructureSignatureJson:
249
292
  raise TypeError(f"Object {r} lacks a 'kind' attribute for dependency mapping.")
250
293
 
251
294
 
295
+ def _convert_engine_to_config_dict(engine: Union[ClickHouseEngines, EngineConfig], table: OlapTable) -> EngineConfigDict:
296
+ """Convert engine enum or EngineConfig instance to new engine config format.
297
+
298
+ Args:
299
+ engine: Either a ClickHouseEngines enum value or an EngineConfig instance
300
+ table: The OlapTable instance with configuration
301
+
302
+ Returns:
303
+ EngineConfigDict with engine-specific configuration
304
+ """
305
+ from moose_lib import ClickHouseEngines
306
+ from moose_lib.blocks import (
307
+ EngineConfig, S3QueueEngine, MergeTreeEngine,
308
+ ReplacingMergeTreeEngine, AggregatingMergeTreeEngine,
309
+ SummingMergeTreeEngine
310
+ )
311
+ from moose_lib.commons import Logger
312
+
313
+ # Check if engine is an EngineConfig instance (new API)
314
+ if isinstance(engine, EngineConfig):
315
+ if isinstance(engine, S3QueueEngine):
316
+ return S3QueueConfigDict(
317
+ s3_path=engine.s3_path,
318
+ format=engine.format,
319
+ aws_access_key_id=engine.aws_access_key_id,
320
+ aws_secret_access_key=engine.aws_secret_access_key,
321
+ compression=engine.compression,
322
+ headers=engine.headers
323
+ )
324
+ elif isinstance(engine, ReplacingMergeTreeEngine):
325
+ return ReplacingMergeTreeConfigDict()
326
+ elif isinstance(engine, AggregatingMergeTreeEngine):
327
+ return AggregatingMergeTreeConfigDict()
328
+ elif isinstance(engine, SummingMergeTreeEngine):
329
+ return SummingMergeTreeConfigDict()
330
+ elif isinstance(engine, MergeTreeEngine):
331
+ return MergeTreeConfigDict()
332
+ else:
333
+ # Fallback for any other EngineConfig subclass - use base class
334
+ return BaseEngineConfigDict(engine=engine.__class__.__name__.replace("Engine", ""))
335
+
336
+ # Handle legacy enum-based engine configuration
337
+ if isinstance(engine, ClickHouseEngines):
338
+ engine_name = engine.value
339
+ else:
340
+ engine_name = str(engine)
341
+
342
+ # For S3Queue with legacy configuration, check for s3_queue_engine_config
343
+ if engine_name == "S3Queue" and hasattr(table.config, 's3_queue_engine_config'):
344
+ s3_config = table.config.s3_queue_engine_config
345
+ if s3_config:
346
+ logger = Logger(action="S3QueueConfig")
347
+ logger.highlight(
348
+ "Using deprecated s3_queue_engine_config. Please migrate to:\n"
349
+ " engine=S3QueueEngine(s3_path='...', format='...', ...)"
350
+ )
351
+ return S3QueueConfigDict(
352
+ s3_path=s3_config.path,
353
+ format=s3_config.format,
354
+ aws_access_key_id=s3_config.aws_access_key_id,
355
+ aws_secret_access_key=s3_config.aws_secret_access_key,
356
+ compression=s3_config.compression,
357
+ headers=s3_config.headers
358
+ )
359
+
360
+ # Map engine names to specific config classes
361
+ engine_map = {
362
+ "MergeTree": MergeTreeConfigDict,
363
+ "ReplacingMergeTree": ReplacingMergeTreeConfigDict,
364
+ "AggregatingMergeTree": AggregatingMergeTreeConfigDict,
365
+ "SummingMergeTree": SummingMergeTreeConfigDict,
366
+ }
367
+
368
+ config_class = engine_map.get(engine_name)
369
+ if config_class:
370
+ return config_class()
371
+
372
+ # Fallback for unknown engines
373
+ return BaseEngineConfigDict(engine=engine_name)
374
+
252
375
  def to_infra_map() -> dict:
253
376
  """Converts the registered `dmv2` resources into the serializable `InfrastructureMap` format.
254
377
 
@@ -268,15 +391,29 @@ def to_infra_map() -> dict:
268
391
  workflows = {}
269
392
 
270
393
  for name, table in get_tables().items():
271
- engine = table.config.engine
394
+ # Convert engine configuration to new format
395
+ engine_config = None
396
+ if table.config.engine:
397
+ engine_config = _convert_engine_to_config_dict(table.config.engine, table)
398
+
399
+ # Get table settings, applying defaults for S3Queue
400
+ table_settings = table.config.settings.copy() if table.config.settings else {}
401
+
402
+ # Apply default settings for S3Queue if not already specified
403
+ if engine_config and engine_config.engine == "S3Queue":
404
+ # Set default mode to 'unordered' if not specified
405
+ if "mode" not in table_settings:
406
+ table_settings["mode"] = "unordered"
407
+
272
408
  tables[name] = TableConfig(
273
409
  name=name,
274
410
  columns=_to_columns(table._t),
275
411
  order_by=table.config.order_by_fields,
276
- engine=None if engine is None else engine.value,
412
+ engine_config=engine_config,
277
413
  version=table.config.version,
278
414
  metadata=getattr(table, "metadata", None),
279
415
  life_cycle=table.config.life_cycle.value if table.config.life_cycle else None,
416
+ table_settings=table_settings if table_settings else None, # Map 'settings' to 'table_settings' for internal use
280
417
  )
281
418
 
282
419
  for name, stream in get_streams().items():
@@ -321,10 +458,10 @@ def to_infra_map() -> dict:
321
458
  name=api.config.destination.name
322
459
  ),
323
460
  metadata=getattr(api, "metadata", None),
324
- dead_letter_queue=api.config.dead_letter_queue.name,
325
461
  json_schema=api._t.model_json_schema(
326
462
  ref_template='#/components/schemas/{model}'
327
463
  ),
464
+ dead_letter_queue=api.config.dead_letter_queue.name if api.config.dead_letter_queue else None
328
465
  )
329
466
 
330
467
  for name, api in get_apis().items():
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: moose_lib
3
- Version: 0.6.43
3
+ Version: 0.6.44
4
4
  Home-page: https://www.fiveonefour.com/moose
5
5
  Author: Fiveonefour Labs Inc.
6
6
  Author-email: support@fiveonefour.com
@@ -1,9 +1,9 @@
1
1
  moose_lib/__init__.py,sha256=LiUdVqmtASPqbMF53dXVZzCdU1jtFVx62_tiSbW65a0,162
2
- moose_lib/blocks.py,sha256=_wdvC2NC_Y3MMEnB71WTgWbeQ--zPNHk19xjToJW0C0,3185
2
+ moose_lib/blocks.py,sha256=5QNgaTPC4hfWrIxY5lG4FeNoFP2-JJuFmWjbAVVxSlk,10091
3
3
  moose_lib/commons.py,sha256=FUpRv8D3-LeGcjhcqtDyiimz5izwpCq53h50ydxC_uA,3711
4
4
  moose_lib/data_models.py,sha256=PgoFXTzf4C66FlKUowMT4VOrwSRR9_bk36P43ub4LzU,10274
5
5
  moose_lib/dmv2-serializer.py,sha256=CL_Pvvg8tJOT8Qk6hywDNzY8MYGhMVdTOw8arZi3jng,49
6
- moose_lib/internal.py,sha256=Mi6Zng5-bsIKbtV9qhoxUV6J9U-mZw0Q0WggiMJ7cXQ,14600
6
+ moose_lib/internal.py,sha256=8LZXIOpaDUviZul3qjtBIlLVCyz7uAmZadrqJBndJzo,20505
7
7
  moose_lib/main.py,sha256=XcVX_sTnt5QbrPXKNLCKZGCvpFpE8oiqSG2S1So9ztI,16713
8
8
  moose_lib/query_param.py,sha256=kxcR09BMIsEg4o2qetjKrVu1YFRaLfMEzwzyGsKUpvA,6474
9
9
  moose_lib/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -17,8 +17,8 @@ moose_lib/dmv2/consumption.py,sha256=f43XPgJn1JLPPEpYbJ1FQ8Efi92YJH7DWa1wb5TY38g
17
17
  moose_lib/dmv2/ingest_api.py,sha256=Snek9NGwaJl_BuImSWGtQq91m9D3AJ4qBoGiKZ-9yTQ,2323
18
18
  moose_lib/dmv2/ingest_pipeline.py,sha256=2tDM4gCjQMdDRDjvymI5Z1DlkXULx9nY4nG9M3YXpjk,7033
19
19
  moose_lib/dmv2/life_cycle.py,sha256=wl0k6yzwU1MJ_fO_UkN29buoY5G6ChYZvfwigP9fVfM,1254
20
- moose_lib/dmv2/materialized_view.py,sha256=sNwYwNlNNoaYdmlNHBNuJXp7uc4CThxtYkYPNUaeQpw,4968
21
- moose_lib/dmv2/olap_table.py,sha256=WT2ZQ-G4xwLOfpKSG1Duk-DHlFFTtQQzGiMG8hc-uWg,31525
20
+ moose_lib/dmv2/materialized_view.py,sha256=3JbNLC26p7xOQK0DpgkreWKx7k5V3x3bz9a7kjDqKuU,4876
21
+ moose_lib/dmv2/olap_table.py,sha256=h4aF9hhFy8g0CHiQ_GwtnYqzagi1e4mGei406l8Z8tg,33808
22
22
  moose_lib/dmv2/registry.py,sha256=janjYszIXDnMfuwkNhoiBmMLar3umQ3Npw9Ms-u6p-8,2373
23
23
  moose_lib/dmv2/sql_resource.py,sha256=kUZoGqxhZMHMthtBZGYJBxTFjXkspXiWLXhJRYXgGUM,1864
24
24
  moose_lib/dmv2/stream.py,sha256=jiUWBsjFalLLP63mikOxyHRdieiDAlzf9lXfLye-Wjc,10761
@@ -33,7 +33,8 @@ tests/__init__.py,sha256=0Gh4yzPkkC3TzBGKhenpMIxJcRhyrrCfxLSfpTZnPMQ,53
33
33
  tests/conftest.py,sha256=ZVJNbnr4DwbcqkTmePW6U01zAzE6QD0kNAEZjPG1f4s,169
34
34
  tests/test_moose.py,sha256=mBsx_OYWmL8ppDzL_7Bd7xR6qf_i3-pCIO3wm2iQNaA,2136
35
35
  tests/test_redis_client.py,sha256=d9_MLYsJ4ecVil_jPB2gW3Q5aWnavxmmjZg2uYI3LVo,3256
36
- moose_lib-0.6.43.dist-info/METADATA,sha256=wadzNkPUXbeVD4YhUJYjR-K3qYge6r6AgScTd4N8EEw,730
37
- moose_lib-0.6.43.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
38
- moose_lib-0.6.43.dist-info/top_level.txt,sha256=XEns2-4aCmGp2XjJAeEH9TAUcGONLnSLy6ycT9FSJh8,16
39
- moose_lib-0.6.43.dist-info/RECORD,,
36
+ tests/test_s3queue_config.py,sha256=F05cnD61S2wBKPabcpEJxf55-DJGF4nLqwBb6aFbprc,9741
37
+ moose_lib-0.6.44.dist-info/METADATA,sha256=WED0zZnLw2jk6NR90k1PfiR-6Auqmuf3rrHGdOMJpCk,730
38
+ moose_lib-0.6.44.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
39
+ moose_lib-0.6.44.dist-info/top_level.txt,sha256=XEns2-4aCmGp2XjJAeEH9TAUcGONLnSLy6ycT9FSJh8,16
40
+ moose_lib-0.6.44.dist-info/RECORD,,
@@ -0,0 +1,311 @@
1
+ """Tests for S3Queue engine configuration with the new type hints."""
2
+
3
+ import pytest
4
+ from pydantic import BaseModel
5
+ from datetime import datetime
6
+ import warnings
7
+
8
+ from moose_lib import OlapTable, OlapConfig, ClickHouseEngines
9
+ from moose_lib.blocks import S3QueueEngine, MergeTreeEngine, ReplacingMergeTreeEngine
10
+ from moose_lib.internal import (
11
+ _convert_engine_to_config_dict,
12
+ EngineConfigDict,
13
+ S3QueueConfigDict,
14
+ MergeTreeConfigDict,
15
+ ReplacingMergeTreeConfigDict
16
+ )
17
+
18
+
19
+ class SampleEvent(BaseModel):
20
+ """Sample model for S3Queue table tests."""
21
+ id: str
22
+ timestamp: datetime
23
+ message: str
24
+
25
+
26
+ def test_olap_config_accepts_enum():
27
+ """Test that OlapConfig accepts ClickHouseEngines enum values."""
28
+ config = OlapConfig(
29
+ engine=ClickHouseEngines.MergeTree,
30
+ order_by_fields=["id"]
31
+ )
32
+ assert config.engine == ClickHouseEngines.MergeTree
33
+
34
+
35
+ def test_olap_config_accepts_engine_config():
36
+ """Test that OlapConfig accepts EngineConfig instances."""
37
+ s3_engine = S3QueueEngine(
38
+ s3_path="s3://bucket/data/*.json",
39
+ format="JSONEachRow",
40
+ aws_access_key_id="AKIA123",
41
+ aws_secret_access_key="secret123"
42
+ )
43
+ config = OlapConfig(
44
+ engine=s3_engine,
45
+ order_by_fields=["timestamp"]
46
+ )
47
+ assert isinstance(config.engine, S3QueueEngine)
48
+ assert config.engine.s3_path == "s3://bucket/data/*.json"
49
+
50
+
51
+ def test_olap_table_with_s3queue_engine():
52
+ """Test creating OlapTable with S3QueueEngine."""
53
+ table = OlapTable[SampleEvent](
54
+ "TestS3Table",
55
+ OlapConfig(
56
+ engine=S3QueueEngine(
57
+ s3_path="s3://test-bucket/logs/*.json",
58
+ format="JSONEachRow",
59
+ compression="gzip"
60
+ ),
61
+ order_by_fields=["timestamp", "id"],
62
+ settings={
63
+ "s3queue_mode": "unordered",
64
+ "s3queue_keeper_path": "/clickhouse/s3queue/test"
65
+ }
66
+ )
67
+ )
68
+
69
+ assert table.name == "TestS3Table"
70
+ assert isinstance(table.config.engine, S3QueueEngine)
71
+ assert table.config.engine.s3_path == "s3://test-bucket/logs/*.json"
72
+ assert table.config.settings["s3queue_mode"] == "unordered"
73
+
74
+
75
+ def test_olap_table_with_mergetree_engines():
76
+ """Test creating OlapTable with various MergeTree engine configs."""
77
+ # Test with MergeTreeEngine
78
+ table1 = OlapTable[SampleEvent](
79
+ "MergeTreeTable",
80
+ OlapConfig(
81
+ engine=MergeTreeEngine(),
82
+ order_by_fields=["id"]
83
+ )
84
+ )
85
+ assert isinstance(table1.config.engine, MergeTreeEngine)
86
+
87
+ # Test with ReplacingMergeTreeEngine
88
+ table2 = OlapTable[SampleEvent](
89
+ "ReplacingTable",
90
+ OlapConfig(
91
+ engine=ReplacingMergeTreeEngine(),
92
+ order_by_fields=["id"]
93
+ )
94
+ )
95
+ assert isinstance(table2.config.engine, ReplacingMergeTreeEngine)
96
+
97
+
98
+ def test_engine_conversion_to_dict():
99
+ """Test conversion of engine configs to EngineConfigDict."""
100
+ # Create a mock table with S3QueueEngine
101
+ table = OlapTable[SampleEvent](
102
+ "TestTable",
103
+ OlapConfig(
104
+ engine=S3QueueEngine(
105
+ s3_path="s3://bucket/data/*.parquet",
106
+ format="Parquet",
107
+ aws_access_key_id="AKIA456",
108
+ aws_secret_access_key="secret456",
109
+ compression="zstd",
110
+ headers={"X-Custom": "value"}
111
+ )
112
+ )
113
+ )
114
+
115
+ # Convert engine to dict
116
+ engine_dict = _convert_engine_to_config_dict(table.config.engine, table)
117
+
118
+ assert engine_dict.engine == "S3Queue"
119
+ assert engine_dict.s3_path == "s3://bucket/data/*.parquet"
120
+ assert engine_dict.format == "Parquet"
121
+ assert engine_dict.aws_access_key_id == "AKIA456"
122
+ assert engine_dict.aws_secret_access_key == "secret456"
123
+ assert engine_dict.compression == "zstd"
124
+ assert engine_dict.headers == {"X-Custom": "value"}
125
+
126
+
127
+ def test_engine_conversion_with_enum():
128
+ """Test conversion of enum engines to EngineConfigDict."""
129
+ # Create a mock table with enum engine
130
+ table = OlapTable[SampleEvent](
131
+ "TestTable",
132
+ OlapConfig(
133
+ engine=ClickHouseEngines.ReplacingMergeTree
134
+ )
135
+ )
136
+
137
+ # Convert engine to dict
138
+ engine_dict = _convert_engine_to_config_dict(table.config.engine, table)
139
+
140
+ assert engine_dict.engine == "ReplacingMergeTree"
141
+
142
+
143
+ def test_backward_compatibility():
144
+ """Test that both old and new APIs work together."""
145
+ # Old API with enum
146
+ old_table = OlapTable[SampleEvent](
147
+ "OldTable",
148
+ OlapConfig(
149
+ engine=ClickHouseEngines.MergeTree,
150
+ order_by_fields=["id"]
151
+ )
152
+ )
153
+
154
+ # New API with EngineConfig
155
+ new_table = OlapTable[SampleEvent](
156
+ "NewTable",
157
+ OlapConfig(
158
+ engine=MergeTreeEngine(),
159
+ order_by_fields=["id"]
160
+ )
161
+ )
162
+
163
+ # Both should work
164
+ assert old_table.config.engine == ClickHouseEngines.MergeTree
165
+ assert isinstance(new_table.config.engine, MergeTreeEngine)
166
+
167
+
168
+ def test_deprecation_warning_for_enum():
169
+ """Test that using enum engine triggers deprecation warning."""
170
+ with warnings.catch_warnings(record=True) as w:
171
+ warnings.simplefilter("always")
172
+
173
+ table = OlapTable[SampleEvent](
174
+ "LegacyTable",
175
+ OlapConfig(
176
+ engine=ClickHouseEngines.S3Queue,
177
+ order_by_fields=["id"]
178
+ )
179
+ )
180
+
181
+ # Check that a deprecation warning was issued
182
+ assert len(w) == 1
183
+ assert issubclass(w[0].category, DeprecationWarning)
184
+ assert "deprecated ClickHouseEngines enum" in str(w[0].message)
185
+ assert "S3QueueEngine" in str(w[0].message)
186
+
187
+
188
+ def test_s3queue_with_all_options():
189
+ """Test S3QueueEngine with all configuration options."""
190
+ engine = S3QueueEngine(
191
+ s3_path="s3://my-bucket/path/to/data/*.json",
192
+ format="JSONEachRow",
193
+ aws_access_key_id="AKIAIOSFODNN7EXAMPLE",
194
+ aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
195
+ compression="gzip",
196
+ headers={
197
+ "X-Custom-Header": "value1",
198
+ "Authorization": "Bearer token"
199
+ }
200
+ )
201
+
202
+ table = OlapTable[SampleEvent](
203
+ "FullConfigTable",
204
+ OlapConfig(
205
+ engine=engine,
206
+ order_by_fields=["timestamp"],
207
+ settings={
208
+ "s3queue_mode": "ordered",
209
+ "s3queue_keeper_path": "/clickhouse/s3queue/full",
210
+ "s3queue_loading_retries": "5",
211
+ "s3queue_processing_threads_num": "8"
212
+ }
213
+ )
214
+ )
215
+
216
+ assert table.config.engine.s3_path == "s3://my-bucket/path/to/data/*.json"
217
+ assert table.config.engine.format == "JSONEachRow"
218
+ assert table.config.engine.compression == "gzip"
219
+ assert table.config.engine.headers["X-Custom-Header"] == "value1"
220
+
221
+
222
+ def test_s3queue_public_bucket():
223
+ """Test S3QueueEngine for public bucket (no credentials)."""
224
+ engine = S3QueueEngine(
225
+ s3_path="s3://public-bucket/open-data/*.parquet",
226
+ format="Parquet"
227
+ # No AWS credentials needed for public buckets
228
+ )
229
+
230
+ table = OlapTable[SampleEvent](
231
+ "PublicBucketTable",
232
+ OlapConfig(
233
+ engine=engine,
234
+ order_by_fields=["id"]
235
+ )
236
+ )
237
+
238
+ assert table.config.engine.aws_access_key_id is None
239
+ assert table.config.engine.aws_secret_access_key is None
240
+
241
+
242
+ def test_migration_from_legacy_to_new():
243
+ """Test migration path from legacy to new API."""
244
+ # Legacy approach
245
+ legacy_config = OlapConfig(
246
+ engine=ClickHouseEngines.S3Queue,
247
+ order_by_fields=["timestamp"]
248
+ )
249
+
250
+ # New approach - equivalent configuration
251
+ new_config = OlapConfig(
252
+ engine=S3QueueEngine(
253
+ s3_path="s3://bucket/data/*.json",
254
+ format="JSONEachRow"
255
+ ),
256
+ order_by_fields=["timestamp"]
257
+ )
258
+
259
+ # Both should have the same order_by_fields
260
+ assert legacy_config.order_by_fields == new_config.order_by_fields
261
+
262
+ # Engine types should be different
263
+ assert isinstance(legacy_config.engine, ClickHouseEngines)
264
+ assert isinstance(new_config.engine, S3QueueEngine)
265
+
266
+
267
+ def test_engine_config_validation():
268
+ """Test that S3QueueEngine validates required fields."""
269
+ # Test missing required fields
270
+ with pytest.raises(ValueError, match="S3Queue engine requires 's3_path'"):
271
+ S3QueueEngine(
272
+ s3_path="", # Empty path should fail
273
+ format="JSONEachRow"
274
+ )
275
+
276
+ with pytest.raises(ValueError, match="S3Queue engine requires 'format'"):
277
+ S3QueueEngine(
278
+ s3_path="s3://bucket/data/*.json",
279
+ format="" # Empty format should fail
280
+ )
281
+
282
+
283
+ def test_multiple_engine_types():
284
+ """Test that different engine types can be used in the same application."""
285
+ tables = []
286
+
287
+ # Create tables with different engine types
288
+ tables.append(OlapTable[SampleEvent](
289
+ "MergeTreeTable",
290
+ OlapConfig(engine=MergeTreeEngine())
291
+ ))
292
+
293
+ tables.append(OlapTable[SampleEvent](
294
+ "ReplacingTreeTable",
295
+ OlapConfig(engine=ReplacingMergeTreeEngine())
296
+ ))
297
+
298
+ tables.append(OlapTable[SampleEvent](
299
+ "S3QueueTable",
300
+ OlapConfig(
301
+ engine=S3QueueEngine(
302
+ s3_path="s3://bucket/*.json",
303
+ format="JSONEachRow"
304
+ )
305
+ )
306
+ ))
307
+
308
+ # Verify all tables were created with correct engine types
309
+ assert isinstance(tables[0].config.engine, MergeTreeEngine)
310
+ assert isinstance(tables[1].config.engine, ReplacingMergeTreeEngine)
311
+ assert isinstance(tables[2].config.engine, S3QueueEngine)