moose-lib 0.6.43__py3-none-any.whl → 0.6.45__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of moose-lib might be problematic. Click here for more details.
- moose_lib/blocks.py +203 -3
- moose_lib/dmv2/materialized_view.py +0 -1
- moose_lib/dmv2/olap_table.py +43 -2
- moose_lib/internal.py +143 -6
- {moose_lib-0.6.43.dist-info → moose_lib-0.6.45.dist-info}/METADATA +1 -1
- {moose_lib-0.6.43.dist-info → moose_lib-0.6.45.dist-info}/RECORD +9 -8
- tests/test_s3queue_config.py +311 -0
- {moose_lib-0.6.43.dist-info → moose_lib-0.6.45.dist-info}/WHEEL +0 -0
- {moose_lib-0.6.43.dist-info → moose_lib-0.6.45.dist-info}/top_level.txt +0 -0
moose_lib/blocks.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
2
|
from enum import Enum
|
|
3
|
-
from typing import Dict, Optional
|
|
3
|
+
from typing import Dict, List, Optional, Any, Union
|
|
4
|
+
from abc import ABC
|
|
5
|
+
import warnings
|
|
4
6
|
|
|
5
7
|
|
|
6
8
|
class ClickHouseEngines(Enum):
|
|
@@ -11,6 +13,128 @@ class ClickHouseEngines(Enum):
|
|
|
11
13
|
CollapsingMergeTree = "CollapsingMergeTree"
|
|
12
14
|
VersionedCollapsingMergeTree = "VersionedCollapsingMergeTree"
|
|
13
15
|
GraphiteMergeTree = "GraphiteMergeTree"
|
|
16
|
+
S3Queue = "S3Queue"
|
|
17
|
+
|
|
18
|
+
# ==========================
|
|
19
|
+
# New Engine Configuration Classes
|
|
20
|
+
# ==========================
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class EngineConfig(ABC):
|
|
24
|
+
"""Base class for engine configurations"""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class MergeTreeEngine(EngineConfig):
|
|
29
|
+
"""Configuration for MergeTree engine"""
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ReplacingMergeTreeEngine(EngineConfig):
|
|
34
|
+
"""Configuration for ReplacingMergeTree engine (with deduplication)"""
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class AggregatingMergeTreeEngine(EngineConfig):
|
|
39
|
+
"""Configuration for AggregatingMergeTree engine"""
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class SummingMergeTreeEngine(EngineConfig):
|
|
44
|
+
"""Configuration for SummingMergeTree engine"""
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class S3QueueEngine(EngineConfig):
|
|
49
|
+
"""Configuration for S3Queue engine - only non-alterable constructor parameters.
|
|
50
|
+
|
|
51
|
+
S3Queue-specific settings like 'mode', 'keeper_path', etc. should be specified
|
|
52
|
+
in the settings field of OlapConfig, not here.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
# Required fields
|
|
56
|
+
s3_path: str # S3 bucket path with wildcards (e.g., 's3://bucket/prefix/*.json')
|
|
57
|
+
format: str # Data format (e.g., 'JSONEachRow', 'CSV', 'Parquet')
|
|
58
|
+
|
|
59
|
+
# Optional AWS credentials
|
|
60
|
+
aws_access_key_id: Optional[str] = None
|
|
61
|
+
aws_secret_access_key: Optional[str] = None
|
|
62
|
+
|
|
63
|
+
# Optional configuration
|
|
64
|
+
compression: Optional[str] = None # e.g., 'gzip', 'zstd'
|
|
65
|
+
headers: Optional[Dict[str, str]] = None
|
|
66
|
+
|
|
67
|
+
def __post_init__(self):
|
|
68
|
+
"""Validate required fields"""
|
|
69
|
+
if not self.s3_path:
|
|
70
|
+
raise ValueError("S3Queue engine requires 's3_path'")
|
|
71
|
+
if not self.format:
|
|
72
|
+
raise ValueError("S3Queue engine requires 'format'")
|
|
73
|
+
|
|
74
|
+
# ==========================
|
|
75
|
+
# New Table Configuration (Recommended API)
|
|
76
|
+
# ==========================
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class TableConfig:
|
|
80
|
+
"""Modern table configuration with engine-specific settings"""
|
|
81
|
+
|
|
82
|
+
# Engine configuration (required in new API)
|
|
83
|
+
engine: EngineConfig
|
|
84
|
+
|
|
85
|
+
# Common settings
|
|
86
|
+
name: str
|
|
87
|
+
columns: Dict[str, str]
|
|
88
|
+
order_by: Optional[str] = None
|
|
89
|
+
|
|
90
|
+
@classmethod
|
|
91
|
+
def with_s3_queue(cls,
|
|
92
|
+
name: str,
|
|
93
|
+
columns: Dict[str, str],
|
|
94
|
+
s3_path: str,
|
|
95
|
+
format: str,
|
|
96
|
+
order_by: Optional[str] = None,
|
|
97
|
+
**kwargs) -> 'TableConfig':
|
|
98
|
+
"""Create a table with S3Queue engine"""
|
|
99
|
+
return cls(
|
|
100
|
+
name=name,
|
|
101
|
+
columns=columns,
|
|
102
|
+
engine=S3QueueEngine(s3_path=s3_path, format=format, **kwargs),
|
|
103
|
+
order_by=order_by
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
@classmethod
|
|
107
|
+
def with_merge_tree(cls,
|
|
108
|
+
name: str,
|
|
109
|
+
columns: Dict[str, str],
|
|
110
|
+
order_by: Optional[str] = None,
|
|
111
|
+
deduplicate: bool = False,
|
|
112
|
+
**kwargs) -> 'TableConfig':
|
|
113
|
+
"""Create a table with MergeTree or ReplacingMergeTree engine"""
|
|
114
|
+
engine = ReplacingMergeTreeEngine() if deduplicate else MergeTreeEngine()
|
|
115
|
+
return cls(
|
|
116
|
+
name=name,
|
|
117
|
+
columns=columns,
|
|
118
|
+
engine=engine,
|
|
119
|
+
order_by=order_by
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# ==========================
|
|
123
|
+
# Legacy API Support (Deprecated)
|
|
124
|
+
# ==========================
|
|
125
|
+
|
|
126
|
+
@dataclass
|
|
127
|
+
class S3QueueEngineConfig:
|
|
128
|
+
"""Legacy S3Queue configuration (deprecated - use S3QueueEngine instead)"""
|
|
129
|
+
path: str # S3 path pattern (e.g., 's3://bucket/data/*.json')
|
|
130
|
+
format: str # Data format (e.g., 'JSONEachRow', 'CSV', etc.)
|
|
131
|
+
# Optional S3 access credentials - can be NOSIGN for public buckets
|
|
132
|
+
aws_access_key_id: Optional[str] = None
|
|
133
|
+
aws_secret_access_key: Optional[str] = None
|
|
134
|
+
# Optional compression
|
|
135
|
+
compression: Optional[str] = None
|
|
136
|
+
# Optional headers
|
|
137
|
+
headers: Optional[Dict[str, str]] = None
|
|
14
138
|
|
|
15
139
|
@dataclass
|
|
16
140
|
class TableCreateOptions:
|
|
@@ -18,6 +142,82 @@ class TableCreateOptions:
|
|
|
18
142
|
columns: Dict[str, str]
|
|
19
143
|
engine: Optional[ClickHouseEngines] = ClickHouseEngines.MergeTree
|
|
20
144
|
order_by: Optional[str] = None
|
|
145
|
+
s3_queue_engine_config: Optional[S3QueueEngineConfig] = None # Required when engine is S3Queue
|
|
146
|
+
|
|
147
|
+
def __post_init__(self):
|
|
148
|
+
"""Validate S3Queue configuration"""
|
|
149
|
+
if self.engine == ClickHouseEngines.S3Queue and self.s3_queue_engine_config is None:
|
|
150
|
+
raise ValueError(
|
|
151
|
+
"s3_queue_engine_config is required when using ClickHouseEngines.S3Queue engine. "
|
|
152
|
+
"Please provide s3_queue_engine_config with path, format, and optional settings."
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# ==========================
|
|
156
|
+
# Backward Compatibility Layer
|
|
157
|
+
# ==========================
|
|
158
|
+
|
|
159
|
+
def is_new_config(config: Any) -> bool:
|
|
160
|
+
"""Check if configuration uses new API"""
|
|
161
|
+
if isinstance(config, TableConfig):
|
|
162
|
+
return True
|
|
163
|
+
if hasattr(config, 'engine') and isinstance(getattr(config, 'engine'), EngineConfig):
|
|
164
|
+
return True
|
|
165
|
+
return False
|
|
166
|
+
|
|
167
|
+
def migrate_legacy_config(legacy: TableCreateOptions) -> TableConfig:
|
|
168
|
+
"""Convert legacy configuration to new format"""
|
|
169
|
+
|
|
170
|
+
# Show deprecation warning
|
|
171
|
+
warnings.warn(
|
|
172
|
+
"Using deprecated TableCreateOptions. Please migrate to TableConfig:\n"
|
|
173
|
+
"- For S3Queue: Use TableConfig.with_s3_queue()\n"
|
|
174
|
+
"- For deduplication: Use engine=ReplacingMergeTreeEngine()\n"
|
|
175
|
+
"See documentation for examples.",
|
|
176
|
+
DeprecationWarning,
|
|
177
|
+
stacklevel=2
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Handle S3Queue with separate config
|
|
181
|
+
if legacy.engine == ClickHouseEngines.S3Queue and legacy.s3_queue_engine_config:
|
|
182
|
+
s3_config = legacy.s3_queue_engine_config
|
|
183
|
+
return TableConfig(
|
|
184
|
+
name=legacy.name,
|
|
185
|
+
columns=legacy.columns,
|
|
186
|
+
engine=S3QueueEngine(
|
|
187
|
+
s3_path=s3_config.path,
|
|
188
|
+
format=s3_config.format,
|
|
189
|
+
aws_access_key_id=s3_config.aws_access_key_id,
|
|
190
|
+
aws_secret_access_key=s3_config.aws_secret_access_key,
|
|
191
|
+
compression=s3_config.compression,
|
|
192
|
+
headers=s3_config.headers
|
|
193
|
+
),
|
|
194
|
+
order_by=legacy.order_by
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
# Map legacy engine enum to new engine classes
|
|
198
|
+
engine_map = {
|
|
199
|
+
ClickHouseEngines.MergeTree: MergeTreeEngine(),
|
|
200
|
+
ClickHouseEngines.ReplacingMergeTree: ReplacingMergeTreeEngine(),
|
|
201
|
+
ClickHouseEngines.AggregatingMergeTree: AggregatingMergeTreeEngine(),
|
|
202
|
+
ClickHouseEngines.SummingMergeTree: SummingMergeTreeEngine(),
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
engine = engine_map.get(legacy.engine) if legacy.engine else MergeTreeEngine()
|
|
206
|
+
if engine is None:
|
|
207
|
+
engine = MergeTreeEngine()
|
|
208
|
+
|
|
209
|
+
return TableConfig(
|
|
210
|
+
name=legacy.name,
|
|
211
|
+
columns=legacy.columns,
|
|
212
|
+
engine=engine,
|
|
213
|
+
order_by=legacy.order_by
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
def normalize_config(config: Union[TableConfig, TableCreateOptions]) -> TableConfig:
|
|
217
|
+
"""Normalize any configuration format to new API"""
|
|
218
|
+
if is_new_config(config):
|
|
219
|
+
return config # type: ignore
|
|
220
|
+
return migrate_legacy_config(config) # type: ignore
|
|
21
221
|
|
|
22
222
|
@dataclass
|
|
23
223
|
class AggregationCreateOptions:
|
|
@@ -93,7 +293,7 @@ def create_table(options: TableCreateOptions) -> str:
|
|
|
93
293
|
"""
|
|
94
294
|
column_definitions = ",\n".join([f"{name} {type}" for name, type in options.columns.items()])
|
|
95
295
|
order_by_clause = f"ORDER BY {options.order_by}" if options.order_by else ""
|
|
96
|
-
engine = options.engine.value
|
|
296
|
+
engine = options.engine.value if options.engine else "MergeTree"
|
|
97
297
|
|
|
98
298
|
return f"""
|
|
99
299
|
CREATE TABLE IF NOT EXISTS {options.name}
|
|
@@ -96,7 +96,6 @@ class MaterializedView(SqlResource, BaseTypedResource, Generic[T]):
|
|
|
96
96
|
|
|
97
97
|
setup = [
|
|
98
98
|
f"CREATE MATERIALIZED VIEW IF NOT EXISTS {quote_identifier(options.materialized_view_name)} TO {quote_identifier(target_table.name)} AS {options.select_statement}",
|
|
99
|
-
f"INSERT INTO {quote_identifier(target_table.name)} {options.select_statement}"
|
|
100
99
|
]
|
|
101
100
|
teardown = [f"DROP VIEW IF EXISTS {quote_identifier(options.materialized_view_name)}"]
|
|
102
101
|
|
moose_lib/dmv2/olap_table.py
CHANGED
|
@@ -5,6 +5,7 @@ This module provides classes for defining and configuring OLAP tables,
|
|
|
5
5
|
particularly for ClickHouse.
|
|
6
6
|
"""
|
|
7
7
|
import json
|
|
8
|
+
import warnings
|
|
8
9
|
from clickhouse_connect import get_client
|
|
9
10
|
from clickhouse_connect.driver.client import Client
|
|
10
11
|
from clickhouse_connect.driver.exceptions import ClickHouseError
|
|
@@ -12,6 +13,8 @@ from dataclasses import dataclass
|
|
|
12
13
|
from pydantic import BaseModel
|
|
13
14
|
from typing import List, Optional, Any, Literal, Union, Tuple, TypeVar, Generic, Iterator
|
|
14
15
|
from moose_lib import ClickHouseEngines
|
|
16
|
+
from ..blocks import EngineConfig # Add import for EngineConfig
|
|
17
|
+
from ..commons import Logger # Add import for Moose logging
|
|
15
18
|
from ..config.runtime import RuntimeClickHouseConfig
|
|
16
19
|
from ..utilities.sql import quote_identifier
|
|
17
20
|
from .types import TypedMooseResource, T
|
|
@@ -93,21 +96,27 @@ class InsertResult(Generic[T]):
|
|
|
93
96
|
failed_records: Optional[List[FailedRecord[T]]] = None
|
|
94
97
|
|
|
95
98
|
class OlapConfig(BaseModel):
|
|
99
|
+
model_config = {"extra": "forbid"} # Reject unknown fields for a clean API
|
|
100
|
+
|
|
96
101
|
"""Configuration for OLAP tables (e.g., ClickHouse tables).
|
|
97
102
|
|
|
98
103
|
Attributes:
|
|
99
104
|
order_by_fields: List of column names to use for the ORDER BY clause.
|
|
100
105
|
Crucial for `ReplacingMergeTree` and performance.
|
|
101
|
-
engine: The ClickHouse table engine to use
|
|
106
|
+
engine: The ClickHouse table engine to use. Can be either a ClickHouseEngines enum value
|
|
107
|
+
(for backward compatibility) or an EngineConfig instance (recommended).
|
|
102
108
|
version: Optional version string for tracking configuration changes.
|
|
103
109
|
metadata: Optional metadata for the table.
|
|
104
110
|
life_cycle: Determines how changes in code will propagate to the resources.
|
|
111
|
+
settings: Optional table-level settings that can be modified with ALTER TABLE MODIFY SETTING.
|
|
112
|
+
These are alterable settings that can be changed without recreating the table.
|
|
105
113
|
"""
|
|
106
114
|
order_by_fields: list[str] = []
|
|
107
|
-
engine: Optional[ClickHouseEngines] = None
|
|
115
|
+
engine: Optional[Union[ClickHouseEngines, EngineConfig]] = None
|
|
108
116
|
version: Optional[str] = None
|
|
109
117
|
metadata: Optional[dict] = None
|
|
110
118
|
life_cycle: Optional[LifeCycle] = None
|
|
119
|
+
settings: Optional[dict[str, str]] = None
|
|
111
120
|
|
|
112
121
|
class OlapTable(TypedMooseResource, Generic[T]):
|
|
113
122
|
"""Represents an OLAP table (e.g., a ClickHouse table) typed with a Pydantic model.
|
|
@@ -136,6 +145,38 @@ class OlapTable(TypedMooseResource, Generic[T]):
|
|
|
136
145
|
self.config = config
|
|
137
146
|
self.metadata = config.metadata
|
|
138
147
|
_tables[name] = self
|
|
148
|
+
|
|
149
|
+
# Check if using legacy enum-based engine configuration
|
|
150
|
+
if config.engine and isinstance(config.engine, ClickHouseEngines):
|
|
151
|
+
logger = Logger(action="OlapTable")
|
|
152
|
+
|
|
153
|
+
# Special case for S3Queue - more detailed migration message
|
|
154
|
+
if config.engine == ClickHouseEngines.S3Queue:
|
|
155
|
+
logger.highlight(
|
|
156
|
+
f"Table '{name}' uses legacy S3Queue configuration. "
|
|
157
|
+
"Please migrate to the new API:\n"
|
|
158
|
+
" Old: engine=ClickHouseEngines.S3Queue\n"
|
|
159
|
+
" New: engine=S3QueueEngine(s3_path='...', format='...')\n"
|
|
160
|
+
"See documentation for complete S3Queue configuration options."
|
|
161
|
+
)
|
|
162
|
+
else:
|
|
163
|
+
# Generic message for other engines
|
|
164
|
+
engine_name = config.engine.value
|
|
165
|
+
logger.highlight(
|
|
166
|
+
f"Table '{name}' uses legacy engine configuration. "
|
|
167
|
+
f"Consider migrating to the new API:\n"
|
|
168
|
+
f" Old: engine=ClickHouseEngines.{engine_name}\n"
|
|
169
|
+
f" New: from moose_lib.blocks import {engine_name}Engine; engine={engine_name}Engine()\n"
|
|
170
|
+
"The new API provides better type safety and configuration options."
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Also emit a Python warning for development environments
|
|
174
|
+
warnings.warn(
|
|
175
|
+
f"Table '{name}' uses deprecated ClickHouseEngines enum. "
|
|
176
|
+
f"Please migrate to engine configuration classes (e.g., {config.engine.value}Engine).",
|
|
177
|
+
DeprecationWarning,
|
|
178
|
+
stacklevel=2
|
|
179
|
+
)
|
|
139
180
|
|
|
140
181
|
def _generate_table_name(self) -> str:
|
|
141
182
|
"""Generate the versioned table name following Moose's naming convention.
|
moose_lib/internal.py
CHANGED
|
@@ -7,10 +7,11 @@ to convert the user-defined resources (from `dmv2.py`) into a serializable
|
|
|
7
7
|
JSON format expected by the Moose infrastructure management system.
|
|
8
8
|
"""
|
|
9
9
|
from importlib import import_module
|
|
10
|
-
from typing import Literal, Optional, List, Any
|
|
10
|
+
from typing import Literal, Optional, List, Any, Dict, Union, TYPE_CHECKING
|
|
11
11
|
from pydantic import BaseModel, ConfigDict, AliasGenerator, Field
|
|
12
12
|
import json
|
|
13
13
|
from .data_models import Column, _to_columns
|
|
14
|
+
from .blocks import EngineConfig, ClickHouseEngines
|
|
14
15
|
from moose_lib.dmv2 import (
|
|
15
16
|
get_tables,
|
|
16
17
|
get_streams,
|
|
@@ -53,6 +54,46 @@ class Consumer(BaseModel):
|
|
|
53
54
|
"""
|
|
54
55
|
version: Optional[str] = None
|
|
55
56
|
|
|
57
|
+
class BaseEngineConfigDict(BaseModel):
|
|
58
|
+
"""Base engine configuration for all ClickHouse table engines."""
|
|
59
|
+
model_config = model_config
|
|
60
|
+
engine: str
|
|
61
|
+
|
|
62
|
+
class MergeTreeConfigDict(BaseEngineConfigDict):
|
|
63
|
+
"""Configuration for MergeTree engine."""
|
|
64
|
+
engine: Literal["MergeTree"] = "MergeTree"
|
|
65
|
+
|
|
66
|
+
class ReplacingMergeTreeConfigDict(BaseEngineConfigDict):
|
|
67
|
+
"""Configuration for ReplacingMergeTree engine."""
|
|
68
|
+
engine: Literal["ReplacingMergeTree"] = "ReplacingMergeTree"
|
|
69
|
+
|
|
70
|
+
class AggregatingMergeTreeConfigDict(BaseEngineConfigDict):
|
|
71
|
+
"""Configuration for AggregatingMergeTree engine."""
|
|
72
|
+
engine: Literal["AggregatingMergeTree"] = "AggregatingMergeTree"
|
|
73
|
+
|
|
74
|
+
class SummingMergeTreeConfigDict(BaseEngineConfigDict):
|
|
75
|
+
"""Configuration for SummingMergeTree engine."""
|
|
76
|
+
engine: Literal["SummingMergeTree"] = "SummingMergeTree"
|
|
77
|
+
|
|
78
|
+
class S3QueueConfigDict(BaseEngineConfigDict):
|
|
79
|
+
"""Configuration for S3Queue engine with all specific fields."""
|
|
80
|
+
engine: Literal["S3Queue"] = "S3Queue"
|
|
81
|
+
s3_path: str
|
|
82
|
+
format: str
|
|
83
|
+
aws_access_key_id: Optional[str] = None
|
|
84
|
+
aws_secret_access_key: Optional[str] = None
|
|
85
|
+
compression: Optional[str] = None
|
|
86
|
+
headers: Optional[Dict[str, str]] = None
|
|
87
|
+
|
|
88
|
+
# Discriminated union of all engine configurations
|
|
89
|
+
EngineConfigDict = Union[
|
|
90
|
+
MergeTreeConfigDict,
|
|
91
|
+
ReplacingMergeTreeConfigDict,
|
|
92
|
+
AggregatingMergeTreeConfigDict,
|
|
93
|
+
SummingMergeTreeConfigDict,
|
|
94
|
+
S3QueueConfigDict
|
|
95
|
+
]
|
|
96
|
+
|
|
56
97
|
class TableConfig(BaseModel):
|
|
57
98
|
"""Internal representation of an OLAP table configuration for serialization.
|
|
58
99
|
|
|
@@ -60,20 +101,22 @@ class TableConfig(BaseModel):
|
|
|
60
101
|
name: Name of the table.
|
|
61
102
|
columns: List of columns with their types and attributes.
|
|
62
103
|
order_by: List of columns used for the ORDER BY clause.
|
|
63
|
-
|
|
104
|
+
engine_config: Engine configuration with type-safe, engine-specific parameters.
|
|
64
105
|
version: Optional version string of the table configuration.
|
|
65
106
|
metadata: Optional metadata for the table.
|
|
66
107
|
life_cycle: Lifecycle management setting for the table.
|
|
108
|
+
table_settings: Optional table-level settings that can be modified with ALTER TABLE MODIFY SETTING.
|
|
67
109
|
"""
|
|
68
110
|
model_config = model_config
|
|
69
111
|
|
|
70
112
|
name: str
|
|
71
113
|
columns: List[Column]
|
|
72
114
|
order_by: List[str]
|
|
73
|
-
|
|
115
|
+
engine_config: Optional[EngineConfigDict] = Field(None, discriminator='engine')
|
|
74
116
|
version: Optional[str] = None
|
|
75
117
|
metadata: Optional[dict] = None
|
|
76
118
|
life_cycle: Optional[str] = None
|
|
119
|
+
table_settings: Optional[Dict[str, str]] = None
|
|
77
120
|
|
|
78
121
|
class TopicConfig(BaseModel):
|
|
79
122
|
"""Internal representation of a stream/topic configuration for serialization.
|
|
@@ -249,6 +292,86 @@ def _map_sql_resource_ref(r: Any) -> InfrastructureSignatureJson:
|
|
|
249
292
|
raise TypeError(f"Object {r} lacks a 'kind' attribute for dependency mapping.")
|
|
250
293
|
|
|
251
294
|
|
|
295
|
+
def _convert_engine_to_config_dict(engine: Union[ClickHouseEngines, EngineConfig], table: OlapTable) -> EngineConfigDict:
|
|
296
|
+
"""Convert engine enum or EngineConfig instance to new engine config format.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
engine: Either a ClickHouseEngines enum value or an EngineConfig instance
|
|
300
|
+
table: The OlapTable instance with configuration
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
EngineConfigDict with engine-specific configuration
|
|
304
|
+
"""
|
|
305
|
+
from moose_lib import ClickHouseEngines
|
|
306
|
+
from moose_lib.blocks import (
|
|
307
|
+
EngineConfig, S3QueueEngine, MergeTreeEngine,
|
|
308
|
+
ReplacingMergeTreeEngine, AggregatingMergeTreeEngine,
|
|
309
|
+
SummingMergeTreeEngine
|
|
310
|
+
)
|
|
311
|
+
from moose_lib.commons import Logger
|
|
312
|
+
|
|
313
|
+
# Check if engine is an EngineConfig instance (new API)
|
|
314
|
+
if isinstance(engine, EngineConfig):
|
|
315
|
+
if isinstance(engine, S3QueueEngine):
|
|
316
|
+
return S3QueueConfigDict(
|
|
317
|
+
s3_path=engine.s3_path,
|
|
318
|
+
format=engine.format,
|
|
319
|
+
aws_access_key_id=engine.aws_access_key_id,
|
|
320
|
+
aws_secret_access_key=engine.aws_secret_access_key,
|
|
321
|
+
compression=engine.compression,
|
|
322
|
+
headers=engine.headers
|
|
323
|
+
)
|
|
324
|
+
elif isinstance(engine, ReplacingMergeTreeEngine):
|
|
325
|
+
return ReplacingMergeTreeConfigDict()
|
|
326
|
+
elif isinstance(engine, AggregatingMergeTreeEngine):
|
|
327
|
+
return AggregatingMergeTreeConfigDict()
|
|
328
|
+
elif isinstance(engine, SummingMergeTreeEngine):
|
|
329
|
+
return SummingMergeTreeConfigDict()
|
|
330
|
+
elif isinstance(engine, MergeTreeEngine):
|
|
331
|
+
return MergeTreeConfigDict()
|
|
332
|
+
else:
|
|
333
|
+
# Fallback for any other EngineConfig subclass - use base class
|
|
334
|
+
return BaseEngineConfigDict(engine=engine.__class__.__name__.replace("Engine", ""))
|
|
335
|
+
|
|
336
|
+
# Handle legacy enum-based engine configuration
|
|
337
|
+
if isinstance(engine, ClickHouseEngines):
|
|
338
|
+
engine_name = engine.value
|
|
339
|
+
else:
|
|
340
|
+
engine_name = str(engine)
|
|
341
|
+
|
|
342
|
+
# For S3Queue with legacy configuration, check for s3_queue_engine_config
|
|
343
|
+
if engine_name == "S3Queue" and hasattr(table.config, 's3_queue_engine_config'):
|
|
344
|
+
s3_config = table.config.s3_queue_engine_config
|
|
345
|
+
if s3_config:
|
|
346
|
+
logger = Logger(action="S3QueueConfig")
|
|
347
|
+
logger.highlight(
|
|
348
|
+
"Using deprecated s3_queue_engine_config. Please migrate to:\n"
|
|
349
|
+
" engine=S3QueueEngine(s3_path='...', format='...', ...)"
|
|
350
|
+
)
|
|
351
|
+
return S3QueueConfigDict(
|
|
352
|
+
s3_path=s3_config.path,
|
|
353
|
+
format=s3_config.format,
|
|
354
|
+
aws_access_key_id=s3_config.aws_access_key_id,
|
|
355
|
+
aws_secret_access_key=s3_config.aws_secret_access_key,
|
|
356
|
+
compression=s3_config.compression,
|
|
357
|
+
headers=s3_config.headers
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
# Map engine names to specific config classes
|
|
361
|
+
engine_map = {
|
|
362
|
+
"MergeTree": MergeTreeConfigDict,
|
|
363
|
+
"ReplacingMergeTree": ReplacingMergeTreeConfigDict,
|
|
364
|
+
"AggregatingMergeTree": AggregatingMergeTreeConfigDict,
|
|
365
|
+
"SummingMergeTree": SummingMergeTreeConfigDict,
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
config_class = engine_map.get(engine_name)
|
|
369
|
+
if config_class:
|
|
370
|
+
return config_class()
|
|
371
|
+
|
|
372
|
+
# Fallback for unknown engines
|
|
373
|
+
return BaseEngineConfigDict(engine=engine_name)
|
|
374
|
+
|
|
252
375
|
def to_infra_map() -> dict:
|
|
253
376
|
"""Converts the registered `dmv2` resources into the serializable `InfrastructureMap` format.
|
|
254
377
|
|
|
@@ -268,15 +391,29 @@ def to_infra_map() -> dict:
|
|
|
268
391
|
workflows = {}
|
|
269
392
|
|
|
270
393
|
for name, table in get_tables().items():
|
|
271
|
-
engine
|
|
394
|
+
# Convert engine configuration to new format
|
|
395
|
+
engine_config = None
|
|
396
|
+
if table.config.engine:
|
|
397
|
+
engine_config = _convert_engine_to_config_dict(table.config.engine, table)
|
|
398
|
+
|
|
399
|
+
# Get table settings, applying defaults for S3Queue
|
|
400
|
+
table_settings = table.config.settings.copy() if table.config.settings else {}
|
|
401
|
+
|
|
402
|
+
# Apply default settings for S3Queue if not already specified
|
|
403
|
+
if engine_config and engine_config.engine == "S3Queue":
|
|
404
|
+
# Set default mode to 'unordered' if not specified
|
|
405
|
+
if "mode" not in table_settings:
|
|
406
|
+
table_settings["mode"] = "unordered"
|
|
407
|
+
|
|
272
408
|
tables[name] = TableConfig(
|
|
273
409
|
name=name,
|
|
274
410
|
columns=_to_columns(table._t),
|
|
275
411
|
order_by=table.config.order_by_fields,
|
|
276
|
-
|
|
412
|
+
engine_config=engine_config,
|
|
277
413
|
version=table.config.version,
|
|
278
414
|
metadata=getattr(table, "metadata", None),
|
|
279
415
|
life_cycle=table.config.life_cycle.value if table.config.life_cycle else None,
|
|
416
|
+
table_settings=table_settings if table_settings else None, # Map 'settings' to 'table_settings' for internal use
|
|
280
417
|
)
|
|
281
418
|
|
|
282
419
|
for name, stream in get_streams().items():
|
|
@@ -321,10 +458,10 @@ def to_infra_map() -> dict:
|
|
|
321
458
|
name=api.config.destination.name
|
|
322
459
|
),
|
|
323
460
|
metadata=getattr(api, "metadata", None),
|
|
324
|
-
dead_letter_queue=api.config.dead_letter_queue.name,
|
|
325
461
|
json_schema=api._t.model_json_schema(
|
|
326
462
|
ref_template='#/components/schemas/{model}'
|
|
327
463
|
),
|
|
464
|
+
dead_letter_queue=api.config.dead_letter_queue.name if api.config.dead_letter_queue else None
|
|
328
465
|
)
|
|
329
466
|
|
|
330
467
|
for name, api in get_apis().items():
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
moose_lib/__init__.py,sha256=LiUdVqmtASPqbMF53dXVZzCdU1jtFVx62_tiSbW65a0,162
|
|
2
|
-
moose_lib/blocks.py,sha256=
|
|
2
|
+
moose_lib/blocks.py,sha256=5QNgaTPC4hfWrIxY5lG4FeNoFP2-JJuFmWjbAVVxSlk,10091
|
|
3
3
|
moose_lib/commons.py,sha256=FUpRv8D3-LeGcjhcqtDyiimz5izwpCq53h50ydxC_uA,3711
|
|
4
4
|
moose_lib/data_models.py,sha256=PgoFXTzf4C66FlKUowMT4VOrwSRR9_bk36P43ub4LzU,10274
|
|
5
5
|
moose_lib/dmv2-serializer.py,sha256=CL_Pvvg8tJOT8Qk6hywDNzY8MYGhMVdTOw8arZi3jng,49
|
|
6
|
-
moose_lib/internal.py,sha256=
|
|
6
|
+
moose_lib/internal.py,sha256=8LZXIOpaDUviZul3qjtBIlLVCyz7uAmZadrqJBndJzo,20505
|
|
7
7
|
moose_lib/main.py,sha256=XcVX_sTnt5QbrPXKNLCKZGCvpFpE8oiqSG2S1So9ztI,16713
|
|
8
8
|
moose_lib/query_param.py,sha256=kxcR09BMIsEg4o2qetjKrVu1YFRaLfMEzwzyGsKUpvA,6474
|
|
9
9
|
moose_lib/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -17,8 +17,8 @@ moose_lib/dmv2/consumption.py,sha256=f43XPgJn1JLPPEpYbJ1FQ8Efi92YJH7DWa1wb5TY38g
|
|
|
17
17
|
moose_lib/dmv2/ingest_api.py,sha256=Snek9NGwaJl_BuImSWGtQq91m9D3AJ4qBoGiKZ-9yTQ,2323
|
|
18
18
|
moose_lib/dmv2/ingest_pipeline.py,sha256=2tDM4gCjQMdDRDjvymI5Z1DlkXULx9nY4nG9M3YXpjk,7033
|
|
19
19
|
moose_lib/dmv2/life_cycle.py,sha256=wl0k6yzwU1MJ_fO_UkN29buoY5G6ChYZvfwigP9fVfM,1254
|
|
20
|
-
moose_lib/dmv2/materialized_view.py,sha256=
|
|
21
|
-
moose_lib/dmv2/olap_table.py,sha256=
|
|
20
|
+
moose_lib/dmv2/materialized_view.py,sha256=3JbNLC26p7xOQK0DpgkreWKx7k5V3x3bz9a7kjDqKuU,4876
|
|
21
|
+
moose_lib/dmv2/olap_table.py,sha256=h4aF9hhFy8g0CHiQ_GwtnYqzagi1e4mGei406l8Z8tg,33808
|
|
22
22
|
moose_lib/dmv2/registry.py,sha256=janjYszIXDnMfuwkNhoiBmMLar3umQ3Npw9Ms-u6p-8,2373
|
|
23
23
|
moose_lib/dmv2/sql_resource.py,sha256=kUZoGqxhZMHMthtBZGYJBxTFjXkspXiWLXhJRYXgGUM,1864
|
|
24
24
|
moose_lib/dmv2/stream.py,sha256=jiUWBsjFalLLP63mikOxyHRdieiDAlzf9lXfLye-Wjc,10761
|
|
@@ -33,7 +33,8 @@ tests/__init__.py,sha256=0Gh4yzPkkC3TzBGKhenpMIxJcRhyrrCfxLSfpTZnPMQ,53
|
|
|
33
33
|
tests/conftest.py,sha256=ZVJNbnr4DwbcqkTmePW6U01zAzE6QD0kNAEZjPG1f4s,169
|
|
34
34
|
tests/test_moose.py,sha256=mBsx_OYWmL8ppDzL_7Bd7xR6qf_i3-pCIO3wm2iQNaA,2136
|
|
35
35
|
tests/test_redis_client.py,sha256=d9_MLYsJ4ecVil_jPB2gW3Q5aWnavxmmjZg2uYI3LVo,3256
|
|
36
|
-
|
|
37
|
-
moose_lib-0.6.
|
|
38
|
-
moose_lib-0.6.
|
|
39
|
-
moose_lib-0.6.
|
|
36
|
+
tests/test_s3queue_config.py,sha256=F05cnD61S2wBKPabcpEJxf55-DJGF4nLqwBb6aFbprc,9741
|
|
37
|
+
moose_lib-0.6.45.dist-info/METADATA,sha256=PiGe0i9B9cmtsZ8Ry8HPgWZFhcAHhfduJSeCCizTS4k,730
|
|
38
|
+
moose_lib-0.6.45.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
39
|
+
moose_lib-0.6.45.dist-info/top_level.txt,sha256=XEns2-4aCmGp2XjJAeEH9TAUcGONLnSLy6ycT9FSJh8,16
|
|
40
|
+
moose_lib-0.6.45.dist-info/RECORD,,
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
"""Tests for S3Queue engine configuration with the new type hints."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
import warnings
|
|
7
|
+
|
|
8
|
+
from moose_lib import OlapTable, OlapConfig, ClickHouseEngines
|
|
9
|
+
from moose_lib.blocks import S3QueueEngine, MergeTreeEngine, ReplacingMergeTreeEngine
|
|
10
|
+
from moose_lib.internal import (
|
|
11
|
+
_convert_engine_to_config_dict,
|
|
12
|
+
EngineConfigDict,
|
|
13
|
+
S3QueueConfigDict,
|
|
14
|
+
MergeTreeConfigDict,
|
|
15
|
+
ReplacingMergeTreeConfigDict
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SampleEvent(BaseModel):
|
|
20
|
+
"""Sample model for S3Queue table tests."""
|
|
21
|
+
id: str
|
|
22
|
+
timestamp: datetime
|
|
23
|
+
message: str
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_olap_config_accepts_enum():
|
|
27
|
+
"""Test that OlapConfig accepts ClickHouseEngines enum values."""
|
|
28
|
+
config = OlapConfig(
|
|
29
|
+
engine=ClickHouseEngines.MergeTree,
|
|
30
|
+
order_by_fields=["id"]
|
|
31
|
+
)
|
|
32
|
+
assert config.engine == ClickHouseEngines.MergeTree
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_olap_config_accepts_engine_config():
|
|
36
|
+
"""Test that OlapConfig accepts EngineConfig instances."""
|
|
37
|
+
s3_engine = S3QueueEngine(
|
|
38
|
+
s3_path="s3://bucket/data/*.json",
|
|
39
|
+
format="JSONEachRow",
|
|
40
|
+
aws_access_key_id="AKIA123",
|
|
41
|
+
aws_secret_access_key="secret123"
|
|
42
|
+
)
|
|
43
|
+
config = OlapConfig(
|
|
44
|
+
engine=s3_engine,
|
|
45
|
+
order_by_fields=["timestamp"]
|
|
46
|
+
)
|
|
47
|
+
assert isinstance(config.engine, S3QueueEngine)
|
|
48
|
+
assert config.engine.s3_path == "s3://bucket/data/*.json"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_olap_table_with_s3queue_engine():
|
|
52
|
+
"""Test creating OlapTable with S3QueueEngine."""
|
|
53
|
+
table = OlapTable[SampleEvent](
|
|
54
|
+
"TestS3Table",
|
|
55
|
+
OlapConfig(
|
|
56
|
+
engine=S3QueueEngine(
|
|
57
|
+
s3_path="s3://test-bucket/logs/*.json",
|
|
58
|
+
format="JSONEachRow",
|
|
59
|
+
compression="gzip"
|
|
60
|
+
),
|
|
61
|
+
order_by_fields=["timestamp", "id"],
|
|
62
|
+
settings={
|
|
63
|
+
"s3queue_mode": "unordered",
|
|
64
|
+
"s3queue_keeper_path": "/clickhouse/s3queue/test"
|
|
65
|
+
}
|
|
66
|
+
)
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
assert table.name == "TestS3Table"
|
|
70
|
+
assert isinstance(table.config.engine, S3QueueEngine)
|
|
71
|
+
assert table.config.engine.s3_path == "s3://test-bucket/logs/*.json"
|
|
72
|
+
assert table.config.settings["s3queue_mode"] == "unordered"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def test_olap_table_with_mergetree_engines():
|
|
76
|
+
"""Test creating OlapTable with various MergeTree engine configs."""
|
|
77
|
+
# Test with MergeTreeEngine
|
|
78
|
+
table1 = OlapTable[SampleEvent](
|
|
79
|
+
"MergeTreeTable",
|
|
80
|
+
OlapConfig(
|
|
81
|
+
engine=MergeTreeEngine(),
|
|
82
|
+
order_by_fields=["id"]
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
assert isinstance(table1.config.engine, MergeTreeEngine)
|
|
86
|
+
|
|
87
|
+
# Test with ReplacingMergeTreeEngine
|
|
88
|
+
table2 = OlapTable[SampleEvent](
|
|
89
|
+
"ReplacingTable",
|
|
90
|
+
OlapConfig(
|
|
91
|
+
engine=ReplacingMergeTreeEngine(),
|
|
92
|
+
order_by_fields=["id"]
|
|
93
|
+
)
|
|
94
|
+
)
|
|
95
|
+
assert isinstance(table2.config.engine, ReplacingMergeTreeEngine)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def test_engine_conversion_to_dict():
|
|
99
|
+
"""Test conversion of engine configs to EngineConfigDict."""
|
|
100
|
+
# Create a mock table with S3QueueEngine
|
|
101
|
+
table = OlapTable[SampleEvent](
|
|
102
|
+
"TestTable",
|
|
103
|
+
OlapConfig(
|
|
104
|
+
engine=S3QueueEngine(
|
|
105
|
+
s3_path="s3://bucket/data/*.parquet",
|
|
106
|
+
format="Parquet",
|
|
107
|
+
aws_access_key_id="AKIA456",
|
|
108
|
+
aws_secret_access_key="secret456",
|
|
109
|
+
compression="zstd",
|
|
110
|
+
headers={"X-Custom": "value"}
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Convert engine to dict
|
|
116
|
+
engine_dict = _convert_engine_to_config_dict(table.config.engine, table)
|
|
117
|
+
|
|
118
|
+
assert engine_dict.engine == "S3Queue"
|
|
119
|
+
assert engine_dict.s3_path == "s3://bucket/data/*.parquet"
|
|
120
|
+
assert engine_dict.format == "Parquet"
|
|
121
|
+
assert engine_dict.aws_access_key_id == "AKIA456"
|
|
122
|
+
assert engine_dict.aws_secret_access_key == "secret456"
|
|
123
|
+
assert engine_dict.compression == "zstd"
|
|
124
|
+
assert engine_dict.headers == {"X-Custom": "value"}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def test_engine_conversion_with_enum():
|
|
128
|
+
"""Test conversion of enum engines to EngineConfigDict."""
|
|
129
|
+
# Create a mock table with enum engine
|
|
130
|
+
table = OlapTable[SampleEvent](
|
|
131
|
+
"TestTable",
|
|
132
|
+
OlapConfig(
|
|
133
|
+
engine=ClickHouseEngines.ReplacingMergeTree
|
|
134
|
+
)
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Convert engine to dict
|
|
138
|
+
engine_dict = _convert_engine_to_config_dict(table.config.engine, table)
|
|
139
|
+
|
|
140
|
+
assert engine_dict.engine == "ReplacingMergeTree"
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def test_backward_compatibility():
|
|
144
|
+
"""Test that both old and new APIs work together."""
|
|
145
|
+
# Old API with enum
|
|
146
|
+
old_table = OlapTable[SampleEvent](
|
|
147
|
+
"OldTable",
|
|
148
|
+
OlapConfig(
|
|
149
|
+
engine=ClickHouseEngines.MergeTree,
|
|
150
|
+
order_by_fields=["id"]
|
|
151
|
+
)
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# New API with EngineConfig
|
|
155
|
+
new_table = OlapTable[SampleEvent](
|
|
156
|
+
"NewTable",
|
|
157
|
+
OlapConfig(
|
|
158
|
+
engine=MergeTreeEngine(),
|
|
159
|
+
order_by_fields=["id"]
|
|
160
|
+
)
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Both should work
|
|
164
|
+
assert old_table.config.engine == ClickHouseEngines.MergeTree
|
|
165
|
+
assert isinstance(new_table.config.engine, MergeTreeEngine)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def test_deprecation_warning_for_enum():
|
|
169
|
+
"""Test that using enum engine triggers deprecation warning."""
|
|
170
|
+
with warnings.catch_warnings(record=True) as w:
|
|
171
|
+
warnings.simplefilter("always")
|
|
172
|
+
|
|
173
|
+
table = OlapTable[SampleEvent](
|
|
174
|
+
"LegacyTable",
|
|
175
|
+
OlapConfig(
|
|
176
|
+
engine=ClickHouseEngines.S3Queue,
|
|
177
|
+
order_by_fields=["id"]
|
|
178
|
+
)
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Check that a deprecation warning was issued
|
|
182
|
+
assert len(w) == 1
|
|
183
|
+
assert issubclass(w[0].category, DeprecationWarning)
|
|
184
|
+
assert "deprecated ClickHouseEngines enum" in str(w[0].message)
|
|
185
|
+
assert "S3QueueEngine" in str(w[0].message)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def test_s3queue_with_all_options():
|
|
189
|
+
"""Test S3QueueEngine with all configuration options."""
|
|
190
|
+
engine = S3QueueEngine(
|
|
191
|
+
s3_path="s3://my-bucket/path/to/data/*.json",
|
|
192
|
+
format="JSONEachRow",
|
|
193
|
+
aws_access_key_id="AKIAIOSFODNN7EXAMPLE",
|
|
194
|
+
aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
|
|
195
|
+
compression="gzip",
|
|
196
|
+
headers={
|
|
197
|
+
"X-Custom-Header": "value1",
|
|
198
|
+
"Authorization": "Bearer token"
|
|
199
|
+
}
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
table = OlapTable[SampleEvent](
|
|
203
|
+
"FullConfigTable",
|
|
204
|
+
OlapConfig(
|
|
205
|
+
engine=engine,
|
|
206
|
+
order_by_fields=["timestamp"],
|
|
207
|
+
settings={
|
|
208
|
+
"s3queue_mode": "ordered",
|
|
209
|
+
"s3queue_keeper_path": "/clickhouse/s3queue/full",
|
|
210
|
+
"s3queue_loading_retries": "5",
|
|
211
|
+
"s3queue_processing_threads_num": "8"
|
|
212
|
+
}
|
|
213
|
+
)
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
assert table.config.engine.s3_path == "s3://my-bucket/path/to/data/*.json"
|
|
217
|
+
assert table.config.engine.format == "JSONEachRow"
|
|
218
|
+
assert table.config.engine.compression == "gzip"
|
|
219
|
+
assert table.config.engine.headers["X-Custom-Header"] == "value1"
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def test_s3queue_public_bucket():
|
|
223
|
+
"""Test S3QueueEngine for public bucket (no credentials)."""
|
|
224
|
+
engine = S3QueueEngine(
|
|
225
|
+
s3_path="s3://public-bucket/open-data/*.parquet",
|
|
226
|
+
format="Parquet"
|
|
227
|
+
# No AWS credentials needed for public buckets
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
table = OlapTable[SampleEvent](
|
|
231
|
+
"PublicBucketTable",
|
|
232
|
+
OlapConfig(
|
|
233
|
+
engine=engine,
|
|
234
|
+
order_by_fields=["id"]
|
|
235
|
+
)
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
assert table.config.engine.aws_access_key_id is None
|
|
239
|
+
assert table.config.engine.aws_secret_access_key is None
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def test_migration_from_legacy_to_new():
|
|
243
|
+
"""Test migration path from legacy to new API."""
|
|
244
|
+
# Legacy approach
|
|
245
|
+
legacy_config = OlapConfig(
|
|
246
|
+
engine=ClickHouseEngines.S3Queue,
|
|
247
|
+
order_by_fields=["timestamp"]
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
# New approach - equivalent configuration
|
|
251
|
+
new_config = OlapConfig(
|
|
252
|
+
engine=S3QueueEngine(
|
|
253
|
+
s3_path="s3://bucket/data/*.json",
|
|
254
|
+
format="JSONEachRow"
|
|
255
|
+
),
|
|
256
|
+
order_by_fields=["timestamp"]
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
# Both should have the same order_by_fields
|
|
260
|
+
assert legacy_config.order_by_fields == new_config.order_by_fields
|
|
261
|
+
|
|
262
|
+
# Engine types should be different
|
|
263
|
+
assert isinstance(legacy_config.engine, ClickHouseEngines)
|
|
264
|
+
assert isinstance(new_config.engine, S3QueueEngine)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def test_engine_config_validation():
|
|
268
|
+
"""Test that S3QueueEngine validates required fields."""
|
|
269
|
+
# Test missing required fields
|
|
270
|
+
with pytest.raises(ValueError, match="S3Queue engine requires 's3_path'"):
|
|
271
|
+
S3QueueEngine(
|
|
272
|
+
s3_path="", # Empty path should fail
|
|
273
|
+
format="JSONEachRow"
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
with pytest.raises(ValueError, match="S3Queue engine requires 'format'"):
|
|
277
|
+
S3QueueEngine(
|
|
278
|
+
s3_path="s3://bucket/data/*.json",
|
|
279
|
+
format="" # Empty format should fail
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def test_multiple_engine_types():
|
|
284
|
+
"""Test that different engine types can be used in the same application."""
|
|
285
|
+
tables = []
|
|
286
|
+
|
|
287
|
+
# Create tables with different engine types
|
|
288
|
+
tables.append(OlapTable[SampleEvent](
|
|
289
|
+
"MergeTreeTable",
|
|
290
|
+
OlapConfig(engine=MergeTreeEngine())
|
|
291
|
+
))
|
|
292
|
+
|
|
293
|
+
tables.append(OlapTable[SampleEvent](
|
|
294
|
+
"ReplacingTreeTable",
|
|
295
|
+
OlapConfig(engine=ReplacingMergeTreeEngine())
|
|
296
|
+
))
|
|
297
|
+
|
|
298
|
+
tables.append(OlapTable[SampleEvent](
|
|
299
|
+
"S3QueueTable",
|
|
300
|
+
OlapConfig(
|
|
301
|
+
engine=S3QueueEngine(
|
|
302
|
+
s3_path="s3://bucket/*.json",
|
|
303
|
+
format="JSONEachRow"
|
|
304
|
+
)
|
|
305
|
+
)
|
|
306
|
+
))
|
|
307
|
+
|
|
308
|
+
# Verify all tables were created with correct engine types
|
|
309
|
+
assert isinstance(tables[0].config.engine, MergeTreeEngine)
|
|
310
|
+
assert isinstance(tables[1].config.engine, ReplacingMergeTreeEngine)
|
|
311
|
+
assert isinstance(tables[2].config.engine, S3QueueEngine)
|
|
File without changes
|
|
File without changes
|