moose-lib 0.6.157__py3-none-any.whl → 0.6.162__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moose_lib/__init__.py +2 -0
- moose_lib/blocks.py +107 -0
- moose_lib/dmv2/olap_table.py +34 -0
- moose_lib/internal.py +86 -3
- moose_lib/secrets.py +100 -0
- {moose_lib-0.6.157.dist-info → moose_lib-0.6.162.dist-info}/METADATA +1 -1
- {moose_lib-0.6.157.dist-info → moose_lib-0.6.162.dist-info}/RECORD +11 -9
- tests/test_s3queue_config.py +167 -38
- tests/test_secrets.py +221 -0
- {moose_lib-0.6.157.dist-info → moose_lib-0.6.162.dist-info}/WHEEL +0 -0
- {moose_lib-0.6.157.dist-info → moose_lib-0.6.162.dist-info}/top_level.txt +0 -0
moose_lib/__init__.py
CHANGED
moose_lib/blocks.py
CHANGED
|
@@ -14,6 +14,9 @@ class ClickHouseEngines(Enum):
|
|
|
14
14
|
VersionedCollapsingMergeTree = "VersionedCollapsingMergeTree"
|
|
15
15
|
GraphiteMergeTree = "GraphiteMergeTree"
|
|
16
16
|
S3Queue = "S3Queue"
|
|
17
|
+
S3 = "S3"
|
|
18
|
+
Buffer = "Buffer"
|
|
19
|
+
Distributed = "Distributed"
|
|
17
20
|
ReplicatedMergeTree = "ReplicatedMergeTree"
|
|
18
21
|
ReplicatedReplacingMergeTree = "ReplicatedReplacingMergeTree"
|
|
19
22
|
ReplicatedAggregatingMergeTree = "ReplicatedAggregatingMergeTree"
|
|
@@ -177,6 +180,110 @@ class S3QueueEngine(EngineConfig):
|
|
|
177
180
|
if not self.format:
|
|
178
181
|
raise ValueError("S3Queue engine requires 'format'")
|
|
179
182
|
|
|
183
|
+
@dataclass
|
|
184
|
+
class S3Engine(EngineConfig):
|
|
185
|
+
"""Configuration for S3 engine - direct read/write from S3 storage.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
path: S3 path to the data file(s) (e.g., 's3://bucket/path/file.json')
|
|
189
|
+
format: Data format (e.g., 'JSONEachRow', 'CSV', 'Parquet')
|
|
190
|
+
aws_access_key_id: AWS access key ID (optional, omit for public buckets)
|
|
191
|
+
aws_secret_access_key: AWS secret access key (optional, omit for public buckets)
|
|
192
|
+
compression: Compression type (e.g., 'gzip', 'zstd', 'auto')
|
|
193
|
+
partition_strategy: Optional partition strategy
|
|
194
|
+
partition_columns_in_data_file: Optional partition columns in data file
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
# Required fields
|
|
198
|
+
path: str
|
|
199
|
+
format: str
|
|
200
|
+
|
|
201
|
+
# Optional fields
|
|
202
|
+
aws_access_key_id: Optional[str] = None
|
|
203
|
+
aws_secret_access_key: Optional[str] = None
|
|
204
|
+
compression: Optional[str] = None
|
|
205
|
+
partition_strategy: Optional[str] = None
|
|
206
|
+
partition_columns_in_data_file: Optional[str] = None
|
|
207
|
+
|
|
208
|
+
def __post_init__(self):
|
|
209
|
+
"""Validate required fields"""
|
|
210
|
+
if not self.path:
|
|
211
|
+
raise ValueError("S3 engine requires 'path'")
|
|
212
|
+
if not self.format:
|
|
213
|
+
raise ValueError("S3 engine requires 'format'")
|
|
214
|
+
|
|
215
|
+
@dataclass
|
|
216
|
+
class BufferEngine(EngineConfig):
|
|
217
|
+
"""Configuration for Buffer engine - in-memory buffer that flushes to a destination table.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
target_database: Target database name for the destination table
|
|
221
|
+
target_table: Target table name where data will be flushed
|
|
222
|
+
num_layers: Number of buffer layers (typically 16)
|
|
223
|
+
min_time: Minimum time in seconds before flushing
|
|
224
|
+
max_time: Maximum time in seconds before flushing
|
|
225
|
+
min_rows: Minimum number of rows before flushing
|
|
226
|
+
max_rows: Maximum number of rows before flushing
|
|
227
|
+
min_bytes: Minimum bytes before flushing
|
|
228
|
+
max_bytes: Maximum bytes before flushing
|
|
229
|
+
flush_time: Optional flush time in seconds
|
|
230
|
+
flush_rows: Optional flush number of rows
|
|
231
|
+
flush_bytes: Optional flush number of bytes
|
|
232
|
+
"""
|
|
233
|
+
|
|
234
|
+
# Required fields
|
|
235
|
+
target_database: str
|
|
236
|
+
target_table: str
|
|
237
|
+
num_layers: int
|
|
238
|
+
min_time: int
|
|
239
|
+
max_time: int
|
|
240
|
+
min_rows: int
|
|
241
|
+
max_rows: int
|
|
242
|
+
min_bytes: int
|
|
243
|
+
max_bytes: int
|
|
244
|
+
|
|
245
|
+
# Optional fields
|
|
246
|
+
flush_time: Optional[int] = None
|
|
247
|
+
flush_rows: Optional[int] = None
|
|
248
|
+
flush_bytes: Optional[int] = None
|
|
249
|
+
|
|
250
|
+
def __post_init__(self):
|
|
251
|
+
"""Validate required fields"""
|
|
252
|
+
if not self.target_database:
|
|
253
|
+
raise ValueError("Buffer engine requires 'target_database'")
|
|
254
|
+
if not self.target_table:
|
|
255
|
+
raise ValueError("Buffer engine requires 'target_table'")
|
|
256
|
+
|
|
257
|
+
@dataclass
|
|
258
|
+
class DistributedEngine(EngineConfig):
|
|
259
|
+
"""Configuration for Distributed engine - distributed table across a cluster.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
cluster: Cluster name from the ClickHouse configuration
|
|
263
|
+
target_database: Database name on the cluster
|
|
264
|
+
target_table: Table name on the cluster
|
|
265
|
+
sharding_key: Optional sharding key expression for data distribution
|
|
266
|
+
policy_name: Optional policy name for data distribution
|
|
267
|
+
"""
|
|
268
|
+
|
|
269
|
+
# Required fields
|
|
270
|
+
cluster: str
|
|
271
|
+
target_database: str
|
|
272
|
+
target_table: str
|
|
273
|
+
|
|
274
|
+
# Optional fields
|
|
275
|
+
sharding_key: Optional[str] = None
|
|
276
|
+
policy_name: Optional[str] = None
|
|
277
|
+
|
|
278
|
+
def __post_init__(self):
|
|
279
|
+
"""Validate required fields"""
|
|
280
|
+
if not self.cluster:
|
|
281
|
+
raise ValueError("Distributed engine requires 'cluster'")
|
|
282
|
+
if not self.target_database:
|
|
283
|
+
raise ValueError("Distributed engine requires 'target_database'")
|
|
284
|
+
if not self.target_table:
|
|
285
|
+
raise ValueError("Distributed engine requires 'target_table'")
|
|
286
|
+
|
|
180
287
|
# ==========================
|
|
181
288
|
# New Table Configuration (Recommended API)
|
|
182
289
|
# ==========================
|
moose_lib/dmv2/olap_table.py
CHANGED
|
@@ -143,6 +143,7 @@ class OlapConfig(BaseModel):
|
|
|
143
143
|
granularity: int = 1
|
|
144
144
|
|
|
145
145
|
indexes: list[TableIndex] = []
|
|
146
|
+
database: Optional[str] = None # Optional database name for multi-database support
|
|
146
147
|
|
|
147
148
|
def model_post_init(self, __context):
|
|
148
149
|
has_fields = bool(self.order_by_fields)
|
|
@@ -150,6 +151,39 @@ class OlapConfig(BaseModel):
|
|
|
150
151
|
if has_fields and has_expr:
|
|
151
152
|
raise ValueError("Provide either order_by_fields or order_by_expression, not both.")
|
|
152
153
|
|
|
154
|
+
# Validate that non-MergeTree engines don't have unsupported clauses
|
|
155
|
+
if self.engine:
|
|
156
|
+
from ..blocks import S3Engine, S3QueueEngine, BufferEngine, DistributedEngine
|
|
157
|
+
|
|
158
|
+
# All non-MergeTree engines don't support ORDER BY
|
|
159
|
+
non_mergetree_engines = (S3Engine, S3QueueEngine, BufferEngine, DistributedEngine)
|
|
160
|
+
if isinstance(self.engine, non_mergetree_engines):
|
|
161
|
+
engine_name = type(self.engine).__name__
|
|
162
|
+
|
|
163
|
+
if has_fields or has_expr:
|
|
164
|
+
raise ValueError(
|
|
165
|
+
f"{engine_name} does not support ORDER BY clauses. "
|
|
166
|
+
f"Remove order_by_fields or order_by_expression from your configuration."
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
if self.sample_by_expression:
|
|
170
|
+
raise ValueError(
|
|
171
|
+
f"{engine_name} does not support SAMPLE BY clause. "
|
|
172
|
+
f"Remove sample_by_expression from your configuration."
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Only S3QueueEngine, BufferEngine, and DistributedEngine don't support PARTITION BY
|
|
176
|
+
# S3Engine DOES support PARTITION BY
|
|
177
|
+
engines_without_partition_by = (S3QueueEngine, BufferEngine, DistributedEngine)
|
|
178
|
+
if isinstance(self.engine, engines_without_partition_by):
|
|
179
|
+
engine_name = type(self.engine).__name__
|
|
180
|
+
|
|
181
|
+
if self.partition_by:
|
|
182
|
+
raise ValueError(
|
|
183
|
+
f"{engine_name} does not support PARTITION BY clause. "
|
|
184
|
+
f"Remove partition_by from your configuration."
|
|
185
|
+
)
|
|
186
|
+
|
|
153
187
|
|
|
154
188
|
class OlapTable(TypedMooseResource, Generic[T]):
|
|
155
189
|
"""Represents an OLAP table (e.g., a ClickHouse table) typed with a Pydantic model.
|
moose_lib/internal.py
CHANGED
|
@@ -128,6 +128,45 @@ class S3QueueConfigDict(BaseEngineConfigDict):
|
|
|
128
128
|
headers: Optional[Dict[str, str]] = None
|
|
129
129
|
|
|
130
130
|
|
|
131
|
+
class S3ConfigDict(BaseEngineConfigDict):
|
|
132
|
+
"""Configuration for S3 engine."""
|
|
133
|
+
engine: Literal["S3"] = "S3"
|
|
134
|
+
path: str
|
|
135
|
+
format: str
|
|
136
|
+
aws_access_key_id: Optional[str] = None
|
|
137
|
+
aws_secret_access_key: Optional[str] = None
|
|
138
|
+
compression: Optional[str] = None
|
|
139
|
+
partition_strategy: Optional[str] = None
|
|
140
|
+
partition_columns_in_data_file: Optional[str] = None
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class BufferConfigDict(BaseEngineConfigDict):
|
|
144
|
+
"""Configuration for Buffer engine."""
|
|
145
|
+
engine: Literal["Buffer"] = "Buffer"
|
|
146
|
+
target_database: str
|
|
147
|
+
target_table: str
|
|
148
|
+
num_layers: int
|
|
149
|
+
min_time: int
|
|
150
|
+
max_time: int
|
|
151
|
+
min_rows: int
|
|
152
|
+
max_rows: int
|
|
153
|
+
min_bytes: int
|
|
154
|
+
max_bytes: int
|
|
155
|
+
flush_time: Optional[int] = None
|
|
156
|
+
flush_rows: Optional[int] = None
|
|
157
|
+
flush_bytes: Optional[int] = None
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class DistributedConfigDict(BaseEngineConfigDict):
|
|
161
|
+
"""Configuration for Distributed engine."""
|
|
162
|
+
engine: Literal["Distributed"] = "Distributed"
|
|
163
|
+
cluster: str
|
|
164
|
+
target_database: str
|
|
165
|
+
target_table: str
|
|
166
|
+
sharding_key: Optional[str] = None
|
|
167
|
+
policy_name: Optional[str] = None
|
|
168
|
+
|
|
169
|
+
|
|
131
170
|
# Discriminated union of all engine configurations
|
|
132
171
|
EngineConfigDict = Union[
|
|
133
172
|
MergeTreeConfigDict,
|
|
@@ -138,7 +177,10 @@ EngineConfigDict = Union[
|
|
|
138
177
|
ReplicatedReplacingMergeTreeConfigDict,
|
|
139
178
|
ReplicatedAggregatingMergeTreeConfigDict,
|
|
140
179
|
ReplicatedSummingMergeTreeConfigDict,
|
|
141
|
-
S3QueueConfigDict
|
|
180
|
+
S3QueueConfigDict,
|
|
181
|
+
S3ConfigDict,
|
|
182
|
+
BufferConfigDict,
|
|
183
|
+
DistributedConfigDict
|
|
142
184
|
]
|
|
143
185
|
|
|
144
186
|
|
|
@@ -171,6 +213,7 @@ class TableConfig(BaseModel):
|
|
|
171
213
|
table_settings: Optional[dict[str, str]] = None
|
|
172
214
|
indexes: list[OlapConfig.TableIndex] = []
|
|
173
215
|
ttl: Optional[str] = None
|
|
216
|
+
database: Optional[str] = None
|
|
174
217
|
|
|
175
218
|
|
|
176
219
|
class TopicConfig(BaseModel):
|
|
@@ -463,7 +506,7 @@ def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineCon
|
|
|
463
506
|
Returns:
|
|
464
507
|
EngineConfigDict with engine-specific configuration
|
|
465
508
|
"""
|
|
466
|
-
from moose_lib.blocks import S3QueueEngine
|
|
509
|
+
from moose_lib.blocks import S3QueueEngine, S3Engine, BufferEngine, DistributedEngine
|
|
467
510
|
|
|
468
511
|
# Try S3Queue first
|
|
469
512
|
if isinstance(engine, S3QueueEngine):
|
|
@@ -475,6 +518,45 @@ def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineCon
|
|
|
475
518
|
compression=engine.compression,
|
|
476
519
|
headers=engine.headers
|
|
477
520
|
)
|
|
521
|
+
|
|
522
|
+
# Try S3
|
|
523
|
+
if isinstance(engine, S3Engine):
|
|
524
|
+
return S3ConfigDict(
|
|
525
|
+
path=engine.path,
|
|
526
|
+
format=engine.format,
|
|
527
|
+
aws_access_key_id=engine.aws_access_key_id,
|
|
528
|
+
aws_secret_access_key=engine.aws_secret_access_key,
|
|
529
|
+
compression=engine.compression,
|
|
530
|
+
partition_strategy=engine.partition_strategy,
|
|
531
|
+
partition_columns_in_data_file=engine.partition_columns_in_data_file
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
# Try Buffer
|
|
535
|
+
if isinstance(engine, BufferEngine):
|
|
536
|
+
return BufferConfigDict(
|
|
537
|
+
target_database=engine.target_database,
|
|
538
|
+
target_table=engine.target_table,
|
|
539
|
+
num_layers=engine.num_layers,
|
|
540
|
+
min_time=engine.min_time,
|
|
541
|
+
max_time=engine.max_time,
|
|
542
|
+
min_rows=engine.min_rows,
|
|
543
|
+
max_rows=engine.max_rows,
|
|
544
|
+
min_bytes=engine.min_bytes,
|
|
545
|
+
max_bytes=engine.max_bytes,
|
|
546
|
+
flush_time=engine.flush_time,
|
|
547
|
+
flush_rows=engine.flush_rows,
|
|
548
|
+
flush_bytes=engine.flush_bytes
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
# Try Distributed
|
|
552
|
+
if isinstance(engine, DistributedEngine):
|
|
553
|
+
return DistributedConfigDict(
|
|
554
|
+
cluster=engine.cluster,
|
|
555
|
+
target_database=engine.target_database,
|
|
556
|
+
target_table=engine.target_table,
|
|
557
|
+
sharding_key=engine.sharding_key,
|
|
558
|
+
policy_name=engine.policy_name
|
|
559
|
+
)
|
|
478
560
|
|
|
479
561
|
# Try basic engines
|
|
480
562
|
basic_config = _convert_basic_engine_instance(engine)
|
|
@@ -613,6 +695,7 @@ def to_infra_map() -> dict:
|
|
|
613
695
|
table_settings=table_settings if table_settings else None,
|
|
614
696
|
indexes=table.config.indexes,
|
|
615
697
|
ttl=table.config.ttl,
|
|
698
|
+
database=table.config.database,
|
|
616
699
|
)
|
|
617
700
|
|
|
618
701
|
for name, stream in get_streams().items():
|
|
@@ -716,7 +799,7 @@ def to_infra_map() -> dict:
|
|
|
716
799
|
web_apps=web_apps
|
|
717
800
|
)
|
|
718
801
|
|
|
719
|
-
return infra_map.model_dump(by_alias=True)
|
|
802
|
+
return infra_map.model_dump(by_alias=True, exclude_none=False)
|
|
720
803
|
|
|
721
804
|
|
|
722
805
|
def load_models():
|
moose_lib/secrets.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""Utilities for runtime environment variable resolution.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to mark values that should be resolved
|
|
4
|
+
from environment variables at runtime by the Moose CLI, rather than being
|
|
5
|
+
embedded at build time.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
>>> from moose_lib import S3QueueEngine, moose_runtime_env
|
|
9
|
+
>>>
|
|
10
|
+
>>> engine = S3QueueEngine(
|
|
11
|
+
... s3_path="s3://bucket/data/*.json",
|
|
12
|
+
... format="JSONEachRow",
|
|
13
|
+
... aws_access_key_id=moose_runtime_env.get("AWS_ACCESS_KEY_ID"),
|
|
14
|
+
... aws_secret_access_key=moose_runtime_env.get("AWS_SECRET_ACCESS_KEY")
|
|
15
|
+
... )
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
#: Prefix used to mark values for runtime environment variable resolution.
|
|
19
|
+
MOOSE_RUNTIME_ENV_PREFIX = "__MOOSE_RUNTIME_ENV__:"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get(env_var_name: str) -> str:
|
|
23
|
+
"""Marks a value to be resolved from an environment variable at runtime.
|
|
24
|
+
|
|
25
|
+
When you use this function, the value is not read immediately. Instead,
|
|
26
|
+
a special marker is created that the Moose CLI will resolve when it
|
|
27
|
+
processes your infrastructure configuration.
|
|
28
|
+
|
|
29
|
+
This is useful for:
|
|
30
|
+
- Credentials that should never be embedded in Docker images
|
|
31
|
+
- Configuration that can be rotated without rebuilding
|
|
32
|
+
- Different values for different environments (dev, staging, prod)
|
|
33
|
+
- Any runtime configuration in infrastructure elements (Tables, Topics, etc.)
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
env_var_name: Name of the environment variable to resolve
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
A marker string that Moose CLI will resolve at runtime
|
|
40
|
+
|
|
41
|
+
Raises:
|
|
42
|
+
ValueError: If the environment variable name is empty
|
|
43
|
+
|
|
44
|
+
Example:
|
|
45
|
+
>>> # Instead of this (evaluated at build time):
|
|
46
|
+
>>> import os
|
|
47
|
+
>>> aws_key = os.environ.get("AWS_ACCESS_KEY_ID")
|
|
48
|
+
>>>
|
|
49
|
+
>>> # Use this (evaluated at runtime):
|
|
50
|
+
>>> aws_key = moose_runtime_env.get("AWS_ACCESS_KEY_ID")
|
|
51
|
+
"""
|
|
52
|
+
if not env_var_name or not env_var_name.strip():
|
|
53
|
+
raise ValueError("Environment variable name cannot be empty")
|
|
54
|
+
return f"{MOOSE_RUNTIME_ENV_PREFIX}{env_var_name}"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class MooseRuntimeEnv:
|
|
58
|
+
"""Utilities for marking values to be resolved from environment variables at runtime.
|
|
59
|
+
|
|
60
|
+
This class provides a namespace for runtime environment variable resolution.
|
|
61
|
+
Use the singleton instance `moose_runtime_env` rather than instantiating this class directly.
|
|
62
|
+
|
|
63
|
+
Attributes:
|
|
64
|
+
get: Static method for creating runtime environment variable markers
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def get(env_var_name: str) -> str:
|
|
69
|
+
"""Marks a value to be resolved from an environment variable at runtime.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
env_var_name: Name of the environment variable to resolve
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
A marker string that Moose CLI will resolve at runtime
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
ValueError: If the environment variable name is empty
|
|
79
|
+
"""
|
|
80
|
+
return get(env_var_name)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# Export singleton instance for module-level access
|
|
84
|
+
moose_runtime_env = MooseRuntimeEnv()
|
|
85
|
+
|
|
86
|
+
# Legacy exports for backwards compatibility
|
|
87
|
+
MooseEnvSecrets = MooseRuntimeEnv # Deprecated: Use MooseRuntimeEnv instead
|
|
88
|
+
moose_env_secrets = moose_runtime_env # Deprecated: Use moose_runtime_env instead
|
|
89
|
+
MOOSE_ENV_SECRET_PREFIX = MOOSE_RUNTIME_ENV_PREFIX # Deprecated: Use MOOSE_RUNTIME_ENV_PREFIX instead
|
|
90
|
+
|
|
91
|
+
__all__ = [
|
|
92
|
+
"moose_runtime_env",
|
|
93
|
+
"MooseRuntimeEnv",
|
|
94
|
+
"get",
|
|
95
|
+
"MOOSE_RUNTIME_ENV_PREFIX",
|
|
96
|
+
# Legacy exports (deprecated)
|
|
97
|
+
"moose_env_secrets",
|
|
98
|
+
"MooseEnvSecrets",
|
|
99
|
+
"MOOSE_ENV_SECRET_PREFIX",
|
|
100
|
+
]
|
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
moose_lib/__init__.py,sha256=
|
|
2
|
-
moose_lib/blocks.py,sha256=
|
|
1
|
+
moose_lib/__init__.py,sha256=1wG0Y8VGAdRpMCsTuHhmbltAUYCsGH-a8v8W3sN1gQ8,957
|
|
2
|
+
moose_lib/blocks.py,sha256=TJfaBJjDbCjGBVGpcV2HwVLWxiuqBC9lhx7FXCCbfsE,18238
|
|
3
3
|
moose_lib/commons.py,sha256=YDMzRO3ySa_iQRknmFwwE539KgSyjWhYJ-E3jIsfSgc,6585
|
|
4
4
|
moose_lib/data_models.py,sha256=IovDWmdHdoG7M3QmMbvWg7wDzrLlU-pea3CDoF9mShA,17615
|
|
5
5
|
moose_lib/dmv2_serializer.py,sha256=CL_Pvvg8tJOT8Qk6hywDNzY8MYGhMVdTOw8arZi3jng,49
|
|
6
|
-
moose_lib/internal.py,sha256=
|
|
6
|
+
moose_lib/internal.py,sha256=7T6PQyU2QLquSSPujO37e9t0tC_QYXpIK2CQom69li4,30453
|
|
7
7
|
moose_lib/main.py,sha256=JWsgza52xEh25AyF61cO1ItJ8VXJHHz8j-4HG445Whg,20380
|
|
8
8
|
moose_lib/query_builder.py,sha256=-L5p2dArBx3SBA-WZPkcCJPemKXnqJw60NHy-wn5wa4,6619
|
|
9
9
|
moose_lib/query_param.py,sha256=kxcR09BMIsEg4o2qetjKrVu1YFRaLfMEzwzyGsKUpvA,6474
|
|
10
|
+
moose_lib/secrets.py,sha256=upFjLorfAsiyL3F3UIl2STUFGjC6Pu0-Qe1FGs5S09c,3480
|
|
10
11
|
moose_lib/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
12
|
moose_lib/clients/redis_client.py,sha256=BDYjJ582V-rW92qVQ4neZ9Pu7JtDNt8x8jBWApt1XUg,11895
|
|
12
13
|
moose_lib/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -19,7 +20,7 @@ moose_lib/dmv2/ingest_api.py,sha256=XhvHHgGPXp-BuRpAALth-FRhanwy-zJQ_83Cg_RLolM,
|
|
|
19
20
|
moose_lib/dmv2/ingest_pipeline.py,sha256=R8NRrfD9zkEzwWEJKIrNL9Nk18DdGc7wdAakae_2-7o,8459
|
|
20
21
|
moose_lib/dmv2/life_cycle.py,sha256=wl0k6yzwU1MJ_fO_UkN29buoY5G6ChYZvfwigP9fVfM,1254
|
|
21
22
|
moose_lib/dmv2/materialized_view.py,sha256=wTam1V5CC2rExt7YrdK_Cz4rRTONm2keKOF951LlCP4,4875
|
|
22
|
-
moose_lib/dmv2/olap_table.py,sha256=
|
|
23
|
+
moose_lib/dmv2/olap_table.py,sha256=AfKTH6kabyE2qzrz-9C9BdDHyQor_JOTfIqplAkP56k,37474
|
|
23
24
|
moose_lib/dmv2/registry.py,sha256=5V8HRKBJXKLIi5tdIC2tKEmsGELQ1C2fBrMjns8a3ao,2947
|
|
24
25
|
moose_lib/dmv2/sql_resource.py,sha256=kUZoGqxhZMHMthtBZGYJBxTFjXkspXiWLXhJRYXgGUM,1864
|
|
25
26
|
moose_lib/dmv2/stream.py,sha256=pDryS1x5zMkDSJsFlGqm8quwKTl1ctAL-a_U8ySgqQI,18043
|
|
@@ -38,10 +39,11 @@ tests/test_moose.py,sha256=mBsx_OYWmL8ppDzL_7Bd7xR6qf_i3-pCIO3wm2iQNaA,2136
|
|
|
38
39
|
tests/test_olap_table_versioning.py,sha256=ffuJsO5TYKBd2aWV0GWDmosCwL4j518Y_MqYJdrfgDY,7041
|
|
39
40
|
tests/test_query_builder.py,sha256=O3imdFSaqU13kbK1jSQaHbBgynhVmJaApT8DlRqYwJU,1116
|
|
40
41
|
tests/test_redis_client.py,sha256=d9_MLYsJ4ecVil_jPB2gW3Q5aWnavxmmjZg2uYI3LVo,3256
|
|
41
|
-
tests/test_s3queue_config.py,sha256=
|
|
42
|
+
tests/test_s3queue_config.py,sha256=TSXSTAjcAVpZuvQza2i0XWGQTeaNs4mRZeH4pFCujNY,14728
|
|
43
|
+
tests/test_secrets.py,sha256=g5ZjtOyT7vTz_sRUB71Vd6s715u0gMc4C-_8m_59Mtg,8347
|
|
42
44
|
tests/test_simple_aggregate.py,sha256=yyFSYHUXskA1aTcwC-KQ9XmgOikeQ8wKXzntsUBIC6w,4055
|
|
43
45
|
tests/test_web_app.py,sha256=iL86sg0NS2k6nP8jIGKZvrtg0BjvaqNTRp7-4T1TTck,6557
|
|
44
|
-
moose_lib-0.6.
|
|
45
|
-
moose_lib-0.6.
|
|
46
|
-
moose_lib-0.6.
|
|
47
|
-
moose_lib-0.6.
|
|
46
|
+
moose_lib-0.6.162.dist-info/METADATA,sha256=dFe4T9iDSF2Ik79cm_6VsP3OPEqGB8avr-QsA2i4rCY,827
|
|
47
|
+
moose_lib-0.6.162.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
48
|
+
moose_lib-0.6.162.dist-info/top_level.txt,sha256=XEns2-4aCmGp2XjJAeEH9TAUcGONLnSLy6ycT9FSJh8,16
|
|
49
|
+
moose_lib-0.6.162.dist-info/RECORD,,
|
tests/test_s3queue_config.py
CHANGED
|
@@ -8,7 +8,7 @@ import warnings
|
|
|
8
8
|
from moose_lib import OlapTable, OlapConfig, ClickHouseEngines
|
|
9
9
|
from moose_lib.blocks import S3QueueEngine, MergeTreeEngine, ReplacingMergeTreeEngine
|
|
10
10
|
from moose_lib.internal import (
|
|
11
|
-
_convert_engine_to_config_dict,
|
|
11
|
+
_convert_engine_to_config_dict,
|
|
12
12
|
EngineConfigDict,
|
|
13
13
|
S3QueueConfigDict,
|
|
14
14
|
MergeTreeConfigDict,
|
|
@@ -41,8 +41,8 @@ def test_olap_config_accepts_engine_config():
|
|
|
41
41
|
aws_secret_access_key="secret123"
|
|
42
42
|
)
|
|
43
43
|
config = OlapConfig(
|
|
44
|
-
engine=s3_engine
|
|
45
|
-
order_by_fields
|
|
44
|
+
engine=s3_engine
|
|
45
|
+
# Note: S3QueueEngine does not support order_by_fields
|
|
46
46
|
)
|
|
47
47
|
assert isinstance(config.engine, S3QueueEngine)
|
|
48
48
|
assert config.engine.s3_path == "s3://bucket/data/*.json"
|
|
@@ -58,14 +58,14 @@ def test_olap_table_with_s3queue_engine():
|
|
|
58
58
|
format="JSONEachRow",
|
|
59
59
|
compression="gzip"
|
|
60
60
|
),
|
|
61
|
-
order_by_fields
|
|
61
|
+
# Note: S3QueueEngine does not support order_by_fields
|
|
62
62
|
settings={
|
|
63
63
|
"s3queue_mode": "unordered",
|
|
64
64
|
"s3queue_keeper_path": "/clickhouse/s3queue/test"
|
|
65
65
|
}
|
|
66
66
|
)
|
|
67
67
|
)
|
|
68
|
-
|
|
68
|
+
|
|
69
69
|
assert table.name == "TestS3Table"
|
|
70
70
|
assert isinstance(table.config.engine, S3QueueEngine)
|
|
71
71
|
assert table.config.engine.s3_path == "s3://test-bucket/logs/*.json"
|
|
@@ -83,7 +83,7 @@ def test_olap_table_with_mergetree_engines():
|
|
|
83
83
|
)
|
|
84
84
|
)
|
|
85
85
|
assert isinstance(table1.config.engine, MergeTreeEngine)
|
|
86
|
-
|
|
86
|
+
|
|
87
87
|
# Test with ReplacingMergeTreeEngine
|
|
88
88
|
table2 = OlapTable[SampleEvent](
|
|
89
89
|
"ReplacingTable",
|
|
@@ -111,10 +111,10 @@ def test_engine_conversion_to_dict():
|
|
|
111
111
|
)
|
|
112
112
|
)
|
|
113
113
|
)
|
|
114
|
-
|
|
114
|
+
|
|
115
115
|
# Convert engine to dict
|
|
116
116
|
engine_dict = _convert_engine_to_config_dict(table.config.engine, table)
|
|
117
|
-
|
|
117
|
+
|
|
118
118
|
assert engine_dict.engine == "S3Queue"
|
|
119
119
|
assert engine_dict.s3_path == "s3://bucket/data/*.parquet"
|
|
120
120
|
assert engine_dict.format == "Parquet"
|
|
@@ -133,10 +133,10 @@ def test_engine_conversion_with_enum():
|
|
|
133
133
|
engine=ClickHouseEngines.ReplacingMergeTree
|
|
134
134
|
)
|
|
135
135
|
)
|
|
136
|
-
|
|
136
|
+
|
|
137
137
|
# Convert engine to dict
|
|
138
138
|
engine_dict = _convert_engine_to_config_dict(table.config.engine, table)
|
|
139
|
-
|
|
139
|
+
|
|
140
140
|
assert engine_dict.engine == "ReplacingMergeTree"
|
|
141
141
|
|
|
142
142
|
|
|
@@ -150,16 +150,16 @@ def test_backward_compatibility():
|
|
|
150
150
|
order_by_fields=["id"]
|
|
151
151
|
)
|
|
152
152
|
)
|
|
153
|
-
|
|
153
|
+
|
|
154
154
|
# New API with EngineConfig
|
|
155
155
|
new_table = OlapTable[SampleEvent](
|
|
156
|
-
"NewTable",
|
|
156
|
+
"NewTable",
|
|
157
157
|
OlapConfig(
|
|
158
158
|
engine=MergeTreeEngine(),
|
|
159
159
|
order_by_fields=["id"]
|
|
160
160
|
)
|
|
161
161
|
)
|
|
162
|
-
|
|
162
|
+
|
|
163
163
|
# Both should work
|
|
164
164
|
assert old_table.config.engine == ClickHouseEngines.MergeTree
|
|
165
165
|
assert isinstance(new_table.config.engine, MergeTreeEngine)
|
|
@@ -169,7 +169,7 @@ def test_deprecation_warning_for_enum():
|
|
|
169
169
|
"""Test that using enum engine triggers deprecation warning."""
|
|
170
170
|
with warnings.catch_warnings(record=True) as w:
|
|
171
171
|
warnings.simplefilter("always")
|
|
172
|
-
|
|
172
|
+
|
|
173
173
|
table = OlapTable[SampleEvent](
|
|
174
174
|
"LegacyTable",
|
|
175
175
|
OlapConfig(
|
|
@@ -177,7 +177,7 @@ def test_deprecation_warning_for_enum():
|
|
|
177
177
|
order_by_fields=["id"]
|
|
178
178
|
)
|
|
179
179
|
)
|
|
180
|
-
|
|
180
|
+
|
|
181
181
|
# Check that a deprecation warning was issued
|
|
182
182
|
assert len(w) == 1
|
|
183
183
|
assert issubclass(w[0].category, DeprecationWarning)
|
|
@@ -198,12 +198,12 @@ def test_s3queue_with_all_options():
|
|
|
198
198
|
"Authorization": "Bearer token"
|
|
199
199
|
}
|
|
200
200
|
)
|
|
201
|
-
|
|
201
|
+
|
|
202
202
|
table = OlapTable[SampleEvent](
|
|
203
203
|
"FullConfigTable",
|
|
204
204
|
OlapConfig(
|
|
205
205
|
engine=engine,
|
|
206
|
-
order_by_fields
|
|
206
|
+
# Note: S3QueueEngine does not support order_by_fields
|
|
207
207
|
settings={
|
|
208
208
|
"s3queue_mode": "ordered",
|
|
209
209
|
"s3queue_keeper_path": "/clickhouse/s3queue/full",
|
|
@@ -212,7 +212,7 @@ def test_s3queue_with_all_options():
|
|
|
212
212
|
}
|
|
213
213
|
)
|
|
214
214
|
)
|
|
215
|
-
|
|
215
|
+
|
|
216
216
|
assert table.config.engine.s3_path == "s3://my-bucket/path/to/data/*.json"
|
|
217
217
|
assert table.config.engine.format == "JSONEachRow"
|
|
218
218
|
assert table.config.engine.compression == "gzip"
|
|
@@ -226,42 +226,49 @@ def test_s3queue_public_bucket():
|
|
|
226
226
|
format="Parquet"
|
|
227
227
|
# No AWS credentials needed for public buckets
|
|
228
228
|
)
|
|
229
|
-
|
|
229
|
+
|
|
230
230
|
table = OlapTable[SampleEvent](
|
|
231
231
|
"PublicBucketTable",
|
|
232
232
|
OlapConfig(
|
|
233
|
-
engine=engine
|
|
234
|
-
order_by_fields
|
|
233
|
+
engine=engine
|
|
234
|
+
# Note: S3QueueEngine does not support order_by_fields
|
|
235
235
|
)
|
|
236
236
|
)
|
|
237
|
-
|
|
237
|
+
|
|
238
238
|
assert table.config.engine.aws_access_key_id is None
|
|
239
239
|
assert table.config.engine.aws_secret_access_key is None
|
|
240
240
|
|
|
241
241
|
|
|
242
242
|
def test_migration_from_legacy_to_new():
|
|
243
243
|
"""Test migration path from legacy to new API."""
|
|
244
|
-
# Legacy approach
|
|
244
|
+
# Legacy approach (with MergeTree, which supports order_by_fields)
|
|
245
245
|
legacy_config = OlapConfig(
|
|
246
|
-
engine=ClickHouseEngines.
|
|
246
|
+
engine=ClickHouseEngines.MergeTree,
|
|
247
247
|
order_by_fields=["timestamp"]
|
|
248
248
|
)
|
|
249
|
-
|
|
250
|
-
# New approach - equivalent configuration
|
|
249
|
+
|
|
250
|
+
# New approach - equivalent configuration for MergeTree
|
|
251
251
|
new_config = OlapConfig(
|
|
252
|
-
engine=
|
|
253
|
-
s3_path="s3://bucket/data/*.json",
|
|
254
|
-
format="JSONEachRow"
|
|
255
|
-
),
|
|
252
|
+
engine=MergeTreeEngine(),
|
|
256
253
|
order_by_fields=["timestamp"]
|
|
257
254
|
)
|
|
258
|
-
|
|
255
|
+
|
|
259
256
|
# Both should have the same order_by_fields
|
|
260
257
|
assert legacy_config.order_by_fields == new_config.order_by_fields
|
|
261
|
-
|
|
258
|
+
|
|
262
259
|
# Engine types should be different
|
|
263
260
|
assert isinstance(legacy_config.engine, ClickHouseEngines)
|
|
264
|
-
assert isinstance(new_config.engine,
|
|
261
|
+
assert isinstance(new_config.engine, MergeTreeEngine)
|
|
262
|
+
|
|
263
|
+
# For S3Queue, the new API correctly prevents unsupported clauses
|
|
264
|
+
s3queue_config = OlapConfig(
|
|
265
|
+
engine=S3QueueEngine(
|
|
266
|
+
s3_path="s3://bucket/data/*.json",
|
|
267
|
+
format="JSONEachRow"
|
|
268
|
+
)
|
|
269
|
+
# Note: order_by_fields is not supported for S3QueueEngine
|
|
270
|
+
)
|
|
271
|
+
assert isinstance(s3queue_config.engine, S3QueueEngine)
|
|
265
272
|
|
|
266
273
|
|
|
267
274
|
def test_engine_config_validation():
|
|
@@ -272,7 +279,7 @@ def test_engine_config_validation():
|
|
|
272
279
|
s3_path="", # Empty path should fail
|
|
273
280
|
format="JSONEachRow"
|
|
274
281
|
)
|
|
275
|
-
|
|
282
|
+
|
|
276
283
|
with pytest.raises(ValueError, match="S3Queue engine requires 'format'"):
|
|
277
284
|
S3QueueEngine(
|
|
278
285
|
s3_path="s3://bucket/data/*.json",
|
|
@@ -280,21 +287,143 @@ def test_engine_config_validation():
|
|
|
280
287
|
)
|
|
281
288
|
|
|
282
289
|
|
|
290
|
+
def test_non_mergetree_engines_reject_unsupported_clauses():
|
|
291
|
+
"""Test that non-MergeTree engines reject unsupported ORDER BY and SAMPLE BY clauses."""
|
|
292
|
+
from moose_lib.blocks import S3Engine, S3QueueEngine, BufferEngine, DistributedEngine
|
|
293
|
+
|
|
294
|
+
# Test S3Engine rejects ORDER BY
|
|
295
|
+
with pytest.raises(ValueError, match="S3Engine does not support ORDER BY clauses"):
|
|
296
|
+
OlapConfig(
|
|
297
|
+
engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
|
|
298
|
+
order_by_fields=["id"]
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
with pytest.raises(ValueError, match="S3Engine does not support ORDER BY clauses"):
|
|
302
|
+
OlapConfig(
|
|
303
|
+
engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
|
|
304
|
+
order_by_expression="(id, name)"
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# Test S3Engine rejects SAMPLE BY
|
|
308
|
+
with pytest.raises(ValueError, match="S3Engine does not support SAMPLE BY clause"):
|
|
309
|
+
OlapConfig(
|
|
310
|
+
engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
|
|
311
|
+
sample_by_expression="cityHash64(id)"
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# Test S3Engine DOES support PARTITION BY (should not raise)
|
|
315
|
+
config_s3_with_partition = OlapConfig(
|
|
316
|
+
engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
|
|
317
|
+
partition_by="toYYYYMM(timestamp)"
|
|
318
|
+
)
|
|
319
|
+
assert config_s3_with_partition.partition_by == "toYYYYMM(timestamp)"
|
|
320
|
+
|
|
321
|
+
# Test S3QueueEngine rejects ORDER BY
|
|
322
|
+
with pytest.raises(ValueError, match="S3QueueEngine does not support ORDER BY clauses"):
|
|
323
|
+
OlapConfig(
|
|
324
|
+
engine=S3QueueEngine(s3_path="s3://bucket/*.json", format="JSONEachRow"),
|
|
325
|
+
order_by_fields=["id"]
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
# Test S3QueueEngine rejects PARTITION BY (unlike S3Engine)
|
|
329
|
+
with pytest.raises(ValueError, match="S3QueueEngine does not support PARTITION BY clause"):
|
|
330
|
+
OlapConfig(
|
|
331
|
+
engine=S3QueueEngine(s3_path="s3://bucket/*.json", format="JSONEachRow"),
|
|
332
|
+
partition_by="toYYYYMM(timestamp)"
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
# Test BufferEngine rejects ORDER BY
|
|
336
|
+
with pytest.raises(ValueError, match="BufferEngine does not support ORDER BY clauses"):
|
|
337
|
+
OlapConfig(
|
|
338
|
+
engine=BufferEngine(
|
|
339
|
+
target_database="default",
|
|
340
|
+
target_table="dest",
|
|
341
|
+
num_layers=16,
|
|
342
|
+
min_time=10,
|
|
343
|
+
max_time=100,
|
|
344
|
+
min_rows=10000,
|
|
345
|
+
max_rows=100000,
|
|
346
|
+
min_bytes=10000000,
|
|
347
|
+
max_bytes=100000000
|
|
348
|
+
),
|
|
349
|
+
order_by_fields=["id"]
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
# Test BufferEngine rejects PARTITION BY
|
|
353
|
+
with pytest.raises(ValueError, match="BufferEngine does not support PARTITION BY clause"):
|
|
354
|
+
OlapConfig(
|
|
355
|
+
engine=BufferEngine(
|
|
356
|
+
target_database="default",
|
|
357
|
+
target_table="dest",
|
|
358
|
+
num_layers=16,
|
|
359
|
+
min_time=10,
|
|
360
|
+
max_time=100,
|
|
361
|
+
min_rows=10000,
|
|
362
|
+
max_rows=100000,
|
|
363
|
+
min_bytes=10000000,
|
|
364
|
+
max_bytes=100000000
|
|
365
|
+
),
|
|
366
|
+
partition_by="date"
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
# Test DistributedEngine rejects PARTITION BY
|
|
370
|
+
with pytest.raises(ValueError, match="DistributedEngine does not support PARTITION BY clause"):
|
|
371
|
+
OlapConfig(
|
|
372
|
+
engine=DistributedEngine(
|
|
373
|
+
cluster="my_cluster",
|
|
374
|
+
target_database="default",
|
|
375
|
+
target_table="local_table"
|
|
376
|
+
),
|
|
377
|
+
partition_by="date"
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
# Verify that S3Engine works without unsupported clauses
|
|
381
|
+
config = OlapConfig(
|
|
382
|
+
engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow")
|
|
383
|
+
)
|
|
384
|
+
assert isinstance(config.engine, S3Engine)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def test_mergetree_engines_still_accept_clauses():
|
|
388
|
+
"""Test that MergeTree engines still accept ORDER BY, PARTITION BY, and SAMPLE BY clauses."""
|
|
389
|
+
from moose_lib.blocks import MergeTreeEngine, ReplacingMergeTreeEngine
|
|
390
|
+
|
|
391
|
+
# MergeTree should accept all clauses
|
|
392
|
+
config1 = OlapConfig(
|
|
393
|
+
engine=MergeTreeEngine(),
|
|
394
|
+
order_by_fields=["id", "timestamp"],
|
|
395
|
+
partition_by="toYYYYMM(timestamp)",
|
|
396
|
+
sample_by_expression="cityHash64(id)"
|
|
397
|
+
)
|
|
398
|
+
assert config1.order_by_fields == ["id", "timestamp"]
|
|
399
|
+
assert config1.partition_by == "toYYYYMM(timestamp)"
|
|
400
|
+
assert config1.sample_by_expression == "cityHash64(id)"
|
|
401
|
+
|
|
402
|
+
# ReplacingMergeTree should also accept these clauses
|
|
403
|
+
config2 = OlapConfig(
|
|
404
|
+
engine=ReplacingMergeTreeEngine(ver="updated_at"),
|
|
405
|
+
order_by_expression="(id, name)",
|
|
406
|
+
partition_by="date"
|
|
407
|
+
)
|
|
408
|
+
assert config2.order_by_expression == "(id, name)"
|
|
409
|
+
assert config2.partition_by == "date"
|
|
410
|
+
|
|
411
|
+
|
|
283
412
|
def test_multiple_engine_types():
|
|
284
413
|
"""Test that different engine types can be used in the same application."""
|
|
285
414
|
tables = []
|
|
286
|
-
|
|
415
|
+
|
|
287
416
|
# Create tables with different engine types
|
|
288
417
|
tables.append(OlapTable[SampleEvent](
|
|
289
418
|
"MergeTreeTable",
|
|
290
419
|
OlapConfig(engine=MergeTreeEngine())
|
|
291
420
|
))
|
|
292
|
-
|
|
421
|
+
|
|
293
422
|
tables.append(OlapTable[SampleEvent](
|
|
294
423
|
"ReplacingTreeTable",
|
|
295
424
|
OlapConfig(engine=ReplacingMergeTreeEngine())
|
|
296
425
|
))
|
|
297
|
-
|
|
426
|
+
|
|
298
427
|
tables.append(OlapTable[SampleEvent](
|
|
299
428
|
"S3QueueTable",
|
|
300
429
|
OlapConfig(
|
|
@@ -304,7 +433,7 @@ def test_multiple_engine_types():
|
|
|
304
433
|
)
|
|
305
434
|
)
|
|
306
435
|
))
|
|
307
|
-
|
|
436
|
+
|
|
308
437
|
# Verify all tables were created with correct engine types
|
|
309
438
|
assert isinstance(tables[0].config.engine, MergeTreeEngine)
|
|
310
439
|
assert isinstance(tables[1].config.engine, ReplacingMergeTreeEngine)
|
tests/test_secrets.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""Tests for the moose_lib.secrets module.
|
|
2
|
+
|
|
3
|
+
This module tests the runtime environment variable marker functionality,
|
|
4
|
+
which allows users to defer secret resolution until runtime rather than
|
|
5
|
+
embedding secrets at build time.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
from moose_lib.secrets import moose_runtime_env, get, MOOSE_RUNTIME_ENV_PREFIX
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TestMooseRuntimeEnvGet:
|
|
13
|
+
"""Tests for the moose_runtime_env.get() method."""
|
|
14
|
+
|
|
15
|
+
def test_creates_marker_with_correct_prefix(self):
|
|
16
|
+
"""Should create a marker string with the correct prefix."""
|
|
17
|
+
var_name = "AWS_ACCESS_KEY_ID"
|
|
18
|
+
result = moose_runtime_env.get(var_name)
|
|
19
|
+
|
|
20
|
+
assert result == f"{MOOSE_RUNTIME_ENV_PREFIX}{var_name}"
|
|
21
|
+
assert result == "__MOOSE_RUNTIME_ENV__:AWS_ACCESS_KEY_ID"
|
|
22
|
+
|
|
23
|
+
def test_handles_different_variable_names(self):
|
|
24
|
+
"""Should handle different environment variable names correctly."""
|
|
25
|
+
test_cases = [
|
|
26
|
+
"AWS_SECRET_ACCESS_KEY",
|
|
27
|
+
"DATABASE_PASSWORD",
|
|
28
|
+
"API_KEY",
|
|
29
|
+
"MY_CUSTOM_SECRET",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
for var_name in test_cases:
|
|
33
|
+
result = moose_runtime_env.get(var_name)
|
|
34
|
+
assert result == f"{MOOSE_RUNTIME_ENV_PREFIX}{var_name}"
|
|
35
|
+
assert var_name in result
|
|
36
|
+
|
|
37
|
+
def test_raises_error_for_empty_string(self):
|
|
38
|
+
"""Should raise ValueError for empty string."""
|
|
39
|
+
with pytest.raises(ValueError, match="Environment variable name cannot be empty"):
|
|
40
|
+
moose_runtime_env.get("")
|
|
41
|
+
|
|
42
|
+
def test_raises_error_for_whitespace_only(self):
|
|
43
|
+
"""Should raise ValueError for whitespace-only string."""
|
|
44
|
+
with pytest.raises(ValueError, match="Environment variable name cannot be empty"):
|
|
45
|
+
moose_runtime_env.get(" ")
|
|
46
|
+
|
|
47
|
+
def test_raises_error_for_tabs_only(self):
|
|
48
|
+
"""Should raise ValueError for string with only tabs."""
|
|
49
|
+
with pytest.raises(ValueError, match="Environment variable name cannot be empty"):
|
|
50
|
+
moose_runtime_env.get("\t\t")
|
|
51
|
+
|
|
52
|
+
def test_allows_underscores_in_variable_names(self):
|
|
53
|
+
"""Should allow variable names with underscores."""
|
|
54
|
+
var_name = "MY_LONG_VAR_NAME"
|
|
55
|
+
result = moose_runtime_env.get(var_name)
|
|
56
|
+
|
|
57
|
+
assert result == f"{MOOSE_RUNTIME_ENV_PREFIX}{var_name}"
|
|
58
|
+
|
|
59
|
+
def test_allows_numbers_in_variable_names(self):
|
|
60
|
+
"""Should allow variable names with numbers."""
|
|
61
|
+
var_name = "API_KEY_123"
|
|
62
|
+
result = moose_runtime_env.get(var_name)
|
|
63
|
+
|
|
64
|
+
assert result == f"{MOOSE_RUNTIME_ENV_PREFIX}{var_name}"
|
|
65
|
+
|
|
66
|
+
def test_preserves_exact_casing(self):
|
|
67
|
+
"""Should preserve exact variable name casing."""
|
|
68
|
+
var_name = "MixedCase_VarName"
|
|
69
|
+
result = moose_runtime_env.get(var_name)
|
|
70
|
+
|
|
71
|
+
assert var_name in result
|
|
72
|
+
assert var_name.lower() not in result # Ensure casing wasn't changed
|
|
73
|
+
|
|
74
|
+
def test_can_be_used_in_s3queue_config(self):
|
|
75
|
+
"""Should create markers that can be used in S3Queue configuration."""
|
|
76
|
+
access_key_marker = moose_runtime_env.get("AWS_ACCESS_KEY_ID")
|
|
77
|
+
secret_key_marker = moose_runtime_env.get("AWS_SECRET_ACCESS_KEY")
|
|
78
|
+
|
|
79
|
+
config = {
|
|
80
|
+
"aws_access_key_id": access_key_marker,
|
|
81
|
+
"aws_secret_access_key": secret_key_marker,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
assert "AWS_ACCESS_KEY_ID" in config["aws_access_key_id"]
|
|
85
|
+
assert "AWS_SECRET_ACCESS_KEY" in config["aws_secret_access_key"]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class TestModuleLevelGetFunction:
|
|
89
|
+
"""Tests for the module-level get() function."""
|
|
90
|
+
|
|
91
|
+
def test_module_level_get_creates_marker(self):
|
|
92
|
+
"""The module-level get function should create markers."""
|
|
93
|
+
var_name = "TEST_SECRET"
|
|
94
|
+
result = get(var_name)
|
|
95
|
+
|
|
96
|
+
assert result == f"{MOOSE_RUNTIME_ENV_PREFIX}{var_name}"
|
|
97
|
+
|
|
98
|
+
def test_module_level_get_matches_class_method(self):
|
|
99
|
+
"""Module-level get should produce same result as class method."""
|
|
100
|
+
var_name = "MY_SECRET"
|
|
101
|
+
|
|
102
|
+
result_module = get(var_name)
|
|
103
|
+
result_class = moose_runtime_env.get(var_name)
|
|
104
|
+
|
|
105
|
+
assert result_module == result_class
|
|
106
|
+
|
|
107
|
+
def test_module_level_get_raises_error_for_empty(self):
|
|
108
|
+
"""Module-level get should raise ValueError for empty string."""
|
|
109
|
+
with pytest.raises(ValueError, match="Environment variable name cannot be empty"):
|
|
110
|
+
get("")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class TestMooseRuntimeEnvPrefix:
|
|
114
|
+
"""Tests for the MOOSE_RUNTIME_ENV_PREFIX constant."""
|
|
115
|
+
|
|
116
|
+
def test_has_expected_value(self):
|
|
117
|
+
"""Should have the expected prefix value."""
|
|
118
|
+
assert MOOSE_RUNTIME_ENV_PREFIX == "__MOOSE_RUNTIME_ENV__:"
|
|
119
|
+
|
|
120
|
+
def test_is_string(self):
|
|
121
|
+
"""Should be a string."""
|
|
122
|
+
assert isinstance(MOOSE_RUNTIME_ENV_PREFIX, str)
|
|
123
|
+
|
|
124
|
+
def test_is_not_empty(self):
|
|
125
|
+
"""Should not be empty."""
|
|
126
|
+
assert len(MOOSE_RUNTIME_ENV_PREFIX) > 0
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class TestMarkerFormatValidation:
|
|
130
|
+
"""Tests for marker format validation and parsing."""
|
|
131
|
+
|
|
132
|
+
def test_creates_easily_detectable_markers(self):
|
|
133
|
+
"""Should create markers that are easily detectable."""
|
|
134
|
+
marker = moose_runtime_env.get("TEST_VAR")
|
|
135
|
+
|
|
136
|
+
assert marker.startswith("__MOOSE_RUNTIME_ENV__:")
|
|
137
|
+
|
|
138
|
+
def test_markers_can_be_split_to_extract_variable_name(self):
|
|
139
|
+
"""Should create markers that can be split to extract variable name."""
|
|
140
|
+
var_name = "MY_SECRET"
|
|
141
|
+
marker = moose_runtime_env.get(var_name)
|
|
142
|
+
|
|
143
|
+
parts = marker.split(MOOSE_RUNTIME_ENV_PREFIX)
|
|
144
|
+
assert len(parts) == 2
|
|
145
|
+
assert parts[1] == var_name
|
|
146
|
+
|
|
147
|
+
def test_markers_are_json_serializable(self):
|
|
148
|
+
"""Should create markers that are JSON serializable."""
|
|
149
|
+
import json
|
|
150
|
+
|
|
151
|
+
marker = moose_runtime_env.get("TEST_VAR")
|
|
152
|
+
json_str = json.dumps({"secret": marker})
|
|
153
|
+
parsed = json.loads(json_str)
|
|
154
|
+
|
|
155
|
+
assert parsed["secret"] == marker
|
|
156
|
+
|
|
157
|
+
def test_markers_work_with_dict_serialization(self):
|
|
158
|
+
"""Should work correctly with dictionary serialization."""
|
|
159
|
+
marker = moose_runtime_env.get("DATABASE_PASSWORD")
|
|
160
|
+
|
|
161
|
+
config = {
|
|
162
|
+
"password": marker,
|
|
163
|
+
"other_field": "value"
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
# Verify the marker is preserved in the dict
|
|
167
|
+
assert config["password"] == marker
|
|
168
|
+
assert MOOSE_RUNTIME_ENV_PREFIX in config["password"]
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class TestIntegrationScenarios:
|
|
172
|
+
"""Integration tests for real-world usage scenarios."""
|
|
173
|
+
|
|
174
|
+
def test_s3queue_engine_with_secrets(self):
|
|
175
|
+
"""Should work correctly in S3Queue engine configuration."""
|
|
176
|
+
from moose_lib.blocks import S3QueueEngine
|
|
177
|
+
|
|
178
|
+
engine = S3QueueEngine(
|
|
179
|
+
s3_path="s3://my-bucket/data/*.json",
|
|
180
|
+
format="JSONEachRow",
|
|
181
|
+
aws_access_key_id=moose_runtime_env.get("AWS_ACCESS_KEY_ID"),
|
|
182
|
+
aws_secret_access_key=moose_runtime_env.get("AWS_SECRET_ACCESS_KEY"),
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
# Verify markers were set correctly
|
|
186
|
+
assert engine.aws_access_key_id == "__MOOSE_RUNTIME_ENV__:AWS_ACCESS_KEY_ID"
|
|
187
|
+
assert engine.aws_secret_access_key == "__MOOSE_RUNTIME_ENV__:AWS_SECRET_ACCESS_KEY"
|
|
188
|
+
|
|
189
|
+
def test_multiple_secrets_in_same_config(self):
|
|
190
|
+
"""Should handle multiple secrets in the same configuration."""
|
|
191
|
+
config = {
|
|
192
|
+
"username": moose_runtime_env.get("DB_USERNAME"),
|
|
193
|
+
"password": moose_runtime_env.get("DB_PASSWORD"),
|
|
194
|
+
"api_key": moose_runtime_env.get("API_KEY"),
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
# All should have the correct prefix
|
|
198
|
+
for value in config.values():
|
|
199
|
+
assert value.startswith(MOOSE_RUNTIME_ENV_PREFIX)
|
|
200
|
+
|
|
201
|
+
# Each should have the correct variable name
|
|
202
|
+
assert "DB_USERNAME" in config["username"]
|
|
203
|
+
assert "DB_PASSWORD" in config["password"]
|
|
204
|
+
assert "API_KEY" in config["api_key"]
|
|
205
|
+
|
|
206
|
+
def test_mixed_secret_and_plain_values(self):
|
|
207
|
+
"""Should handle mix of secret markers and plain values."""
|
|
208
|
+
config = {
|
|
209
|
+
"region": "us-east-1", # Plain value
|
|
210
|
+
"access_key": moose_runtime_env.get("AWS_ACCESS_KEY_ID"), # Secret
|
|
211
|
+
"bucket": "my-bucket", # Plain value
|
|
212
|
+
"secret_key": moose_runtime_env.get("AWS_SECRET_ACCESS_KEY"), # Secret
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
# Plain values should be unchanged
|
|
216
|
+
assert config["region"] == "us-east-1"
|
|
217
|
+
assert config["bucket"] == "my-bucket"
|
|
218
|
+
|
|
219
|
+
# Secrets should have markers
|
|
220
|
+
assert MOOSE_RUNTIME_ENV_PREFIX in config["access_key"]
|
|
221
|
+
assert MOOSE_RUNTIME_ENV_PREFIX in config["secret_key"]
|
|
File without changes
|
|
File without changes
|