moose-lib 0.6.157__py3-none-any.whl → 0.6.162__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
moose_lib/__init__.py CHANGED
@@ -4,6 +4,8 @@ from .blocks import *
4
4
 
5
5
  from .commons import *
6
6
 
7
+ from .secrets import moose_runtime_env
8
+
7
9
  from .data_models import *
8
10
 
9
11
  from .dmv2 import *
moose_lib/blocks.py CHANGED
@@ -14,6 +14,9 @@ class ClickHouseEngines(Enum):
14
14
  VersionedCollapsingMergeTree = "VersionedCollapsingMergeTree"
15
15
  GraphiteMergeTree = "GraphiteMergeTree"
16
16
  S3Queue = "S3Queue"
17
+ S3 = "S3"
18
+ Buffer = "Buffer"
19
+ Distributed = "Distributed"
17
20
  ReplicatedMergeTree = "ReplicatedMergeTree"
18
21
  ReplicatedReplacingMergeTree = "ReplicatedReplacingMergeTree"
19
22
  ReplicatedAggregatingMergeTree = "ReplicatedAggregatingMergeTree"
@@ -177,6 +180,110 @@ class S3QueueEngine(EngineConfig):
177
180
  if not self.format:
178
181
  raise ValueError("S3Queue engine requires 'format'")
179
182
 
183
+ @dataclass
184
+ class S3Engine(EngineConfig):
185
+ """Configuration for S3 engine - direct read/write from S3 storage.
186
+
187
+ Args:
188
+ path: S3 path to the data file(s) (e.g., 's3://bucket/path/file.json')
189
+ format: Data format (e.g., 'JSONEachRow', 'CSV', 'Parquet')
190
+ aws_access_key_id: AWS access key ID (optional, omit for public buckets)
191
+ aws_secret_access_key: AWS secret access key (optional, omit for public buckets)
192
+ compression: Compression type (e.g., 'gzip', 'zstd', 'auto')
193
+ partition_strategy: Optional partition strategy
194
+ partition_columns_in_data_file: Optional partition columns in data file
195
+ """
196
+
197
+ # Required fields
198
+ path: str
199
+ format: str
200
+
201
+ # Optional fields
202
+ aws_access_key_id: Optional[str] = None
203
+ aws_secret_access_key: Optional[str] = None
204
+ compression: Optional[str] = None
205
+ partition_strategy: Optional[str] = None
206
+ partition_columns_in_data_file: Optional[str] = None
207
+
208
+ def __post_init__(self):
209
+ """Validate required fields"""
210
+ if not self.path:
211
+ raise ValueError("S3 engine requires 'path'")
212
+ if not self.format:
213
+ raise ValueError("S3 engine requires 'format'")
214
+
215
+ @dataclass
216
+ class BufferEngine(EngineConfig):
217
+ """Configuration for Buffer engine - in-memory buffer that flushes to a destination table.
218
+
219
+ Args:
220
+ target_database: Target database name for the destination table
221
+ target_table: Target table name where data will be flushed
222
+ num_layers: Number of buffer layers (typically 16)
223
+ min_time: Minimum time in seconds before flushing
224
+ max_time: Maximum time in seconds before flushing
225
+ min_rows: Minimum number of rows before flushing
226
+ max_rows: Maximum number of rows before flushing
227
+ min_bytes: Minimum bytes before flushing
228
+ max_bytes: Maximum bytes before flushing
229
+ flush_time: Optional flush time in seconds
230
+ flush_rows: Optional flush number of rows
231
+ flush_bytes: Optional flush number of bytes
232
+ """
233
+
234
+ # Required fields
235
+ target_database: str
236
+ target_table: str
237
+ num_layers: int
238
+ min_time: int
239
+ max_time: int
240
+ min_rows: int
241
+ max_rows: int
242
+ min_bytes: int
243
+ max_bytes: int
244
+
245
+ # Optional fields
246
+ flush_time: Optional[int] = None
247
+ flush_rows: Optional[int] = None
248
+ flush_bytes: Optional[int] = None
249
+
250
+ def __post_init__(self):
251
+ """Validate required fields"""
252
+ if not self.target_database:
253
+ raise ValueError("Buffer engine requires 'target_database'")
254
+ if not self.target_table:
255
+ raise ValueError("Buffer engine requires 'target_table'")
256
+
257
+ @dataclass
258
+ class DistributedEngine(EngineConfig):
259
+ """Configuration for Distributed engine - distributed table across a cluster.
260
+
261
+ Args:
262
+ cluster: Cluster name from the ClickHouse configuration
263
+ target_database: Database name on the cluster
264
+ target_table: Table name on the cluster
265
+ sharding_key: Optional sharding key expression for data distribution
266
+ policy_name: Optional policy name for data distribution
267
+ """
268
+
269
+ # Required fields
270
+ cluster: str
271
+ target_database: str
272
+ target_table: str
273
+
274
+ # Optional fields
275
+ sharding_key: Optional[str] = None
276
+ policy_name: Optional[str] = None
277
+
278
+ def __post_init__(self):
279
+ """Validate required fields"""
280
+ if not self.cluster:
281
+ raise ValueError("Distributed engine requires 'cluster'")
282
+ if not self.target_database:
283
+ raise ValueError("Distributed engine requires 'target_database'")
284
+ if not self.target_table:
285
+ raise ValueError("Distributed engine requires 'target_table'")
286
+
180
287
  # ==========================
181
288
  # New Table Configuration (Recommended API)
182
289
  # ==========================
@@ -143,6 +143,7 @@ class OlapConfig(BaseModel):
143
143
  granularity: int = 1
144
144
 
145
145
  indexes: list[TableIndex] = []
146
+ database: Optional[str] = None # Optional database name for multi-database support
146
147
 
147
148
  def model_post_init(self, __context):
148
149
  has_fields = bool(self.order_by_fields)
@@ -150,6 +151,39 @@ class OlapConfig(BaseModel):
150
151
  if has_fields and has_expr:
151
152
  raise ValueError("Provide either order_by_fields or order_by_expression, not both.")
152
153
 
154
+ # Validate that non-MergeTree engines don't have unsupported clauses
155
+ if self.engine:
156
+ from ..blocks import S3Engine, S3QueueEngine, BufferEngine, DistributedEngine
157
+
158
+ # All non-MergeTree engines don't support ORDER BY
159
+ non_mergetree_engines = (S3Engine, S3QueueEngine, BufferEngine, DistributedEngine)
160
+ if isinstance(self.engine, non_mergetree_engines):
161
+ engine_name = type(self.engine).__name__
162
+
163
+ if has_fields or has_expr:
164
+ raise ValueError(
165
+ f"{engine_name} does not support ORDER BY clauses. "
166
+ f"Remove order_by_fields or order_by_expression from your configuration."
167
+ )
168
+
169
+ if self.sample_by_expression:
170
+ raise ValueError(
171
+ f"{engine_name} does not support SAMPLE BY clause. "
172
+ f"Remove sample_by_expression from your configuration."
173
+ )
174
+
175
+ # Only S3QueueEngine, BufferEngine, and DistributedEngine don't support PARTITION BY
176
+ # S3Engine DOES support PARTITION BY
177
+ engines_without_partition_by = (S3QueueEngine, BufferEngine, DistributedEngine)
178
+ if isinstance(self.engine, engines_without_partition_by):
179
+ engine_name = type(self.engine).__name__
180
+
181
+ if self.partition_by:
182
+ raise ValueError(
183
+ f"{engine_name} does not support PARTITION BY clause. "
184
+ f"Remove partition_by from your configuration."
185
+ )
186
+
153
187
 
154
188
  class OlapTable(TypedMooseResource, Generic[T]):
155
189
  """Represents an OLAP table (e.g., a ClickHouse table) typed with a Pydantic model.
moose_lib/internal.py CHANGED
@@ -128,6 +128,45 @@ class S3QueueConfigDict(BaseEngineConfigDict):
128
128
  headers: Optional[Dict[str, str]] = None
129
129
 
130
130
 
131
+ class S3ConfigDict(BaseEngineConfigDict):
132
+ """Configuration for S3 engine."""
133
+ engine: Literal["S3"] = "S3"
134
+ path: str
135
+ format: str
136
+ aws_access_key_id: Optional[str] = None
137
+ aws_secret_access_key: Optional[str] = None
138
+ compression: Optional[str] = None
139
+ partition_strategy: Optional[str] = None
140
+ partition_columns_in_data_file: Optional[str] = None
141
+
142
+
143
+ class BufferConfigDict(BaseEngineConfigDict):
144
+ """Configuration for Buffer engine."""
145
+ engine: Literal["Buffer"] = "Buffer"
146
+ target_database: str
147
+ target_table: str
148
+ num_layers: int
149
+ min_time: int
150
+ max_time: int
151
+ min_rows: int
152
+ max_rows: int
153
+ min_bytes: int
154
+ max_bytes: int
155
+ flush_time: Optional[int] = None
156
+ flush_rows: Optional[int] = None
157
+ flush_bytes: Optional[int] = None
158
+
159
+
160
+ class DistributedConfigDict(BaseEngineConfigDict):
161
+ """Configuration for Distributed engine."""
162
+ engine: Literal["Distributed"] = "Distributed"
163
+ cluster: str
164
+ target_database: str
165
+ target_table: str
166
+ sharding_key: Optional[str] = None
167
+ policy_name: Optional[str] = None
168
+
169
+
131
170
  # Discriminated union of all engine configurations
132
171
  EngineConfigDict = Union[
133
172
  MergeTreeConfigDict,
@@ -138,7 +177,10 @@ EngineConfigDict = Union[
138
177
  ReplicatedReplacingMergeTreeConfigDict,
139
178
  ReplicatedAggregatingMergeTreeConfigDict,
140
179
  ReplicatedSummingMergeTreeConfigDict,
141
- S3QueueConfigDict
180
+ S3QueueConfigDict,
181
+ S3ConfigDict,
182
+ BufferConfigDict,
183
+ DistributedConfigDict
142
184
  ]
143
185
 
144
186
 
@@ -171,6 +213,7 @@ class TableConfig(BaseModel):
171
213
  table_settings: Optional[dict[str, str]] = None
172
214
  indexes: list[OlapConfig.TableIndex] = []
173
215
  ttl: Optional[str] = None
216
+ database: Optional[str] = None
174
217
 
175
218
 
176
219
  class TopicConfig(BaseModel):
@@ -463,7 +506,7 @@ def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineCon
463
506
  Returns:
464
507
  EngineConfigDict with engine-specific configuration
465
508
  """
466
- from moose_lib.blocks import S3QueueEngine
509
+ from moose_lib.blocks import S3QueueEngine, S3Engine, BufferEngine, DistributedEngine
467
510
 
468
511
  # Try S3Queue first
469
512
  if isinstance(engine, S3QueueEngine):
@@ -475,6 +518,45 @@ def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineCon
475
518
  compression=engine.compression,
476
519
  headers=engine.headers
477
520
  )
521
+
522
+ # Try S3
523
+ if isinstance(engine, S3Engine):
524
+ return S3ConfigDict(
525
+ path=engine.path,
526
+ format=engine.format,
527
+ aws_access_key_id=engine.aws_access_key_id,
528
+ aws_secret_access_key=engine.aws_secret_access_key,
529
+ compression=engine.compression,
530
+ partition_strategy=engine.partition_strategy,
531
+ partition_columns_in_data_file=engine.partition_columns_in_data_file
532
+ )
533
+
534
+ # Try Buffer
535
+ if isinstance(engine, BufferEngine):
536
+ return BufferConfigDict(
537
+ target_database=engine.target_database,
538
+ target_table=engine.target_table,
539
+ num_layers=engine.num_layers,
540
+ min_time=engine.min_time,
541
+ max_time=engine.max_time,
542
+ min_rows=engine.min_rows,
543
+ max_rows=engine.max_rows,
544
+ min_bytes=engine.min_bytes,
545
+ max_bytes=engine.max_bytes,
546
+ flush_time=engine.flush_time,
547
+ flush_rows=engine.flush_rows,
548
+ flush_bytes=engine.flush_bytes
549
+ )
550
+
551
+ # Try Distributed
552
+ if isinstance(engine, DistributedEngine):
553
+ return DistributedConfigDict(
554
+ cluster=engine.cluster,
555
+ target_database=engine.target_database,
556
+ target_table=engine.target_table,
557
+ sharding_key=engine.sharding_key,
558
+ policy_name=engine.policy_name
559
+ )
478
560
 
479
561
  # Try basic engines
480
562
  basic_config = _convert_basic_engine_instance(engine)
@@ -613,6 +695,7 @@ def to_infra_map() -> dict:
613
695
  table_settings=table_settings if table_settings else None,
614
696
  indexes=table.config.indexes,
615
697
  ttl=table.config.ttl,
698
+ database=table.config.database,
616
699
  )
617
700
 
618
701
  for name, stream in get_streams().items():
@@ -716,7 +799,7 @@ def to_infra_map() -> dict:
716
799
  web_apps=web_apps
717
800
  )
718
801
 
719
- return infra_map.model_dump(by_alias=True)
802
+ return infra_map.model_dump(by_alias=True, exclude_none=False)
720
803
 
721
804
 
722
805
  def load_models():
moose_lib/secrets.py ADDED
@@ -0,0 +1,100 @@
1
+ """Utilities for runtime environment variable resolution.
2
+
3
+ This module provides functionality to mark values that should be resolved
4
+ from environment variables at runtime by the Moose CLI, rather than being
5
+ embedded at build time.
6
+
7
+ Example:
8
+ >>> from moose_lib import S3QueueEngine, moose_runtime_env
9
+ >>>
10
+ >>> engine = S3QueueEngine(
11
+ ... s3_path="s3://bucket/data/*.json",
12
+ ... format="JSONEachRow",
13
+ ... aws_access_key_id=moose_runtime_env.get("AWS_ACCESS_KEY_ID"),
14
+ ... aws_secret_access_key=moose_runtime_env.get("AWS_SECRET_ACCESS_KEY")
15
+ ... )
16
+ """
17
+
18
+ #: Prefix used to mark values for runtime environment variable resolution.
19
+ MOOSE_RUNTIME_ENV_PREFIX = "__MOOSE_RUNTIME_ENV__:"
20
+
21
+
22
+ def get(env_var_name: str) -> str:
23
+ """Marks a value to be resolved from an environment variable at runtime.
24
+
25
+ When you use this function, the value is not read immediately. Instead,
26
+ a special marker is created that the Moose CLI will resolve when it
27
+ processes your infrastructure configuration.
28
+
29
+ This is useful for:
30
+ - Credentials that should never be embedded in Docker images
31
+ - Configuration that can be rotated without rebuilding
32
+ - Different values for different environments (dev, staging, prod)
33
+ - Any runtime configuration in infrastructure elements (Tables, Topics, etc.)
34
+
35
+ Args:
36
+ env_var_name: Name of the environment variable to resolve
37
+
38
+ Returns:
39
+ A marker string that Moose CLI will resolve at runtime
40
+
41
+ Raises:
42
+ ValueError: If the environment variable name is empty
43
+
44
+ Example:
45
+ >>> # Instead of this (evaluated at build time):
46
+ >>> import os
47
+ >>> aws_key = os.environ.get("AWS_ACCESS_KEY_ID")
48
+ >>>
49
+ >>> # Use this (evaluated at runtime):
50
+ >>> aws_key = moose_runtime_env.get("AWS_ACCESS_KEY_ID")
51
+ """
52
+ if not env_var_name or not env_var_name.strip():
53
+ raise ValueError("Environment variable name cannot be empty")
54
+ return f"{MOOSE_RUNTIME_ENV_PREFIX}{env_var_name}"
55
+
56
+
57
+ class MooseRuntimeEnv:
58
+ """Utilities for marking values to be resolved from environment variables at runtime.
59
+
60
+ This class provides a namespace for runtime environment variable resolution.
61
+ Use the singleton instance `moose_runtime_env` rather than instantiating this class directly.
62
+
63
+ Attributes:
64
+ get: Static method for creating runtime environment variable markers
65
+ """
66
+
67
+ @staticmethod
68
+ def get(env_var_name: str) -> str:
69
+ """Marks a value to be resolved from an environment variable at runtime.
70
+
71
+ Args:
72
+ env_var_name: Name of the environment variable to resolve
73
+
74
+ Returns:
75
+ A marker string that Moose CLI will resolve at runtime
76
+
77
+ Raises:
78
+ ValueError: If the environment variable name is empty
79
+ """
80
+ return get(env_var_name)
81
+
82
+
83
+ # Export singleton instance for module-level access
84
+ moose_runtime_env = MooseRuntimeEnv()
85
+
86
+ # Legacy exports for backwards compatibility
87
+ MooseEnvSecrets = MooseRuntimeEnv # Deprecated: Use MooseRuntimeEnv instead
88
+ moose_env_secrets = moose_runtime_env # Deprecated: Use moose_runtime_env instead
89
+ MOOSE_ENV_SECRET_PREFIX = MOOSE_RUNTIME_ENV_PREFIX # Deprecated: Use MOOSE_RUNTIME_ENV_PREFIX instead
90
+
91
+ __all__ = [
92
+ "moose_runtime_env",
93
+ "MooseRuntimeEnv",
94
+ "get",
95
+ "MOOSE_RUNTIME_ENV_PREFIX",
96
+ # Legacy exports (deprecated)
97
+ "moose_env_secrets",
98
+ "MooseEnvSecrets",
99
+ "MOOSE_ENV_SECRET_PREFIX",
100
+ ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: moose_lib
3
- Version: 0.6.157
3
+ Version: 0.6.162
4
4
  Home-page: https://www.fiveonefour.com/moose
5
5
  Author: Fiveonefour Labs Inc.
6
6
  Author-email: support@fiveonefour.com
@@ -1,12 +1,13 @@
1
- moose_lib/__init__.py,sha256=swNNCec7czJO62pYKeRyonMR4QlzgCN74GLFFI3NDxs,917
2
- moose_lib/blocks.py,sha256=Cwjx7UX5qXLVpxF5eBomBLzIoxVn769jfZRJfJnqPyw,14423
1
+ moose_lib/__init__.py,sha256=1wG0Y8VGAdRpMCsTuHhmbltAUYCsGH-a8v8W3sN1gQ8,957
2
+ moose_lib/blocks.py,sha256=TJfaBJjDbCjGBVGpcV2HwVLWxiuqBC9lhx7FXCCbfsE,18238
3
3
  moose_lib/commons.py,sha256=YDMzRO3ySa_iQRknmFwwE539KgSyjWhYJ-E3jIsfSgc,6585
4
4
  moose_lib/data_models.py,sha256=IovDWmdHdoG7M3QmMbvWg7wDzrLlU-pea3CDoF9mShA,17615
5
5
  moose_lib/dmv2_serializer.py,sha256=CL_Pvvg8tJOT8Qk6hywDNzY8MYGhMVdTOw8arZi3jng,49
6
- moose_lib/internal.py,sha256=FMd4GuQ2uRXiV7_9JrTYN1xx2HFagMbEY9uu_Kb4omg,27742
6
+ moose_lib/internal.py,sha256=7T6PQyU2QLquSSPujO37e9t0tC_QYXpIK2CQom69li4,30453
7
7
  moose_lib/main.py,sha256=JWsgza52xEh25AyF61cO1ItJ8VXJHHz8j-4HG445Whg,20380
8
8
  moose_lib/query_builder.py,sha256=-L5p2dArBx3SBA-WZPkcCJPemKXnqJw60NHy-wn5wa4,6619
9
9
  moose_lib/query_param.py,sha256=kxcR09BMIsEg4o2qetjKrVu1YFRaLfMEzwzyGsKUpvA,6474
10
+ moose_lib/secrets.py,sha256=upFjLorfAsiyL3F3UIl2STUFGjC6Pu0-Qe1FGs5S09c,3480
10
11
  moose_lib/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
12
  moose_lib/clients/redis_client.py,sha256=BDYjJ582V-rW92qVQ4neZ9Pu7JtDNt8x8jBWApt1XUg,11895
12
13
  moose_lib/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -19,7 +20,7 @@ moose_lib/dmv2/ingest_api.py,sha256=XhvHHgGPXp-BuRpAALth-FRhanwy-zJQ_83Cg_RLolM,
19
20
  moose_lib/dmv2/ingest_pipeline.py,sha256=R8NRrfD9zkEzwWEJKIrNL9Nk18DdGc7wdAakae_2-7o,8459
20
21
  moose_lib/dmv2/life_cycle.py,sha256=wl0k6yzwU1MJ_fO_UkN29buoY5G6ChYZvfwigP9fVfM,1254
21
22
  moose_lib/dmv2/materialized_view.py,sha256=wTam1V5CC2rExt7YrdK_Cz4rRTONm2keKOF951LlCP4,4875
22
- moose_lib/dmv2/olap_table.py,sha256=sj3nsi-reIFEdykEYGheA3zYhoFZqVOOwt5RaMXn4J0,35755
23
+ moose_lib/dmv2/olap_table.py,sha256=AfKTH6kabyE2qzrz-9C9BdDHyQor_JOTfIqplAkP56k,37474
23
24
  moose_lib/dmv2/registry.py,sha256=5V8HRKBJXKLIi5tdIC2tKEmsGELQ1C2fBrMjns8a3ao,2947
24
25
  moose_lib/dmv2/sql_resource.py,sha256=kUZoGqxhZMHMthtBZGYJBxTFjXkspXiWLXhJRYXgGUM,1864
25
26
  moose_lib/dmv2/stream.py,sha256=pDryS1x5zMkDSJsFlGqm8quwKTl1ctAL-a_U8ySgqQI,18043
@@ -38,10 +39,11 @@ tests/test_moose.py,sha256=mBsx_OYWmL8ppDzL_7Bd7xR6qf_i3-pCIO3wm2iQNaA,2136
38
39
  tests/test_olap_table_versioning.py,sha256=ffuJsO5TYKBd2aWV0GWDmosCwL4j518Y_MqYJdrfgDY,7041
39
40
  tests/test_query_builder.py,sha256=O3imdFSaqU13kbK1jSQaHbBgynhVmJaApT8DlRqYwJU,1116
40
41
  tests/test_redis_client.py,sha256=d9_MLYsJ4ecVil_jPB2gW3Q5aWnavxmmjZg2uYI3LVo,3256
41
- tests/test_s3queue_config.py,sha256=F05cnD61S2wBKPabcpEJxf55-DJGF4nLqwBb6aFbprc,9741
42
+ tests/test_s3queue_config.py,sha256=TSXSTAjcAVpZuvQza2i0XWGQTeaNs4mRZeH4pFCujNY,14728
43
+ tests/test_secrets.py,sha256=g5ZjtOyT7vTz_sRUB71Vd6s715u0gMc4C-_8m_59Mtg,8347
42
44
  tests/test_simple_aggregate.py,sha256=yyFSYHUXskA1aTcwC-KQ9XmgOikeQ8wKXzntsUBIC6w,4055
43
45
  tests/test_web_app.py,sha256=iL86sg0NS2k6nP8jIGKZvrtg0BjvaqNTRp7-4T1TTck,6557
44
- moose_lib-0.6.157.dist-info/METADATA,sha256=o5cJlnVOp85VQQURjAEzuxYtxX6-AdBbbr7Gy_iRsq4,827
45
- moose_lib-0.6.157.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
46
- moose_lib-0.6.157.dist-info/top_level.txt,sha256=XEns2-4aCmGp2XjJAeEH9TAUcGONLnSLy6ycT9FSJh8,16
47
- moose_lib-0.6.157.dist-info/RECORD,,
46
+ moose_lib-0.6.162.dist-info/METADATA,sha256=dFe4T9iDSF2Ik79cm_6VsP3OPEqGB8avr-QsA2i4rCY,827
47
+ moose_lib-0.6.162.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
48
+ moose_lib-0.6.162.dist-info/top_level.txt,sha256=XEns2-4aCmGp2XjJAeEH9TAUcGONLnSLy6ycT9FSJh8,16
49
+ moose_lib-0.6.162.dist-info/RECORD,,
@@ -8,7 +8,7 @@ import warnings
8
8
  from moose_lib import OlapTable, OlapConfig, ClickHouseEngines
9
9
  from moose_lib.blocks import S3QueueEngine, MergeTreeEngine, ReplacingMergeTreeEngine
10
10
  from moose_lib.internal import (
11
- _convert_engine_to_config_dict,
11
+ _convert_engine_to_config_dict,
12
12
  EngineConfigDict,
13
13
  S3QueueConfigDict,
14
14
  MergeTreeConfigDict,
@@ -41,8 +41,8 @@ def test_olap_config_accepts_engine_config():
41
41
  aws_secret_access_key="secret123"
42
42
  )
43
43
  config = OlapConfig(
44
- engine=s3_engine,
45
- order_by_fields=["timestamp"]
44
+ engine=s3_engine
45
+ # Note: S3QueueEngine does not support order_by_fields
46
46
  )
47
47
  assert isinstance(config.engine, S3QueueEngine)
48
48
  assert config.engine.s3_path == "s3://bucket/data/*.json"
@@ -58,14 +58,14 @@ def test_olap_table_with_s3queue_engine():
58
58
  format="JSONEachRow",
59
59
  compression="gzip"
60
60
  ),
61
- order_by_fields=["timestamp", "id"],
61
+ # Note: S3QueueEngine does not support order_by_fields
62
62
  settings={
63
63
  "s3queue_mode": "unordered",
64
64
  "s3queue_keeper_path": "/clickhouse/s3queue/test"
65
65
  }
66
66
  )
67
67
  )
68
-
68
+
69
69
  assert table.name == "TestS3Table"
70
70
  assert isinstance(table.config.engine, S3QueueEngine)
71
71
  assert table.config.engine.s3_path == "s3://test-bucket/logs/*.json"
@@ -83,7 +83,7 @@ def test_olap_table_with_mergetree_engines():
83
83
  )
84
84
  )
85
85
  assert isinstance(table1.config.engine, MergeTreeEngine)
86
-
86
+
87
87
  # Test with ReplacingMergeTreeEngine
88
88
  table2 = OlapTable[SampleEvent](
89
89
  "ReplacingTable",
@@ -111,10 +111,10 @@ def test_engine_conversion_to_dict():
111
111
  )
112
112
  )
113
113
  )
114
-
114
+
115
115
  # Convert engine to dict
116
116
  engine_dict = _convert_engine_to_config_dict(table.config.engine, table)
117
-
117
+
118
118
  assert engine_dict.engine == "S3Queue"
119
119
  assert engine_dict.s3_path == "s3://bucket/data/*.parquet"
120
120
  assert engine_dict.format == "Parquet"
@@ -133,10 +133,10 @@ def test_engine_conversion_with_enum():
133
133
  engine=ClickHouseEngines.ReplacingMergeTree
134
134
  )
135
135
  )
136
-
136
+
137
137
  # Convert engine to dict
138
138
  engine_dict = _convert_engine_to_config_dict(table.config.engine, table)
139
-
139
+
140
140
  assert engine_dict.engine == "ReplacingMergeTree"
141
141
 
142
142
 
@@ -150,16 +150,16 @@ def test_backward_compatibility():
150
150
  order_by_fields=["id"]
151
151
  )
152
152
  )
153
-
153
+
154
154
  # New API with EngineConfig
155
155
  new_table = OlapTable[SampleEvent](
156
- "NewTable",
156
+ "NewTable",
157
157
  OlapConfig(
158
158
  engine=MergeTreeEngine(),
159
159
  order_by_fields=["id"]
160
160
  )
161
161
  )
162
-
162
+
163
163
  # Both should work
164
164
  assert old_table.config.engine == ClickHouseEngines.MergeTree
165
165
  assert isinstance(new_table.config.engine, MergeTreeEngine)
@@ -169,7 +169,7 @@ def test_deprecation_warning_for_enum():
169
169
  """Test that using enum engine triggers deprecation warning."""
170
170
  with warnings.catch_warnings(record=True) as w:
171
171
  warnings.simplefilter("always")
172
-
172
+
173
173
  table = OlapTable[SampleEvent](
174
174
  "LegacyTable",
175
175
  OlapConfig(
@@ -177,7 +177,7 @@ def test_deprecation_warning_for_enum():
177
177
  order_by_fields=["id"]
178
178
  )
179
179
  )
180
-
180
+
181
181
  # Check that a deprecation warning was issued
182
182
  assert len(w) == 1
183
183
  assert issubclass(w[0].category, DeprecationWarning)
@@ -198,12 +198,12 @@ def test_s3queue_with_all_options():
198
198
  "Authorization": "Bearer token"
199
199
  }
200
200
  )
201
-
201
+
202
202
  table = OlapTable[SampleEvent](
203
203
  "FullConfigTable",
204
204
  OlapConfig(
205
205
  engine=engine,
206
- order_by_fields=["timestamp"],
206
+ # Note: S3QueueEngine does not support order_by_fields
207
207
  settings={
208
208
  "s3queue_mode": "ordered",
209
209
  "s3queue_keeper_path": "/clickhouse/s3queue/full",
@@ -212,7 +212,7 @@ def test_s3queue_with_all_options():
212
212
  }
213
213
  )
214
214
  )
215
-
215
+
216
216
  assert table.config.engine.s3_path == "s3://my-bucket/path/to/data/*.json"
217
217
  assert table.config.engine.format == "JSONEachRow"
218
218
  assert table.config.engine.compression == "gzip"
@@ -226,42 +226,49 @@ def test_s3queue_public_bucket():
226
226
  format="Parquet"
227
227
  # No AWS credentials needed for public buckets
228
228
  )
229
-
229
+
230
230
  table = OlapTable[SampleEvent](
231
231
  "PublicBucketTable",
232
232
  OlapConfig(
233
- engine=engine,
234
- order_by_fields=["id"]
233
+ engine=engine
234
+ # Note: S3QueueEngine does not support order_by_fields
235
235
  )
236
236
  )
237
-
237
+
238
238
  assert table.config.engine.aws_access_key_id is None
239
239
  assert table.config.engine.aws_secret_access_key is None
240
240
 
241
241
 
242
242
  def test_migration_from_legacy_to_new():
243
243
  """Test migration path from legacy to new API."""
244
- # Legacy approach
244
+ # Legacy approach (with MergeTree, which supports order_by_fields)
245
245
  legacy_config = OlapConfig(
246
- engine=ClickHouseEngines.S3Queue,
246
+ engine=ClickHouseEngines.MergeTree,
247
247
  order_by_fields=["timestamp"]
248
248
  )
249
-
250
- # New approach - equivalent configuration
249
+
250
+ # New approach - equivalent configuration for MergeTree
251
251
  new_config = OlapConfig(
252
- engine=S3QueueEngine(
253
- s3_path="s3://bucket/data/*.json",
254
- format="JSONEachRow"
255
- ),
252
+ engine=MergeTreeEngine(),
256
253
  order_by_fields=["timestamp"]
257
254
  )
258
-
255
+
259
256
  # Both should have the same order_by_fields
260
257
  assert legacy_config.order_by_fields == new_config.order_by_fields
261
-
258
+
262
259
  # Engine types should be different
263
260
  assert isinstance(legacy_config.engine, ClickHouseEngines)
264
- assert isinstance(new_config.engine, S3QueueEngine)
261
+ assert isinstance(new_config.engine, MergeTreeEngine)
262
+
263
+ # For S3Queue, the new API correctly prevents unsupported clauses
264
+ s3queue_config = OlapConfig(
265
+ engine=S3QueueEngine(
266
+ s3_path="s3://bucket/data/*.json",
267
+ format="JSONEachRow"
268
+ )
269
+ # Note: order_by_fields is not supported for S3QueueEngine
270
+ )
271
+ assert isinstance(s3queue_config.engine, S3QueueEngine)
265
272
 
266
273
 
267
274
  def test_engine_config_validation():
@@ -272,7 +279,7 @@ def test_engine_config_validation():
272
279
  s3_path="", # Empty path should fail
273
280
  format="JSONEachRow"
274
281
  )
275
-
282
+
276
283
  with pytest.raises(ValueError, match="S3Queue engine requires 'format'"):
277
284
  S3QueueEngine(
278
285
  s3_path="s3://bucket/data/*.json",
@@ -280,21 +287,143 @@ def test_engine_config_validation():
280
287
  )
281
288
 
282
289
 
290
+ def test_non_mergetree_engines_reject_unsupported_clauses():
291
+ """Test that non-MergeTree engines reject unsupported ORDER BY and SAMPLE BY clauses."""
292
+ from moose_lib.blocks import S3Engine, S3QueueEngine, BufferEngine, DistributedEngine
293
+
294
+ # Test S3Engine rejects ORDER BY
295
+ with pytest.raises(ValueError, match="S3Engine does not support ORDER BY clauses"):
296
+ OlapConfig(
297
+ engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
298
+ order_by_fields=["id"]
299
+ )
300
+
301
+ with pytest.raises(ValueError, match="S3Engine does not support ORDER BY clauses"):
302
+ OlapConfig(
303
+ engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
304
+ order_by_expression="(id, name)"
305
+ )
306
+
307
+ # Test S3Engine rejects SAMPLE BY
308
+ with pytest.raises(ValueError, match="S3Engine does not support SAMPLE BY clause"):
309
+ OlapConfig(
310
+ engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
311
+ sample_by_expression="cityHash64(id)"
312
+ )
313
+
314
+ # Test S3Engine DOES support PARTITION BY (should not raise)
315
+ config_s3_with_partition = OlapConfig(
316
+ engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
317
+ partition_by="toYYYYMM(timestamp)"
318
+ )
319
+ assert config_s3_with_partition.partition_by == "toYYYYMM(timestamp)"
320
+
321
+ # Test S3QueueEngine rejects ORDER BY
322
+ with pytest.raises(ValueError, match="S3QueueEngine does not support ORDER BY clauses"):
323
+ OlapConfig(
324
+ engine=S3QueueEngine(s3_path="s3://bucket/*.json", format="JSONEachRow"),
325
+ order_by_fields=["id"]
326
+ )
327
+
328
+ # Test S3QueueEngine rejects PARTITION BY (unlike S3Engine)
329
+ with pytest.raises(ValueError, match="S3QueueEngine does not support PARTITION BY clause"):
330
+ OlapConfig(
331
+ engine=S3QueueEngine(s3_path="s3://bucket/*.json", format="JSONEachRow"),
332
+ partition_by="toYYYYMM(timestamp)"
333
+ )
334
+
335
+ # Test BufferEngine rejects ORDER BY
336
+ with pytest.raises(ValueError, match="BufferEngine does not support ORDER BY clauses"):
337
+ OlapConfig(
338
+ engine=BufferEngine(
339
+ target_database="default",
340
+ target_table="dest",
341
+ num_layers=16,
342
+ min_time=10,
343
+ max_time=100,
344
+ min_rows=10000,
345
+ max_rows=100000,
346
+ min_bytes=10000000,
347
+ max_bytes=100000000
348
+ ),
349
+ order_by_fields=["id"]
350
+ )
351
+
352
+ # Test BufferEngine rejects PARTITION BY
353
+ with pytest.raises(ValueError, match="BufferEngine does not support PARTITION BY clause"):
354
+ OlapConfig(
355
+ engine=BufferEngine(
356
+ target_database="default",
357
+ target_table="dest",
358
+ num_layers=16,
359
+ min_time=10,
360
+ max_time=100,
361
+ min_rows=10000,
362
+ max_rows=100000,
363
+ min_bytes=10000000,
364
+ max_bytes=100000000
365
+ ),
366
+ partition_by="date"
367
+ )
368
+
369
+ # Test DistributedEngine rejects PARTITION BY
370
+ with pytest.raises(ValueError, match="DistributedEngine does not support PARTITION BY clause"):
371
+ OlapConfig(
372
+ engine=DistributedEngine(
373
+ cluster="my_cluster",
374
+ target_database="default",
375
+ target_table="local_table"
376
+ ),
377
+ partition_by="date"
378
+ )
379
+
380
+ # Verify that S3Engine works without unsupported clauses
381
+ config = OlapConfig(
382
+ engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow")
383
+ )
384
+ assert isinstance(config.engine, S3Engine)
385
+
386
+
387
+ def test_mergetree_engines_still_accept_clauses():
388
+ """Test that MergeTree engines still accept ORDER BY, PARTITION BY, and SAMPLE BY clauses."""
389
+ from moose_lib.blocks import MergeTreeEngine, ReplacingMergeTreeEngine
390
+
391
+ # MergeTree should accept all clauses
392
+ config1 = OlapConfig(
393
+ engine=MergeTreeEngine(),
394
+ order_by_fields=["id", "timestamp"],
395
+ partition_by="toYYYYMM(timestamp)",
396
+ sample_by_expression="cityHash64(id)"
397
+ )
398
+ assert config1.order_by_fields == ["id", "timestamp"]
399
+ assert config1.partition_by == "toYYYYMM(timestamp)"
400
+ assert config1.sample_by_expression == "cityHash64(id)"
401
+
402
+ # ReplacingMergeTree should also accept these clauses
403
+ config2 = OlapConfig(
404
+ engine=ReplacingMergeTreeEngine(ver="updated_at"),
405
+ order_by_expression="(id, name)",
406
+ partition_by="date"
407
+ )
408
+ assert config2.order_by_expression == "(id, name)"
409
+ assert config2.partition_by == "date"
410
+
411
+
283
412
  def test_multiple_engine_types():
284
413
  """Test that different engine types can be used in the same application."""
285
414
  tables = []
286
-
415
+
287
416
  # Create tables with different engine types
288
417
  tables.append(OlapTable[SampleEvent](
289
418
  "MergeTreeTable",
290
419
  OlapConfig(engine=MergeTreeEngine())
291
420
  ))
292
-
421
+
293
422
  tables.append(OlapTable[SampleEvent](
294
423
  "ReplacingTreeTable",
295
424
  OlapConfig(engine=ReplacingMergeTreeEngine())
296
425
  ))
297
-
426
+
298
427
  tables.append(OlapTable[SampleEvent](
299
428
  "S3QueueTable",
300
429
  OlapConfig(
@@ -304,7 +433,7 @@ def test_multiple_engine_types():
304
433
  )
305
434
  )
306
435
  ))
307
-
436
+
308
437
  # Verify all tables were created with correct engine types
309
438
  assert isinstance(tables[0].config.engine, MergeTreeEngine)
310
439
  assert isinstance(tables[1].config.engine, ReplacingMergeTreeEngine)
tests/test_secrets.py ADDED
@@ -0,0 +1,221 @@
1
+ """Tests for the moose_lib.secrets module.
2
+
3
+ This module tests the runtime environment variable marker functionality,
4
+ which allows users to defer secret resolution until runtime rather than
5
+ embedding secrets at build time.
6
+ """
7
+
8
+ import pytest
9
+ from moose_lib.secrets import moose_runtime_env, get, MOOSE_RUNTIME_ENV_PREFIX
10
+
11
+
12
+ class TestMooseRuntimeEnvGet:
13
+ """Tests for the moose_runtime_env.get() method."""
14
+
15
+ def test_creates_marker_with_correct_prefix(self):
16
+ """Should create a marker string with the correct prefix."""
17
+ var_name = "AWS_ACCESS_KEY_ID"
18
+ result = moose_runtime_env.get(var_name)
19
+
20
+ assert result == f"{MOOSE_RUNTIME_ENV_PREFIX}{var_name}"
21
+ assert result == "__MOOSE_RUNTIME_ENV__:AWS_ACCESS_KEY_ID"
22
+
23
+ def test_handles_different_variable_names(self):
24
+ """Should handle different environment variable names correctly."""
25
+ test_cases = [
26
+ "AWS_SECRET_ACCESS_KEY",
27
+ "DATABASE_PASSWORD",
28
+ "API_KEY",
29
+ "MY_CUSTOM_SECRET",
30
+ ]
31
+
32
+ for var_name in test_cases:
33
+ result = moose_runtime_env.get(var_name)
34
+ assert result == f"{MOOSE_RUNTIME_ENV_PREFIX}{var_name}"
35
+ assert var_name in result
36
+
37
+ def test_raises_error_for_empty_string(self):
38
+ """Should raise ValueError for empty string."""
39
+ with pytest.raises(ValueError, match="Environment variable name cannot be empty"):
40
+ moose_runtime_env.get("")
41
+
42
+ def test_raises_error_for_whitespace_only(self):
43
+ """Should raise ValueError for whitespace-only string."""
44
+ with pytest.raises(ValueError, match="Environment variable name cannot be empty"):
45
+ moose_runtime_env.get(" ")
46
+
47
+ def test_raises_error_for_tabs_only(self):
48
+ """Should raise ValueError for string with only tabs."""
49
+ with pytest.raises(ValueError, match="Environment variable name cannot be empty"):
50
+ moose_runtime_env.get("\t\t")
51
+
52
+ def test_allows_underscores_in_variable_names(self):
53
+ """Should allow variable names with underscores."""
54
+ var_name = "MY_LONG_VAR_NAME"
55
+ result = moose_runtime_env.get(var_name)
56
+
57
+ assert result == f"{MOOSE_RUNTIME_ENV_PREFIX}{var_name}"
58
+
59
+ def test_allows_numbers_in_variable_names(self):
60
+ """Should allow variable names with numbers."""
61
+ var_name = "API_KEY_123"
62
+ result = moose_runtime_env.get(var_name)
63
+
64
+ assert result == f"{MOOSE_RUNTIME_ENV_PREFIX}{var_name}"
65
+
66
+ def test_preserves_exact_casing(self):
67
+ """Should preserve exact variable name casing."""
68
+ var_name = "MixedCase_VarName"
69
+ result = moose_runtime_env.get(var_name)
70
+
71
+ assert var_name in result
72
+ assert var_name.lower() not in result # Ensure casing wasn't changed
73
+
74
+ def test_can_be_used_in_s3queue_config(self):
75
+ """Should create markers that can be used in S3Queue configuration."""
76
+ access_key_marker = moose_runtime_env.get("AWS_ACCESS_KEY_ID")
77
+ secret_key_marker = moose_runtime_env.get("AWS_SECRET_ACCESS_KEY")
78
+
79
+ config = {
80
+ "aws_access_key_id": access_key_marker,
81
+ "aws_secret_access_key": secret_key_marker,
82
+ }
83
+
84
+ assert "AWS_ACCESS_KEY_ID" in config["aws_access_key_id"]
85
+ assert "AWS_SECRET_ACCESS_KEY" in config["aws_secret_access_key"]
86
+
87
+
88
+ class TestModuleLevelGetFunction:
89
+ """Tests for the module-level get() function."""
90
+
91
+ def test_module_level_get_creates_marker(self):
92
+ """The module-level get function should create markers."""
93
+ var_name = "TEST_SECRET"
94
+ result = get(var_name)
95
+
96
+ assert result == f"{MOOSE_RUNTIME_ENV_PREFIX}{var_name}"
97
+
98
+ def test_module_level_get_matches_class_method(self):
99
+ """Module-level get should produce same result as class method."""
100
+ var_name = "MY_SECRET"
101
+
102
+ result_module = get(var_name)
103
+ result_class = moose_runtime_env.get(var_name)
104
+
105
+ assert result_module == result_class
106
+
107
+ def test_module_level_get_raises_error_for_empty(self):
108
+ """Module-level get should raise ValueError for empty string."""
109
+ with pytest.raises(ValueError, match="Environment variable name cannot be empty"):
110
+ get("")
111
+
112
+
113
+ class TestMooseRuntimeEnvPrefix:
114
+ """Tests for the MOOSE_RUNTIME_ENV_PREFIX constant."""
115
+
116
+ def test_has_expected_value(self):
117
+ """Should have the expected prefix value."""
118
+ assert MOOSE_RUNTIME_ENV_PREFIX == "__MOOSE_RUNTIME_ENV__:"
119
+
120
+ def test_is_string(self):
121
+ """Should be a string."""
122
+ assert isinstance(MOOSE_RUNTIME_ENV_PREFIX, str)
123
+
124
+ def test_is_not_empty(self):
125
+ """Should not be empty."""
126
+ assert len(MOOSE_RUNTIME_ENV_PREFIX) > 0
127
+
128
+
129
+ class TestMarkerFormatValidation:
130
+ """Tests for marker format validation and parsing."""
131
+
132
+ def test_creates_easily_detectable_markers(self):
133
+ """Should create markers that are easily detectable."""
134
+ marker = moose_runtime_env.get("TEST_VAR")
135
+
136
+ assert marker.startswith("__MOOSE_RUNTIME_ENV__:")
137
+
138
+ def test_markers_can_be_split_to_extract_variable_name(self):
139
+ """Should create markers that can be split to extract variable name."""
140
+ var_name = "MY_SECRET"
141
+ marker = moose_runtime_env.get(var_name)
142
+
143
+ parts = marker.split(MOOSE_RUNTIME_ENV_PREFIX)
144
+ assert len(parts) == 2
145
+ assert parts[1] == var_name
146
+
147
+ def test_markers_are_json_serializable(self):
148
+ """Should create markers that are JSON serializable."""
149
+ import json
150
+
151
+ marker = moose_runtime_env.get("TEST_VAR")
152
+ json_str = json.dumps({"secret": marker})
153
+ parsed = json.loads(json_str)
154
+
155
+ assert parsed["secret"] == marker
156
+
157
+ def test_markers_work_with_dict_serialization(self):
158
+ """Should work correctly with dictionary serialization."""
159
+ marker = moose_runtime_env.get("DATABASE_PASSWORD")
160
+
161
+ config = {
162
+ "password": marker,
163
+ "other_field": "value"
164
+ }
165
+
166
+ # Verify the marker is preserved in the dict
167
+ assert config["password"] == marker
168
+ assert MOOSE_RUNTIME_ENV_PREFIX in config["password"]
169
+
170
+
171
+ class TestIntegrationScenarios:
172
+ """Integration tests for real-world usage scenarios."""
173
+
174
+ def test_s3queue_engine_with_secrets(self):
175
+ """Should work correctly in S3Queue engine configuration."""
176
+ from moose_lib.blocks import S3QueueEngine
177
+
178
+ engine = S3QueueEngine(
179
+ s3_path="s3://my-bucket/data/*.json",
180
+ format="JSONEachRow",
181
+ aws_access_key_id=moose_runtime_env.get("AWS_ACCESS_KEY_ID"),
182
+ aws_secret_access_key=moose_runtime_env.get("AWS_SECRET_ACCESS_KEY"),
183
+ )
184
+
185
+ # Verify markers were set correctly
186
+ assert engine.aws_access_key_id == "__MOOSE_RUNTIME_ENV__:AWS_ACCESS_KEY_ID"
187
+ assert engine.aws_secret_access_key == "__MOOSE_RUNTIME_ENV__:AWS_SECRET_ACCESS_KEY"
188
+
189
+ def test_multiple_secrets_in_same_config(self):
190
+ """Should handle multiple secrets in the same configuration."""
191
+ config = {
192
+ "username": moose_runtime_env.get("DB_USERNAME"),
193
+ "password": moose_runtime_env.get("DB_PASSWORD"),
194
+ "api_key": moose_runtime_env.get("API_KEY"),
195
+ }
196
+
197
+ # All should have the correct prefix
198
+ for value in config.values():
199
+ assert value.startswith(MOOSE_RUNTIME_ENV_PREFIX)
200
+
201
+ # Each should have the correct variable name
202
+ assert "DB_USERNAME" in config["username"]
203
+ assert "DB_PASSWORD" in config["password"]
204
+ assert "API_KEY" in config["api_key"]
205
+
206
+ def test_mixed_secret_and_plain_values(self):
207
+ """Should handle mix of secret markers and plain values."""
208
+ config = {
209
+ "region": "us-east-1", # Plain value
210
+ "access_key": moose_runtime_env.get("AWS_ACCESS_KEY_ID"), # Secret
211
+ "bucket": "my-bucket", # Plain value
212
+ "secret_key": moose_runtime_env.get("AWS_SECRET_ACCESS_KEY"), # Secret
213
+ }
214
+
215
+ # Plain values should be unchanged
216
+ assert config["region"] == "us-east-1"
217
+ assert config["bucket"] == "my-bucket"
218
+
219
+ # Secrets should have markers
220
+ assert MOOSE_RUNTIME_ENV_PREFIX in config["access_key"]
221
+ assert MOOSE_RUNTIME_ENV_PREFIX in config["secret_key"]