moose-lib 0.6.90__py3-none-any.whl → 0.6.283__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moose_lib/__init__.py +38 -3
- moose_lib/blocks.py +497 -37
- moose_lib/clients/redis_client.py +26 -14
- moose_lib/commons.py +94 -5
- moose_lib/config/config_file.py +44 -2
- moose_lib/config/runtime.py +137 -5
- moose_lib/data_models.py +451 -46
- moose_lib/dmv2/__init__.py +88 -60
- moose_lib/dmv2/_registry.py +3 -1
- moose_lib/dmv2/_source_capture.py +37 -0
- moose_lib/dmv2/consumption.py +55 -32
- moose_lib/dmv2/ingest_api.py +9 -2
- moose_lib/dmv2/ingest_pipeline.py +56 -13
- moose_lib/dmv2/life_cycle.py +3 -1
- moose_lib/dmv2/materialized_view.py +24 -14
- moose_lib/dmv2/moose_model.py +165 -0
- moose_lib/dmv2/olap_table.py +304 -119
- moose_lib/dmv2/registry.py +28 -3
- moose_lib/dmv2/sql_resource.py +16 -8
- moose_lib/dmv2/stream.py +241 -21
- moose_lib/dmv2/types.py +14 -8
- moose_lib/dmv2/view.py +13 -6
- moose_lib/dmv2/web_app.py +175 -0
- moose_lib/dmv2/web_app_helpers.py +96 -0
- moose_lib/dmv2/workflow.py +37 -9
- moose_lib/internal.py +537 -68
- moose_lib/main.py +87 -56
- moose_lib/query_builder.py +18 -5
- moose_lib/query_param.py +54 -20
- moose_lib/secrets.py +122 -0
- moose_lib/streaming/streaming_function_runner.py +266 -156
- moose_lib/utilities/sql.py +0 -1
- {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/METADATA +19 -1
- moose_lib-0.6.283.dist-info/RECORD +63 -0
- tests/__init__.py +1 -1
- tests/conftest.py +38 -1
- tests/test_backward_compatibility.py +85 -0
- tests/test_cluster_validation.py +85 -0
- tests/test_codec.py +75 -0
- tests/test_column_formatting.py +80 -0
- tests/test_fixedstring.py +43 -0
- tests/test_iceberg_config.py +105 -0
- tests/test_int_types.py +211 -0
- tests/test_kafka_config.py +141 -0
- tests/test_materialized.py +74 -0
- tests/test_metadata.py +37 -0
- tests/test_moose.py +21 -30
- tests/test_moose_model.py +153 -0
- tests/test_olap_table_moosemodel.py +89 -0
- tests/test_olap_table_versioning.py +210 -0
- tests/test_query_builder.py +97 -9
- tests/test_redis_client.py +10 -3
- tests/test_s3queue_config.py +211 -110
- tests/test_secrets.py +239 -0
- tests/test_simple_aggregate.py +114 -0
- tests/test_web_app.py +227 -0
- moose_lib-0.6.90.dist-info/RECORD +0 -42
- {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
- {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
moose_lib/main.py
CHANGED
|
@@ -4,6 +4,7 @@ This module provides foundational classes, enums, and functions used across the
|
|
|
4
4
|
including configuration objects, clients for interacting with services (ClickHouse, Temporal),
|
|
5
5
|
and utilities for defining data models and SQL queries.
|
|
6
6
|
"""
|
|
7
|
+
|
|
7
8
|
from clickhouse_connect.driver.client import Client as ClickhouseClient
|
|
8
9
|
from clickhouse_connect import get_client
|
|
9
10
|
from moose_lib.dmv2 import OlapTable
|
|
@@ -18,7 +19,11 @@ import hashlib
|
|
|
18
19
|
import asyncio
|
|
19
20
|
from string import Formatter
|
|
20
21
|
from temporalio.client import Client as TemporalClient, TLSConfig
|
|
21
|
-
from temporalio.common import
|
|
22
|
+
from temporalio.common import (
|
|
23
|
+
RetryPolicy,
|
|
24
|
+
WorkflowIDConflictPolicy,
|
|
25
|
+
WorkflowIDReusePolicy,
|
|
26
|
+
)
|
|
22
27
|
from datetime import timedelta, datetime
|
|
23
28
|
from time import perf_counter
|
|
24
29
|
from humanfriendly import format_timespan
|
|
@@ -37,6 +42,7 @@ class StreamingFunction:
|
|
|
37
42
|
Attributes:
|
|
38
43
|
run: The callable function that performs the streaming logic.
|
|
39
44
|
"""
|
|
45
|
+
|
|
40
46
|
run: Callable
|
|
41
47
|
|
|
42
48
|
|
|
@@ -49,6 +55,7 @@ class StorageConfig:
|
|
|
49
55
|
order_by_fields: List of fields to use for ordering in the storage layer.
|
|
50
56
|
deduplicate: Whether to enable deduplication based on the order_by_fields.
|
|
51
57
|
"""
|
|
58
|
+
|
|
52
59
|
enabled: Optional[bool] = None
|
|
53
60
|
order_by_fields: Optional[list[str]] = None
|
|
54
61
|
deduplicate: Optional[bool] = None
|
|
@@ -63,6 +70,7 @@ class DataModelConfig:
|
|
|
63
70
|
Attributes:
|
|
64
71
|
storage: Configuration for how data is stored.
|
|
65
72
|
"""
|
|
73
|
+
|
|
66
74
|
storage: Optional[StorageConfig] = None
|
|
67
75
|
|
|
68
76
|
|
|
@@ -112,21 +120,22 @@ def moose_data_model(arg: Any = None) -> Any:
|
|
|
112
120
|
def get_file(t: type) -> Optional[str]:
|
|
113
121
|
"""Helper to get the file path of a type's definition."""
|
|
114
122
|
module = sys.modules.get(t.__module__)
|
|
115
|
-
if module and hasattr(module,
|
|
123
|
+
if module and hasattr(module, "__file__"):
|
|
116
124
|
return module.__file__
|
|
117
125
|
return None
|
|
118
126
|
|
|
119
127
|
def remove_null(d: dict) -> dict:
|
|
120
128
|
"""Recursively removes keys with None values from a dictionary."""
|
|
121
|
-
return {
|
|
122
|
-
|
|
129
|
+
return {
|
|
130
|
+
key: remove_null(value) if isinstance(value, dict) else value
|
|
131
|
+
for key, value in d.items()
|
|
132
|
+
if not (value is None)
|
|
133
|
+
}
|
|
123
134
|
|
|
124
135
|
def decorator(data_class: type) -> type:
|
|
125
136
|
expected_file_name = os.environ.get("MOOSE_PYTHON_DM_DUMP")
|
|
126
137
|
if expected_file_name and expected_file_name == get_file(data_class):
|
|
127
|
-
output: dict[str, str | dict] = {
|
|
128
|
-
'class_name': data_class.__name__
|
|
129
|
-
}
|
|
138
|
+
output: dict[str, str | dict] = {"class_name": data_class.__name__}
|
|
130
139
|
if arg:
|
|
131
140
|
output["config"] = remove_null(asdict(arg))
|
|
132
141
|
output_json = json.dumps(output, cls=CustomEncoder, indent=4)
|
|
@@ -149,6 +158,7 @@ class ApiResult:
|
|
|
149
158
|
status: The HTTP status code for the response.
|
|
150
159
|
body: The response body, which should be JSON serializable.
|
|
151
160
|
"""
|
|
161
|
+
|
|
152
162
|
status: int
|
|
153
163
|
body: Any
|
|
154
164
|
|
|
@@ -165,11 +175,13 @@ class QueryClient:
|
|
|
165
175
|
ch_client_or_config: Either an instance of the ClickHouse client or a RuntimeClickHouseConfig.
|
|
166
176
|
"""
|
|
167
177
|
|
|
168
|
-
def __init__(
|
|
178
|
+
def __init__(
|
|
179
|
+
self, ch_client_or_config: Union[ClickhouseClient, RuntimeClickHouseConfig]
|
|
180
|
+
):
|
|
169
181
|
if isinstance(ch_client_or_config, RuntimeClickHouseConfig):
|
|
170
182
|
# Create ClickHouse client from configuration
|
|
171
183
|
config = ch_client_or_config
|
|
172
|
-
interface =
|
|
184
|
+
interface = "https" if config.use_ssl else "http"
|
|
173
185
|
self.ch_client = get_client(
|
|
174
186
|
interface=interface,
|
|
175
187
|
host=config.host,
|
|
@@ -185,7 +197,9 @@ class QueryClient:
|
|
|
185
197
|
def __call__(self, input, variables):
|
|
186
198
|
return self.execute(input, variables)
|
|
187
199
|
|
|
188
|
-
def execute(
|
|
200
|
+
def execute(
|
|
201
|
+
self, input: Union[str, Query], variables=None, row_type: Type[BaseModel] = None
|
|
202
|
+
):
|
|
189
203
|
"""
|
|
190
204
|
Execute a query.
|
|
191
205
|
|
|
@@ -212,7 +226,8 @@ class QueryClient:
|
|
|
212
226
|
values: dict[str, Any] = {}
|
|
213
227
|
preview_params = {}
|
|
214
228
|
|
|
215
|
-
|
|
229
|
+
i = 0
|
|
230
|
+
for _, variable_name, _, _ in Formatter().parse(input):
|
|
216
231
|
if variable_name:
|
|
217
232
|
value = variables[variable_name]
|
|
218
233
|
if isinstance(value, list) and len(value) == 1:
|
|
@@ -220,27 +235,36 @@ class QueryClient:
|
|
|
220
235
|
value = value[0]
|
|
221
236
|
|
|
222
237
|
if isinstance(value, Column) or isinstance(value, OlapTable):
|
|
223
|
-
|
|
224
|
-
|
|
238
|
+
if isinstance(value, OlapTable) and value.config.database:
|
|
239
|
+
params[variable_name] = (
|
|
240
|
+
f"{{p{i}: Identifier}}.{{p{i + 1}: Identifier}}"
|
|
241
|
+
)
|
|
242
|
+
values[f"p{i}"] = value.config.database
|
|
243
|
+
values[f"p{i + 1}"] = value.name
|
|
244
|
+
i += 2
|
|
245
|
+
else:
|
|
246
|
+
params[variable_name] = f"{{p{i}: Identifier}}"
|
|
247
|
+
values[f"p{i}"] = value.name
|
|
248
|
+
i += 1
|
|
225
249
|
else:
|
|
226
250
|
from moose_lib.utilities.sql import clickhouse_param_type_for_value
|
|
251
|
+
|
|
227
252
|
ch_type = clickhouse_param_type_for_value(value)
|
|
228
|
-
params[variable_name] = f
|
|
229
|
-
values[f
|
|
253
|
+
params[variable_name] = f"{{p{i}: {ch_type}}}"
|
|
254
|
+
values[f"p{i}"] = value
|
|
255
|
+
i += 1
|
|
230
256
|
preview_params[variable_name] = self._format_value_for_preview(value)
|
|
231
257
|
|
|
232
258
|
clickhouse_query = input.format_map(params)
|
|
233
259
|
preview_query = input.format_map(preview_params)
|
|
234
260
|
print(f"[QueryClient] | Query: {' '.join(preview_query.split())}")
|
|
235
|
-
return self.execute_raw(
|
|
236
|
-
clickhouse_query, values, row_type
|
|
237
|
-
)
|
|
261
|
+
return self.execute_raw(clickhouse_query, values, row_type)
|
|
238
262
|
|
|
239
263
|
def execute_raw(
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
264
|
+
self,
|
|
265
|
+
clickhouse_query: str,
|
|
266
|
+
parameters: Optional[dict[str, Any]],
|
|
267
|
+
row_type: Type[BaseModel] = None,
|
|
244
268
|
):
|
|
245
269
|
"""
|
|
246
270
|
Uses raw clickhouse SQL syntax.
|
|
@@ -270,11 +294,11 @@ class QueryClient:
|
|
|
270
294
|
"""
|
|
271
295
|
# NULL handling
|
|
272
296
|
if value is None:
|
|
273
|
-
return
|
|
297
|
+
return "NULL"
|
|
274
298
|
|
|
275
299
|
# Booleans (ClickHouse accepts true/false)
|
|
276
300
|
if isinstance(value, bool):
|
|
277
|
-
return
|
|
301
|
+
return "true" if value else "false"
|
|
278
302
|
|
|
279
303
|
# Numbers
|
|
280
304
|
if isinstance(value, (int, float)) and not isinstance(value, bool):
|
|
@@ -283,24 +307,29 @@ class QueryClient:
|
|
|
283
307
|
# Strings
|
|
284
308
|
if isinstance(value, str):
|
|
285
309
|
# Escape backslashes and single quotes for ClickHouse single-quoted strings
|
|
286
|
-
escaped = value.replace(
|
|
310
|
+
escaped = value.replace("\\", "\\\\").replace("'", "\\'")
|
|
287
311
|
return f"'{escaped}'"
|
|
288
312
|
|
|
289
313
|
# DateTime
|
|
290
314
|
if isinstance(value, datetime):
|
|
291
315
|
return f"'{value.strftime('%Y-%m-%d %H:%M:%S')}'"
|
|
292
316
|
|
|
317
|
+
if isinstance(value, OlapTable) and value.config.database:
|
|
318
|
+
return f"{value.config.database}.{value.name}"
|
|
319
|
+
|
|
293
320
|
if isinstance(value, Column) or isinstance(value, OlapTable):
|
|
294
321
|
return value.name
|
|
295
322
|
|
|
296
323
|
# Lists / tuples (format as [item1, item2, ...])
|
|
297
324
|
if isinstance(value, (list, tuple)):
|
|
298
|
-
formatted_items =
|
|
325
|
+
formatted_items = ", ".join(
|
|
326
|
+
self._format_value_for_preview(v) for v in value
|
|
327
|
+
)
|
|
299
328
|
return f"[{formatted_items}]"
|
|
300
329
|
|
|
301
330
|
# Fallback: stringify and single-quote
|
|
302
331
|
fallback = str(value)
|
|
303
|
-
escaped_fallback = fallback.replace(
|
|
332
|
+
escaped_fallback = fallback.replace("\\", "\\\\").replace("'", "\\'")
|
|
304
333
|
return f"'{escaped_fallback}'"
|
|
305
334
|
|
|
306
335
|
def close(self):
|
|
@@ -325,33 +354,26 @@ class WorkflowClient:
|
|
|
325
354
|
# Test workflow executor in rust if this changes significantly
|
|
326
355
|
def execute(self, name: str, input_data: Any) -> Dict[str, Any]:
|
|
327
356
|
try:
|
|
328
|
-
workflow_id, run_id = asyncio.run(
|
|
357
|
+
workflow_id, run_id = asyncio.run(
|
|
358
|
+
self._start_workflow_async(name, input_data)
|
|
359
|
+
)
|
|
329
360
|
print(f"WorkflowClient - started workflow: {name}")
|
|
330
361
|
return {
|
|
331
362
|
"status": 200,
|
|
332
|
-
"body": f"Workflow started: {name}. View it in the Temporal dashboard: http://localhost:8080/namespaces/default/workflows/{workflow_id}/{run_id}/history"
|
|
363
|
+
"body": f"Workflow started: {name}. View it in the Temporal dashboard: http://localhost:8080/namespaces/default/workflows/{workflow_id}/{run_id}/history",
|
|
333
364
|
}
|
|
334
365
|
except Exception as e:
|
|
335
366
|
print(f"WorkflowClient - error while starting workflow: {e}")
|
|
336
|
-
return {
|
|
337
|
-
"status": 400,
|
|
338
|
-
"body": str(e)
|
|
339
|
-
}
|
|
367
|
+
return {"status": 400, "body": str(e)}
|
|
340
368
|
|
|
341
369
|
def terminate(self, workflow_id: str) -> Dict[str, Any]:
|
|
342
370
|
try:
|
|
343
371
|
asyncio.run(self._terminate_workflow_async(workflow_id))
|
|
344
372
|
print(f"WorkflowClient - terminated workflow: {workflow_id}")
|
|
345
|
-
return {
|
|
346
|
-
"status": 200,
|
|
347
|
-
"body": f"Workflow terminated: {workflow_id}"
|
|
348
|
-
}
|
|
373
|
+
return {"status": 200, "body": f"Workflow terminated: {workflow_id}"}
|
|
349
374
|
except Exception as e:
|
|
350
375
|
print(f"WorkflowClient - error while terminating workflow: {e}")
|
|
351
|
-
return {
|
|
352
|
-
"status": 400,
|
|
353
|
-
"body": str(e)
|
|
354
|
-
}
|
|
376
|
+
return {"status": 400, "body": str(e)}
|
|
355
377
|
|
|
356
378
|
async def _terminate_workflow_async(self, workflow_id: str):
|
|
357
379
|
workflow_handle = self.temporal_client.get_workflow_handle(workflow_id)
|
|
@@ -365,11 +387,12 @@ class WorkflowClient:
|
|
|
365
387
|
processed_input, workflow_id = self._process_input_data(name, input_data)
|
|
366
388
|
|
|
367
389
|
# Create retry policy and timeout (common logic)
|
|
368
|
-
retry_policy = RetryPolicy(maximum_attempts=config[
|
|
369
|
-
run_timeout = self.parse_timeout_to_timedelta(config[
|
|
390
|
+
retry_policy = RetryPolicy(maximum_attempts=config["retry_count"])
|
|
391
|
+
run_timeout = self.parse_timeout_to_timedelta(config["timeout_str"])
|
|
370
392
|
|
|
371
393
|
print(
|
|
372
|
-
f"WorkflowClient - starting DMv2 workflow: {name} with retry policy: {retry_policy} and timeout: {run_timeout}"
|
|
394
|
+
f"WorkflowClient - starting DMv2 workflow: {name} with retry policy: {retry_policy} and timeout: {run_timeout}"
|
|
395
|
+
)
|
|
373
396
|
|
|
374
397
|
# Start workflow with appropriate args
|
|
375
398
|
workflow_args = self._build_workflow_args(name, processed_input)
|
|
@@ -388,8 +411,7 @@ class WorkflowClient:
|
|
|
388
411
|
workflow_kwargs["run_timeout"] = run_timeout
|
|
389
412
|
|
|
390
413
|
workflow_handle = await self.temporal_client.start_workflow(
|
|
391
|
-
"ScriptWorkflow",
|
|
392
|
-
**workflow_kwargs
|
|
414
|
+
"ScriptWorkflow", **workflow_kwargs
|
|
393
415
|
)
|
|
394
416
|
|
|
395
417
|
return workflow_id, workflow_handle.result_run_id
|
|
@@ -403,8 +425,8 @@ class WorkflowClient:
|
|
|
403
425
|
raise ValueError(f"DMv2 workflow '{name}' not found")
|
|
404
426
|
|
|
405
427
|
return {
|
|
406
|
-
|
|
407
|
-
|
|
428
|
+
"retry_count": dmv2_workflow.config.retries or 3,
|
|
429
|
+
"timeout_str": dmv2_workflow.config.timeout or "1h",
|
|
408
430
|
}
|
|
409
431
|
|
|
410
432
|
def _process_input_data(self, name: str, input_data: Any) -> tuple[Any, str]:
|
|
@@ -438,11 +460,11 @@ class WorkflowClient:
|
|
|
438
460
|
def parse_timeout_to_timedelta(self, timeout_str: str) -> Optional[timedelta]:
|
|
439
461
|
if timeout_str == "never":
|
|
440
462
|
return None # Unlimited execution timeout
|
|
441
|
-
elif timeout_str.endswith(
|
|
463
|
+
elif timeout_str.endswith("h"):
|
|
442
464
|
return timedelta(hours=int(timeout_str[:-1]))
|
|
443
|
-
elif timeout_str.endswith(
|
|
465
|
+
elif timeout_str.endswith("m"):
|
|
444
466
|
return timedelta(minutes=int(timeout_str[:-1]))
|
|
445
|
-
elif timeout_str.endswith(
|
|
467
|
+
elif timeout_str.endswith("s"):
|
|
446
468
|
return timedelta(seconds=int(timeout_str[:-1]))
|
|
447
469
|
else:
|
|
448
470
|
raise ValueError(f"Unsupported timeout format: {timeout_str}")
|
|
@@ -463,7 +485,11 @@ class MooseClient:
|
|
|
463
485
|
workflow (Optional[WorkflowClient]): Client for workflow operations (if configured).
|
|
464
486
|
"""
|
|
465
487
|
|
|
466
|
-
def __init__(
|
|
488
|
+
def __init__(
|
|
489
|
+
self,
|
|
490
|
+
ch_client: ClickhouseClient,
|
|
491
|
+
temporal_client: Optional[TemporalClient] = None,
|
|
492
|
+
):
|
|
467
493
|
self.query = QueryClient(ch_client)
|
|
468
494
|
self.temporal_client = temporal_client
|
|
469
495
|
if temporal_client:
|
|
@@ -508,15 +534,20 @@ class Sql:
|
|
|
508
534
|
between the strings.
|
|
509
535
|
"""
|
|
510
536
|
|
|
511
|
-
def __init__(self, raw_strings: list[str], raw_values: list[
|
|
537
|
+
def __init__(self, raw_strings: list[str], raw_values: list["RawValue"]):
|
|
512
538
|
if len(raw_strings) - 1 != len(raw_values):
|
|
513
539
|
if len(raw_strings) == 0:
|
|
514
540
|
raise TypeError("Expected at least 1 string")
|
|
515
|
-
raise TypeError(
|
|
541
|
+
raise TypeError(
|
|
542
|
+
f"Expected {len(raw_strings)} strings to have {len(raw_strings) - 1} values"
|
|
543
|
+
)
|
|
516
544
|
|
|
517
|
-
values_length = sum(
|
|
545
|
+
values_length = sum(
|
|
546
|
+
1 if not isinstance(value, Sql) else len(value.values)
|
|
547
|
+
for value in raw_values
|
|
548
|
+
)
|
|
518
549
|
|
|
519
|
-
self.values: list[
|
|
550
|
+
self.values: list["Value"] = [None] * values_length
|
|
520
551
|
self.strings: list[str] = [None] * (values_length + 1)
|
|
521
552
|
|
|
522
553
|
self.strings[0] = raw_strings[0]
|
moose_lib/query_builder.py
CHANGED
|
@@ -15,7 +15,9 @@ class Params:
|
|
|
15
15
|
self._counter = 0
|
|
16
16
|
self.bindings: dict[str, object] = {}
|
|
17
17
|
|
|
18
|
-
def bind(
|
|
18
|
+
def bind(
|
|
19
|
+
self, value: object, name: str | None = None, ch_type: str | None = None
|
|
20
|
+
) -> sge.Expression:
|
|
19
21
|
if name is None:
|
|
20
22
|
name = f"p{self._counter}"
|
|
21
23
|
self._counter += 1
|
|
@@ -56,7 +58,9 @@ class ColumnRef:
|
|
|
56
58
|
|
|
57
59
|
def _binary_op(self, op_name: str, value: object) -> Predicate:
|
|
58
60
|
def resolve(query: "Query") -> sge.Expression:
|
|
59
|
-
table_name =
|
|
61
|
+
table_name = (
|
|
62
|
+
query._from_table.name if query._from_table is not None else None
|
|
63
|
+
)
|
|
60
64
|
left = to_column(self._column, table_name)
|
|
61
65
|
right = query.params.bind(value)
|
|
62
66
|
op = getattr(left, op_name)
|
|
@@ -84,7 +88,9 @@ class ColumnRef:
|
|
|
84
88
|
|
|
85
89
|
def in_(self, values: list[object]) -> Predicate:
|
|
86
90
|
def resolve(query: "Query") -> sge.Expression:
|
|
87
|
-
table_name =
|
|
91
|
+
table_name = (
|
|
92
|
+
query._from_table.name if query._from_table is not None else None
|
|
93
|
+
)
|
|
88
94
|
left = to_column(self._column, table_name)
|
|
89
95
|
rights = [query.params.bind(v) for v in values]
|
|
90
96
|
return left.isin(*rights)
|
|
@@ -93,7 +99,9 @@ class ColumnRef:
|
|
|
93
99
|
|
|
94
100
|
def is_null(self) -> Predicate:
|
|
95
101
|
def resolve(query: "Query") -> sge.Expression:
|
|
96
|
-
table_name =
|
|
102
|
+
table_name = (
|
|
103
|
+
query._from_table.name if query._from_table is not None else None
|
|
104
|
+
)
|
|
97
105
|
left = to_column(self._column, table_name)
|
|
98
106
|
return left.is_(sge.Null())
|
|
99
107
|
|
|
@@ -116,7 +124,12 @@ class Query:
|
|
|
116
124
|
return self
|
|
117
125
|
|
|
118
126
|
def select(self, *cols: Column) -> "Query":
|
|
119
|
-
sge_cols = [
|
|
127
|
+
sge_cols = [
|
|
128
|
+
to_column(
|
|
129
|
+
c, self._from_table.name if self._from_table is not None else None
|
|
130
|
+
)
|
|
131
|
+
for c in cols
|
|
132
|
+
]
|
|
120
133
|
self.inner = self.inner.select(*sge_cols)
|
|
121
134
|
return self
|
|
122
135
|
|
moose_lib/query_param.py
CHANGED
|
@@ -5,7 +5,11 @@ from typing import Optional, Union, Any
|
|
|
5
5
|
import inspect
|
|
6
6
|
|
|
7
7
|
from pydantic import BaseModel
|
|
8
|
-
from .data_models import
|
|
8
|
+
from .data_models import (
|
|
9
|
+
DataEnum,
|
|
10
|
+
py_type_to_column_type,
|
|
11
|
+
ArrayType as DataModelArrayType,
|
|
12
|
+
)
|
|
9
13
|
|
|
10
14
|
scalar_types = Union[str, DataEnum]
|
|
11
15
|
|
|
@@ -40,18 +44,20 @@ def unwrap_optional(union_type):
|
|
|
40
44
|
# dmV1 code, won't upgrade to include the rich types
|
|
41
45
|
def parse_scalar_value(value: str, t: scalar_types) -> Any:
|
|
42
46
|
match t:
|
|
43
|
-
case
|
|
47
|
+
case "String":
|
|
44
48
|
return value
|
|
45
|
-
case
|
|
49
|
+
case "Int" | "Int64":
|
|
46
50
|
return int(value)
|
|
47
|
-
case
|
|
51
|
+
case "Float" | "Float64" | "Float32":
|
|
48
52
|
return float(value)
|
|
49
|
-
case
|
|
53
|
+
case "Boolean":
|
|
50
54
|
value_lower = value.lower()
|
|
51
|
-
if value_lower not in (
|
|
52
|
-
raise ValueError(
|
|
53
|
-
|
|
54
|
-
|
|
55
|
+
if value_lower not in ("true", "false"):
|
|
56
|
+
raise ValueError(
|
|
57
|
+
f"Boolean value must be 'true' or 'false', got: {value}"
|
|
58
|
+
)
|
|
59
|
+
return value_lower == "true"
|
|
60
|
+
case "DateTime":
|
|
55
61
|
return datetime.fromisoformat(value)
|
|
56
62
|
case _:
|
|
57
63
|
# enum parsing will not be added to dmV1 code
|
|
@@ -78,11 +84,21 @@ def convert_pydantic_definition(cls: type) -> list[QueryField]:
|
|
|
78
84
|
element_type = field_type.__args__[0] # type: ignore
|
|
79
85
|
scala_type = to_scalar_type(element_type)
|
|
80
86
|
fields_list.append(
|
|
81
|
-
QueryField(
|
|
87
|
+
QueryField(
|
|
88
|
+
field_name,
|
|
89
|
+
ArrayType(scala_type),
|
|
90
|
+
has_default=not no_default,
|
|
91
|
+
required=required,
|
|
92
|
+
)
|
|
93
|
+
)
|
|
82
94
|
continue
|
|
83
95
|
|
|
84
96
|
scala_type = to_scalar_type(field_type)
|
|
85
|
-
fields_list.append(
|
|
97
|
+
fields_list.append(
|
|
98
|
+
QueryField(
|
|
99
|
+
field_name, scala_type, has_default=not no_default, required=required
|
|
100
|
+
)
|
|
101
|
+
)
|
|
86
102
|
|
|
87
103
|
return fields_list
|
|
88
104
|
|
|
@@ -98,7 +114,9 @@ def convert_dataclass_definition(cls: type) -> list[QueryField]:
|
|
|
98
114
|
|
|
99
115
|
# Handle Optional types
|
|
100
116
|
# Field is not required if it has a default value or is Optional
|
|
101
|
-
no_default =
|
|
117
|
+
no_default = (
|
|
118
|
+
field_def.default == field_def.default_factory == dataclasses.MISSING
|
|
119
|
+
)
|
|
102
120
|
required = no_default
|
|
103
121
|
|
|
104
122
|
if hasattr(field_type, "__origin__"):
|
|
@@ -109,11 +127,21 @@ def convert_dataclass_definition(cls: type) -> list[QueryField]:
|
|
|
109
127
|
element_type = field_type.__args__[0] # type: ignore
|
|
110
128
|
scala_type = to_scalar_type(element_type)
|
|
111
129
|
fields_list.append(
|
|
112
|
-
QueryField(
|
|
130
|
+
QueryField(
|
|
131
|
+
field_name,
|
|
132
|
+
ArrayType(scala_type),
|
|
133
|
+
has_default=not no_default,
|
|
134
|
+
required=required,
|
|
135
|
+
)
|
|
136
|
+
)
|
|
113
137
|
continue
|
|
114
138
|
|
|
115
139
|
scala_type = to_scalar_type(field_type)
|
|
116
|
-
fields_list.append(
|
|
140
|
+
fields_list.append(
|
|
141
|
+
QueryField(
|
|
142
|
+
field_name, scala_type, has_default=not no_default, required=required
|
|
143
|
+
)
|
|
144
|
+
)
|
|
117
145
|
|
|
118
146
|
return fields_list
|
|
119
147
|
|
|
@@ -129,14 +157,16 @@ def convert_api_param(module) -> Optional[tuple[type, list[QueryField]]]:
|
|
|
129
157
|
elif issubclass(param_class, BaseModel):
|
|
130
158
|
query_fields = convert_pydantic_definition(param_class)
|
|
131
159
|
else:
|
|
132
|
-
raise ValueError(
|
|
160
|
+
raise ValueError(
|
|
161
|
+
f"{param_class.__name__} is neither a Pydantic model or a dataclass"
|
|
162
|
+
)
|
|
133
163
|
return param_class, query_fields
|
|
134
164
|
|
|
135
165
|
|
|
136
166
|
def map_params_to_class(
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
167
|
+
params: dict[str, list[str]],
|
|
168
|
+
field_def_list: list[QueryField],
|
|
169
|
+
cls: type,
|
|
140
170
|
) -> Any:
|
|
141
171
|
# Initialize an empty dict for the constructor arguments
|
|
142
172
|
constructor_args: dict[str, Any] = {}
|
|
@@ -147,7 +177,9 @@ def map_params_to_class(
|
|
|
147
177
|
elif is_dataclass(cls):
|
|
148
178
|
return parse_scalar_value(param, t)
|
|
149
179
|
else:
|
|
150
|
-
raise ValueError(
|
|
180
|
+
raise ValueError(
|
|
181
|
+
f"{cls.__name__} is neither a Pydantic model or a dataclass"
|
|
182
|
+
)
|
|
151
183
|
|
|
152
184
|
# Get field definitions from the dataclass
|
|
153
185
|
for field_def in field_def_list:
|
|
@@ -167,7 +199,9 @@ def map_params_to_class(
|
|
|
167
199
|
values = params[field_name]
|
|
168
200
|
|
|
169
201
|
if isinstance(field_type, ArrayType):
|
|
170
|
-
constructor_args[field_name] = [
|
|
202
|
+
constructor_args[field_name] = [
|
|
203
|
+
parse(v, field_type.element_type) for v in values
|
|
204
|
+
]
|
|
171
205
|
else:
|
|
172
206
|
if len(values) != 1:
|
|
173
207
|
raise ValueError(f"Expected a single element for {field_name}")
|
moose_lib/secrets.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""Utilities for runtime environment variable resolution.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to mark values that should be resolved
|
|
4
|
+
from environment variables at runtime by the Moose CLI, rather than being
|
|
5
|
+
embedded at build time.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
>>> from moose_lib import S3QueueEngine, moose_runtime_env
|
|
9
|
+
>>>
|
|
10
|
+
>>> engine = S3QueueEngine(
|
|
11
|
+
... s3_path="s3://bucket/data/*.json",
|
|
12
|
+
... format="JSONEachRow",
|
|
13
|
+
... aws_access_key_id=moose_runtime_env.get("AWS_ACCESS_KEY_ID"),
|
|
14
|
+
... aws_secret_access_key=moose_runtime_env.get("AWS_SECRET_ACCESS_KEY")
|
|
15
|
+
... )
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
#: Prefix used to mark values for runtime environment variable resolution.
|
|
19
|
+
MOOSE_RUNTIME_ENV_PREFIX = "__MOOSE_RUNTIME_ENV__:"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get(env_var_name: str) -> str:
|
|
23
|
+
"""Gets a value from an environment variable, with behavior depending on context.
|
|
24
|
+
|
|
25
|
+
When IS_LOADING_INFRA_MAP=true (infrastructure loading):
|
|
26
|
+
Returns a marker string that Moose CLI will resolve later
|
|
27
|
+
|
|
28
|
+
When IS_LOADING_INFRA_MAP is unset (function/workflow runtime):
|
|
29
|
+
Returns the actual value from the environment variable
|
|
30
|
+
|
|
31
|
+
This is useful for:
|
|
32
|
+
- Credentials that should never be embedded in Docker images
|
|
33
|
+
- Configuration that can be rotated without rebuilding
|
|
34
|
+
- Different values for different environments (dev, staging, prod)
|
|
35
|
+
- Any runtime configuration in infrastructure elements (Tables, Topics, etc.)
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
env_var_name: Name of the environment variable to resolve
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Either a marker string or the actual environment variable value
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
ValueError: If the environment variable name is empty
|
|
45
|
+
KeyError: If the environment variable is not set (runtime mode only)
|
|
46
|
+
|
|
47
|
+
Example:
|
|
48
|
+
>>> # Instead of this (evaluated at build time):
|
|
49
|
+
>>> import os
|
|
50
|
+
>>> aws_key = os.environ.get("AWS_ACCESS_KEY_ID")
|
|
51
|
+
>>>
|
|
52
|
+
>>> # Use this (evaluated at runtime):
|
|
53
|
+
>>> aws_key = moose_runtime_env.get("AWS_ACCESS_KEY_ID")
|
|
54
|
+
"""
|
|
55
|
+
import os
|
|
56
|
+
|
|
57
|
+
if not env_var_name or not env_var_name.strip():
|
|
58
|
+
raise ValueError("Environment variable name cannot be empty")
|
|
59
|
+
|
|
60
|
+
# Check if we're loading infrastructure map
|
|
61
|
+
is_loading_infra_map = os.environ.get("IS_LOADING_INFRA_MAP") == "true"
|
|
62
|
+
|
|
63
|
+
if is_loading_infra_map:
|
|
64
|
+
# Return marker string for later resolution by Moose CLI
|
|
65
|
+
return f"{MOOSE_RUNTIME_ENV_PREFIX}{env_var_name}"
|
|
66
|
+
else:
|
|
67
|
+
# Return actual value from environment for runtime execution
|
|
68
|
+
value = os.environ.get(env_var_name)
|
|
69
|
+
if value is None:
|
|
70
|
+
raise KeyError(
|
|
71
|
+
f"Environment variable '{env_var_name}' is not set. "
|
|
72
|
+
f"This is required for runtime execution of functions/workflows."
|
|
73
|
+
)
|
|
74
|
+
return value
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class MooseRuntimeEnv:
|
|
78
|
+
"""Utilities for marking values to be resolved from environment variables at runtime.
|
|
79
|
+
|
|
80
|
+
This class provides a namespace for runtime environment variable resolution.
|
|
81
|
+
Use the singleton instance `moose_runtime_env` rather than instantiating this class directly.
|
|
82
|
+
|
|
83
|
+
Attributes:
|
|
84
|
+
get: Static method for creating runtime environment variable markers
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
@staticmethod
|
|
88
|
+
def get(env_var_name: str) -> str:
|
|
89
|
+
"""Marks a value to be resolved from an environment variable at runtime.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
env_var_name: Name of the environment variable to resolve
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
A marker string that Moose CLI will resolve at runtime
|
|
96
|
+
|
|
97
|
+
Raises:
|
|
98
|
+
ValueError: If the environment variable name is empty
|
|
99
|
+
"""
|
|
100
|
+
return get(env_var_name)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# Export singleton instance for module-level access
|
|
104
|
+
moose_runtime_env = MooseRuntimeEnv()
|
|
105
|
+
|
|
106
|
+
# Legacy exports for backwards compatibility
|
|
107
|
+
MooseEnvSecrets = MooseRuntimeEnv # Deprecated: Use MooseRuntimeEnv instead
|
|
108
|
+
moose_env_secrets = moose_runtime_env # Deprecated: Use moose_runtime_env instead
|
|
109
|
+
MOOSE_ENV_SECRET_PREFIX = (
|
|
110
|
+
MOOSE_RUNTIME_ENV_PREFIX # Deprecated: Use MOOSE_RUNTIME_ENV_PREFIX instead
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
__all__ = [
|
|
114
|
+
"moose_runtime_env",
|
|
115
|
+
"MooseRuntimeEnv",
|
|
116
|
+
"get",
|
|
117
|
+
"MOOSE_RUNTIME_ENV_PREFIX",
|
|
118
|
+
# Legacy exports (deprecated)
|
|
119
|
+
"moose_env_secrets",
|
|
120
|
+
"MooseEnvSecrets",
|
|
121
|
+
"MOOSE_ENV_SECRET_PREFIX",
|
|
122
|
+
]
|