moose-lib 0.6.148.dev3442438466__py3-none-any.whl → 0.6.283__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. moose_lib/__init__.py +34 -3
  2. moose_lib/blocks.py +416 -52
  3. moose_lib/clients/redis_client.py +26 -14
  4. moose_lib/commons.py +37 -30
  5. moose_lib/config/config_file.py +5 -1
  6. moose_lib/config/runtime.py +73 -34
  7. moose_lib/data_models.py +331 -61
  8. moose_lib/dmv2/__init__.py +69 -73
  9. moose_lib/dmv2/_registry.py +2 -1
  10. moose_lib/dmv2/_source_capture.py +37 -0
  11. moose_lib/dmv2/consumption.py +55 -32
  12. moose_lib/dmv2/ingest_api.py +9 -2
  13. moose_lib/dmv2/ingest_pipeline.py +35 -16
  14. moose_lib/dmv2/life_cycle.py +3 -1
  15. moose_lib/dmv2/materialized_view.py +24 -14
  16. moose_lib/dmv2/moose_model.py +165 -0
  17. moose_lib/dmv2/olap_table.py +299 -151
  18. moose_lib/dmv2/registry.py +18 -3
  19. moose_lib/dmv2/sql_resource.py +16 -8
  20. moose_lib/dmv2/stream.py +75 -23
  21. moose_lib/dmv2/types.py +14 -8
  22. moose_lib/dmv2/view.py +13 -6
  23. moose_lib/dmv2/web_app.py +11 -6
  24. moose_lib/dmv2/web_app_helpers.py +5 -1
  25. moose_lib/dmv2/workflow.py +37 -9
  26. moose_lib/internal.py +340 -56
  27. moose_lib/main.py +87 -56
  28. moose_lib/query_builder.py +18 -5
  29. moose_lib/query_param.py +54 -20
  30. moose_lib/secrets.py +122 -0
  31. moose_lib/streaming/streaming_function_runner.py +233 -117
  32. moose_lib/utilities/sql.py +0 -1
  33. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/METADATA +18 -1
  34. moose_lib-0.6.283.dist-info/RECORD +63 -0
  35. tests/__init__.py +1 -1
  36. tests/conftest.py +6 -5
  37. tests/test_backward_compatibility.py +85 -0
  38. tests/test_cluster_validation.py +85 -0
  39. tests/test_codec.py +75 -0
  40. tests/test_column_formatting.py +80 -0
  41. tests/test_fixedstring.py +43 -0
  42. tests/test_iceberg_config.py +105 -0
  43. tests/test_int_types.py +211 -0
  44. tests/test_kafka_config.py +141 -0
  45. tests/test_materialized.py +74 -0
  46. tests/test_metadata.py +37 -0
  47. tests/test_moose.py +21 -30
  48. tests/test_moose_model.py +153 -0
  49. tests/test_olap_table_moosemodel.py +89 -0
  50. tests/test_olap_table_versioning.py +52 -58
  51. tests/test_query_builder.py +97 -9
  52. tests/test_redis_client.py +10 -3
  53. tests/test_s3queue_config.py +211 -110
  54. tests/test_secrets.py +239 -0
  55. tests/test_simple_aggregate.py +42 -40
  56. tests/test_web_app.py +11 -5
  57. moose_lib-0.6.148.dev3442438466.dist-info/RECORD +0 -47
  58. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
  59. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
moose_lib/main.py CHANGED
@@ -4,6 +4,7 @@ This module provides foundational classes, enums, and functions used across the
4
4
  including configuration objects, clients for interacting with services (ClickHouse, Temporal),
5
5
  and utilities for defining data models and SQL queries.
6
6
  """
7
+
7
8
  from clickhouse_connect.driver.client import Client as ClickhouseClient
8
9
  from clickhouse_connect import get_client
9
10
  from moose_lib.dmv2 import OlapTable
@@ -18,7 +19,11 @@ import hashlib
18
19
  import asyncio
19
20
  from string import Formatter
20
21
  from temporalio.client import Client as TemporalClient, TLSConfig
21
- from temporalio.common import RetryPolicy, WorkflowIDConflictPolicy, WorkflowIDReusePolicy
22
+ from temporalio.common import (
23
+ RetryPolicy,
24
+ WorkflowIDConflictPolicy,
25
+ WorkflowIDReusePolicy,
26
+ )
22
27
  from datetime import timedelta, datetime
23
28
  from time import perf_counter
24
29
  from humanfriendly import format_timespan
@@ -37,6 +42,7 @@ class StreamingFunction:
37
42
  Attributes:
38
43
  run: The callable function that performs the streaming logic.
39
44
  """
45
+
40
46
  run: Callable
41
47
 
42
48
 
@@ -49,6 +55,7 @@ class StorageConfig:
49
55
  order_by_fields: List of fields to use for ordering in the storage layer.
50
56
  deduplicate: Whether to enable deduplication based on the order_by_fields.
51
57
  """
58
+
52
59
  enabled: Optional[bool] = None
53
60
  order_by_fields: Optional[list[str]] = None
54
61
  deduplicate: Optional[bool] = None
@@ -63,6 +70,7 @@ class DataModelConfig:
63
70
  Attributes:
64
71
  storage: Configuration for how data is stored.
65
72
  """
73
+
66
74
  storage: Optional[StorageConfig] = None
67
75
 
68
76
 
@@ -112,21 +120,22 @@ def moose_data_model(arg: Any = None) -> Any:
112
120
  def get_file(t: type) -> Optional[str]:
113
121
  """Helper to get the file path of a type's definition."""
114
122
  module = sys.modules.get(t.__module__)
115
- if module and hasattr(module, '__file__'):
123
+ if module and hasattr(module, "__file__"):
116
124
  return module.__file__
117
125
  return None
118
126
 
119
127
  def remove_null(d: dict) -> dict:
120
128
  """Recursively removes keys with None values from a dictionary."""
121
- return {key: remove_null(value) if isinstance(value, dict) else value for key, value in d.items() if
122
- not (value is None)}
129
+ return {
130
+ key: remove_null(value) if isinstance(value, dict) else value
131
+ for key, value in d.items()
132
+ if not (value is None)
133
+ }
123
134
 
124
135
  def decorator(data_class: type) -> type:
125
136
  expected_file_name = os.environ.get("MOOSE_PYTHON_DM_DUMP")
126
137
  if expected_file_name and expected_file_name == get_file(data_class):
127
- output: dict[str, str | dict] = {
128
- 'class_name': data_class.__name__
129
- }
138
+ output: dict[str, str | dict] = {"class_name": data_class.__name__}
130
139
  if arg:
131
140
  output["config"] = remove_null(asdict(arg))
132
141
  output_json = json.dumps(output, cls=CustomEncoder, indent=4)
@@ -149,6 +158,7 @@ class ApiResult:
149
158
  status: The HTTP status code for the response.
150
159
  body: The response body, which should be JSON serializable.
151
160
  """
161
+
152
162
  status: int
153
163
  body: Any
154
164
 
@@ -165,11 +175,13 @@ class QueryClient:
165
175
  ch_client_or_config: Either an instance of the ClickHouse client or a RuntimeClickHouseConfig.
166
176
  """
167
177
 
168
- def __init__(self, ch_client_or_config: Union[ClickhouseClient, RuntimeClickHouseConfig]):
178
+ def __init__(
179
+ self, ch_client_or_config: Union[ClickhouseClient, RuntimeClickHouseConfig]
180
+ ):
169
181
  if isinstance(ch_client_or_config, RuntimeClickHouseConfig):
170
182
  # Create ClickHouse client from configuration
171
183
  config = ch_client_or_config
172
- interface = 'https' if config.use_ssl else 'http'
184
+ interface = "https" if config.use_ssl else "http"
173
185
  self.ch_client = get_client(
174
186
  interface=interface,
175
187
  host=config.host,
@@ -185,7 +197,9 @@ class QueryClient:
185
197
  def __call__(self, input, variables):
186
198
  return self.execute(input, variables)
187
199
 
188
- def execute(self, input: Union[str, Query], variables = None, row_type: Type[BaseModel] = None):
200
+ def execute(
201
+ self, input: Union[str, Query], variables=None, row_type: Type[BaseModel] = None
202
+ ):
189
203
  """
190
204
  Execute a query.
191
205
 
@@ -212,7 +226,8 @@ class QueryClient:
212
226
  values: dict[str, Any] = {}
213
227
  preview_params = {}
214
228
 
215
- for i, (_, variable_name, _, _) in enumerate(Formatter().parse(input)):
229
+ i = 0
230
+ for _, variable_name, _, _ in Formatter().parse(input):
216
231
  if variable_name:
217
232
  value = variables[variable_name]
218
233
  if isinstance(value, list) and len(value) == 1:
@@ -220,27 +235,36 @@ class QueryClient:
220
235
  value = value[0]
221
236
 
222
237
  if isinstance(value, Column) or isinstance(value, OlapTable):
223
- params[variable_name] = f'{{p{i}: Identifier}}'
224
- values[f'p{i}'] = value.name
238
+ if isinstance(value, OlapTable) and value.config.database:
239
+ params[variable_name] = (
240
+ f"{{p{i}: Identifier}}.{{p{i + 1}: Identifier}}"
241
+ )
242
+ values[f"p{i}"] = value.config.database
243
+ values[f"p{i + 1}"] = value.name
244
+ i += 2
245
+ else:
246
+ params[variable_name] = f"{{p{i}: Identifier}}"
247
+ values[f"p{i}"] = value.name
248
+ i += 1
225
249
  else:
226
250
  from moose_lib.utilities.sql import clickhouse_param_type_for_value
251
+
227
252
  ch_type = clickhouse_param_type_for_value(value)
228
- params[variable_name] = f'{{p{i}: {ch_type}}}'
229
- values[f'p{i}'] = value
253
+ params[variable_name] = f"{{p{i}: {ch_type}}}"
254
+ values[f"p{i}"] = value
255
+ i += 1
230
256
  preview_params[variable_name] = self._format_value_for_preview(value)
231
257
 
232
258
  clickhouse_query = input.format_map(params)
233
259
  preview_query = input.format_map(preview_params)
234
260
  print(f"[QueryClient] | Query: {' '.join(preview_query.split())}")
235
- return self.execute_raw(
236
- clickhouse_query, values, row_type
237
- )
261
+ return self.execute_raw(clickhouse_query, values, row_type)
238
262
 
239
263
  def execute_raw(
240
- self,
241
- clickhouse_query: str,
242
- parameters: Optional[dict[str, Any]],
243
- row_type: Type[BaseModel] = None
264
+ self,
265
+ clickhouse_query: str,
266
+ parameters: Optional[dict[str, Any]],
267
+ row_type: Type[BaseModel] = None,
244
268
  ):
245
269
  """
246
270
  Uses raw clickhouse SQL syntax.
@@ -270,11 +294,11 @@ class QueryClient:
270
294
  """
271
295
  # NULL handling
272
296
  if value is None:
273
- return 'NULL'
297
+ return "NULL"
274
298
 
275
299
  # Booleans (ClickHouse accepts true/false)
276
300
  if isinstance(value, bool):
277
- return 'true' if value else 'false'
301
+ return "true" if value else "false"
278
302
 
279
303
  # Numbers
280
304
  if isinstance(value, (int, float)) and not isinstance(value, bool):
@@ -283,24 +307,29 @@ class QueryClient:
283
307
  # Strings
284
308
  if isinstance(value, str):
285
309
  # Escape backslashes and single quotes for ClickHouse single-quoted strings
286
- escaped = value.replace('\\', '\\\\').replace("'", "\\'")
310
+ escaped = value.replace("\\", "\\\\").replace("'", "\\'")
287
311
  return f"'{escaped}'"
288
312
 
289
313
  # DateTime
290
314
  if isinstance(value, datetime):
291
315
  return f"'{value.strftime('%Y-%m-%d %H:%M:%S')}'"
292
316
 
317
+ if isinstance(value, OlapTable) and value.config.database:
318
+ return f"{value.config.database}.{value.name}"
319
+
293
320
  if isinstance(value, Column) or isinstance(value, OlapTable):
294
321
  return value.name
295
322
 
296
323
  # Lists / tuples (format as [item1, item2, ...])
297
324
  if isinstance(value, (list, tuple)):
298
- formatted_items = ', '.join(self._format_value_for_preview(v) for v in value)
325
+ formatted_items = ", ".join(
326
+ self._format_value_for_preview(v) for v in value
327
+ )
299
328
  return f"[{formatted_items}]"
300
329
 
301
330
  # Fallback: stringify and single-quote
302
331
  fallback = str(value)
303
- escaped_fallback = fallback.replace('\\', '\\\\').replace("'", "\\'")
332
+ escaped_fallback = fallback.replace("\\", "\\\\").replace("'", "\\'")
304
333
  return f"'{escaped_fallback}'"
305
334
 
306
335
  def close(self):
@@ -325,33 +354,26 @@ class WorkflowClient:
325
354
  # Test workflow executor in rust if this changes significantly
326
355
  def execute(self, name: str, input_data: Any) -> Dict[str, Any]:
327
356
  try:
328
- workflow_id, run_id = asyncio.run(self._start_workflow_async(name, input_data))
357
+ workflow_id, run_id = asyncio.run(
358
+ self._start_workflow_async(name, input_data)
359
+ )
329
360
  print(f"WorkflowClient - started workflow: {name}")
330
361
  return {
331
362
  "status": 200,
332
- "body": f"Workflow started: {name}. View it in the Temporal dashboard: http://localhost:8080/namespaces/default/workflows/{workflow_id}/{run_id}/history"
363
+ "body": f"Workflow started: {name}. View it in the Temporal dashboard: http://localhost:8080/namespaces/default/workflows/{workflow_id}/{run_id}/history",
333
364
  }
334
365
  except Exception as e:
335
366
  print(f"WorkflowClient - error while starting workflow: {e}")
336
- return {
337
- "status": 400,
338
- "body": str(e)
339
- }
367
+ return {"status": 400, "body": str(e)}
340
368
 
341
369
  def terminate(self, workflow_id: str) -> Dict[str, Any]:
342
370
  try:
343
371
  asyncio.run(self._terminate_workflow_async(workflow_id))
344
372
  print(f"WorkflowClient - terminated workflow: {workflow_id}")
345
- return {
346
- "status": 200,
347
- "body": f"Workflow terminated: {workflow_id}"
348
- }
373
+ return {"status": 200, "body": f"Workflow terminated: {workflow_id}"}
349
374
  except Exception as e:
350
375
  print(f"WorkflowClient - error while terminating workflow: {e}")
351
- return {
352
- "status": 400,
353
- "body": str(e)
354
- }
376
+ return {"status": 400, "body": str(e)}
355
377
 
356
378
  async def _terminate_workflow_async(self, workflow_id: str):
357
379
  workflow_handle = self.temporal_client.get_workflow_handle(workflow_id)
@@ -365,11 +387,12 @@ class WorkflowClient:
365
387
  processed_input, workflow_id = self._process_input_data(name, input_data)
366
388
 
367
389
  # Create retry policy and timeout (common logic)
368
- retry_policy = RetryPolicy(maximum_attempts=config['retry_count'])
369
- run_timeout = self.parse_timeout_to_timedelta(config['timeout_str'])
390
+ retry_policy = RetryPolicy(maximum_attempts=config["retry_count"])
391
+ run_timeout = self.parse_timeout_to_timedelta(config["timeout_str"])
370
392
 
371
393
  print(
372
- f"WorkflowClient - starting DMv2 workflow: {name} with retry policy: {retry_policy} and timeout: {run_timeout}")
394
+ f"WorkflowClient - starting DMv2 workflow: {name} with retry policy: {retry_policy} and timeout: {run_timeout}"
395
+ )
373
396
 
374
397
  # Start workflow with appropriate args
375
398
  workflow_args = self._build_workflow_args(name, processed_input)
@@ -388,8 +411,7 @@ class WorkflowClient:
388
411
  workflow_kwargs["run_timeout"] = run_timeout
389
412
 
390
413
  workflow_handle = await self.temporal_client.start_workflow(
391
- "ScriptWorkflow",
392
- **workflow_kwargs
414
+ "ScriptWorkflow", **workflow_kwargs
393
415
  )
394
416
 
395
417
  return workflow_id, workflow_handle.result_run_id
@@ -403,8 +425,8 @@ class WorkflowClient:
403
425
  raise ValueError(f"DMv2 workflow '{name}' not found")
404
426
 
405
427
  return {
406
- 'retry_count': dmv2_workflow.config.retries or 3,
407
- 'timeout_str': dmv2_workflow.config.timeout or "1h",
428
+ "retry_count": dmv2_workflow.config.retries or 3,
429
+ "timeout_str": dmv2_workflow.config.timeout or "1h",
408
430
  }
409
431
 
410
432
  def _process_input_data(self, name: str, input_data: Any) -> tuple[Any, str]:
@@ -438,11 +460,11 @@ class WorkflowClient:
438
460
  def parse_timeout_to_timedelta(self, timeout_str: str) -> Optional[timedelta]:
439
461
  if timeout_str == "never":
440
462
  return None # Unlimited execution timeout
441
- elif timeout_str.endswith('h'):
463
+ elif timeout_str.endswith("h"):
442
464
  return timedelta(hours=int(timeout_str[:-1]))
443
- elif timeout_str.endswith('m'):
465
+ elif timeout_str.endswith("m"):
444
466
  return timedelta(minutes=int(timeout_str[:-1]))
445
- elif timeout_str.endswith('s'):
467
+ elif timeout_str.endswith("s"):
446
468
  return timedelta(seconds=int(timeout_str[:-1]))
447
469
  else:
448
470
  raise ValueError(f"Unsupported timeout format: {timeout_str}")
@@ -463,7 +485,11 @@ class MooseClient:
463
485
  workflow (Optional[WorkflowClient]): Client for workflow operations (if configured).
464
486
  """
465
487
 
466
- def __init__(self, ch_client: ClickhouseClient, temporal_client: Optional[TemporalClient] = None):
488
+ def __init__(
489
+ self,
490
+ ch_client: ClickhouseClient,
491
+ temporal_client: Optional[TemporalClient] = None,
492
+ ):
467
493
  self.query = QueryClient(ch_client)
468
494
  self.temporal_client = temporal_client
469
495
  if temporal_client:
@@ -508,15 +534,20 @@ class Sql:
508
534
  between the strings.
509
535
  """
510
536
 
511
- def __init__(self, raw_strings: list[str], raw_values: list['RawValue']):
537
+ def __init__(self, raw_strings: list[str], raw_values: list["RawValue"]):
512
538
  if len(raw_strings) - 1 != len(raw_values):
513
539
  if len(raw_strings) == 0:
514
540
  raise TypeError("Expected at least 1 string")
515
- raise TypeError(f"Expected {len(raw_strings)} strings to have {len(raw_strings) - 1} values")
541
+ raise TypeError(
542
+ f"Expected {len(raw_strings)} strings to have {len(raw_strings) - 1} values"
543
+ )
516
544
 
517
- values_length = sum(1 if not isinstance(value, Sql) else len(value.values) for value in raw_values)
545
+ values_length = sum(
546
+ 1 if not isinstance(value, Sql) else len(value.values)
547
+ for value in raw_values
548
+ )
518
549
 
519
- self.values: list['Value'] = [None] * values_length
550
+ self.values: list["Value"] = [None] * values_length
520
551
  self.strings: list[str] = [None] * (values_length + 1)
521
552
 
522
553
  self.strings[0] = raw_strings[0]
@@ -15,7 +15,9 @@ class Params:
15
15
  self._counter = 0
16
16
  self.bindings: dict[str, object] = {}
17
17
 
18
- def bind(self, value: object, name: str | None = None, ch_type: str | None = None) -> sge.Expression:
18
+ def bind(
19
+ self, value: object, name: str | None = None, ch_type: str | None = None
20
+ ) -> sge.Expression:
19
21
  if name is None:
20
22
  name = f"p{self._counter}"
21
23
  self._counter += 1
@@ -56,7 +58,9 @@ class ColumnRef:
56
58
 
57
59
  def _binary_op(self, op_name: str, value: object) -> Predicate:
58
60
  def resolve(query: "Query") -> sge.Expression:
59
- table_name = query._from_table.name if query._from_table is not None else None
61
+ table_name = (
62
+ query._from_table.name if query._from_table is not None else None
63
+ )
60
64
  left = to_column(self._column, table_name)
61
65
  right = query.params.bind(value)
62
66
  op = getattr(left, op_name)
@@ -84,7 +88,9 @@ class ColumnRef:
84
88
 
85
89
  def in_(self, values: list[object]) -> Predicate:
86
90
  def resolve(query: "Query") -> sge.Expression:
87
- table_name = query._from_table.name if query._from_table is not None else None
91
+ table_name = (
92
+ query._from_table.name if query._from_table is not None else None
93
+ )
88
94
  left = to_column(self._column, table_name)
89
95
  rights = [query.params.bind(v) for v in values]
90
96
  return left.isin(*rights)
@@ -93,7 +99,9 @@ class ColumnRef:
93
99
 
94
100
  def is_null(self) -> Predicate:
95
101
  def resolve(query: "Query") -> sge.Expression:
96
- table_name = query._from_table.name if query._from_table is not None else None
102
+ table_name = (
103
+ query._from_table.name if query._from_table is not None else None
104
+ )
97
105
  left = to_column(self._column, table_name)
98
106
  return left.is_(sge.Null())
99
107
 
@@ -116,7 +124,12 @@ class Query:
116
124
  return self
117
125
 
118
126
  def select(self, *cols: Column) -> "Query":
119
- sge_cols = [to_column(c, self._from_table.name if self._from_table is not None else None) for c in cols]
127
+ sge_cols = [
128
+ to_column(
129
+ c, self._from_table.name if self._from_table is not None else None
130
+ )
131
+ for c in cols
132
+ ]
120
133
  self.inner = self.inner.select(*sge_cols)
121
134
  return self
122
135
 
moose_lib/query_param.py CHANGED
@@ -5,7 +5,11 @@ from typing import Optional, Union, Any
5
5
  import inspect
6
6
 
7
7
  from pydantic import BaseModel
8
- from .data_models import DataEnum, py_type_to_column_type, ArrayType as DataModelArrayType
8
+ from .data_models import (
9
+ DataEnum,
10
+ py_type_to_column_type,
11
+ ArrayType as DataModelArrayType,
12
+ )
9
13
 
10
14
  scalar_types = Union[str, DataEnum]
11
15
 
@@ -40,18 +44,20 @@ def unwrap_optional(union_type):
40
44
  # dmV1 code, won't upgrade to include the rich types
41
45
  def parse_scalar_value(value: str, t: scalar_types) -> Any:
42
46
  match t:
43
- case 'String':
47
+ case "String":
44
48
  return value
45
- case 'Int':
49
+ case "Int" | "Int64":
46
50
  return int(value)
47
- case 'Float' | 'Float64' | 'Float32':
51
+ case "Float" | "Float64" | "Float32":
48
52
  return float(value)
49
- case 'Boolean':
53
+ case "Boolean":
50
54
  value_lower = value.lower()
51
- if value_lower not in ('true', 'false'):
52
- raise ValueError(f"Boolean value must be 'true' or 'false', got: {value}")
53
- return value_lower == 'true'
54
- case 'DateTime':
55
+ if value_lower not in ("true", "false"):
56
+ raise ValueError(
57
+ f"Boolean value must be 'true' or 'false', got: {value}"
58
+ )
59
+ return value_lower == "true"
60
+ case "DateTime":
55
61
  return datetime.fromisoformat(value)
56
62
  case _:
57
63
  # enum parsing will not be added to dmV1 code
@@ -78,11 +84,21 @@ def convert_pydantic_definition(cls: type) -> list[QueryField]:
78
84
  element_type = field_type.__args__[0] # type: ignore
79
85
  scala_type = to_scalar_type(element_type)
80
86
  fields_list.append(
81
- QueryField(field_name, ArrayType(scala_type), has_default=not no_default, required=required))
87
+ QueryField(
88
+ field_name,
89
+ ArrayType(scala_type),
90
+ has_default=not no_default,
91
+ required=required,
92
+ )
93
+ )
82
94
  continue
83
95
 
84
96
  scala_type = to_scalar_type(field_type)
85
- fields_list.append(QueryField(field_name, scala_type, has_default=not no_default, required=required))
97
+ fields_list.append(
98
+ QueryField(
99
+ field_name, scala_type, has_default=not no_default, required=required
100
+ )
101
+ )
86
102
 
87
103
  return fields_list
88
104
 
@@ -98,7 +114,9 @@ def convert_dataclass_definition(cls: type) -> list[QueryField]:
98
114
 
99
115
  # Handle Optional types
100
116
  # Field is not required if it has a default value or is Optional
101
- no_default = field_def.default == field_def.default_factory == dataclasses.MISSING
117
+ no_default = (
118
+ field_def.default == field_def.default_factory == dataclasses.MISSING
119
+ )
102
120
  required = no_default
103
121
 
104
122
  if hasattr(field_type, "__origin__"):
@@ -109,11 +127,21 @@ def convert_dataclass_definition(cls: type) -> list[QueryField]:
109
127
  element_type = field_type.__args__[0] # type: ignore
110
128
  scala_type = to_scalar_type(element_type)
111
129
  fields_list.append(
112
- QueryField(field_name, ArrayType(scala_type), has_default=not no_default, required=required))
130
+ QueryField(
131
+ field_name,
132
+ ArrayType(scala_type),
133
+ has_default=not no_default,
134
+ required=required,
135
+ )
136
+ )
113
137
  continue
114
138
 
115
139
  scala_type = to_scalar_type(field_type)
116
- fields_list.append(QueryField(field_name, scala_type, has_default=not no_default, required=required))
140
+ fields_list.append(
141
+ QueryField(
142
+ field_name, scala_type, has_default=not no_default, required=required
143
+ )
144
+ )
117
145
 
118
146
  return fields_list
119
147
 
@@ -129,14 +157,16 @@ def convert_api_param(module) -> Optional[tuple[type, list[QueryField]]]:
129
157
  elif issubclass(param_class, BaseModel):
130
158
  query_fields = convert_pydantic_definition(param_class)
131
159
  else:
132
- raise ValueError(f"{param_class.__name__} is neither a Pydantic model or a dataclass")
160
+ raise ValueError(
161
+ f"{param_class.__name__} is neither a Pydantic model or a dataclass"
162
+ )
133
163
  return param_class, query_fields
134
164
 
135
165
 
136
166
  def map_params_to_class(
137
- params: dict[str, list[str]],
138
- field_def_list: list[QueryField],
139
- cls: type,
167
+ params: dict[str, list[str]],
168
+ field_def_list: list[QueryField],
169
+ cls: type,
140
170
  ) -> Any:
141
171
  # Initialize an empty dict for the constructor arguments
142
172
  constructor_args: dict[str, Any] = {}
@@ -147,7 +177,9 @@ def map_params_to_class(
147
177
  elif is_dataclass(cls):
148
178
  return parse_scalar_value(param, t)
149
179
  else:
150
- raise ValueError(f"{cls.__name__} is neither a Pydantic model or a dataclass")
180
+ raise ValueError(
181
+ f"{cls.__name__} is neither a Pydantic model or a dataclass"
182
+ )
151
183
 
152
184
  # Get field definitions from the dataclass
153
185
  for field_def in field_def_list:
@@ -167,7 +199,9 @@ def map_params_to_class(
167
199
  values = params[field_name]
168
200
 
169
201
  if isinstance(field_type, ArrayType):
170
- constructor_args[field_name] = [parse(v, field_type.element_type) for v in values]
202
+ constructor_args[field_name] = [
203
+ parse(v, field_type.element_type) for v in values
204
+ ]
171
205
  else:
172
206
  if len(values) != 1:
173
207
  raise ValueError(f"Expected a single element for {field_name}")
moose_lib/secrets.py ADDED
@@ -0,0 +1,122 @@
1
+ """Utilities for runtime environment variable resolution.
2
+
3
+ This module provides functionality to mark values that should be resolved
4
+ from environment variables at runtime by the Moose CLI, rather than being
5
+ embedded at build time.
6
+
7
+ Example:
8
+ >>> from moose_lib import S3QueueEngine, moose_runtime_env
9
+ >>>
10
+ >>> engine = S3QueueEngine(
11
+ ... s3_path="s3://bucket/data/*.json",
12
+ ... format="JSONEachRow",
13
+ ... aws_access_key_id=moose_runtime_env.get("AWS_ACCESS_KEY_ID"),
14
+ ... aws_secret_access_key=moose_runtime_env.get("AWS_SECRET_ACCESS_KEY")
15
+ ... )
16
+ """
17
+
18
+ #: Prefix used to mark values for runtime environment variable resolution.
19
+ MOOSE_RUNTIME_ENV_PREFIX = "__MOOSE_RUNTIME_ENV__:"
20
+
21
+
22
+ def get(env_var_name: str) -> str:
23
+ """Gets a value from an environment variable, with behavior depending on context.
24
+
25
+ When IS_LOADING_INFRA_MAP=true (infrastructure loading):
26
+ Returns a marker string that Moose CLI will resolve later
27
+
28
+ When IS_LOADING_INFRA_MAP is unset (function/workflow runtime):
29
+ Returns the actual value from the environment variable
30
+
31
+ This is useful for:
32
+ - Credentials that should never be embedded in Docker images
33
+ - Configuration that can be rotated without rebuilding
34
+ - Different values for different environments (dev, staging, prod)
35
+ - Any runtime configuration in infrastructure elements (Tables, Topics, etc.)
36
+
37
+ Args:
38
+ env_var_name: Name of the environment variable to resolve
39
+
40
+ Returns:
41
+ Either a marker string or the actual environment variable value
42
+
43
+ Raises:
44
+ ValueError: If the environment variable name is empty
45
+ KeyError: If the environment variable is not set (runtime mode only)
46
+
47
+ Example:
48
+ >>> # Instead of this (evaluated at build time):
49
+ >>> import os
50
+ >>> aws_key = os.environ.get("AWS_ACCESS_KEY_ID")
51
+ >>>
52
+ >>> # Use this (evaluated at runtime):
53
+ >>> aws_key = moose_runtime_env.get("AWS_ACCESS_KEY_ID")
54
+ """
55
+ import os
56
+
57
+ if not env_var_name or not env_var_name.strip():
58
+ raise ValueError("Environment variable name cannot be empty")
59
+
60
+ # Check if we're loading infrastructure map
61
+ is_loading_infra_map = os.environ.get("IS_LOADING_INFRA_MAP") == "true"
62
+
63
+ if is_loading_infra_map:
64
+ # Return marker string for later resolution by Moose CLI
65
+ return f"{MOOSE_RUNTIME_ENV_PREFIX}{env_var_name}"
66
+ else:
67
+ # Return actual value from environment for runtime execution
68
+ value = os.environ.get(env_var_name)
69
+ if value is None:
70
+ raise KeyError(
71
+ f"Environment variable '{env_var_name}' is not set. "
72
+ f"This is required for runtime execution of functions/workflows."
73
+ )
74
+ return value
75
+
76
+
77
+ class MooseRuntimeEnv:
78
+ """Utilities for marking values to be resolved from environment variables at runtime.
79
+
80
+ This class provides a namespace for runtime environment variable resolution.
81
+ Use the singleton instance `moose_runtime_env` rather than instantiating this class directly.
82
+
83
+ Attributes:
84
+ get: Static method for creating runtime environment variable markers
85
+ """
86
+
87
+ @staticmethod
88
+ def get(env_var_name: str) -> str:
89
+ """Marks a value to be resolved from an environment variable at runtime.
90
+
91
+ Args:
92
+ env_var_name: Name of the environment variable to resolve
93
+
94
+ Returns:
95
+ A marker string that Moose CLI will resolve at runtime
96
+
97
+ Raises:
98
+ ValueError: If the environment variable name is empty
99
+ """
100
+ return get(env_var_name)
101
+
102
+
103
+ # Export singleton instance for module-level access
104
+ moose_runtime_env = MooseRuntimeEnv()
105
+
106
+ # Legacy exports for backwards compatibility
107
+ MooseEnvSecrets = MooseRuntimeEnv # Deprecated: Use MooseRuntimeEnv instead
108
+ moose_env_secrets = moose_runtime_env # Deprecated: Use moose_runtime_env instead
109
+ MOOSE_ENV_SECRET_PREFIX = (
110
+ MOOSE_RUNTIME_ENV_PREFIX # Deprecated: Use MOOSE_RUNTIME_ENV_PREFIX instead
111
+ )
112
+
113
+ __all__ = [
114
+ "moose_runtime_env",
115
+ "MooseRuntimeEnv",
116
+ "get",
117
+ "MOOSE_RUNTIME_ENV_PREFIX",
118
+ # Legacy exports (deprecated)
119
+ "moose_env_secrets",
120
+ "MooseEnvSecrets",
121
+ "MOOSE_ENV_SECRET_PREFIX",
122
+ ]