moose-lib 0.4.262__tar.gz → 0.4.263__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of moose-lib might be problematic. Click here for more details.

Files changed (43) hide show
  1. {moose_lib-0.4.262 → moose_lib-0.4.263}/PKG-INFO +1 -1
  2. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/__init__.py +1 -1
  3. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/commons.py +37 -4
  4. moose_lib-0.4.263/moose_lib/main.py +445 -0
  5. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/streaming/streaming_function_runner.py +1 -28
  6. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib.egg-info/PKG-INFO +1 -1
  7. moose_lib-0.4.262/moose_lib/main.py +0 -254
  8. {moose_lib-0.4.262 → moose_lib-0.4.263}/README.md +0 -0
  9. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/blocks.py +0 -0
  10. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/clients/__init__.py +0 -0
  11. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/clients/redis_client.py +0 -0
  12. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/config/__init__.py +0 -0
  13. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/config/config_file.py +0 -0
  14. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/config/runtime.py +0 -0
  15. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/data_models.py +0 -0
  16. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/__init__.py +0 -0
  17. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/_registry.py +0 -0
  18. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/consumption.py +0 -0
  19. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/ingest_api.py +0 -0
  20. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/ingest_pipeline.py +0 -0
  21. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/materialized_view.py +0 -0
  22. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/olap_table.py +0 -0
  23. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/registry.py +0 -0
  24. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/sql_resource.py +0 -0
  25. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/stream.py +0 -0
  26. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/types.py +0 -0
  27. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/view.py +0 -0
  28. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/workflow.py +0 -0
  29. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2-serializer.py +0 -0
  30. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/internal.py +0 -0
  31. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/query_param.py +0 -0
  32. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/streaming/__init__.py +0 -0
  33. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/tasks.py +0 -0
  34. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib.egg-info/SOURCES.txt +0 -0
  35. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib.egg-info/dependency_links.txt +0 -0
  36. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib.egg-info/requires.txt +0 -0
  37. {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib.egg-info/top_level.txt +0 -0
  38. {moose_lib-0.4.262 → moose_lib-0.4.263}/setup.cfg +0 -0
  39. {moose_lib-0.4.262 → moose_lib-0.4.263}/setup.py +0 -0
  40. {moose_lib-0.4.262 → moose_lib-0.4.263}/tests/__init__.py +0 -0
  41. {moose_lib-0.4.262 → moose_lib-0.4.263}/tests/conftest.py +0 -0
  42. {moose_lib-0.4.262 → moose_lib-0.4.263}/tests/test_moose.py +0 -0
  43. {moose_lib-0.4.262 → moose_lib-0.4.263}/tests/test_redis_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: moose_lib
3
- Version: 0.4.262
3
+ Version: 0.4.263
4
4
  Home-page: https://www.fiveonefour.com/moose
5
5
  Author: Fiveonefour Labs Inc.
6
6
  Author-email: support@fiveonefour.com
@@ -10,4 +10,4 @@ from .data_models import *
10
10
 
11
11
  from .dmv2 import *
12
12
 
13
- from .clients.redis_client import MooseCache
13
+ from .clients.redis_client import MooseCache
@@ -1,15 +1,20 @@
1
+ import dataclasses
1
2
  import logging
3
+ from datetime import datetime, timezone
4
+
2
5
  import requests
3
6
  import json
4
7
  from typing import Optional, Literal
5
8
 
9
+
6
10
  class CliLogData:
7
11
  INFO = "Info"
8
12
  SUCCESS = "Success"
9
13
  ERROR = "Error"
10
14
  HIGHLIGHT = "Highlight"
11
15
 
12
- def __init__(self, action: str, message: str, message_type: Optional[Literal[INFO, SUCCESS, ERROR, HIGHLIGHT]] = INFO):
16
+ def __init__(self, action: str, message: str,
17
+ message_type: Optional[Literal[INFO, SUCCESS, ERROR, HIGHLIGHT]] = INFO):
13
18
  self.message_type = message_type
14
19
  self.action = action
15
20
  self.message = message
@@ -31,11 +36,11 @@ def cli_log(log: CliLogData) -> None:
31
36
 
32
37
  class Logger:
33
38
  default_action = "Custom"
34
-
39
+
35
40
  def __init__(self, action: Optional[str] = None, is_moose_task: bool = False):
36
41
  self.action = action or Logger.default_action
37
42
  self._is_moose_task = is_moose_task
38
-
43
+
39
44
  def _log(self, message: str, message_type: str) -> None:
40
45
  if self._is_moose_task:
41
46
  # We have a task decorator in the lib that initializes a logger
@@ -62,4 +67,32 @@ class Logger:
62
67
  self._log(message, CliLogData.ERROR)
63
68
 
64
69
  def highlight(self, message: str) -> None:
65
- self._log(message, CliLogData.HIGHLIGHT)
70
+ self._log(message, CliLogData.HIGHLIGHT)
71
+
72
+
73
+ class EnhancedJSONEncoder(json.JSONEncoder):
74
+ """
75
+ Custom JSON encoder that handles:
76
+ - datetime objects (converts to ISO format with timezone)
77
+ - dataclass instances (converts to dict)
78
+ - Pydantic models (converts to dict)
79
+ """
80
+
81
+ def default(self, o):
82
+ if isinstance(o, datetime):
83
+ if o.tzinfo is None:
84
+ o = o.replace(tzinfo=timezone.utc)
85
+ return o.isoformat()
86
+ if hasattr(o, "model_dump"): # Handle Pydantic v2 models
87
+ # Convert to dict and handle datetime fields
88
+ data = o.model_dump()
89
+ # Handle any datetime fields that might be present
90
+ for key, value in data.items():
91
+ if isinstance(value, datetime):
92
+ if value.tzinfo is None:
93
+ value = value.replace(tzinfo=timezone.utc)
94
+ data[key] = value.isoformat()
95
+ return data
96
+ if dataclasses.is_dataclass(o):
97
+ return dataclasses.asdict(o)
98
+ return super().default(o)
@@ -0,0 +1,445 @@
1
+ """Core Moose Python library definitions.
2
+
3
+ This module provides foundational classes, enums, and functions used across the Moose ecosystem,
4
+ including configuration objects, clients for interacting with services (ClickHouse, Temporal),
5
+ and utilities for defining data models and SQL queries.
6
+ """
7
+ from clickhouse_connect.driver.client import Client as ClickhouseClient
8
+ from clickhouse_connect import get_client
9
+ from pydantic import BaseModel
10
+ from dataclasses import dataclass, asdict
11
+ from enum import Enum
12
+ from typing import Any, Callable, Dict, Optional, TypeVar, overload, Type, Union
13
+ import sys
14
+ import os
15
+ import json
16
+ import hashlib
17
+ import asyncio
18
+ from string import Formatter
19
+ from temporalio.client import Client as TemporalClient, TLSConfig
20
+ from temporalio.common import RetryPolicy, WorkflowIDConflictPolicy, WorkflowIDReusePolicy
21
+ from datetime import timedelta
22
+ from .config.runtime import RuntimeClickHouseConfig
23
+
24
+ from moose_lib.commons import EnhancedJSONEncoder
25
+
26
+
27
+ @dataclass
28
+ class StreamingFunction:
29
+ """Represents a function intended for stream processing.
30
+
31
+ Attributes:
32
+ run: The callable function that performs the streaming logic.
33
+ """
34
+ run: Callable
35
+
36
+
37
+ @dataclass
38
+ class StorageConfig:
39
+ """Configuration related to data storage, typically in an OLAP table.
40
+
41
+ Attributes:
42
+ enabled: Whether storage is enabled for this data model.
43
+ order_by_fields: List of fields to use for ordering in the storage layer.
44
+ deduplicate: Whether to enable deduplication based on the order_by_fields.
45
+ """
46
+ enabled: Optional[bool] = None
47
+ order_by_fields: Optional[list[str]] = None
48
+ deduplicate: Optional[bool] = None
49
+
50
+
51
+ @dataclass
52
+ class DataModelConfig:
53
+ """Top-level configuration for a Moose data model.
54
+
55
+ Combines ingestion and storage settings.
56
+
57
+ Attributes:
58
+ storage: Configuration for how data is stored.
59
+ """
60
+ storage: Optional[StorageConfig] = None
61
+
62
+
63
+ class CustomEncoder(json.JSONEncoder):
64
+ """Custom JSON encoder that handles Enum types by encoding their values."""
65
+
66
+ def default(self, obj):
67
+ if isinstance(obj, Enum):
68
+ return obj.value
69
+ return super().default(obj)
70
+
71
+
72
+ _DC = TypeVar("_DC", bound=type)
73
+
74
+
75
+ @overload
76
+ def moose_data_model(arg: Optional[DataModelConfig]) -> Callable[[_DC], _DC]:
77
+ """Decorator overload: Applies configuration to a data model class."""
78
+ ...
79
+
80
+
81
+ @overload
82
+ def moose_data_model(arg: _DC) -> _DC:
83
+ """Decorator overload: Decorates a data model class without explicit configuration."""
84
+ ...
85
+
86
+
87
+ def moose_data_model(arg: Any = None) -> Any:
88
+ """Decorator for Moose data model classes.
89
+
90
+ This decorator can be used with or without arguments:
91
+ - `@moose_data_model`: Decorates a class as a Moose data model with default settings.
92
+ - `@moose_data_model(DataModelConfig(...))`: Decorates a class and applies the specified
93
+ ingestion and storage configurations.
94
+
95
+ During infrastructure processing (when `MOOSE_PYTHON_DM_DUMP` environment variable
96
+ matches the decorated class's file path), it prints the class name and configuration
97
+ as JSON, separated by a specific delimiter (`___DATAMODELCONFIG___`).
98
+
99
+ Args:
100
+ arg: Either a `DataModelConfig` instance or the class being decorated.
101
+
102
+ Returns:
103
+ A decorator function or the decorated class.
104
+ """
105
+
106
+ def get_file(t: type) -> Optional[str]:
107
+ """Helper to get the file path of a type's definition."""
108
+ module = sys.modules.get(t.__module__)
109
+ if module and hasattr(module, '__file__'):
110
+ return module.__file__
111
+ return None
112
+
113
+ def remove_null(d: dict) -> dict:
114
+ """Recursively removes keys with None values from a dictionary."""
115
+ return {key: remove_null(value) if isinstance(value, dict) else value for key, value in d.items() if
116
+ not (value is None)}
117
+
118
+ def decorator(data_class: type) -> type:
119
+ expected_file_name = os.environ.get("MOOSE_PYTHON_DM_DUMP")
120
+ if expected_file_name and expected_file_name == get_file(data_class):
121
+ output: dict[str, str | dict] = {
122
+ 'class_name': data_class.__name__
123
+ }
124
+ if arg:
125
+ output["config"] = remove_null(asdict(arg))
126
+ output_json = json.dumps(output, cls=CustomEncoder, indent=4)
127
+ print(output_json, "___DATAMODELCONFIG___", sep="")
128
+ return data_class
129
+
130
+ if isinstance(arg, type):
131
+ return moose_data_model(None)(arg)
132
+ return decorator
133
+
134
+
135
+ JWTPayload = Dict[str, Any]
136
+
137
+
138
+ @dataclass
139
+ class ConsumptionApiResult:
140
+ """Standard structure for returning results from a Consumption API handler.
141
+
142
+ Attributes:
143
+ status: The HTTP status code for the response.
144
+ body: The response body, which should be JSON serializable.
145
+ """
146
+ status: int
147
+ body: Any
148
+
149
+
150
+ class QueryClient:
151
+ """Client for executing queries, typically against ClickHouse.
152
+
153
+ Args:
154
+ ch_client_or_config: Either an instance of the ClickHouse client or a RuntimeClickHouseConfig.
155
+ """
156
+
157
+ def __init__(self, ch_client_or_config: Union[ClickhouseClient, RuntimeClickHouseConfig]):
158
+ if isinstance(ch_client_or_config, RuntimeClickHouseConfig):
159
+ # Create ClickHouse client from configuration
160
+ config = ch_client_or_config
161
+ interface = 'https' if config.use_ssl else 'http'
162
+ self.ch_client = get_client(
163
+ interface=interface,
164
+ host=config.host,
165
+ port=int(config.port),
166
+ username=config.username,
167
+ password=config.password,
168
+ database=config.database,
169
+ )
170
+ else:
171
+ # Use provided ClickHouse client directly
172
+ self.ch_client = ch_client_or_config
173
+
174
+ def __call__(self, input, variables):
175
+ return self.execute(input, variables)
176
+
177
+ def execute(self, input, variables, row_type: Type[BaseModel] = None):
178
+ params = {}
179
+ values = {}
180
+
181
+ for i, (_, variable_name, _, _) in enumerate(Formatter().parse(input)):
182
+ if variable_name:
183
+ value = variables[variable_name]
184
+ if isinstance(value, list) and len(value) == 1:
185
+ # handling passing the value of the query string dict directly to variables
186
+ value = value[0]
187
+
188
+ t = 'String' if isinstance(value, str) else \
189
+ 'Int64' if isinstance(value, int) else \
190
+ 'Float64' if isinstance(value, float) else "String" # unknown type
191
+
192
+ params[variable_name] = f'{{p{i}: {t}}}'
193
+ values[f'p{i}'] = value
194
+ clickhouse_query = input.format_map(params)
195
+
196
+ # We are not using the result of the ping
197
+ # but this ensures that if the clickhouse cloud service is idle, we
198
+ # wake it up, before we send the query.
199
+ self.ch_client.ping()
200
+
201
+ val = self.ch_client.query(clickhouse_query, values)
202
+
203
+ if row_type is None:
204
+ return list(val.named_results())
205
+ else:
206
+ return list(row_type(**row) for row in val.named_results())
207
+
208
+ def close(self):
209
+ """Close the ClickHouse client connection."""
210
+ if self.ch_client:
211
+ try:
212
+ self.ch_client.close()
213
+ except Exception as e:
214
+ print(f"Error closing ClickHouse client: {e}")
215
+
216
+
217
+ class WorkflowClient:
218
+ """Client for interacting with Temporal workflows.
219
+
220
+ Args:
221
+ temporal_client: An instance of the Temporal client.
222
+ """
223
+
224
+ def __init__(self, temporal_client: TemporalClient):
225
+ self.temporal_client = temporal_client
226
+ self.configs = self.load_consolidated_configs()
227
+ print(f"WorkflowClient - configs: {self.configs}")
228
+
229
+ # Test workflow executor in rust if this changes significantly
230
+ def execute(self, name: str, input_data: Any) -> Dict[str, Any]:
231
+ try:
232
+ workflow_id, run_id = asyncio.run(self._start_workflow_async(name, input_data))
233
+ print(f"WorkflowClient - started workflow: {name}")
234
+ return {
235
+ "status": 200,
236
+ "body": f"Workflow started: {name}. View it in the Temporal dashboard: http://localhost:8080/namespaces/default/workflows/{workflow_id}/{run_id}/history"
237
+ }
238
+ except Exception as e:
239
+ print(f"WorkflowClient - error while starting workflow: {e}")
240
+ return {
241
+ "status": 400,
242
+ "body": str(e)
243
+ }
244
+
245
+ async def _start_workflow_async(self, name: str, input_data: Any):
246
+ # Extract configuration based on workflow type
247
+ config = self._get_workflow_config(name)
248
+
249
+ # Process input data and generate workflow ID (common logic)
250
+ processed_input, workflow_id = self._process_input_data(name, input_data)
251
+
252
+ # Create retry policy and timeout (common logic)
253
+ retry_policy = RetryPolicy(maximum_attempts=config['retry_count'])
254
+ run_timeout = self.parse_timeout_to_timedelta(config['timeout_str'])
255
+
256
+ print(
257
+ f"WorkflowClient - starting {'DMv2 ' if config['is_dmv2'] else ''}workflow: {name} with retry policy: {retry_policy} and timeout: {run_timeout}")
258
+
259
+ # Start workflow with appropriate args
260
+ workflow_args = self._build_workflow_args(name, processed_input, config['is_dmv2'])
261
+
262
+ workflow_handle = await self.temporal_client.start_workflow(
263
+ "ScriptWorkflow",
264
+ args=workflow_args,
265
+ id=workflow_id,
266
+ task_queue="python-script-queue",
267
+ id_conflict_policy=WorkflowIDConflictPolicy.FAIL,
268
+ id_reuse_policy=WorkflowIDReusePolicy.ALLOW_DUPLICATE,
269
+ retry_policy=retry_policy,
270
+ run_timeout=run_timeout
271
+ )
272
+
273
+ return workflow_id, workflow_handle.result_run_id
274
+
275
+ def _get_workflow_config(self, name: str) -> Dict[str, Any]:
276
+ """Extract workflow configuration from DMv2 or legacy config."""
277
+ from moose_lib.dmv2 import get_workflow
278
+
279
+ dmv2_workflow = get_workflow(name)
280
+ if dmv2_workflow is not None:
281
+ return {
282
+ 'retry_count': dmv2_workflow.config.retries or 3,
283
+ 'timeout_str': dmv2_workflow.config.timeout or "1h",
284
+ 'is_dmv2': True
285
+ }
286
+ else:
287
+ config = self.configs.get(name, {})
288
+ return {
289
+ 'retry_count': config.get('retries', 3),
290
+ 'timeout_str': config.get('timeout', "1h"),
291
+ 'is_dmv2': False
292
+ }
293
+
294
+ def _process_input_data(self, name: str, input_data: Any) -> tuple[Any, str]:
295
+ """Process input data and generate workflow ID."""
296
+ workflow_id = name
297
+ if input_data:
298
+ try:
299
+ # Handle Pydantic model input for DMv2
300
+ if isinstance(input_data, BaseModel):
301
+ input_data = input_data.model_dump()
302
+ elif isinstance(input_data, str):
303
+ input_data = json.loads(input_data)
304
+
305
+ # Encode with custom encoder
306
+ input_data = json.loads(
307
+ json.dumps({"data": input_data}, cls=EnhancedJSONEncoder)
308
+ )
309
+
310
+ params_str = json.dumps(input_data, sort_keys=True)
311
+ params_hash = hashlib.sha256(params_str.encode()).hexdigest()[:16]
312
+ workflow_id = f"{name}-{params_hash}"
313
+ except Exception as e:
314
+ raise ValueError(f"Invalid input data: {e}")
315
+
316
+ return input_data, workflow_id
317
+
318
+ def _build_workflow_args(self, name: str, input_data: Any, is_dmv2: bool) -> list:
319
+ """Build workflow arguments based on workflow type."""
320
+ if is_dmv2:
321
+ return [f"{name}", input_data]
322
+ else:
323
+ return [f"{os.getcwd()}/app/scripts/{name}", input_data]
324
+
325
+ def load_consolidated_configs(self):
326
+ try:
327
+ file_path = os.path.join(os.getcwd(), ".moose", "workflow_configs.json")
328
+ with open(file_path, 'r') as file:
329
+ data = json.load(file)
330
+ config_map = {config['name']: config for config in data}
331
+ return config_map
332
+ except Exception as e:
333
+ raise ValueError(f"Error loading file {file_path}: {e}")
334
+
335
+ def parse_timeout_to_timedelta(self, timeout_str: str) -> timedelta:
336
+ if timeout_str.endswith('h'):
337
+ return timedelta(hours=int(timeout_str[:-1]))
338
+ elif timeout_str.endswith('m'):
339
+ return timedelta(minutes=int(timeout_str[:-1]))
340
+ elif timeout_str.endswith('s'):
341
+ return timedelta(seconds=int(timeout_str[:-1]))
342
+ else:
343
+ raise ValueError(f"Unsupported timeout format: {timeout_str}")
344
+
345
+
346
+ class MooseClient:
347
+ """Unified client for interacting with Moose services (Query, Workflow).
348
+
349
+ Provides access points for executing database queries and managing workflows.
350
+
351
+ Args:
352
+ ch_client: An instance of the ClickHouse client.
353
+ temporal_client: An optional instance of the Temporal client.
354
+ If provided, workflow functionalities are enabled.
355
+
356
+ Attributes:
357
+ query (QueryClient): Client for executing queries.
358
+ workflow (Optional[WorkflowClient]): Client for workflow operations (if configured).
359
+ """
360
+
361
+ def __init__(self, ch_client: ClickhouseClient, temporal_client: Optional[TemporalClient] = None):
362
+ self.query = QueryClient(ch_client)
363
+ self.temporal_client = temporal_client
364
+ if temporal_client:
365
+ self.workflow = WorkflowClient(temporal_client)
366
+ else:
367
+ self.workflow = None
368
+
369
+ async def cleanup(self):
370
+ """Cleanup resources before shutdown"""
371
+ if self.query:
372
+ try:
373
+ self.query.close()
374
+ except Exception as e:
375
+ print(f"Error closing Clickhouse client: {e}")
376
+
377
+ if self.temporal_client:
378
+ try:
379
+ await self.temporal_client.close()
380
+ except Exception as e:
381
+ print(f"Error closing Temporal client: {e}")
382
+
383
+
384
+ class Sql:
385
+ """Represents a SQL query template with embedded values.
386
+
387
+ Allows constructing SQL queries safely by separating the query string parts
388
+ from the values to be interpolated, similar to tagged template literals
389
+ in other languages. Supports nesting `Sql` objects.
390
+
391
+ Args:
392
+ raw_strings: List of string fragments forming the SQL template.
393
+ raw_values: List of values to be interpolated between the string fragments.
394
+ Values can be basic types or other `Sql` instances.
395
+
396
+ Raises:
397
+ TypeError: If the number of strings and values doesn't match the expected
398
+ pattern (len(strings) == len(values) + 1).
399
+
400
+ Attributes:
401
+ strings (list[str]): The flattened list of string fragments.
402
+ values (list[Any]): The flattened list of values corresponding to the gaps
403
+ between the strings.
404
+ """
405
+
406
+ def __init__(self, raw_strings: list[str], raw_values: list['RawValue']):
407
+ if len(raw_strings) - 1 != len(raw_values):
408
+ if len(raw_strings) == 0:
409
+ raise TypeError("Expected at least 1 string")
410
+ raise TypeError(f"Expected {len(raw_strings)} strings to have {len(raw_strings) - 1} values")
411
+
412
+ values_length = sum(1 if not isinstance(value, Sql) else len(value.values) for value in raw_values)
413
+
414
+ self.values: list['Value'] = [None] * values_length
415
+ self.strings: list[str] = [None] * (values_length + 1)
416
+
417
+ self.strings[0] = raw_strings[0]
418
+
419
+ i = 0
420
+ pos = 0
421
+ while i < len(raw_values):
422
+ child = raw_values[i]
423
+ raw_string = raw_strings[i + 1]
424
+
425
+ if isinstance(child, Sql):
426
+ self.strings[pos] += child.strings[0]
427
+
428
+ for child_index in range(len(child.values)):
429
+ self.values[pos] = child.values[child_index]
430
+ pos += 1
431
+ self.strings[pos] = child.strings[child_index + 1]
432
+
433
+ self.strings[pos] += raw_string
434
+ else:
435
+ self.values[pos] = child
436
+ pos += 1
437
+ self.strings[pos] = raw_string
438
+
439
+ i += 1
440
+
441
+
442
+ def sigterm_handler():
443
+ """Handles SIGTERM signals by printing a message and exiting gracefully."""
444
+ print("SIGTERM received")
445
+ sys.exit(0)
@@ -30,6 +30,7 @@ from typing import Optional, Callable, Tuple, Any
30
30
 
31
31
  from moose_lib.dmv2 import get_streams, DeadLetterModel
32
32
  from moose_lib import cli_log, CliLogData, DeadLetterQueue
33
+ from moose_lib.commons import EnhancedJSONEncoder
33
34
 
34
35
  # Force stdout to be unbuffered
35
36
  sys.stdout = io.TextIOWrapper(
@@ -82,34 +83,6 @@ class KafkaTopicConfig:
82
83
  return name
83
84
 
84
85
 
85
- class EnhancedJSONEncoder(json.JSONEncoder):
86
- """
87
- Custom JSON encoder that handles:
88
- - datetime objects (converts to ISO format with timezone)
89
- - dataclass instances (converts to dict)
90
- - Pydantic models (converts to dict)
91
- """
92
-
93
- def default(self, o):
94
- if isinstance(o, datetime):
95
- if o.tzinfo is None:
96
- o = o.replace(tzinfo=timezone.utc)
97
- return o.isoformat()
98
- if hasattr(o, "model_dump"): # Handle Pydantic v2 models
99
- # Convert to dict and handle datetime fields
100
- data = o.model_dump()
101
- # Handle any datetime fields that might be present
102
- for key, value in data.items():
103
- if isinstance(value, datetime):
104
- if value.tzinfo is None:
105
- value = value.replace(tzinfo=timezone.utc)
106
- data[key] = value.isoformat()
107
- return data
108
- if dataclasses.is_dataclass(o):
109
- return dataclasses.asdict(o)
110
- return super().default(o)
111
-
112
-
113
86
  def load_streaming_function_dmv1(function_file_dir: str, function_file_name: str) -> Tuple[type, Callable]:
114
87
  """
115
88
  Load a DMV1 (legacy) streaming function from a Python module.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: moose_lib
3
- Version: 0.4.262
3
+ Version: 0.4.263
4
4
  Home-page: https://www.fiveonefour.com/moose
5
5
  Author: Fiveonefour Labs Inc.
6
6
  Author-email: support@fiveonefour.com
@@ -1,254 +0,0 @@
1
- """Core Moose Python library definitions.
2
-
3
- This module provides foundational classes, enums, and functions used across the Moose ecosystem,
4
- including configuration objects, clients for interacting with services (ClickHouse, Temporal),
5
- and utilities for defining data models and SQL queries.
6
- """
7
- from clickhouse_connect.driver.client import Client as ClickhouseClient
8
- from temporalio.client import Client as TemporalClient
9
- from dataclasses import dataclass, asdict
10
- from enum import Enum
11
- from typing import Any, Callable, Dict, Optional, TypeVar, overload
12
- import sys
13
- import os
14
- import json
15
-
16
-
17
- @dataclass
18
- class StreamingFunction:
19
- """Represents a function intended for stream processing.
20
-
21
- Attributes:
22
- run: The callable function that performs the streaming logic.
23
- """
24
- run: Callable
25
-
26
-
27
-
28
- @dataclass
29
- class StorageConfig:
30
- """Configuration related to data storage, typically in an OLAP table.
31
-
32
- Attributes:
33
- enabled: Whether storage is enabled for this data model.
34
- order_by_fields: List of fields to use for ordering in the storage layer.
35
- deduplicate: Whether to enable deduplication based on the order_by_fields.
36
- """
37
- enabled: Optional[bool] = None
38
- order_by_fields: Optional[list[str]] = None
39
- deduplicate: Optional[bool] = None
40
-
41
-
42
- @dataclass
43
- class DataModelConfig:
44
- """Top-level configuration for a Moose data model.
45
-
46
- Combines ingestion and storage settings.
47
-
48
- Attributes:
49
- storage: Configuration for how data is stored.
50
- """
51
- storage: Optional[StorageConfig] = None
52
-
53
-
54
- class CustomEncoder(json.JSONEncoder):
55
- """Custom JSON encoder that handles Enum types by encoding their values."""
56
- def default(self, obj):
57
- if isinstance(obj, Enum):
58
- return obj.value
59
- return super().default(obj)
60
-
61
-
62
- _DC = TypeVar("_DC", bound=type)
63
-
64
-
65
- @overload
66
- def moose_data_model(arg: Optional[DataModelConfig]) -> Callable[[_DC], _DC]:
67
- """Decorator overload: Applies configuration to a data model class."""
68
- ...
69
-
70
-
71
- @overload
72
- def moose_data_model(arg: _DC) -> _DC:
73
- """Decorator overload: Decorates a data model class without explicit configuration."""
74
- ...
75
-
76
-
77
- def moose_data_model(arg: Any = None) -> Any:
78
- """Decorator for Moose data model classes.
79
-
80
- This decorator can be used with or without arguments:
81
- - `@moose_data_model`: Decorates a class as a Moose data model with default settings.
82
- - `@moose_data_model(DataModelConfig(...))`: Decorates a class and applies the specified
83
- ingestion and storage configurations.
84
-
85
- During infrastructure processing (when `MOOSE_PYTHON_DM_DUMP` environment variable
86
- matches the decorated class's file path), it prints the class name and configuration
87
- as JSON, separated by a specific delimiter (`___DATAMODELCONFIG___`).
88
-
89
- Args:
90
- arg: Either a `DataModelConfig` instance or the class being decorated.
91
-
92
- Returns:
93
- A decorator function or the decorated class.
94
- """
95
- def get_file(t: type) -> Optional[str]:
96
- """Helper to get the file path of a type's definition."""
97
- module = sys.modules.get(t.__module__)
98
- if module and hasattr(module, '__file__'):
99
- return module.__file__
100
- return None
101
-
102
- def remove_null(d: dict) -> dict:
103
- """Recursively removes keys with None values from a dictionary."""
104
- return {key: remove_null(value) if isinstance(value, dict) else value for key, value in d.items() if
105
- not (value is None)}
106
-
107
- def decorator(data_class: type) -> type:
108
- expected_file_name = os.environ.get("MOOSE_PYTHON_DM_DUMP")
109
- if expected_file_name and expected_file_name == get_file(data_class):
110
- output: dict[str, str | dict] = {
111
- 'class_name': data_class.__name__
112
- }
113
- if arg:
114
- output["config"] = remove_null(asdict(arg))
115
- output_json = json.dumps(output, cls=CustomEncoder, indent=4)
116
- print(output_json, "___DATAMODELCONFIG___", sep="")
117
- return data_class
118
-
119
- if isinstance(arg, type):
120
- return moose_data_model(None)(arg)
121
- return decorator
122
-
123
-
124
- JWTPayload = Dict[str, Any]
125
-
126
-
127
- @dataclass
128
- class ConsumptionApiResult:
129
- """Standard structure for returning results from a Consumption API handler.
130
-
131
- Attributes:
132
- status: The HTTP status code for the response.
133
- body: The response body, which should be JSON serializable.
134
- """
135
- status: int
136
- body: Any
137
-
138
-
139
- class QueryClient:
140
- """Client for executing queries, typically against ClickHouse.
141
-
142
- (Note: Current implementation is a placeholder.)
143
-
144
- Args:
145
- ch_client: An instance of the ClickHouse client.
146
- """
147
- def __init__(self, ch_client: ClickhouseClient):
148
- self.ch_client = ch_client
149
-
150
- def execute(self, input, variables) -> Any:
151
- # No impl for the interface
152
- pass
153
-
154
-
155
- class WorkflowClient:
156
- """Client for interacting with Temporal workflows.
157
-
158
- (Note: Current implementation is a placeholder.)
159
-
160
- Args:
161
- temporal_client: An instance of the Temporal client.
162
- """
163
- def __init__(self, temporal_client: TemporalClient):
164
- self.temporal_client = temporal_client
165
-
166
- def execute(self, name: str, input_data: Any) -> Dict[str, Any]:
167
- # No impl for the interface
168
- pass
169
-
170
-
171
- class MooseClient:
172
- """Unified client for interacting with Moose services (Query, Workflow).
173
-
174
- Provides access points for executing database queries and managing workflows.
175
-
176
- Args:
177
- ch_client: An instance of the ClickHouse client.
178
- temporal_client: An optional instance of the Temporal client.
179
- If provided, workflow functionalities are enabled.
180
-
181
- Attributes:
182
- query (QueryClient): Client for executing queries.
183
- workflow (Optional[WorkflowClient]): Client for workflow operations (if configured).
184
- """
185
- def __init__(self, ch_client: ClickhouseClient, temporal_client: Optional[TemporalClient] = None):
186
- self.query = QueryClient(ch_client)
187
- if temporal_client:
188
- self.workflow = WorkflowClient(temporal_client)
189
- else:
190
- self.workflow = None
191
-
192
-
193
- class Sql:
194
- """Represents a SQL query template with embedded values.
195
-
196
- Allows constructing SQL queries safely by separating the query string parts
197
- from the values to be interpolated, similar to tagged template literals
198
- in other languages. Supports nesting `Sql` objects.
199
-
200
- Args:
201
- raw_strings: List of string fragments forming the SQL template.
202
- raw_values: List of values to be interpolated between the string fragments.
203
- Values can be basic types or other `Sql` instances.
204
-
205
- Raises:
206
- TypeError: If the number of strings and values doesn't match the expected
207
- pattern (len(strings) == len(values) + 1).
208
-
209
- Attributes:
210
- strings (list[str]): The flattened list of string fragments.
211
- values (list[Any]): The flattened list of values corresponding to the gaps
212
- between the strings.
213
- """
214
- def __init__(self, raw_strings: list[str], raw_values: list['RawValue']):
215
- if len(raw_strings) - 1 != len(raw_values):
216
- if len(raw_strings) == 0:
217
- raise TypeError("Expected at least 1 string")
218
- raise TypeError(f"Expected {len(raw_strings)} strings to have {len(raw_strings) - 1} values")
219
-
220
- values_length = sum(1 if not isinstance(value, Sql) else len(value.values) for value in raw_values)
221
-
222
- self.values: list['Value'] = [None] * values_length
223
- self.strings: list[str] = [None] * (values_length + 1)
224
-
225
- self.strings[0] = raw_strings[0]
226
-
227
- i = 0
228
- pos = 0
229
- while i < len(raw_values):
230
- child = raw_values[i]
231
- raw_string = raw_strings[i + 1]
232
-
233
- if isinstance(child, Sql):
234
- self.strings[pos] += child.strings[0]
235
-
236
- for child_index in range(len(child.values)):
237
- self.values[pos] = child.values[child_index]
238
- pos += 1
239
- self.strings[pos] = child.strings[child_index + 1]
240
-
241
- self.strings[pos] += raw_string
242
- else:
243
- self.values[pos] = child
244
- pos += 1
245
- self.strings[pos] = raw_string
246
-
247
- i += 1
248
-
249
-
250
- def sigterm_handler():
251
- """Handles SIGTERM signals by printing a message and exiting gracefully."""
252
- print("SIGTERM received")
253
- sys.exit(0)
254
-
File without changes
File without changes
File without changes