moose-lib 0.4.262__tar.gz → 0.4.263__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of moose-lib might be problematic. Click here for more details.
- {moose_lib-0.4.262 → moose_lib-0.4.263}/PKG-INFO +1 -1
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/__init__.py +1 -1
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/commons.py +37 -4
- moose_lib-0.4.263/moose_lib/main.py +445 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/streaming/streaming_function_runner.py +1 -28
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib.egg-info/PKG-INFO +1 -1
- moose_lib-0.4.262/moose_lib/main.py +0 -254
- {moose_lib-0.4.262 → moose_lib-0.4.263}/README.md +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/blocks.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/clients/__init__.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/clients/redis_client.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/config/__init__.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/config/config_file.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/config/runtime.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/data_models.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/__init__.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/_registry.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/consumption.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/ingest_api.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/ingest_pipeline.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/materialized_view.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/olap_table.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/registry.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/sql_resource.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/stream.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/types.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/view.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2/workflow.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/dmv2-serializer.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/internal.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/query_param.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/streaming/__init__.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib/tasks.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib.egg-info/SOURCES.txt +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib.egg-info/dependency_links.txt +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib.egg-info/requires.txt +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/moose_lib.egg-info/top_level.txt +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/setup.cfg +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/setup.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/tests/__init__.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/tests/conftest.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/tests/test_moose.py +0 -0
- {moose_lib-0.4.262 → moose_lib-0.4.263}/tests/test_redis_client.py +0 -0
|
@@ -1,15 +1,20 @@
|
|
|
1
|
+
import dataclasses
|
|
1
2
|
import logging
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
|
|
2
5
|
import requests
|
|
3
6
|
import json
|
|
4
7
|
from typing import Optional, Literal
|
|
5
8
|
|
|
9
|
+
|
|
6
10
|
class CliLogData:
|
|
7
11
|
INFO = "Info"
|
|
8
12
|
SUCCESS = "Success"
|
|
9
13
|
ERROR = "Error"
|
|
10
14
|
HIGHLIGHT = "Highlight"
|
|
11
15
|
|
|
12
|
-
def __init__(self, action: str, message: str,
|
|
16
|
+
def __init__(self, action: str, message: str,
|
|
17
|
+
message_type: Optional[Literal[INFO, SUCCESS, ERROR, HIGHLIGHT]] = INFO):
|
|
13
18
|
self.message_type = message_type
|
|
14
19
|
self.action = action
|
|
15
20
|
self.message = message
|
|
@@ -31,11 +36,11 @@ def cli_log(log: CliLogData) -> None:
|
|
|
31
36
|
|
|
32
37
|
class Logger:
|
|
33
38
|
default_action = "Custom"
|
|
34
|
-
|
|
39
|
+
|
|
35
40
|
def __init__(self, action: Optional[str] = None, is_moose_task: bool = False):
|
|
36
41
|
self.action = action or Logger.default_action
|
|
37
42
|
self._is_moose_task = is_moose_task
|
|
38
|
-
|
|
43
|
+
|
|
39
44
|
def _log(self, message: str, message_type: str) -> None:
|
|
40
45
|
if self._is_moose_task:
|
|
41
46
|
# We have a task decorator in the lib that initializes a logger
|
|
@@ -62,4 +67,32 @@ class Logger:
|
|
|
62
67
|
self._log(message, CliLogData.ERROR)
|
|
63
68
|
|
|
64
69
|
def highlight(self, message: str) -> None:
|
|
65
|
-
self._log(message, CliLogData.HIGHLIGHT)
|
|
70
|
+
self._log(message, CliLogData.HIGHLIGHT)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class EnhancedJSONEncoder(json.JSONEncoder):
|
|
74
|
+
"""
|
|
75
|
+
Custom JSON encoder that handles:
|
|
76
|
+
- datetime objects (converts to ISO format with timezone)
|
|
77
|
+
- dataclass instances (converts to dict)
|
|
78
|
+
- Pydantic models (converts to dict)
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def default(self, o):
|
|
82
|
+
if isinstance(o, datetime):
|
|
83
|
+
if o.tzinfo is None:
|
|
84
|
+
o = o.replace(tzinfo=timezone.utc)
|
|
85
|
+
return o.isoformat()
|
|
86
|
+
if hasattr(o, "model_dump"): # Handle Pydantic v2 models
|
|
87
|
+
# Convert to dict and handle datetime fields
|
|
88
|
+
data = o.model_dump()
|
|
89
|
+
# Handle any datetime fields that might be present
|
|
90
|
+
for key, value in data.items():
|
|
91
|
+
if isinstance(value, datetime):
|
|
92
|
+
if value.tzinfo is None:
|
|
93
|
+
value = value.replace(tzinfo=timezone.utc)
|
|
94
|
+
data[key] = value.isoformat()
|
|
95
|
+
return data
|
|
96
|
+
if dataclasses.is_dataclass(o):
|
|
97
|
+
return dataclasses.asdict(o)
|
|
98
|
+
return super().default(o)
|
|
@@ -0,0 +1,445 @@
|
|
|
1
|
+
"""Core Moose Python library definitions.
|
|
2
|
+
|
|
3
|
+
This module provides foundational classes, enums, and functions used across the Moose ecosystem,
|
|
4
|
+
including configuration objects, clients for interacting with services (ClickHouse, Temporal),
|
|
5
|
+
and utilities for defining data models and SQL queries.
|
|
6
|
+
"""
|
|
7
|
+
from clickhouse_connect.driver.client import Client as ClickhouseClient
|
|
8
|
+
from clickhouse_connect import get_client
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
from dataclasses import dataclass, asdict
|
|
11
|
+
from enum import Enum
|
|
12
|
+
from typing import Any, Callable, Dict, Optional, TypeVar, overload, Type, Union
|
|
13
|
+
import sys
|
|
14
|
+
import os
|
|
15
|
+
import json
|
|
16
|
+
import hashlib
|
|
17
|
+
import asyncio
|
|
18
|
+
from string import Formatter
|
|
19
|
+
from temporalio.client import Client as TemporalClient, TLSConfig
|
|
20
|
+
from temporalio.common import RetryPolicy, WorkflowIDConflictPolicy, WorkflowIDReusePolicy
|
|
21
|
+
from datetime import timedelta
|
|
22
|
+
from .config.runtime import RuntimeClickHouseConfig
|
|
23
|
+
|
|
24
|
+
from moose_lib.commons import EnhancedJSONEncoder
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class StreamingFunction:
|
|
29
|
+
"""Represents a function intended for stream processing.
|
|
30
|
+
|
|
31
|
+
Attributes:
|
|
32
|
+
run: The callable function that performs the streaming logic.
|
|
33
|
+
"""
|
|
34
|
+
run: Callable
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class StorageConfig:
|
|
39
|
+
"""Configuration related to data storage, typically in an OLAP table.
|
|
40
|
+
|
|
41
|
+
Attributes:
|
|
42
|
+
enabled: Whether storage is enabled for this data model.
|
|
43
|
+
order_by_fields: List of fields to use for ordering in the storage layer.
|
|
44
|
+
deduplicate: Whether to enable deduplication based on the order_by_fields.
|
|
45
|
+
"""
|
|
46
|
+
enabled: Optional[bool] = None
|
|
47
|
+
order_by_fields: Optional[list[str]] = None
|
|
48
|
+
deduplicate: Optional[bool] = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class DataModelConfig:
|
|
53
|
+
"""Top-level configuration for a Moose data model.
|
|
54
|
+
|
|
55
|
+
Combines ingestion and storage settings.
|
|
56
|
+
|
|
57
|
+
Attributes:
|
|
58
|
+
storage: Configuration for how data is stored.
|
|
59
|
+
"""
|
|
60
|
+
storage: Optional[StorageConfig] = None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class CustomEncoder(json.JSONEncoder):
|
|
64
|
+
"""Custom JSON encoder that handles Enum types by encoding their values."""
|
|
65
|
+
|
|
66
|
+
def default(self, obj):
|
|
67
|
+
if isinstance(obj, Enum):
|
|
68
|
+
return obj.value
|
|
69
|
+
return super().default(obj)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
_DC = TypeVar("_DC", bound=type)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@overload
|
|
76
|
+
def moose_data_model(arg: Optional[DataModelConfig]) -> Callable[[_DC], _DC]:
|
|
77
|
+
"""Decorator overload: Applies configuration to a data model class."""
|
|
78
|
+
...
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@overload
|
|
82
|
+
def moose_data_model(arg: _DC) -> _DC:
|
|
83
|
+
"""Decorator overload: Decorates a data model class without explicit configuration."""
|
|
84
|
+
...
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def moose_data_model(arg: Any = None) -> Any:
|
|
88
|
+
"""Decorator for Moose data model classes.
|
|
89
|
+
|
|
90
|
+
This decorator can be used with or without arguments:
|
|
91
|
+
- `@moose_data_model`: Decorates a class as a Moose data model with default settings.
|
|
92
|
+
- `@moose_data_model(DataModelConfig(...))`: Decorates a class and applies the specified
|
|
93
|
+
ingestion and storage configurations.
|
|
94
|
+
|
|
95
|
+
During infrastructure processing (when `MOOSE_PYTHON_DM_DUMP` environment variable
|
|
96
|
+
matches the decorated class's file path), it prints the class name and configuration
|
|
97
|
+
as JSON, separated by a specific delimiter (`___DATAMODELCONFIG___`).
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
arg: Either a `DataModelConfig` instance or the class being decorated.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
A decorator function or the decorated class.
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
def get_file(t: type) -> Optional[str]:
|
|
107
|
+
"""Helper to get the file path of a type's definition."""
|
|
108
|
+
module = sys.modules.get(t.__module__)
|
|
109
|
+
if module and hasattr(module, '__file__'):
|
|
110
|
+
return module.__file__
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
def remove_null(d: dict) -> dict:
|
|
114
|
+
"""Recursively removes keys with None values from a dictionary."""
|
|
115
|
+
return {key: remove_null(value) if isinstance(value, dict) else value for key, value in d.items() if
|
|
116
|
+
not (value is None)}
|
|
117
|
+
|
|
118
|
+
def decorator(data_class: type) -> type:
|
|
119
|
+
expected_file_name = os.environ.get("MOOSE_PYTHON_DM_DUMP")
|
|
120
|
+
if expected_file_name and expected_file_name == get_file(data_class):
|
|
121
|
+
output: dict[str, str | dict] = {
|
|
122
|
+
'class_name': data_class.__name__
|
|
123
|
+
}
|
|
124
|
+
if arg:
|
|
125
|
+
output["config"] = remove_null(asdict(arg))
|
|
126
|
+
output_json = json.dumps(output, cls=CustomEncoder, indent=4)
|
|
127
|
+
print(output_json, "___DATAMODELCONFIG___", sep="")
|
|
128
|
+
return data_class
|
|
129
|
+
|
|
130
|
+
if isinstance(arg, type):
|
|
131
|
+
return moose_data_model(None)(arg)
|
|
132
|
+
return decorator
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
JWTPayload = Dict[str, Any]
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
@dataclass
|
|
139
|
+
class ConsumptionApiResult:
|
|
140
|
+
"""Standard structure for returning results from a Consumption API handler.
|
|
141
|
+
|
|
142
|
+
Attributes:
|
|
143
|
+
status: The HTTP status code for the response.
|
|
144
|
+
body: The response body, which should be JSON serializable.
|
|
145
|
+
"""
|
|
146
|
+
status: int
|
|
147
|
+
body: Any
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class QueryClient:
|
|
151
|
+
"""Client for executing queries, typically against ClickHouse.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
ch_client_or_config: Either an instance of the ClickHouse client or a RuntimeClickHouseConfig.
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
def __init__(self, ch_client_or_config: Union[ClickhouseClient, RuntimeClickHouseConfig]):
|
|
158
|
+
if isinstance(ch_client_or_config, RuntimeClickHouseConfig):
|
|
159
|
+
# Create ClickHouse client from configuration
|
|
160
|
+
config = ch_client_or_config
|
|
161
|
+
interface = 'https' if config.use_ssl else 'http'
|
|
162
|
+
self.ch_client = get_client(
|
|
163
|
+
interface=interface,
|
|
164
|
+
host=config.host,
|
|
165
|
+
port=int(config.port),
|
|
166
|
+
username=config.username,
|
|
167
|
+
password=config.password,
|
|
168
|
+
database=config.database,
|
|
169
|
+
)
|
|
170
|
+
else:
|
|
171
|
+
# Use provided ClickHouse client directly
|
|
172
|
+
self.ch_client = ch_client_or_config
|
|
173
|
+
|
|
174
|
+
def __call__(self, input, variables):
|
|
175
|
+
return self.execute(input, variables)
|
|
176
|
+
|
|
177
|
+
def execute(self, input, variables, row_type: Type[BaseModel] = None):
|
|
178
|
+
params = {}
|
|
179
|
+
values = {}
|
|
180
|
+
|
|
181
|
+
for i, (_, variable_name, _, _) in enumerate(Formatter().parse(input)):
|
|
182
|
+
if variable_name:
|
|
183
|
+
value = variables[variable_name]
|
|
184
|
+
if isinstance(value, list) and len(value) == 1:
|
|
185
|
+
# handling passing the value of the query string dict directly to variables
|
|
186
|
+
value = value[0]
|
|
187
|
+
|
|
188
|
+
t = 'String' if isinstance(value, str) else \
|
|
189
|
+
'Int64' if isinstance(value, int) else \
|
|
190
|
+
'Float64' if isinstance(value, float) else "String" # unknown type
|
|
191
|
+
|
|
192
|
+
params[variable_name] = f'{{p{i}: {t}}}'
|
|
193
|
+
values[f'p{i}'] = value
|
|
194
|
+
clickhouse_query = input.format_map(params)
|
|
195
|
+
|
|
196
|
+
# We are not using the result of the ping
|
|
197
|
+
# but this ensures that if the clickhouse cloud service is idle, we
|
|
198
|
+
# wake it up, before we send the query.
|
|
199
|
+
self.ch_client.ping()
|
|
200
|
+
|
|
201
|
+
val = self.ch_client.query(clickhouse_query, values)
|
|
202
|
+
|
|
203
|
+
if row_type is None:
|
|
204
|
+
return list(val.named_results())
|
|
205
|
+
else:
|
|
206
|
+
return list(row_type(**row) for row in val.named_results())
|
|
207
|
+
|
|
208
|
+
def close(self):
|
|
209
|
+
"""Close the ClickHouse client connection."""
|
|
210
|
+
if self.ch_client:
|
|
211
|
+
try:
|
|
212
|
+
self.ch_client.close()
|
|
213
|
+
except Exception as e:
|
|
214
|
+
print(f"Error closing ClickHouse client: {e}")
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class WorkflowClient:
|
|
218
|
+
"""Client for interacting with Temporal workflows.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
temporal_client: An instance of the Temporal client.
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
def __init__(self, temporal_client: TemporalClient):
|
|
225
|
+
self.temporal_client = temporal_client
|
|
226
|
+
self.configs = self.load_consolidated_configs()
|
|
227
|
+
print(f"WorkflowClient - configs: {self.configs}")
|
|
228
|
+
|
|
229
|
+
# Test workflow executor in rust if this changes significantly
|
|
230
|
+
def execute(self, name: str, input_data: Any) -> Dict[str, Any]:
|
|
231
|
+
try:
|
|
232
|
+
workflow_id, run_id = asyncio.run(self._start_workflow_async(name, input_data))
|
|
233
|
+
print(f"WorkflowClient - started workflow: {name}")
|
|
234
|
+
return {
|
|
235
|
+
"status": 200,
|
|
236
|
+
"body": f"Workflow started: {name}. View it in the Temporal dashboard: http://localhost:8080/namespaces/default/workflows/{workflow_id}/{run_id}/history"
|
|
237
|
+
}
|
|
238
|
+
except Exception as e:
|
|
239
|
+
print(f"WorkflowClient - error while starting workflow: {e}")
|
|
240
|
+
return {
|
|
241
|
+
"status": 400,
|
|
242
|
+
"body": str(e)
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
async def _start_workflow_async(self, name: str, input_data: Any):
|
|
246
|
+
# Extract configuration based on workflow type
|
|
247
|
+
config = self._get_workflow_config(name)
|
|
248
|
+
|
|
249
|
+
# Process input data and generate workflow ID (common logic)
|
|
250
|
+
processed_input, workflow_id = self._process_input_data(name, input_data)
|
|
251
|
+
|
|
252
|
+
# Create retry policy and timeout (common logic)
|
|
253
|
+
retry_policy = RetryPolicy(maximum_attempts=config['retry_count'])
|
|
254
|
+
run_timeout = self.parse_timeout_to_timedelta(config['timeout_str'])
|
|
255
|
+
|
|
256
|
+
print(
|
|
257
|
+
f"WorkflowClient - starting {'DMv2 ' if config['is_dmv2'] else ''}workflow: {name} with retry policy: {retry_policy} and timeout: {run_timeout}")
|
|
258
|
+
|
|
259
|
+
# Start workflow with appropriate args
|
|
260
|
+
workflow_args = self._build_workflow_args(name, processed_input, config['is_dmv2'])
|
|
261
|
+
|
|
262
|
+
workflow_handle = await self.temporal_client.start_workflow(
|
|
263
|
+
"ScriptWorkflow",
|
|
264
|
+
args=workflow_args,
|
|
265
|
+
id=workflow_id,
|
|
266
|
+
task_queue="python-script-queue",
|
|
267
|
+
id_conflict_policy=WorkflowIDConflictPolicy.FAIL,
|
|
268
|
+
id_reuse_policy=WorkflowIDReusePolicy.ALLOW_DUPLICATE,
|
|
269
|
+
retry_policy=retry_policy,
|
|
270
|
+
run_timeout=run_timeout
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
return workflow_id, workflow_handle.result_run_id
|
|
274
|
+
|
|
275
|
+
def _get_workflow_config(self, name: str) -> Dict[str, Any]:
|
|
276
|
+
"""Extract workflow configuration from DMv2 or legacy config."""
|
|
277
|
+
from moose_lib.dmv2 import get_workflow
|
|
278
|
+
|
|
279
|
+
dmv2_workflow = get_workflow(name)
|
|
280
|
+
if dmv2_workflow is not None:
|
|
281
|
+
return {
|
|
282
|
+
'retry_count': dmv2_workflow.config.retries or 3,
|
|
283
|
+
'timeout_str': dmv2_workflow.config.timeout or "1h",
|
|
284
|
+
'is_dmv2': True
|
|
285
|
+
}
|
|
286
|
+
else:
|
|
287
|
+
config = self.configs.get(name, {})
|
|
288
|
+
return {
|
|
289
|
+
'retry_count': config.get('retries', 3),
|
|
290
|
+
'timeout_str': config.get('timeout', "1h"),
|
|
291
|
+
'is_dmv2': False
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
def _process_input_data(self, name: str, input_data: Any) -> tuple[Any, str]:
|
|
295
|
+
"""Process input data and generate workflow ID."""
|
|
296
|
+
workflow_id = name
|
|
297
|
+
if input_data:
|
|
298
|
+
try:
|
|
299
|
+
# Handle Pydantic model input for DMv2
|
|
300
|
+
if isinstance(input_data, BaseModel):
|
|
301
|
+
input_data = input_data.model_dump()
|
|
302
|
+
elif isinstance(input_data, str):
|
|
303
|
+
input_data = json.loads(input_data)
|
|
304
|
+
|
|
305
|
+
# Encode with custom encoder
|
|
306
|
+
input_data = json.loads(
|
|
307
|
+
json.dumps({"data": input_data}, cls=EnhancedJSONEncoder)
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
params_str = json.dumps(input_data, sort_keys=True)
|
|
311
|
+
params_hash = hashlib.sha256(params_str.encode()).hexdigest()[:16]
|
|
312
|
+
workflow_id = f"{name}-{params_hash}"
|
|
313
|
+
except Exception as e:
|
|
314
|
+
raise ValueError(f"Invalid input data: {e}")
|
|
315
|
+
|
|
316
|
+
return input_data, workflow_id
|
|
317
|
+
|
|
318
|
+
def _build_workflow_args(self, name: str, input_data: Any, is_dmv2: bool) -> list:
|
|
319
|
+
"""Build workflow arguments based on workflow type."""
|
|
320
|
+
if is_dmv2:
|
|
321
|
+
return [f"{name}", input_data]
|
|
322
|
+
else:
|
|
323
|
+
return [f"{os.getcwd()}/app/scripts/{name}", input_data]
|
|
324
|
+
|
|
325
|
+
def load_consolidated_configs(self):
|
|
326
|
+
try:
|
|
327
|
+
file_path = os.path.join(os.getcwd(), ".moose", "workflow_configs.json")
|
|
328
|
+
with open(file_path, 'r') as file:
|
|
329
|
+
data = json.load(file)
|
|
330
|
+
config_map = {config['name']: config for config in data}
|
|
331
|
+
return config_map
|
|
332
|
+
except Exception as e:
|
|
333
|
+
raise ValueError(f"Error loading file {file_path}: {e}")
|
|
334
|
+
|
|
335
|
+
def parse_timeout_to_timedelta(self, timeout_str: str) -> timedelta:
|
|
336
|
+
if timeout_str.endswith('h'):
|
|
337
|
+
return timedelta(hours=int(timeout_str[:-1]))
|
|
338
|
+
elif timeout_str.endswith('m'):
|
|
339
|
+
return timedelta(minutes=int(timeout_str[:-1]))
|
|
340
|
+
elif timeout_str.endswith('s'):
|
|
341
|
+
return timedelta(seconds=int(timeout_str[:-1]))
|
|
342
|
+
else:
|
|
343
|
+
raise ValueError(f"Unsupported timeout format: {timeout_str}")
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
class MooseClient:
|
|
347
|
+
"""Unified client for interacting with Moose services (Query, Workflow).
|
|
348
|
+
|
|
349
|
+
Provides access points for executing database queries and managing workflows.
|
|
350
|
+
|
|
351
|
+
Args:
|
|
352
|
+
ch_client: An instance of the ClickHouse client.
|
|
353
|
+
temporal_client: An optional instance of the Temporal client.
|
|
354
|
+
If provided, workflow functionalities are enabled.
|
|
355
|
+
|
|
356
|
+
Attributes:
|
|
357
|
+
query (QueryClient): Client for executing queries.
|
|
358
|
+
workflow (Optional[WorkflowClient]): Client for workflow operations (if configured).
|
|
359
|
+
"""
|
|
360
|
+
|
|
361
|
+
def __init__(self, ch_client: ClickhouseClient, temporal_client: Optional[TemporalClient] = None):
|
|
362
|
+
self.query = QueryClient(ch_client)
|
|
363
|
+
self.temporal_client = temporal_client
|
|
364
|
+
if temporal_client:
|
|
365
|
+
self.workflow = WorkflowClient(temporal_client)
|
|
366
|
+
else:
|
|
367
|
+
self.workflow = None
|
|
368
|
+
|
|
369
|
+
async def cleanup(self):
|
|
370
|
+
"""Cleanup resources before shutdown"""
|
|
371
|
+
if self.query:
|
|
372
|
+
try:
|
|
373
|
+
self.query.close()
|
|
374
|
+
except Exception as e:
|
|
375
|
+
print(f"Error closing Clickhouse client: {e}")
|
|
376
|
+
|
|
377
|
+
if self.temporal_client:
|
|
378
|
+
try:
|
|
379
|
+
await self.temporal_client.close()
|
|
380
|
+
except Exception as e:
|
|
381
|
+
print(f"Error closing Temporal client: {e}")
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
class Sql:
|
|
385
|
+
"""Represents a SQL query template with embedded values.
|
|
386
|
+
|
|
387
|
+
Allows constructing SQL queries safely by separating the query string parts
|
|
388
|
+
from the values to be interpolated, similar to tagged template literals
|
|
389
|
+
in other languages. Supports nesting `Sql` objects.
|
|
390
|
+
|
|
391
|
+
Args:
|
|
392
|
+
raw_strings: List of string fragments forming the SQL template.
|
|
393
|
+
raw_values: List of values to be interpolated between the string fragments.
|
|
394
|
+
Values can be basic types or other `Sql` instances.
|
|
395
|
+
|
|
396
|
+
Raises:
|
|
397
|
+
TypeError: If the number of strings and values doesn't match the expected
|
|
398
|
+
pattern (len(strings) == len(values) + 1).
|
|
399
|
+
|
|
400
|
+
Attributes:
|
|
401
|
+
strings (list[str]): The flattened list of string fragments.
|
|
402
|
+
values (list[Any]): The flattened list of values corresponding to the gaps
|
|
403
|
+
between the strings.
|
|
404
|
+
"""
|
|
405
|
+
|
|
406
|
+
def __init__(self, raw_strings: list[str], raw_values: list['RawValue']):
|
|
407
|
+
if len(raw_strings) - 1 != len(raw_values):
|
|
408
|
+
if len(raw_strings) == 0:
|
|
409
|
+
raise TypeError("Expected at least 1 string")
|
|
410
|
+
raise TypeError(f"Expected {len(raw_strings)} strings to have {len(raw_strings) - 1} values")
|
|
411
|
+
|
|
412
|
+
values_length = sum(1 if not isinstance(value, Sql) else len(value.values) for value in raw_values)
|
|
413
|
+
|
|
414
|
+
self.values: list['Value'] = [None] * values_length
|
|
415
|
+
self.strings: list[str] = [None] * (values_length + 1)
|
|
416
|
+
|
|
417
|
+
self.strings[0] = raw_strings[0]
|
|
418
|
+
|
|
419
|
+
i = 0
|
|
420
|
+
pos = 0
|
|
421
|
+
while i < len(raw_values):
|
|
422
|
+
child = raw_values[i]
|
|
423
|
+
raw_string = raw_strings[i + 1]
|
|
424
|
+
|
|
425
|
+
if isinstance(child, Sql):
|
|
426
|
+
self.strings[pos] += child.strings[0]
|
|
427
|
+
|
|
428
|
+
for child_index in range(len(child.values)):
|
|
429
|
+
self.values[pos] = child.values[child_index]
|
|
430
|
+
pos += 1
|
|
431
|
+
self.strings[pos] = child.strings[child_index + 1]
|
|
432
|
+
|
|
433
|
+
self.strings[pos] += raw_string
|
|
434
|
+
else:
|
|
435
|
+
self.values[pos] = child
|
|
436
|
+
pos += 1
|
|
437
|
+
self.strings[pos] = raw_string
|
|
438
|
+
|
|
439
|
+
i += 1
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def sigterm_handler():
|
|
443
|
+
"""Handles SIGTERM signals by printing a message and exiting gracefully."""
|
|
444
|
+
print("SIGTERM received")
|
|
445
|
+
sys.exit(0)
|
|
@@ -30,6 +30,7 @@ from typing import Optional, Callable, Tuple, Any
|
|
|
30
30
|
|
|
31
31
|
from moose_lib.dmv2 import get_streams, DeadLetterModel
|
|
32
32
|
from moose_lib import cli_log, CliLogData, DeadLetterQueue
|
|
33
|
+
from moose_lib.commons import EnhancedJSONEncoder
|
|
33
34
|
|
|
34
35
|
# Force stdout to be unbuffered
|
|
35
36
|
sys.stdout = io.TextIOWrapper(
|
|
@@ -82,34 +83,6 @@ class KafkaTopicConfig:
|
|
|
82
83
|
return name
|
|
83
84
|
|
|
84
85
|
|
|
85
|
-
class EnhancedJSONEncoder(json.JSONEncoder):
|
|
86
|
-
"""
|
|
87
|
-
Custom JSON encoder that handles:
|
|
88
|
-
- datetime objects (converts to ISO format with timezone)
|
|
89
|
-
- dataclass instances (converts to dict)
|
|
90
|
-
- Pydantic models (converts to dict)
|
|
91
|
-
"""
|
|
92
|
-
|
|
93
|
-
def default(self, o):
|
|
94
|
-
if isinstance(o, datetime):
|
|
95
|
-
if o.tzinfo is None:
|
|
96
|
-
o = o.replace(tzinfo=timezone.utc)
|
|
97
|
-
return o.isoformat()
|
|
98
|
-
if hasattr(o, "model_dump"): # Handle Pydantic v2 models
|
|
99
|
-
# Convert to dict and handle datetime fields
|
|
100
|
-
data = o.model_dump()
|
|
101
|
-
# Handle any datetime fields that might be present
|
|
102
|
-
for key, value in data.items():
|
|
103
|
-
if isinstance(value, datetime):
|
|
104
|
-
if value.tzinfo is None:
|
|
105
|
-
value = value.replace(tzinfo=timezone.utc)
|
|
106
|
-
data[key] = value.isoformat()
|
|
107
|
-
return data
|
|
108
|
-
if dataclasses.is_dataclass(o):
|
|
109
|
-
return dataclasses.asdict(o)
|
|
110
|
-
return super().default(o)
|
|
111
|
-
|
|
112
|
-
|
|
113
86
|
def load_streaming_function_dmv1(function_file_dir: str, function_file_name: str) -> Tuple[type, Callable]:
|
|
114
87
|
"""
|
|
115
88
|
Load a DMV1 (legacy) streaming function from a Python module.
|
|
@@ -1,254 +0,0 @@
|
|
|
1
|
-
"""Core Moose Python library definitions.
|
|
2
|
-
|
|
3
|
-
This module provides foundational classes, enums, and functions used across the Moose ecosystem,
|
|
4
|
-
including configuration objects, clients for interacting with services (ClickHouse, Temporal),
|
|
5
|
-
and utilities for defining data models and SQL queries.
|
|
6
|
-
"""
|
|
7
|
-
from clickhouse_connect.driver.client import Client as ClickhouseClient
|
|
8
|
-
from temporalio.client import Client as TemporalClient
|
|
9
|
-
from dataclasses import dataclass, asdict
|
|
10
|
-
from enum import Enum
|
|
11
|
-
from typing import Any, Callable, Dict, Optional, TypeVar, overload
|
|
12
|
-
import sys
|
|
13
|
-
import os
|
|
14
|
-
import json
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
@dataclass
|
|
18
|
-
class StreamingFunction:
|
|
19
|
-
"""Represents a function intended for stream processing.
|
|
20
|
-
|
|
21
|
-
Attributes:
|
|
22
|
-
run: The callable function that performs the streaming logic.
|
|
23
|
-
"""
|
|
24
|
-
run: Callable
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
@dataclass
|
|
29
|
-
class StorageConfig:
|
|
30
|
-
"""Configuration related to data storage, typically in an OLAP table.
|
|
31
|
-
|
|
32
|
-
Attributes:
|
|
33
|
-
enabled: Whether storage is enabled for this data model.
|
|
34
|
-
order_by_fields: List of fields to use for ordering in the storage layer.
|
|
35
|
-
deduplicate: Whether to enable deduplication based on the order_by_fields.
|
|
36
|
-
"""
|
|
37
|
-
enabled: Optional[bool] = None
|
|
38
|
-
order_by_fields: Optional[list[str]] = None
|
|
39
|
-
deduplicate: Optional[bool] = None
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
@dataclass
|
|
43
|
-
class DataModelConfig:
|
|
44
|
-
"""Top-level configuration for a Moose data model.
|
|
45
|
-
|
|
46
|
-
Combines ingestion and storage settings.
|
|
47
|
-
|
|
48
|
-
Attributes:
|
|
49
|
-
storage: Configuration for how data is stored.
|
|
50
|
-
"""
|
|
51
|
-
storage: Optional[StorageConfig] = None
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
class CustomEncoder(json.JSONEncoder):
|
|
55
|
-
"""Custom JSON encoder that handles Enum types by encoding their values."""
|
|
56
|
-
def default(self, obj):
|
|
57
|
-
if isinstance(obj, Enum):
|
|
58
|
-
return obj.value
|
|
59
|
-
return super().default(obj)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
_DC = TypeVar("_DC", bound=type)
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
@overload
|
|
66
|
-
def moose_data_model(arg: Optional[DataModelConfig]) -> Callable[[_DC], _DC]:
|
|
67
|
-
"""Decorator overload: Applies configuration to a data model class."""
|
|
68
|
-
...
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
@overload
|
|
72
|
-
def moose_data_model(arg: _DC) -> _DC:
|
|
73
|
-
"""Decorator overload: Decorates a data model class without explicit configuration."""
|
|
74
|
-
...
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
def moose_data_model(arg: Any = None) -> Any:
|
|
78
|
-
"""Decorator for Moose data model classes.
|
|
79
|
-
|
|
80
|
-
This decorator can be used with or without arguments:
|
|
81
|
-
- `@moose_data_model`: Decorates a class as a Moose data model with default settings.
|
|
82
|
-
- `@moose_data_model(DataModelConfig(...))`: Decorates a class and applies the specified
|
|
83
|
-
ingestion and storage configurations.
|
|
84
|
-
|
|
85
|
-
During infrastructure processing (when `MOOSE_PYTHON_DM_DUMP` environment variable
|
|
86
|
-
matches the decorated class's file path), it prints the class name and configuration
|
|
87
|
-
as JSON, separated by a specific delimiter (`___DATAMODELCONFIG___`).
|
|
88
|
-
|
|
89
|
-
Args:
|
|
90
|
-
arg: Either a `DataModelConfig` instance or the class being decorated.
|
|
91
|
-
|
|
92
|
-
Returns:
|
|
93
|
-
A decorator function or the decorated class.
|
|
94
|
-
"""
|
|
95
|
-
def get_file(t: type) -> Optional[str]:
|
|
96
|
-
"""Helper to get the file path of a type's definition."""
|
|
97
|
-
module = sys.modules.get(t.__module__)
|
|
98
|
-
if module and hasattr(module, '__file__'):
|
|
99
|
-
return module.__file__
|
|
100
|
-
return None
|
|
101
|
-
|
|
102
|
-
def remove_null(d: dict) -> dict:
|
|
103
|
-
"""Recursively removes keys with None values from a dictionary."""
|
|
104
|
-
return {key: remove_null(value) if isinstance(value, dict) else value for key, value in d.items() if
|
|
105
|
-
not (value is None)}
|
|
106
|
-
|
|
107
|
-
def decorator(data_class: type) -> type:
|
|
108
|
-
expected_file_name = os.environ.get("MOOSE_PYTHON_DM_DUMP")
|
|
109
|
-
if expected_file_name and expected_file_name == get_file(data_class):
|
|
110
|
-
output: dict[str, str | dict] = {
|
|
111
|
-
'class_name': data_class.__name__
|
|
112
|
-
}
|
|
113
|
-
if arg:
|
|
114
|
-
output["config"] = remove_null(asdict(arg))
|
|
115
|
-
output_json = json.dumps(output, cls=CustomEncoder, indent=4)
|
|
116
|
-
print(output_json, "___DATAMODELCONFIG___", sep="")
|
|
117
|
-
return data_class
|
|
118
|
-
|
|
119
|
-
if isinstance(arg, type):
|
|
120
|
-
return moose_data_model(None)(arg)
|
|
121
|
-
return decorator
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
JWTPayload = Dict[str, Any]
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
@dataclass
|
|
128
|
-
class ConsumptionApiResult:
|
|
129
|
-
"""Standard structure for returning results from a Consumption API handler.
|
|
130
|
-
|
|
131
|
-
Attributes:
|
|
132
|
-
status: The HTTP status code for the response.
|
|
133
|
-
body: The response body, which should be JSON serializable.
|
|
134
|
-
"""
|
|
135
|
-
status: int
|
|
136
|
-
body: Any
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
class QueryClient:
|
|
140
|
-
"""Client for executing queries, typically against ClickHouse.
|
|
141
|
-
|
|
142
|
-
(Note: Current implementation is a placeholder.)
|
|
143
|
-
|
|
144
|
-
Args:
|
|
145
|
-
ch_client: An instance of the ClickHouse client.
|
|
146
|
-
"""
|
|
147
|
-
def __init__(self, ch_client: ClickhouseClient):
|
|
148
|
-
self.ch_client = ch_client
|
|
149
|
-
|
|
150
|
-
def execute(self, input, variables) -> Any:
|
|
151
|
-
# No impl for the interface
|
|
152
|
-
pass
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
class WorkflowClient:
|
|
156
|
-
"""Client for interacting with Temporal workflows.
|
|
157
|
-
|
|
158
|
-
(Note: Current implementation is a placeholder.)
|
|
159
|
-
|
|
160
|
-
Args:
|
|
161
|
-
temporal_client: An instance of the Temporal client.
|
|
162
|
-
"""
|
|
163
|
-
def __init__(self, temporal_client: TemporalClient):
|
|
164
|
-
self.temporal_client = temporal_client
|
|
165
|
-
|
|
166
|
-
def execute(self, name: str, input_data: Any) -> Dict[str, Any]:
|
|
167
|
-
# No impl for the interface
|
|
168
|
-
pass
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
class MooseClient:
|
|
172
|
-
"""Unified client for interacting with Moose services (Query, Workflow).
|
|
173
|
-
|
|
174
|
-
Provides access points for executing database queries and managing workflows.
|
|
175
|
-
|
|
176
|
-
Args:
|
|
177
|
-
ch_client: An instance of the ClickHouse client.
|
|
178
|
-
temporal_client: An optional instance of the Temporal client.
|
|
179
|
-
If provided, workflow functionalities are enabled.
|
|
180
|
-
|
|
181
|
-
Attributes:
|
|
182
|
-
query (QueryClient): Client for executing queries.
|
|
183
|
-
workflow (Optional[WorkflowClient]): Client for workflow operations (if configured).
|
|
184
|
-
"""
|
|
185
|
-
def __init__(self, ch_client: ClickhouseClient, temporal_client: Optional[TemporalClient] = None):
|
|
186
|
-
self.query = QueryClient(ch_client)
|
|
187
|
-
if temporal_client:
|
|
188
|
-
self.workflow = WorkflowClient(temporal_client)
|
|
189
|
-
else:
|
|
190
|
-
self.workflow = None
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
class Sql:
|
|
194
|
-
"""Represents a SQL query template with embedded values.
|
|
195
|
-
|
|
196
|
-
Allows constructing SQL queries safely by separating the query string parts
|
|
197
|
-
from the values to be interpolated, similar to tagged template literals
|
|
198
|
-
in other languages. Supports nesting `Sql` objects.
|
|
199
|
-
|
|
200
|
-
Args:
|
|
201
|
-
raw_strings: List of string fragments forming the SQL template.
|
|
202
|
-
raw_values: List of values to be interpolated between the string fragments.
|
|
203
|
-
Values can be basic types or other `Sql` instances.
|
|
204
|
-
|
|
205
|
-
Raises:
|
|
206
|
-
TypeError: If the number of strings and values doesn't match the expected
|
|
207
|
-
pattern (len(strings) == len(values) + 1).
|
|
208
|
-
|
|
209
|
-
Attributes:
|
|
210
|
-
strings (list[str]): The flattened list of string fragments.
|
|
211
|
-
values (list[Any]): The flattened list of values corresponding to the gaps
|
|
212
|
-
between the strings.
|
|
213
|
-
"""
|
|
214
|
-
def __init__(self, raw_strings: list[str], raw_values: list['RawValue']):
|
|
215
|
-
if len(raw_strings) - 1 != len(raw_values):
|
|
216
|
-
if len(raw_strings) == 0:
|
|
217
|
-
raise TypeError("Expected at least 1 string")
|
|
218
|
-
raise TypeError(f"Expected {len(raw_strings)} strings to have {len(raw_strings) - 1} values")
|
|
219
|
-
|
|
220
|
-
values_length = sum(1 if not isinstance(value, Sql) else len(value.values) for value in raw_values)
|
|
221
|
-
|
|
222
|
-
self.values: list['Value'] = [None] * values_length
|
|
223
|
-
self.strings: list[str] = [None] * (values_length + 1)
|
|
224
|
-
|
|
225
|
-
self.strings[0] = raw_strings[0]
|
|
226
|
-
|
|
227
|
-
i = 0
|
|
228
|
-
pos = 0
|
|
229
|
-
while i < len(raw_values):
|
|
230
|
-
child = raw_values[i]
|
|
231
|
-
raw_string = raw_strings[i + 1]
|
|
232
|
-
|
|
233
|
-
if isinstance(child, Sql):
|
|
234
|
-
self.strings[pos] += child.strings[0]
|
|
235
|
-
|
|
236
|
-
for child_index in range(len(child.values)):
|
|
237
|
-
self.values[pos] = child.values[child_index]
|
|
238
|
-
pos += 1
|
|
239
|
-
self.strings[pos] = child.strings[child_index + 1]
|
|
240
|
-
|
|
241
|
-
self.strings[pos] += raw_string
|
|
242
|
-
else:
|
|
243
|
-
self.values[pos] = child
|
|
244
|
-
pos += 1
|
|
245
|
-
self.strings[pos] = raw_string
|
|
246
|
-
|
|
247
|
-
i += 1
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
def sigterm_handler():
|
|
251
|
-
"""Handles SIGTERM signals by printing a message and exiting gracefully."""
|
|
252
|
-
print("SIGTERM received")
|
|
253
|
-
sys.exit(0)
|
|
254
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|