moose-lib 0.4.217__py3-none-any.whl → 0.4.219__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moose_lib/dmv2/__init__.py +142 -0
- moose_lib/dmv2/_registry.py +15 -0
- moose_lib/dmv2/consumption.py +101 -0
- moose_lib/dmv2/ingest_api.py +64 -0
- moose_lib/dmv2/ingest_pipeline.py +156 -0
- moose_lib/dmv2/materialized_view.py +94 -0
- moose_lib/dmv2/olap_table.py +57 -0
- moose_lib/dmv2/registry.py +62 -0
- moose_lib/dmv2/sql_resource.py +49 -0
- moose_lib/dmv2/stream.py +258 -0
- moose_lib/dmv2/types.py +95 -0
- moose_lib/dmv2/view.py +36 -0
- moose_lib/dmv2/workflow.py +156 -0
- moose_lib/internal.py +18 -8
- moose_lib/streaming/streaming_function_runner.py +2 -2
- {moose_lib-0.4.217.dist-info → moose_lib-0.4.219.dist-info}/METADATA +1 -1
- moose_lib-0.4.219.dist-info/RECORD +34 -0
- moose_lib/dmv2.py +0 -981
- moose_lib-0.4.217.dist-info/RECORD +0 -22
- {moose_lib-0.4.217.dist-info → moose_lib-0.4.219.dist-info}/WHEEL +0 -0
- {moose_lib-0.4.217.dist-info → moose_lib-0.4.219.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,49 @@
|
|
1
|
+
"""
|
2
|
+
Base SQL resource definitions for Moose Data Model v2 (dmv2).
|
3
|
+
|
4
|
+
This module provides the base class for SQL resources like Views and Materialized Views,
|
5
|
+
handling common functionality like setup/teardown SQL commands and dependency tracking.
|
6
|
+
"""
|
7
|
+
from typing import Any, Optional, Union, List
|
8
|
+
from pydantic import BaseModel
|
9
|
+
|
10
|
+
from .olap_table import OlapTable
|
11
|
+
from ._registry import _sql_resources
|
12
|
+
|
13
|
+
class SqlResource:
|
14
|
+
"""Base class for SQL resources like Views and Materialized Views.
|
15
|
+
|
16
|
+
Handles the definition of setup (CREATE) and teardown (DROP) SQL commands
|
17
|
+
and tracks data dependencies.
|
18
|
+
|
19
|
+
Attributes:
|
20
|
+
name (str): The name of the SQL resource (e.g., view name).
|
21
|
+
setup (list[str]): SQL commands to create the resource.
|
22
|
+
teardown (list[str]): SQL commands to drop the resource.
|
23
|
+
pulls_data_from (list[SqlObject]): List of tables/views this resource reads from.
|
24
|
+
pushes_data_to (list[SqlObject]): List of tables/views this resource writes to.
|
25
|
+
kind: The kind of the SQL resource (e.g., "SqlResource").
|
26
|
+
"""
|
27
|
+
setup: list[str]
|
28
|
+
teardown: list[str]
|
29
|
+
name: str
|
30
|
+
kind: str = "SqlResource"
|
31
|
+
pulls_data_from: list[Union[OlapTable, "SqlResource"]]
|
32
|
+
pushes_data_to: list[Union[OlapTable, "SqlResource"]]
|
33
|
+
|
34
|
+
def __init__(
|
35
|
+
self,
|
36
|
+
name: str,
|
37
|
+
setup: list[str],
|
38
|
+
teardown: list[str],
|
39
|
+
pulls_data_from: Optional[list[Union[OlapTable, "SqlResource"]]] = None,
|
40
|
+
pushes_data_to: Optional[list[Union[OlapTable, "SqlResource"]]] = None,
|
41
|
+
metadata: dict = None
|
42
|
+
):
|
43
|
+
self.name = name
|
44
|
+
self.setup = setup
|
45
|
+
self.teardown = teardown
|
46
|
+
self.pulls_data_from = pulls_data_from or []
|
47
|
+
self.pushes_data_to = pushes_data_to or []
|
48
|
+
self.metadata = metadata
|
49
|
+
_sql_resources[name] = self
|
moose_lib/dmv2/stream.py
ADDED
@@ -0,0 +1,258 @@
|
|
1
|
+
"""
|
2
|
+
Stream definitions for Moose Data Model v2 (dmv2).
|
3
|
+
|
4
|
+
This module provides classes for defining and configuring data streams,
|
5
|
+
including stream transformations, consumers, and dead letter queues.
|
6
|
+
"""
|
7
|
+
import dataclasses
|
8
|
+
import datetime
|
9
|
+
from typing import Any, Optional, Callable, Union, Literal, Generic
|
10
|
+
from pydantic import BaseModel, ConfigDict, AliasGenerator
|
11
|
+
from pydantic.alias_generators import to_camel
|
12
|
+
|
13
|
+
from .types import TypedMooseResource, ZeroOrMany, T, U
|
14
|
+
from .olap_table import OlapTable
|
15
|
+
from ._registry import _streams
|
16
|
+
|
17
|
+
class StreamConfig(BaseModel):
|
18
|
+
"""Configuration for data streams (e.g., Redpanda topics).
|
19
|
+
|
20
|
+
Attributes:
|
21
|
+
parallelism: Number of partitions for the stream.
|
22
|
+
retention_period: Data retention period in seconds (default: 7 days).
|
23
|
+
destination: Optional `OlapTable` where stream messages should be automatically ingested.
|
24
|
+
version: Optional version string for tracking configuration changes.
|
25
|
+
metadata: Optional metadata for the stream.
|
26
|
+
"""
|
27
|
+
parallelism: int = 1
|
28
|
+
retention_period: int = 60 * 60 * 24 * 7 # 7 days
|
29
|
+
destination: Optional[OlapTable[Any]] = None
|
30
|
+
version: Optional[str] = None
|
31
|
+
metadata: Optional[dict] = None
|
32
|
+
|
33
|
+
class TransformConfig(BaseModel):
|
34
|
+
"""Configuration for stream transformations.
|
35
|
+
|
36
|
+
Attributes:
|
37
|
+
version: Optional version string to identify a specific transformation.
|
38
|
+
Allows multiple transformations to the same destination if versions differ.
|
39
|
+
"""
|
40
|
+
version: Optional[str] = None
|
41
|
+
dead_letter_queue: "Optional[DeadLetterQueue]" = None
|
42
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
43
|
+
metadata: Optional[dict] = None
|
44
|
+
|
45
|
+
class ConsumerConfig(BaseModel):
|
46
|
+
"""Configuration for stream consumers.
|
47
|
+
|
48
|
+
Attributes:
|
49
|
+
version: Optional version string to identify a specific consumer.
|
50
|
+
Allows multiple consumers if versions differ.
|
51
|
+
"""
|
52
|
+
version: Optional[str] = None
|
53
|
+
dead_letter_queue: "Optional[DeadLetterQueue]" = None
|
54
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
55
|
+
|
56
|
+
@dataclasses.dataclass
|
57
|
+
class _RoutedMessage:
|
58
|
+
"""Internal class representing a message routed to a specific stream."""
|
59
|
+
destination: "Stream[Any]"
|
60
|
+
values: ZeroOrMany[Any]
|
61
|
+
|
62
|
+
@dataclasses.dataclass
|
63
|
+
class ConsumerEntry(Generic[T]):
|
64
|
+
"""Internal class representing a consumer with its configuration."""
|
65
|
+
consumer: Callable[[T], None]
|
66
|
+
config: ConsumerConfig
|
67
|
+
|
68
|
+
@dataclasses.dataclass
|
69
|
+
class TransformEntry(Generic[T]):
|
70
|
+
"""Internal class representing a transformation with its configuration."""
|
71
|
+
destination: "Stream[Any]"
|
72
|
+
transformation: Callable[[T], ZeroOrMany[Any]]
|
73
|
+
config: TransformConfig
|
74
|
+
|
75
|
+
class Stream(TypedMooseResource, Generic[T]):
|
76
|
+
"""Represents a data stream (e.g., a Redpanda topic) typed with a Pydantic model.
|
77
|
+
|
78
|
+
Allows defining transformations to other streams and adding consumers.
|
79
|
+
|
80
|
+
Args:
|
81
|
+
name: The name of the stream.
|
82
|
+
config: Configuration options for the stream (parallelism, retention, destination).
|
83
|
+
t: The Pydantic model defining the stream message schema (passed via `Stream[MyModel](...)`).
|
84
|
+
|
85
|
+
Attributes:
|
86
|
+
config (StreamConfig): Configuration settings for this stream.
|
87
|
+
transformations (dict[str, list[TransformEntry[T]]]): Dictionary mapping destination stream names
|
88
|
+
to lists of transformation functions.
|
89
|
+
consumers (list[ConsumerEntry[T]]): List of consumers attached to this stream.
|
90
|
+
columns (Columns[T]): Helper for accessing message field names safely.
|
91
|
+
name (str): The name of the stream.
|
92
|
+
model_type (type[T]): The Pydantic model associated with this stream.
|
93
|
+
"""
|
94
|
+
config: StreamConfig
|
95
|
+
transformations: dict[str, list[TransformEntry[T]]]
|
96
|
+
consumers: list[ConsumerEntry[T]]
|
97
|
+
_multipleTransformations: Optional[Callable[[T], list[_RoutedMessage]]] = None
|
98
|
+
|
99
|
+
def __init__(self, name: str, config: StreamConfig = StreamConfig(), **kwargs):
|
100
|
+
super().__init__()
|
101
|
+
self._set_type(name, self._get_type(kwargs))
|
102
|
+
self.config = config
|
103
|
+
self.metadata = config.metadata
|
104
|
+
self.consumers = []
|
105
|
+
self.transformations = {}
|
106
|
+
_streams[name] = self
|
107
|
+
|
108
|
+
def add_transform(self, destination: "Stream[U]", transformation: Callable[[T], ZeroOrMany[U]],
|
109
|
+
config: TransformConfig = None):
|
110
|
+
"""Adds a transformation step from this stream to a destination stream.
|
111
|
+
|
112
|
+
The transformation function receives a record of type `T` and should return
|
113
|
+
a record of type `U`, a list of `U` records, or `None` to filter.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
destination: The target `Stream` for the transformed records.
|
117
|
+
transformation: A callable that performs the transformation.
|
118
|
+
config: Optional configuration, primarily for setting a version.
|
119
|
+
"""
|
120
|
+
config = config or TransformConfig()
|
121
|
+
if destination.name in self.transformations:
|
122
|
+
existing_transforms = self.transformations[destination.name]
|
123
|
+
# Check if a transform with this version already exists
|
124
|
+
has_version = any(t.config.version == config.version for t in existing_transforms)
|
125
|
+
if not has_version:
|
126
|
+
existing_transforms.append(
|
127
|
+
TransformEntry(destination=destination, transformation=transformation, config=config))
|
128
|
+
else:
|
129
|
+
self.transformations[destination.name] = [
|
130
|
+
TransformEntry(destination=destination, transformation=transformation, config=config)]
|
131
|
+
|
132
|
+
def add_consumer(self, consumer: Callable[[T], None], config: ConsumerConfig = None):
|
133
|
+
"""Adds a consumer function to be executed for each record in the stream.
|
134
|
+
|
135
|
+
Consumers are typically used for side effects like logging or triggering external actions.
|
136
|
+
|
137
|
+
Args:
|
138
|
+
consumer: A callable that accepts a record of type `T`.
|
139
|
+
config: Optional configuration, primarily for setting a version.
|
140
|
+
"""
|
141
|
+
config = config or ConsumerConfig()
|
142
|
+
has_version = any(c.config.version == config.version for c in self.consumers)
|
143
|
+
if not has_version:
|
144
|
+
self.consumers.append(ConsumerEntry(consumer=consumer, config=config))
|
145
|
+
|
146
|
+
def has_consumers(self) -> bool:
|
147
|
+
"""Checks if any consumers have been added to this stream.
|
148
|
+
|
149
|
+
Returns:
|
150
|
+
True if the stream has one or more consumers, False otherwise.
|
151
|
+
"""
|
152
|
+
return len(self.consumers) > 0
|
153
|
+
|
154
|
+
def routed(self, values: ZeroOrMany[T]) -> _RoutedMessage:
|
155
|
+
"""Creates a `_RoutedMessage` for use in multi-transform functions.
|
156
|
+
|
157
|
+
Wraps the value(s) to be sent with this stream as the destination.
|
158
|
+
|
159
|
+
Args:
|
160
|
+
values: A single record, a list of records, or None.
|
161
|
+
|
162
|
+
Returns:
|
163
|
+
A `_RoutedMessage` object.
|
164
|
+
"""
|
165
|
+
return _RoutedMessage(destination=self, values=values)
|
166
|
+
|
167
|
+
def set_multi_transform(self, transformation: Callable[[T], list[_RoutedMessage]]):
|
168
|
+
"""Sets a transformation function capable of routing records to multiple streams.
|
169
|
+
|
170
|
+
The provided function takes a single input record (`T`) and must return a list
|
171
|
+
of `_RoutedMessage` objects, created using the `.routed()` method of the
|
172
|
+
target streams.
|
173
|
+
|
174
|
+
Example:
|
175
|
+
def my_multi_transform(record: InputModel) -> list[_RoutedMessage]:
|
176
|
+
output1 = transform_for_stream1(record)
|
177
|
+
output2 = transform_for_stream2(record)
|
178
|
+
return [
|
179
|
+
stream1.routed(output1),
|
180
|
+
stream2.routed(output2)
|
181
|
+
]
|
182
|
+
input_stream.set_multi_transform(my_multi_transform)
|
183
|
+
|
184
|
+
Note: Only one multi-transform function can be set per stream.
|
185
|
+
|
186
|
+
Args:
|
187
|
+
transformation: The multi-routing transformation function.
|
188
|
+
"""
|
189
|
+
self._multipleTransformations = transformation
|
190
|
+
|
191
|
+
class DeadLetterModel(BaseModel, Generic[T]):
|
192
|
+
"""Model for dead letter queue messages.
|
193
|
+
|
194
|
+
Attributes:
|
195
|
+
original_record: The original record that failed processing.
|
196
|
+
error_message: Description of the error that occurred.
|
197
|
+
error_type: Type of error (e.g., "ValidationError").
|
198
|
+
failed_at: Timestamp when the error occurred.
|
199
|
+
source: Source of the error ("api", "transform", or "table").
|
200
|
+
"""
|
201
|
+
model_config = ConfigDict(alias_generator=AliasGenerator(
|
202
|
+
serialization_alias=to_camel,
|
203
|
+
))
|
204
|
+
original_record: Any
|
205
|
+
error_message: str
|
206
|
+
error_type: str
|
207
|
+
failed_at: datetime.datetime
|
208
|
+
source: Literal["api", "transform", "table"]
|
209
|
+
|
210
|
+
def as_typed(self) -> T:
|
211
|
+
return self._t.model_validate(self.original_record)
|
212
|
+
|
213
|
+
class DeadLetterQueue(Stream, Generic[T]):
|
214
|
+
"""A specialized Stream for handling failed records.
|
215
|
+
|
216
|
+
Dead letter queues store records that failed during processing, along with
|
217
|
+
error information to help diagnose and potentially recover from failures.
|
218
|
+
|
219
|
+
Attributes:
|
220
|
+
All attributes inherited from Stream.
|
221
|
+
"""
|
222
|
+
|
223
|
+
_model_type: type[T]
|
224
|
+
|
225
|
+
def __init__(self, name: str, config: StreamConfig = StreamConfig(), **kwargs):
|
226
|
+
"""Initialize a new DeadLetterQueue.
|
227
|
+
|
228
|
+
Args:
|
229
|
+
name: The name of the dead letter queue stream.
|
230
|
+
config: Configuration for the stream.
|
231
|
+
"""
|
232
|
+
self._model_type = self._get_type(kwargs)
|
233
|
+
kwargs["t"] = DeadLetterModel[self._model_type]
|
234
|
+
super().__init__(name, config, **kwargs)
|
235
|
+
|
236
|
+
def add_transform(self, destination: Stream[U], transformation: Callable[[DeadLetterModel[T]], ZeroOrMany[U]],
|
237
|
+
config: TransformConfig = None):
|
238
|
+
def wrapped_transform(record: DeadLetterModel[T]):
|
239
|
+
record._t = self._model_type
|
240
|
+
return transformation(record)
|
241
|
+
|
242
|
+
config = config or TransformConfig()
|
243
|
+
super().add_transform(destination, wrapped_transform, config)
|
244
|
+
|
245
|
+
def add_consumer(self, consumer: Callable[[DeadLetterModel[T]], None], config: ConsumerConfig = None):
|
246
|
+
def wrapped_consumer(record: DeadLetterModel[T]):
|
247
|
+
record._t = self._model_type
|
248
|
+
return consumer(record)
|
249
|
+
|
250
|
+
config = config or ConsumerConfig()
|
251
|
+
super().add_consumer(wrapped_consumer, config)
|
252
|
+
|
253
|
+
def set_multi_transform(self, transformation: Callable[[DeadLetterModel[T]], list[_RoutedMessage]]):
|
254
|
+
def wrapped_transform(record: DeadLetterModel[T]):
|
255
|
+
record._t = self._model_type
|
256
|
+
return transformation(record)
|
257
|
+
|
258
|
+
super().set_multi_transform(wrapped_transform)
|
moose_lib/dmv2/types.py
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
"""
|
2
|
+
Shared types and base classes for Moose Data Model v2 (dmv2).
|
3
|
+
|
4
|
+
This module provides the core type definitions and base classes used across
|
5
|
+
the dmv2 package, including generic type parameters, type aliases, and base
|
6
|
+
resource classes.
|
7
|
+
"""
|
8
|
+
from typing import Any, Generic, TypeVar, Union
|
9
|
+
from pydantic import BaseModel
|
10
|
+
from pydantic.fields import FieldInfo
|
11
|
+
|
12
|
+
T = TypeVar('T', bound=BaseModel)
|
13
|
+
U = TypeVar('U', bound=BaseModel)
|
14
|
+
T_none = TypeVar('T_none', bound=Union[BaseModel, None])
|
15
|
+
U_none = TypeVar('U_none', bound=Union[BaseModel, None])
|
16
|
+
type ZeroOrMany[T] = Union[T, list[T], None]
|
17
|
+
|
18
|
+
class Columns(Generic[T]):
|
19
|
+
"""Provides runtime checked column name access for Moose resources.
|
20
|
+
|
21
|
+
Instead of using string literals for column names, you can use attribute access
|
22
|
+
on this object, which will verify the name against the Pydantic model's fields.
|
23
|
+
|
24
|
+
Example:
|
25
|
+
>>> class MyModel(BaseModel):
|
26
|
+
... user_id: int
|
27
|
+
... event_name: str
|
28
|
+
>>> cols = Columns(MyModel)
|
29
|
+
>>> print(cols.user_id) # Output: user_id
|
30
|
+
>>> print(cols.non_existent) # Raises AttributeError
|
31
|
+
|
32
|
+
Args:
|
33
|
+
model: The Pydantic model type whose fields represent the columns.
|
34
|
+
"""
|
35
|
+
_fields: dict[str, FieldInfo]
|
36
|
+
|
37
|
+
def __init__(self, model: type[T]):
|
38
|
+
self._fields = model.model_fields
|
39
|
+
|
40
|
+
def __getattr__(self, item: str) -> str:
|
41
|
+
if item in self._fields:
|
42
|
+
return item # or some Column representation
|
43
|
+
raise AttributeError(f"{item} is not a valid column name")
|
44
|
+
|
45
|
+
class BaseTypedResource(Generic[T]):
|
46
|
+
"""Base class for Moose resources that are typed with a Pydantic model.
|
47
|
+
|
48
|
+
Handles the association of a Pydantic model `T` with a Moose resource,
|
49
|
+
providing type validation and access to the model type.
|
50
|
+
|
51
|
+
Attributes:
|
52
|
+
name (str): The name of the Moose resource.
|
53
|
+
"""
|
54
|
+
_t: type[T]
|
55
|
+
name: str
|
56
|
+
|
57
|
+
@classmethod
|
58
|
+
def _get_type(cls, keyword_args: dict):
|
59
|
+
t = keyword_args.get('t')
|
60
|
+
if t is None:
|
61
|
+
raise ValueError(f"Use `{cls.__name__}[T](name='...')` to supply the Pydantic model type`")
|
62
|
+
if not isinstance(t, type) or not issubclass(t, BaseModel):
|
63
|
+
raise ValueError(f"{t} is not a Pydantic model")
|
64
|
+
return t
|
65
|
+
|
66
|
+
@property
|
67
|
+
def model_type(self) -> type[T]:
|
68
|
+
"""Get the Pydantic model type associated with this resource."""
|
69
|
+
return self._t
|
70
|
+
|
71
|
+
def _set_type(self, name: str, t: type[T]):
|
72
|
+
"""Internal method to set the resource name and associated Pydantic type."""
|
73
|
+
self._t = t
|
74
|
+
self.name = name
|
75
|
+
|
76
|
+
def __class_getitem__(cls, item: type[BaseModel]):
|
77
|
+
def curried_constructor(*args, **kwargs):
|
78
|
+
return cls(t=item, *args, **kwargs)
|
79
|
+
|
80
|
+
return curried_constructor
|
81
|
+
|
82
|
+
class TypedMooseResource(BaseTypedResource, Generic[T]):
|
83
|
+
"""Base class for Moose resources that have columns derived from a Pydantic model.
|
84
|
+
|
85
|
+
Extends `BaseTypedResource` by adding a `Columns` helper for type-safe
|
86
|
+
column name access.
|
87
|
+
|
88
|
+
Attributes:
|
89
|
+
columns (Columns[T]): An object providing attribute access to column names.
|
90
|
+
"""
|
91
|
+
columns: Columns[T]
|
92
|
+
|
93
|
+
def _set_type(self, name: str, t: type[T]):
|
94
|
+
super()._set_type(name, t)
|
95
|
+
self.columns = Columns[T](self._t)
|
moose_lib/dmv2/view.py
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
"""
|
2
|
+
View definitions for Moose Data Model v2 (dmv2).
|
3
|
+
|
4
|
+
This module provides classes for defining standard SQL Views,
|
5
|
+
including their SQL statements and dependencies.
|
6
|
+
"""
|
7
|
+
from typing import Union, List, Optional
|
8
|
+
from pydantic import BaseModel
|
9
|
+
|
10
|
+
from .sql_resource import SqlResource
|
11
|
+
from .olap_table import OlapTable
|
12
|
+
|
13
|
+
class View(SqlResource):
|
14
|
+
"""Represents a standard SQL database View.
|
15
|
+
|
16
|
+
Args:
|
17
|
+
name: The name of the view to be created.
|
18
|
+
select_statement: The SQL SELECT statement defining the view.
|
19
|
+
base_tables: A list of `OlapTable`, `View`, or `MaterializedView` objects
|
20
|
+
that this view depends on.
|
21
|
+
metadata: Optional metadata for the view.
|
22
|
+
|
23
|
+
Attributes:
|
24
|
+
name (str): The name of the view.
|
25
|
+
setup (list[str]): SQL command to create the view.
|
26
|
+
teardown (list[str]): SQL command to drop the view.
|
27
|
+
pulls_data_from (list[SqlObject]): Source tables/views.
|
28
|
+
"""
|
29
|
+
|
30
|
+
def __init__(self, name: str, select_statement: str, base_tables: list[Union[OlapTable, SqlResource]],
|
31
|
+
metadata: dict = None):
|
32
|
+
setup = [
|
33
|
+
f"CREATE VIEW IF NOT EXISTS {name} AS {select_statement}".strip()
|
34
|
+
]
|
35
|
+
teardown = [f"DROP VIEW IF EXISTS {name}"]
|
36
|
+
super().__init__(name, setup, teardown, pulls_data_from=base_tables, metadata=metadata)
|
@@ -0,0 +1,156 @@
|
|
1
|
+
"""
|
2
|
+
Workflow definitions for Moose Data Model v2 (dmv2).
|
3
|
+
|
4
|
+
This module provides classes for defining and configuring workflows composed of tasks,
|
5
|
+
including task dependencies, configurations, and execution functions.
|
6
|
+
"""
|
7
|
+
import dataclasses
|
8
|
+
from typing import Any, Optional, Dict, List, Callable, Union, Awaitable, Generic
|
9
|
+
from pydantic import BaseModel
|
10
|
+
|
11
|
+
from .types import TypedMooseResource, T_none, U_none
|
12
|
+
from ._registry import _workflows
|
13
|
+
|
14
|
+
type TaskRunFunc[T_none, U_none] = Union[
|
15
|
+
# Case 1: No input, no output
|
16
|
+
Callable[[], None],
|
17
|
+
# Case 2: No input, with output
|
18
|
+
Callable[[], Union[U_none, Awaitable[U_none]]],
|
19
|
+
# Case 3: With input, no output
|
20
|
+
Callable[[T_none], None],
|
21
|
+
# Case 4: With input, with output
|
22
|
+
Callable[[T_none], Union[U_none, Awaitable[U_none]]]
|
23
|
+
]
|
24
|
+
|
25
|
+
@dataclasses.dataclass
|
26
|
+
class TaskConfig(Generic[T_none, U_none]):
|
27
|
+
"""Configuration for a Task.
|
28
|
+
|
29
|
+
Attributes:
|
30
|
+
run: The handler function that executes the task logic.
|
31
|
+
on_complete: Optional list of tasks to run after this task completes.
|
32
|
+
timeout: Optional timeout string (e.g. "5m", "1h").
|
33
|
+
retries: Optional number of retry attempts.
|
34
|
+
"""
|
35
|
+
run: TaskRunFunc[T_none, U_none]
|
36
|
+
on_complete: Optional[list["Task[U_none, Any]"]] = None
|
37
|
+
timeout: Optional[str] = None
|
38
|
+
retries: Optional[int] = None
|
39
|
+
|
40
|
+
class Task(TypedMooseResource, Generic[T_none, U_none]):
|
41
|
+
"""Represents a task that can be executed as part of a workflow.
|
42
|
+
|
43
|
+
Tasks are the basic unit of work in a workflow, with typed input and output.
|
44
|
+
They can be chained together using the on_complete configuration.
|
45
|
+
|
46
|
+
Args:
|
47
|
+
name: The name of the task.
|
48
|
+
config: Configuration specifying the task's behavior.
|
49
|
+
t: The Pydantic model defining the task's input schema
|
50
|
+
(passed via `Task[InputModel, OutputModel](...)`).
|
51
|
+
OutputModel can be None for tasks that don't return a value.
|
52
|
+
|
53
|
+
Attributes:
|
54
|
+
config (TaskConfig[T, U]): The configuration for this task.
|
55
|
+
columns (Columns[T]): Helper for accessing input field names safely.
|
56
|
+
name (str): The name of the task.
|
57
|
+
model_type (type[T]): The Pydantic model associated with this task's input.
|
58
|
+
"""
|
59
|
+
config: TaskConfig[T_none, U_none]
|
60
|
+
|
61
|
+
def __init__(self, name: str, config: TaskConfig[T_none, U_none], **kwargs):
|
62
|
+
super().__init__()
|
63
|
+
self._set_type(name, self._get_type(kwargs))
|
64
|
+
self.config = config
|
65
|
+
|
66
|
+
@classmethod
|
67
|
+
def _get_type(cls, keyword_args: dict):
|
68
|
+
t = keyword_args.get('t')
|
69
|
+
if t is None:
|
70
|
+
raise ValueError(f"Use `{cls.__name__}[T, U](name='...')` to supply both input and output types")
|
71
|
+
if not isinstance(t, tuple) or len(t) != 2:
|
72
|
+
raise ValueError(f"Use `{cls.__name__}[T, U](name='...')` to supply both input and output types")
|
73
|
+
|
74
|
+
input_type, output_type = t
|
75
|
+
if input_type is not None and (not isinstance(input_type, type) or not issubclass(input_type, BaseModel)):
|
76
|
+
raise ValueError(f"Input type {input_type} is not a Pydantic model or None")
|
77
|
+
if output_type is not None and (not isinstance(output_type, type) or not issubclass(output_type, BaseModel)):
|
78
|
+
raise ValueError(f"Output type {output_type} is not a Pydantic model or None")
|
79
|
+
return t
|
80
|
+
|
81
|
+
def _set_type(self, name: str, t: tuple[type[T_none], type[U_none]]):
|
82
|
+
input_type, output_type = t
|
83
|
+
self._t = input_type
|
84
|
+
self._u = output_type
|
85
|
+
self.name = name
|
86
|
+
|
87
|
+
@dataclasses.dataclass
|
88
|
+
class WorkflowConfig:
|
89
|
+
"""Configuration for a workflow.
|
90
|
+
|
91
|
+
Attributes:
|
92
|
+
starting_task: The first task to execute in the workflow.
|
93
|
+
retries: Optional number of retry attempts for the entire workflow.
|
94
|
+
timeout: Optional timeout string for the entire workflow.
|
95
|
+
schedule: Optional cron-like schedule string for recurring execution.
|
96
|
+
"""
|
97
|
+
starting_task: Task[Any, Any]
|
98
|
+
retries: Optional[int] = None
|
99
|
+
timeout: Optional[str] = None
|
100
|
+
schedule: Optional[str] = None
|
101
|
+
|
102
|
+
class Workflow:
|
103
|
+
"""Represents a workflow composed of one or more tasks.
|
104
|
+
|
105
|
+
Workflows define a sequence of tasks to be executed, with optional
|
106
|
+
scheduling, retries, and timeouts at the workflow level.
|
107
|
+
|
108
|
+
Args:
|
109
|
+
name: The name of the workflow.
|
110
|
+
config: Configuration specifying the workflow's behavior.
|
111
|
+
|
112
|
+
Attributes:
|
113
|
+
name (str): The name of the workflow.
|
114
|
+
config (WorkflowConfig): The configuration for this workflow.
|
115
|
+
"""
|
116
|
+
def __init__(self, name: str, config: WorkflowConfig):
|
117
|
+
self.name = name
|
118
|
+
self.config = config
|
119
|
+
# Register the workflow in the internal registry
|
120
|
+
_workflows[name] = self
|
121
|
+
|
122
|
+
def get_task_names(self) -> list[str]:
|
123
|
+
"""Get a list of all task names in this workflow.
|
124
|
+
|
125
|
+
Returns:
|
126
|
+
list[str]: List of task names in the workflow, including all child tasks
|
127
|
+
"""
|
128
|
+
def collect_task_names(task: Task) -> list[str]:
|
129
|
+
names = [task.name]
|
130
|
+
if task.config.on_complete:
|
131
|
+
for child in task.config.on_complete:
|
132
|
+
names.extend(collect_task_names(child))
|
133
|
+
return names
|
134
|
+
|
135
|
+
return collect_task_names(self.config.starting_task)
|
136
|
+
|
137
|
+
def get_task(self, task_name: str) -> Optional[Task]:
|
138
|
+
"""Find a task in this workflow by name.
|
139
|
+
|
140
|
+
Args:
|
141
|
+
task_name: The name of the task to find
|
142
|
+
|
143
|
+
Returns:
|
144
|
+
Optional[Task]: The task if found, None otherwise
|
145
|
+
"""
|
146
|
+
def find_task(task: Task) -> Optional[Task]:
|
147
|
+
if task.name == task_name:
|
148
|
+
return task
|
149
|
+
if task.config.on_complete:
|
150
|
+
for child in task.config.on_complete:
|
151
|
+
found = find_task(child)
|
152
|
+
if found:
|
153
|
+
return found
|
154
|
+
return None
|
155
|
+
|
156
|
+
return find_task(self.config.starting_task)
|
moose_lib/internal.py
CHANGED
@@ -11,8 +11,18 @@ from typing import Literal, Optional, List, Any
|
|
11
11
|
from pydantic import BaseModel, ConfigDict, AliasGenerator
|
12
12
|
import json
|
13
13
|
from .data_models import Column, _to_columns
|
14
|
-
from moose_lib.dmv2 import
|
15
|
-
|
14
|
+
from moose_lib.dmv2 import (
|
15
|
+
get_tables,
|
16
|
+
get_streams,
|
17
|
+
get_ingest_apis,
|
18
|
+
get_consumption_apis,
|
19
|
+
get_sql_resources,
|
20
|
+
get_workflows,
|
21
|
+
OlapTable,
|
22
|
+
View,
|
23
|
+
MaterializedView,
|
24
|
+
SqlResource
|
25
|
+
)
|
16
26
|
from pydantic.alias_generators import to_camel
|
17
27
|
from pydantic.json_schema import JsonSchemaValue
|
18
28
|
|
@@ -254,7 +264,7 @@ def to_infra_map() -> dict:
|
|
254
264
|
sql_resources = {}
|
255
265
|
workflows = {}
|
256
266
|
|
257
|
-
for name, table in
|
267
|
+
for name, table in get_tables().items():
|
258
268
|
engine = table.config.engine
|
259
269
|
tables[name] = TableConfig(
|
260
270
|
name=name,
|
@@ -266,7 +276,7 @@ def to_infra_map() -> dict:
|
|
266
276
|
metadata=getattr(table, "metadata", None),
|
267
277
|
)
|
268
278
|
|
269
|
-
for name, stream in
|
279
|
+
for name, stream in get_streams().items():
|
270
280
|
transformation_targets = [
|
271
281
|
Target(
|
272
282
|
kind="stream",
|
@@ -297,7 +307,7 @@ def to_infra_map() -> dict:
|
|
297
307
|
metadata=getattr(stream, "metadata", None),
|
298
308
|
)
|
299
309
|
|
300
|
-
for name, api in
|
310
|
+
for name, api in get_ingest_apis().items():
|
301
311
|
ingest_apis[name] = IngestApiConfig(
|
302
312
|
name=name,
|
303
313
|
columns=_to_columns(api._t),
|
@@ -310,7 +320,7 @@ def to_infra_map() -> dict:
|
|
310
320
|
dead_letter_queue=api.config.dead_letter_queue.name
|
311
321
|
)
|
312
322
|
|
313
|
-
for name, api in
|
323
|
+
for name, api in get_consumption_apis().items():
|
314
324
|
egress_apis[name] = EgressApiConfig(
|
315
325
|
name=name,
|
316
326
|
query_params=_to_columns(api.model_type),
|
@@ -319,7 +329,7 @@ def to_infra_map() -> dict:
|
|
319
329
|
metadata=getattr(api, "metadata", None),
|
320
330
|
)
|
321
331
|
|
322
|
-
for name, resource in
|
332
|
+
for name, resource in get_sql_resources().items():
|
323
333
|
sql_resources[name] = SqlResourceConfig(
|
324
334
|
name=resource.name,
|
325
335
|
setup=resource.setup,
|
@@ -329,7 +339,7 @@ def to_infra_map() -> dict:
|
|
329
339
|
metadata=getattr(resource, "metadata", None),
|
330
340
|
)
|
331
341
|
|
332
|
-
for name, workflow in
|
342
|
+
for name, workflow in get_workflows().items():
|
333
343
|
workflows[name] = WorkflowJson(
|
334
344
|
name=workflow.name,
|
335
345
|
retries=workflow.config.retries,
|
@@ -28,7 +28,7 @@ import threading
|
|
28
28
|
import time
|
29
29
|
from typing import Optional, Callable, Tuple, Any
|
30
30
|
|
31
|
-
from moose_lib.dmv2 import
|
31
|
+
from moose_lib.dmv2 import get_streams, DeadLetterModel
|
32
32
|
from moose_lib import cli_log, CliLogData, DeadLetterQueue
|
33
33
|
|
34
34
|
# Force stdout to be unbuffered
|
@@ -186,7 +186,7 @@ def load_streaming_function_dmv2(function_file_dir: str, function_file_name: str
|
|
186
186
|
sys.exit(1)
|
187
187
|
|
188
188
|
# Find the stream that has a transformation matching our source/destination
|
189
|
-
for source_py_stream_name, stream in
|
189
|
+
for source_py_stream_name, stream in get_streams().items():
|
190
190
|
if source_py_stream_name != source_topic.topic_name_to_stream_name():
|
191
191
|
continue
|
192
192
|
|