moose-lib 0.4.218__py3-none-any.whl → 0.4.220__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moose_lib/data_models.py +15 -1
- moose_lib/dmv2/__init__.py +142 -0
- moose_lib/dmv2/_registry.py +15 -0
- moose_lib/dmv2/consumption.py +101 -0
- moose_lib/dmv2/ingest_api.py +64 -0
- moose_lib/dmv2/ingest_pipeline.py +156 -0
- moose_lib/dmv2/materialized_view.py +94 -0
- moose_lib/dmv2/olap_table.py +57 -0
- moose_lib/dmv2/registry.py +62 -0
- moose_lib/dmv2/sql_resource.py +49 -0
- moose_lib/dmv2/stream.py +258 -0
- moose_lib/dmv2/types.py +95 -0
- moose_lib/dmv2/view.py +36 -0
- moose_lib/dmv2/workflow.py +156 -0
- moose_lib/internal.py +18 -8
- moose_lib/streaming/streaming_function_runner.py +2 -2
- {moose_lib-0.4.218.dist-info → moose_lib-0.4.220.dist-info}/METADATA +1 -1
- moose_lib-0.4.220.dist-info/RECORD +34 -0
- moose_lib/dmv2.py +0 -994
- moose_lib-0.4.218.dist-info/RECORD +0 -22
- {moose_lib-0.4.218.dist-info → moose_lib-0.4.220.dist-info}/WHEEL +0 -0
- {moose_lib-0.4.218.dist-info → moose_lib-0.4.220.dist-info}/top_level.txt +0 -0
moose_lib/data_models.py
CHANGED
@@ -87,7 +87,12 @@ class NamedTupleType(BaseModel):
|
|
87
87
|
fields: list[tuple[str, "DataType"]]
|
88
88
|
|
89
89
|
|
90
|
-
|
90
|
+
class MapType(BaseModel):
|
91
|
+
key_type: "DataType"
|
92
|
+
value_type: "DataType"
|
93
|
+
|
94
|
+
|
95
|
+
type DataType = str | DataEnum | ArrayType | Nested | NamedTupleType | MapType
|
91
96
|
|
92
97
|
|
93
98
|
def handle_jwt(field_type: type) -> Tuple[bool, type]:
|
@@ -185,6 +190,15 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
|
|
185
190
|
elif get_origin(t) is list:
|
186
191
|
inner_optional, _, inner_type = py_type_to_column_type(get_args(t)[0], [])
|
187
192
|
data_type = ArrayType(element_type=inner_type, element_nullable=inner_optional)
|
193
|
+
elif get_origin(t) is dict:
|
194
|
+
args = get_args(t)
|
195
|
+
if len(args) == 2:
|
196
|
+
key_optional, _, key_type = py_type_to_column_type(args[0], [])
|
197
|
+
value_optional, _, value_type = py_type_to_column_type(args[1], [])
|
198
|
+
# For dict types, we assume keys are required and values match their type
|
199
|
+
data_type = MapType(key_type=key_type, value_type=value_type)
|
200
|
+
else:
|
201
|
+
raise ValueError(f"Dict type must have exactly 2 type arguments, got {len(args)}")
|
188
202
|
elif t is UUID:
|
189
203
|
data_type = "UUID"
|
190
204
|
elif t is Any:
|
@@ -0,0 +1,142 @@
|
|
1
|
+
"""
|
2
|
+
Moose Data Model v2 (dmv2)
|
3
|
+
|
4
|
+
This package provides the Python classes for defining Moose v2 data model resources.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from .types import (
|
8
|
+
BaseTypedResource,
|
9
|
+
TypedMooseResource,
|
10
|
+
Columns,
|
11
|
+
T,
|
12
|
+
U,
|
13
|
+
T_none,
|
14
|
+
U_none,
|
15
|
+
ZeroOrMany,
|
16
|
+
)
|
17
|
+
|
18
|
+
from .olap_table import (
|
19
|
+
OlapConfig,
|
20
|
+
OlapTable,
|
21
|
+
)
|
22
|
+
|
23
|
+
from .stream import (
|
24
|
+
StreamConfig,
|
25
|
+
TransformConfig,
|
26
|
+
ConsumerConfig,
|
27
|
+
Stream,
|
28
|
+
DeadLetterModel,
|
29
|
+
DeadLetterQueue,
|
30
|
+
)
|
31
|
+
|
32
|
+
from .ingest_api import (
|
33
|
+
IngestConfig,
|
34
|
+
IngestConfigWithDestination,
|
35
|
+
IngestApi,
|
36
|
+
)
|
37
|
+
|
38
|
+
from .ingest_pipeline import (
|
39
|
+
IngestPipelineConfig,
|
40
|
+
IngestPipeline,
|
41
|
+
)
|
42
|
+
|
43
|
+
from .consumption import (
|
44
|
+
EgressConfig,
|
45
|
+
ConsumptionApi,
|
46
|
+
)
|
47
|
+
|
48
|
+
from .sql_resource import (
|
49
|
+
SqlResource,
|
50
|
+
)
|
51
|
+
|
52
|
+
from .view import (
|
53
|
+
View,
|
54
|
+
)
|
55
|
+
|
56
|
+
from .materialized_view import (
|
57
|
+
MaterializedViewOptions,
|
58
|
+
MaterializedView,
|
59
|
+
)
|
60
|
+
|
61
|
+
from .workflow import (
|
62
|
+
TaskConfig,
|
63
|
+
Task,
|
64
|
+
WorkflowConfig,
|
65
|
+
Workflow,
|
66
|
+
)
|
67
|
+
|
68
|
+
from .registry import (
|
69
|
+
get_tables,
|
70
|
+
get_table,
|
71
|
+
get_streams,
|
72
|
+
get_stream,
|
73
|
+
get_ingest_apis,
|
74
|
+
get_ingest_api,
|
75
|
+
get_consumption_apis,
|
76
|
+
get_consumption_api,
|
77
|
+
get_sql_resources,
|
78
|
+
get_sql_resource,
|
79
|
+
get_workflows,
|
80
|
+
get_workflow,
|
81
|
+
)
|
82
|
+
|
83
|
+
__all__ = [
|
84
|
+
# Types
|
85
|
+
'BaseTypedResource',
|
86
|
+
'TypedMooseResource',
|
87
|
+
'Columns',
|
88
|
+
'T',
|
89
|
+
'U',
|
90
|
+
'T_none',
|
91
|
+
'U_none',
|
92
|
+
'ZeroOrMany',
|
93
|
+
|
94
|
+
# OLAP Tables
|
95
|
+
'OlapConfig',
|
96
|
+
'OlapTable',
|
97
|
+
|
98
|
+
# Streams
|
99
|
+
'StreamConfig',
|
100
|
+
'TransformConfig',
|
101
|
+
'ConsumerConfig',
|
102
|
+
'Stream',
|
103
|
+
'DeadLetterModel',
|
104
|
+
'DeadLetterQueue',
|
105
|
+
|
106
|
+
# Ingestion
|
107
|
+
'IngestConfig',
|
108
|
+
'IngestConfigWithDestination',
|
109
|
+
'IngestPipelineConfig',
|
110
|
+
'IngestApi',
|
111
|
+
'IngestPipeline',
|
112
|
+
|
113
|
+
# Consumption
|
114
|
+
'EgressConfig',
|
115
|
+
'ConsumptionApi',
|
116
|
+
|
117
|
+
# SQL
|
118
|
+
'SqlResource',
|
119
|
+
'View',
|
120
|
+
'MaterializedViewOptions',
|
121
|
+
'MaterializedView',
|
122
|
+
|
123
|
+
# Workflow
|
124
|
+
'TaskConfig',
|
125
|
+
'Task',
|
126
|
+
'WorkflowConfig',
|
127
|
+
'Workflow',
|
128
|
+
|
129
|
+
# Registry
|
130
|
+
'get_tables',
|
131
|
+
'get_table',
|
132
|
+
'get_streams',
|
133
|
+
'get_stream',
|
134
|
+
'get_ingest_apis',
|
135
|
+
'get_ingest_api',
|
136
|
+
'get_consumption_apis',
|
137
|
+
'get_consumption_api',
|
138
|
+
'get_sql_resources',
|
139
|
+
'get_sql_resource',
|
140
|
+
'get_workflows',
|
141
|
+
'get_workflow',
|
142
|
+
]
|
@@ -0,0 +1,15 @@
|
|
1
|
+
"""
|
2
|
+
Internal registry dictionaries for Moose Data Model v2 (dmv2) resources.
|
3
|
+
|
4
|
+
This module maintains the raw dictionaries that store all registered resources.
|
5
|
+
It has no imports from other dmv2 modules to avoid circular dependencies.
|
6
|
+
"""
|
7
|
+
from typing import Dict, Any
|
8
|
+
|
9
|
+
# Global registries for all resource types
|
10
|
+
_tables: Dict[str, Any] = {}
|
11
|
+
_streams: Dict[str, Any] = {}
|
12
|
+
_ingest_apis: Dict[str, Any] = {}
|
13
|
+
_egress_apis: Dict[str, Any] = {}
|
14
|
+
_sql_resources: Dict[str, Any] = {}
|
15
|
+
_workflows: Dict[str, Any] = {}
|
@@ -0,0 +1,101 @@
|
|
1
|
+
"""
|
2
|
+
Consumption (Egress) API definitions for Moose Data Model v2 (dmv2).
|
3
|
+
|
4
|
+
This module provides classes for defining and configuring consumption APIs
|
5
|
+
that allow querying data through user-defined functions.
|
6
|
+
"""
|
7
|
+
from typing import Any, Callable, Optional, Tuple
|
8
|
+
from pydantic import BaseModel
|
9
|
+
from pydantic.json_schema import JsonSchemaValue
|
10
|
+
|
11
|
+
from .types import BaseTypedResource, T, U
|
12
|
+
from ._registry import _egress_apis
|
13
|
+
|
14
|
+
class EgressConfig(BaseModel):
|
15
|
+
"""Configuration for Consumption (Egress) APIs.
|
16
|
+
|
17
|
+
Attributes:
|
18
|
+
version: Optional version string.
|
19
|
+
metadata: Optional metadata for the consumption API.
|
20
|
+
"""
|
21
|
+
version: Optional[str] = None
|
22
|
+
metadata: Optional[dict] = None
|
23
|
+
|
24
|
+
class ConsumptionApi(BaseTypedResource):
|
25
|
+
"""Represents a Consumption (Egress) API endpoint.
|
26
|
+
|
27
|
+
Allows querying data, typically powered by a user-defined function.
|
28
|
+
Requires two Pydantic models: `T` for query parameters and `U` for the response body.
|
29
|
+
|
30
|
+
Args:
|
31
|
+
name: The name of the consumption API endpoint.
|
32
|
+
query_function: The callable that executes the query logic.
|
33
|
+
It receives parameters matching model `T` (and potentially
|
34
|
+
other runtime utilities) and should return data matching model `U`.
|
35
|
+
config: Optional configuration (currently only `version`).
|
36
|
+
t: A tuple containing the input (`T`) and output (`U`) Pydantic models
|
37
|
+
(passed via `ConsumptionApi[InputModel, OutputModel](...)`).
|
38
|
+
|
39
|
+
Attributes:
|
40
|
+
config (EgressConfig): Configuration for the API.
|
41
|
+
query_function (Callable[..., U]): The handler function for the API.
|
42
|
+
name (str): The name of the API.
|
43
|
+
model_type (type[T]): The Pydantic model for the input/query parameters.
|
44
|
+
return_type (type[U]): The Pydantic model for the response body.
|
45
|
+
"""
|
46
|
+
config: EgressConfig
|
47
|
+
query_function: Callable[..., U]
|
48
|
+
_u: type[U]
|
49
|
+
|
50
|
+
def __class_getitem__(cls, items):
|
51
|
+
# Handle two type parameters
|
52
|
+
if not isinstance(items, tuple) or len(items) != 2:
|
53
|
+
raise ValueError(f"Use `{cls.__name__}[T, U](name='...')` to supply both input and output types")
|
54
|
+
input_type, output_type = items
|
55
|
+
|
56
|
+
def curried_constructor(*args, **kwargs):
|
57
|
+
return cls(t=(input_type, output_type), *args, **kwargs)
|
58
|
+
|
59
|
+
return curried_constructor
|
60
|
+
|
61
|
+
def __init__(self, name: str, query_function: Callable[..., U], config: EgressConfig = EgressConfig(), **kwargs):
|
62
|
+
super().__init__()
|
63
|
+
self._set_type(name, self._get_type(kwargs))
|
64
|
+
self.config = config
|
65
|
+
self.query_function = query_function
|
66
|
+
self.metadata = config.metadata
|
67
|
+
_egress_apis[name] = self
|
68
|
+
|
69
|
+
@classmethod
|
70
|
+
def _get_type(cls, keyword_args: dict):
|
71
|
+
t = keyword_args.get('t')
|
72
|
+
if not isinstance(t, tuple) or len(t) != 2:
|
73
|
+
raise ValueError(f"Use `{cls.__name__}[T, U](name='...')` to supply both input and output types")
|
74
|
+
|
75
|
+
input_type, output_type = t
|
76
|
+
if not isinstance(input_type, type) or not issubclass(input_type, BaseModel):
|
77
|
+
raise ValueError(f"Input type {input_type} is not a Pydantic model")
|
78
|
+
if not isinstance(output_type, type) or not issubclass(output_type, BaseModel):
|
79
|
+
raise ValueError(f"Output type {output_type} is not a Pydantic model")
|
80
|
+
return t
|
81
|
+
|
82
|
+
def _set_type(self, name: str, t: Tuple[type[T], type[U]]):
|
83
|
+
input_type, output_type = t
|
84
|
+
self._t = input_type
|
85
|
+
self._u = output_type
|
86
|
+
self.name = name
|
87
|
+
|
88
|
+
def return_type(self) -> type[U]:
|
89
|
+
"""Get the Pydantic model type for the API's response body."""
|
90
|
+
return self._u
|
91
|
+
|
92
|
+
def get_response_schema(self) -> JsonSchemaValue:
|
93
|
+
"""Generates the JSON schema for the API's response body model (`U`).
|
94
|
+
|
95
|
+
Returns:
|
96
|
+
A dictionary representing the JSON schema.
|
97
|
+
"""
|
98
|
+
from pydantic.type_adapter import TypeAdapter
|
99
|
+
return TypeAdapter(self.return_type).json_schema(
|
100
|
+
ref_template='#/components/schemas/{model}'
|
101
|
+
)
|
@@ -0,0 +1,64 @@
|
|
1
|
+
"""
|
2
|
+
Ingestion API definitions for Moose Data Model v2 (dmv2).
|
3
|
+
|
4
|
+
This module provides classes for defining and configuring ingestion APIs
|
5
|
+
that receive data and send it to streams.
|
6
|
+
"""
|
7
|
+
import dataclasses
|
8
|
+
from typing import Any, Optional, Generic
|
9
|
+
from pydantic import BaseModel
|
10
|
+
|
11
|
+
from .types import TypedMooseResource, T
|
12
|
+
from .stream import Stream, DeadLetterQueue
|
13
|
+
from ._registry import _ingest_apis
|
14
|
+
|
15
|
+
class IngestConfig(BaseModel):
|
16
|
+
"""Basic configuration for an ingestion point.
|
17
|
+
|
18
|
+
Attributes:
|
19
|
+
version: Optional version string.
|
20
|
+
metadata: Optional metadata for the ingestion point.
|
21
|
+
"""
|
22
|
+
version: Optional[str] = None
|
23
|
+
metadata: Optional[dict] = None
|
24
|
+
|
25
|
+
@dataclasses.dataclass
|
26
|
+
class IngestConfigWithDestination[T: BaseModel]:
|
27
|
+
"""Ingestion configuration that includes the mandatory destination stream.
|
28
|
+
|
29
|
+
Attributes:
|
30
|
+
destination: The `Stream` where ingested data will be sent.
|
31
|
+
version: Optional version string.
|
32
|
+
metadata: Optional metadata for the ingestion configuration.
|
33
|
+
"""
|
34
|
+
destination: Stream[T]
|
35
|
+
dead_letter_queue: Optional[DeadLetterQueue[T]] = None
|
36
|
+
version: Optional[str] = None
|
37
|
+
metadata: Optional[dict] = None
|
38
|
+
|
39
|
+
class IngestApi(TypedMooseResource, Generic[T]):
|
40
|
+
"""Represents an Ingestion API endpoint typed with a Pydantic model.
|
41
|
+
|
42
|
+
This endpoint receives data (matching schema `T`) and sends it to a configured
|
43
|
+
destination stream.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
name: The name of the ingestion API endpoint.
|
47
|
+
config: Configuration specifying the destination stream and data format.
|
48
|
+
t: The Pydantic model defining the expected input data schema
|
49
|
+
(passed via `IngestApi[MyModel](...)`).
|
50
|
+
|
51
|
+
Attributes:
|
52
|
+
config (IngestConfigWithDestination[T]): The configuration for this API.
|
53
|
+
columns (Columns[T]): Helper for accessing input field names safely.
|
54
|
+
name (str): The name of the API.
|
55
|
+
model_type (type[T]): The Pydantic model associated with this API's input.
|
56
|
+
"""
|
57
|
+
config: IngestConfigWithDestination[T]
|
58
|
+
|
59
|
+
def __init__(self, name: str, config: IngestConfigWithDestination[T], **kwargs):
|
60
|
+
super().__init__()
|
61
|
+
self._set_type(name, self._get_type(kwargs))
|
62
|
+
self.config = config
|
63
|
+
self.metadata = getattr(config, 'metadata', None)
|
64
|
+
_ingest_apis[name] = self
|
@@ -0,0 +1,156 @@
|
|
1
|
+
"""
|
2
|
+
Ingestion Pipeline definitions for Moose Data Model v2 (dmv2).
|
3
|
+
|
4
|
+
This module provides classes for defining and configuring complete ingestion pipelines,
|
5
|
+
which combine tables, streams, and ingestion APIs into a single cohesive unit.
|
6
|
+
"""
|
7
|
+
from typing import Any, Optional, Generic, TypeVar
|
8
|
+
from pydantic import BaseModel
|
9
|
+
|
10
|
+
from .types import TypedMooseResource, T
|
11
|
+
from .olap_table import OlapTable, OlapConfig
|
12
|
+
from .stream import Stream, StreamConfig, DeadLetterQueue
|
13
|
+
from .ingest_api import IngestApi, IngestConfig, IngestConfigWithDestination
|
14
|
+
|
15
|
+
class IngestPipelineConfig(BaseModel):
|
16
|
+
"""Configuration for creating a complete ingestion pipeline.
|
17
|
+
|
18
|
+
Defines which components (table, stream, ingest API) should be created.
|
19
|
+
Set a component to `True` for default settings, `False` to disable, or provide
|
20
|
+
a specific config object (`OlapConfig`, `StreamConfig`, `IngestConfig`).
|
21
|
+
|
22
|
+
Attributes:
|
23
|
+
table: Configuration for the OLAP table component.
|
24
|
+
stream: Configuration for the stream component.
|
25
|
+
ingest: Configuration for the ingest API component.
|
26
|
+
version: Optional version string applied to all created components.
|
27
|
+
metadata: Optional metadata for the ingestion pipeline.
|
28
|
+
"""
|
29
|
+
table: bool | OlapConfig = True
|
30
|
+
stream: bool | StreamConfig = True
|
31
|
+
ingest: bool | IngestConfig = True
|
32
|
+
dead_letter_queue: bool | StreamConfig = True
|
33
|
+
version: Optional[str] = None
|
34
|
+
metadata: Optional[dict] = None
|
35
|
+
|
36
|
+
class IngestPipeline(TypedMooseResource, Generic[T]):
|
37
|
+
"""Creates and configures a linked Table, Stream, and Ingest API pipeline.
|
38
|
+
|
39
|
+
Simplifies the common pattern of ingesting data through an API, processing it
|
40
|
+
in a stream, and storing it in a table.
|
41
|
+
|
42
|
+
Args:
|
43
|
+
name: The base name used for all created components (table, stream, API).
|
44
|
+
config: Specifies which components to create and their configurations.
|
45
|
+
t: The Pydantic model defining the data schema for all components
|
46
|
+
(passed via `IngestPipeline[MyModel](...)`).
|
47
|
+
|
48
|
+
Attributes:
|
49
|
+
table: The created `OlapTable` instance, if configured.
|
50
|
+
stream: The created `Stream` instance, if configured.
|
51
|
+
ingest_api: The created `IngestApi` instance, if configured.
|
52
|
+
dead_letter_queue: The created `DeadLetterQueue` instance, if configured.
|
53
|
+
columns (Columns[T]): Helper for accessing data field names safely.
|
54
|
+
name (str): The base name of the pipeline.
|
55
|
+
model_type (type[T]): The Pydantic model associated with this pipeline.
|
56
|
+
"""
|
57
|
+
table: Optional[OlapTable[T]] = None
|
58
|
+
stream: Optional[Stream[T]] = None
|
59
|
+
ingest_api: Optional[IngestApi[T]] = None
|
60
|
+
dead_letter_queue: Optional[DeadLetterQueue[T]] = None
|
61
|
+
metadata: Optional[dict] = None
|
62
|
+
|
63
|
+
def get_table(self) -> OlapTable[T]:
|
64
|
+
"""Retrieves the pipeline's OLAP table component.
|
65
|
+
|
66
|
+
Raises:
|
67
|
+
ValueError: If the table was not configured for this pipeline.
|
68
|
+
|
69
|
+
Returns:
|
70
|
+
The `OlapTable` instance.
|
71
|
+
"""
|
72
|
+
if self.table is None:
|
73
|
+
raise ValueError("Table was not configured for this pipeline")
|
74
|
+
return self.table
|
75
|
+
|
76
|
+
def get_stream(self) -> Stream[T]:
|
77
|
+
"""Retrieves the pipeline's stream component.
|
78
|
+
|
79
|
+
Raises:
|
80
|
+
ValueError: If the stream was not configured for this pipeline.
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
The `Stream` instance.
|
84
|
+
"""
|
85
|
+
if self.stream is None:
|
86
|
+
raise ValueError("Stream was not configured for this pipeline")
|
87
|
+
return self.stream
|
88
|
+
|
89
|
+
def get_dead_letter_queue(self) -> Stream[T]:
|
90
|
+
"""Retrieves the pipeline's dead letter queue.
|
91
|
+
|
92
|
+
Raises:
|
93
|
+
ValueError: If the dead letter queue was not configured for this pipeline.
|
94
|
+
|
95
|
+
Returns:
|
96
|
+
The `Stream` instance.
|
97
|
+
"""
|
98
|
+
if self.dead_letter_queue is None:
|
99
|
+
raise ValueError("DLQ was not configured for this pipeline")
|
100
|
+
return self.dead_letter_queue
|
101
|
+
|
102
|
+
def get_ingest_api(self) -> IngestApi[T]:
|
103
|
+
"""Retrieves the pipeline's Ingestion API component.
|
104
|
+
|
105
|
+
Raises:
|
106
|
+
ValueError: If the Ingest API was not configured for this pipeline.
|
107
|
+
|
108
|
+
Returns:
|
109
|
+
The `IngestApi` instance.
|
110
|
+
"""
|
111
|
+
if self.ingest_api is None:
|
112
|
+
raise ValueError("Ingest API was not configured for this pipeline")
|
113
|
+
return self.ingest_api
|
114
|
+
|
115
|
+
def __init__(self, name: str, config: IngestPipelineConfig, **kwargs):
|
116
|
+
super().__init__()
|
117
|
+
self._set_type(name, self._get_type(kwargs))
|
118
|
+
self.metadata = config.metadata
|
119
|
+
table_metadata = config.metadata
|
120
|
+
stream_metadata = config.metadata
|
121
|
+
ingest_metadata = config.metadata
|
122
|
+
if config.table:
|
123
|
+
table_config = OlapConfig() if config.table is True else config.table
|
124
|
+
if config.version:
|
125
|
+
table_config.version = config.version
|
126
|
+
table_config.metadata = table_metadata
|
127
|
+
self.table = OlapTable(name, table_config, t=self._t)
|
128
|
+
if config.stream:
|
129
|
+
stream_config = StreamConfig() if config.stream is True else config.stream
|
130
|
+
if config.table and stream_config.destination is not None:
|
131
|
+
raise ValueError("The destination of the stream should be the table created in the IngestPipeline")
|
132
|
+
stream_config.destination = self.table
|
133
|
+
if config.version:
|
134
|
+
stream_config.version = config.version
|
135
|
+
stream_config.metadata = stream_metadata
|
136
|
+
self.stream = Stream(name, stream_config, t=self._t)
|
137
|
+
if config.dead_letter_queue:
|
138
|
+
stream_config = StreamConfig() if config.dead_letter_queue is True else config.dead_letter_queue
|
139
|
+
if config.version:
|
140
|
+
stream_config.version = config.version
|
141
|
+
stream_config.metadata = stream_metadata
|
142
|
+
self.dead_letter_queue = DeadLetterQueue(f"{name}DeadLetterQueue", stream_config, t=self._t)
|
143
|
+
if config.ingest:
|
144
|
+
if self.stream is None:
|
145
|
+
raise ValueError("Ingest API needs a stream to write to.")
|
146
|
+
ingest_config_dict = (
|
147
|
+
IngestConfig() if config.ingest is True else config.ingest
|
148
|
+
).model_dump()
|
149
|
+
ingest_config_dict["destination"] = self.stream
|
150
|
+
if config.version:
|
151
|
+
ingest_config_dict["version"] = config.version
|
152
|
+
if self.dead_letter_queue:
|
153
|
+
ingest_config_dict["dead_letter_queue"] = self.dead_letter_queue
|
154
|
+
ingest_config_dict["metadata"] = ingest_metadata
|
155
|
+
ingest_config = IngestConfigWithDestination(**ingest_config_dict)
|
156
|
+
self.ingest_api = IngestApi(name, ingest_config, t=self._t)
|
@@ -0,0 +1,94 @@
|
|
1
|
+
"""
|
2
|
+
Materialized View definitions for Moose Data Model v2 (dmv2).
|
3
|
+
|
4
|
+
This module provides classes for defining Materialized Views,
|
5
|
+
including their SQL statements, target tables, and dependencies.
|
6
|
+
"""
|
7
|
+
from typing import Any, Optional, Union, Generic, TypeVar
|
8
|
+
from pydantic import BaseModel, ConfigDict
|
9
|
+
|
10
|
+
from moose_lib import ClickHouseEngines
|
11
|
+
from .types import BaseTypedResource, T
|
12
|
+
from .olap_table import OlapTable, OlapConfig
|
13
|
+
from .sql_resource import SqlResource
|
14
|
+
|
15
|
+
class MaterializedViewOptions(BaseModel):
|
16
|
+
"""Configuration options for creating a Materialized View.
|
17
|
+
|
18
|
+
Attributes:
|
19
|
+
select_statement: The SQL SELECT statement defining the view's data.
|
20
|
+
select_tables: List of source tables/views the select statement reads from.
|
21
|
+
table_name: The name of the underlying target table storing the materialized data.
|
22
|
+
materialized_view_name: The name of the MATERIALIZED VIEW object itself.
|
23
|
+
engine: Optional ClickHouse engine for the target table.
|
24
|
+
order_by_fields: Optional ordering key for the target table (required for
|
25
|
+
engines like ReplacingMergeTree).
|
26
|
+
model_config: ConfigDict for Pydantic validation
|
27
|
+
"""
|
28
|
+
select_statement: str
|
29
|
+
select_tables: list[Union[OlapTable, SqlResource]]
|
30
|
+
table_name: str
|
31
|
+
materialized_view_name: str
|
32
|
+
engine: Optional[ClickHouseEngines] = None
|
33
|
+
order_by_fields: Optional[list[str]] = None
|
34
|
+
metadata: Optional[dict] = None
|
35
|
+
# Ensure arbitrary types are allowed for Pydantic validation
|
36
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
37
|
+
|
38
|
+
class MaterializedView(SqlResource, BaseTypedResource, Generic[T]):
|
39
|
+
"""Represents a ClickHouse Materialized View.
|
40
|
+
|
41
|
+
Encapsulates the MATERIALIZED VIEW definition and the underlying target `OlapTable`
|
42
|
+
that stores the data.
|
43
|
+
|
44
|
+
Args:
|
45
|
+
options: Configuration defining the select statement, names, and dependencies.
|
46
|
+
t: The Pydantic model defining the schema of the target table
|
47
|
+
(passed via `MaterializedView[MyModel](...)`).
|
48
|
+
|
49
|
+
Attributes:
|
50
|
+
target_table (OlapTable[T]): The `OlapTable` instance storing the materialized data.
|
51
|
+
config (MaterializedViewOptions): The configuration options used to create the view.
|
52
|
+
name (str): The name of the MATERIALIZED VIEW object.
|
53
|
+
model_type (type[T]): The Pydantic model associated with the target table.
|
54
|
+
setup (list[str]): SQL commands to create the view and populate the target table.
|
55
|
+
teardown (list[str]): SQL command to drop the view.
|
56
|
+
pulls_data_from (list[SqlObject]): Source tables/views.
|
57
|
+
pushes_data_to (list[SqlObject]): The target table.
|
58
|
+
"""
|
59
|
+
target_table: OlapTable[T]
|
60
|
+
config: MaterializedViewOptions
|
61
|
+
|
62
|
+
def __init__(
|
63
|
+
self,
|
64
|
+
options: MaterializedViewOptions,
|
65
|
+
**kwargs
|
66
|
+
):
|
67
|
+
self._set_type(options.materialized_view_name, self._get_type(kwargs))
|
68
|
+
|
69
|
+
setup = [
|
70
|
+
f"CREATE MATERIALIZED VIEW IF NOT EXISTS {options.materialized_view_name} TO {options.table_name} AS {options.select_statement}",
|
71
|
+
f"INSERT INTO {options.table_name} {options.select_statement}"
|
72
|
+
]
|
73
|
+
teardown = [f"DROP VIEW IF EXISTS {options.materialized_view_name}"]
|
74
|
+
|
75
|
+
target_table = OlapTable(
|
76
|
+
name=options.table_name,
|
77
|
+
config=OlapConfig(
|
78
|
+
order_by_fields=options.order_by_fields or [],
|
79
|
+
engine=options.engine
|
80
|
+
),
|
81
|
+
t=self._t
|
82
|
+
)
|
83
|
+
|
84
|
+
super().__init__(
|
85
|
+
options.materialized_view_name,
|
86
|
+
setup,
|
87
|
+
teardown,
|
88
|
+
pulls_data_from=options.select_tables,
|
89
|
+
pushes_data_to=[target_table],
|
90
|
+
metadata=options.metadata
|
91
|
+
)
|
92
|
+
|
93
|
+
self.target_table = target_table
|
94
|
+
self.config = options
|
@@ -0,0 +1,57 @@
|
|
1
|
+
"""
|
2
|
+
OLAP table definitions for Moose Data Model v2 (dmv2).
|
3
|
+
|
4
|
+
This module provides classes for defining and configuring OLAP tables,
|
5
|
+
particularly for ClickHouse.
|
6
|
+
"""
|
7
|
+
from typing import Optional, Dict, Any, Generic
|
8
|
+
from pydantic import BaseModel
|
9
|
+
|
10
|
+
from moose_lib import ClickHouseEngines
|
11
|
+
from .types import TypedMooseResource, T
|
12
|
+
from ._registry import _tables
|
13
|
+
|
14
|
+
class OlapConfig(BaseModel):
|
15
|
+
"""Configuration for OLAP tables (e.g., ClickHouse tables).
|
16
|
+
|
17
|
+
Attributes:
|
18
|
+
order_by_fields: List of column names to use for the ORDER BY clause.
|
19
|
+
Crucial for `ReplacingMergeTree` and performance.
|
20
|
+
deduplicate: If True, uses the ReplacingMergeTree engine for automatic
|
21
|
+
deduplication based on `order_by_fields`. Equivalent to
|
22
|
+
setting `engine=ClickHouseEngines.ReplacingMergeTree`.
|
23
|
+
engine: The ClickHouse table engine to use (e.g., MergeTree, ReplacingMergeTree).
|
24
|
+
version: Optional version string for tracking configuration changes.
|
25
|
+
metadata: Optional metadata for the table.
|
26
|
+
"""
|
27
|
+
order_by_fields: list[str] = []
|
28
|
+
# equivalent to setting `engine=ClickHouseEngines.ReplacingMergeTree`
|
29
|
+
deduplicate: bool = False
|
30
|
+
engine: Optional[ClickHouseEngines] = None
|
31
|
+
version: Optional[str] = None
|
32
|
+
metadata: Optional[dict] = None
|
33
|
+
|
34
|
+
class OlapTable(TypedMooseResource, Generic[T]):
|
35
|
+
"""Represents an OLAP table (e.g., a ClickHouse table) typed with a Pydantic model.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
name: The name of the OLAP table.
|
39
|
+
config: Configuration options for the table engine, ordering, etc.
|
40
|
+
t: The Pydantic model defining the table schema (passed via `OlapTable[MyModel](...)`).
|
41
|
+
|
42
|
+
Attributes:
|
43
|
+
config (OlapConfig): The configuration settings for this table.
|
44
|
+
columns (Columns[T]): Helper for accessing column names safely.
|
45
|
+
name (str): The name of the table.
|
46
|
+
model_type (type[T]): The Pydantic model associated with this table.
|
47
|
+
kind: The kind of the table (e.g., "OlapTable").
|
48
|
+
"""
|
49
|
+
config: OlapConfig
|
50
|
+
kind: str = "OlapTable"
|
51
|
+
|
52
|
+
def __init__(self, name: str, config: OlapConfig = OlapConfig(), **kwargs):
|
53
|
+
super().__init__()
|
54
|
+
self._set_type(name, self._get_type(kwargs))
|
55
|
+
self.config = config
|
56
|
+
self.metadata = config.metadata
|
57
|
+
_tables[name] = self
|