moose-lib 0.6.90__py3-none-any.whl → 0.6.283__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moose_lib/__init__.py +38 -3
- moose_lib/blocks.py +497 -37
- moose_lib/clients/redis_client.py +26 -14
- moose_lib/commons.py +94 -5
- moose_lib/config/config_file.py +44 -2
- moose_lib/config/runtime.py +137 -5
- moose_lib/data_models.py +451 -46
- moose_lib/dmv2/__init__.py +88 -60
- moose_lib/dmv2/_registry.py +3 -1
- moose_lib/dmv2/_source_capture.py +37 -0
- moose_lib/dmv2/consumption.py +55 -32
- moose_lib/dmv2/ingest_api.py +9 -2
- moose_lib/dmv2/ingest_pipeline.py +56 -13
- moose_lib/dmv2/life_cycle.py +3 -1
- moose_lib/dmv2/materialized_view.py +24 -14
- moose_lib/dmv2/moose_model.py +165 -0
- moose_lib/dmv2/olap_table.py +304 -119
- moose_lib/dmv2/registry.py +28 -3
- moose_lib/dmv2/sql_resource.py +16 -8
- moose_lib/dmv2/stream.py +241 -21
- moose_lib/dmv2/types.py +14 -8
- moose_lib/dmv2/view.py +13 -6
- moose_lib/dmv2/web_app.py +175 -0
- moose_lib/dmv2/web_app_helpers.py +96 -0
- moose_lib/dmv2/workflow.py +37 -9
- moose_lib/internal.py +537 -68
- moose_lib/main.py +87 -56
- moose_lib/query_builder.py +18 -5
- moose_lib/query_param.py +54 -20
- moose_lib/secrets.py +122 -0
- moose_lib/streaming/streaming_function_runner.py +266 -156
- moose_lib/utilities/sql.py +0 -1
- {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/METADATA +19 -1
- moose_lib-0.6.283.dist-info/RECORD +63 -0
- tests/__init__.py +1 -1
- tests/conftest.py +38 -1
- tests/test_backward_compatibility.py +85 -0
- tests/test_cluster_validation.py +85 -0
- tests/test_codec.py +75 -0
- tests/test_column_formatting.py +80 -0
- tests/test_fixedstring.py +43 -0
- tests/test_iceberg_config.py +105 -0
- tests/test_int_types.py +211 -0
- tests/test_kafka_config.py +141 -0
- tests/test_materialized.py +74 -0
- tests/test_metadata.py +37 -0
- tests/test_moose.py +21 -30
- tests/test_moose_model.py +153 -0
- tests/test_olap_table_moosemodel.py +89 -0
- tests/test_olap_table_versioning.py +210 -0
- tests/test_query_builder.py +97 -9
- tests/test_redis_client.py +10 -3
- tests/test_s3queue_config.py +211 -110
- tests/test_secrets.py +239 -0
- tests/test_simple_aggregate.py +114 -0
- tests/test_web_app.py +227 -0
- moose_lib-0.6.90.dist-info/RECORD +0 -42
- {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
- {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
moose_lib/dmv2/registry.py
CHANGED
|
@@ -4,6 +4,7 @@ Global registries for Moose Data Model v2 (dmv2) resources.
|
|
|
4
4
|
This module provides functions to access the registered resources.
|
|
5
5
|
The actual registry dictionaries are maintained in _registry.py to avoid circular dependencies.
|
|
6
6
|
"""
|
|
7
|
+
|
|
7
8
|
from typing import Optional, Dict
|
|
8
9
|
from .olap_table import OlapTable
|
|
9
10
|
from .stream import Stream
|
|
@@ -11,6 +12,7 @@ from .ingest_api import IngestApi
|
|
|
11
12
|
from .consumption import Api
|
|
12
13
|
from .sql_resource import SqlResource
|
|
13
14
|
from .workflow import Workflow
|
|
15
|
+
from .web_app import WebApp
|
|
14
16
|
from ._registry import (
|
|
15
17
|
_tables,
|
|
16
18
|
_streams,
|
|
@@ -20,36 +22,45 @@ from ._registry import (
|
|
|
20
22
|
_workflows,
|
|
21
23
|
_api_name_aliases,
|
|
22
24
|
_api_path_map,
|
|
25
|
+
_web_apps,
|
|
23
26
|
)
|
|
24
27
|
|
|
28
|
+
|
|
25
29
|
def get_tables() -> Dict[str, OlapTable]:
|
|
26
30
|
"""Get all registered OLAP tables."""
|
|
27
31
|
return _tables
|
|
28
32
|
|
|
33
|
+
|
|
29
34
|
def get_table(name: str) -> Optional[OlapTable]:
|
|
30
35
|
"""Get a registered OLAP table by name."""
|
|
31
36
|
return _tables.get(name)
|
|
32
37
|
|
|
38
|
+
|
|
33
39
|
def get_streams() -> Dict[str, Stream]:
|
|
34
40
|
"""Get all registered streams."""
|
|
35
41
|
return _streams
|
|
36
42
|
|
|
43
|
+
|
|
37
44
|
def get_stream(name: str) -> Optional[Stream]:
|
|
38
45
|
"""Get a registered stream by name."""
|
|
39
46
|
return _streams.get(name)
|
|
40
47
|
|
|
48
|
+
|
|
41
49
|
def get_ingest_apis() -> Dict[str, IngestApi]:
|
|
42
50
|
"""Get all registered ingestion APIs."""
|
|
43
51
|
return _ingest_apis
|
|
44
52
|
|
|
53
|
+
|
|
45
54
|
def get_ingest_api(name: str) -> Optional[IngestApi]:
|
|
46
55
|
"""Get a registered ingestion API by name."""
|
|
47
56
|
return _ingest_apis.get(name)
|
|
48
57
|
|
|
58
|
+
|
|
49
59
|
def get_apis() -> Dict[str, Api]:
|
|
50
60
|
"""Get all registered APIs."""
|
|
51
61
|
return _apis
|
|
52
62
|
|
|
63
|
+
|
|
53
64
|
def get_api(name: str) -> Optional[Api]:
|
|
54
65
|
"""Get a registered API by name or path.
|
|
55
66
|
|
|
@@ -62,35 +73,49 @@ def get_api(name: str) -> Optional[Api]:
|
|
|
62
73
|
api = _apis.get(name)
|
|
63
74
|
if api:
|
|
64
75
|
return api
|
|
65
|
-
|
|
76
|
+
|
|
66
77
|
# Try alias lookup
|
|
67
78
|
api = _api_name_aliases.get(name)
|
|
68
79
|
if api:
|
|
69
80
|
return api
|
|
70
|
-
|
|
81
|
+
|
|
71
82
|
# Try path-based lookup
|
|
72
83
|
return _api_path_map.get(name)
|
|
73
84
|
|
|
85
|
+
|
|
74
86
|
def get_sql_resources() -> Dict[str, SqlResource]:
|
|
75
87
|
"""Get all registered SQL resources."""
|
|
76
88
|
return _sql_resources
|
|
77
89
|
|
|
90
|
+
|
|
78
91
|
def get_sql_resource(name: str) -> Optional[SqlResource]:
|
|
79
92
|
"""Get a registered SQL resource by name."""
|
|
80
93
|
return _sql_resources.get(name)
|
|
81
94
|
|
|
95
|
+
|
|
82
96
|
def get_workflows() -> Dict[str, Workflow]:
|
|
83
97
|
"""Get all registered workflows."""
|
|
84
98
|
return _workflows
|
|
85
99
|
|
|
100
|
+
|
|
86
101
|
def get_workflow(name: str) -> Optional[Workflow]:
|
|
87
102
|
"""Get a registered workflow by name."""
|
|
88
103
|
return _workflows.get(name)
|
|
89
104
|
|
|
90
105
|
|
|
106
|
+
def get_web_apps() -> Dict[str, WebApp]:
|
|
107
|
+
"""Get all registered WebApps."""
|
|
108
|
+
return _web_apps
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def get_web_app(name: str) -> Optional[WebApp]:
|
|
112
|
+
"""Get a registered WebApp by name."""
|
|
113
|
+
return _web_apps.get(name)
|
|
114
|
+
|
|
115
|
+
|
|
91
116
|
# Backward compatibility aliases (deprecated)
|
|
92
117
|
get_consumption_apis = get_apis
|
|
93
118
|
"""@deprecated: Use get_apis instead of get_consumption_apis"""
|
|
94
119
|
|
|
95
120
|
get_consumption_api = get_api
|
|
96
|
-
"""@deprecated: Use get_api instead of get_consumption_api"""
|
|
121
|
+
"""@deprecated: Use get_api instead of get_consumption_api"""
|
moose_lib/dmv2/sql_resource.py
CHANGED
|
@@ -4,11 +4,14 @@ Base SQL resource definitions for Moose Data Model v2 (dmv2).
|
|
|
4
4
|
This module provides the base class for SQL resources like Views and Materialized Views,
|
|
5
5
|
handling common functionality like setup/teardown SQL commands and dependency tracking.
|
|
6
6
|
"""
|
|
7
|
+
|
|
7
8
|
from typing import Any, Optional, Union, List
|
|
8
9
|
from pydantic import BaseModel
|
|
9
10
|
|
|
10
11
|
from .olap_table import OlapTable
|
|
11
12
|
from ._registry import _sql_resources
|
|
13
|
+
from ._source_capture import get_source_file_from_stack
|
|
14
|
+
|
|
12
15
|
|
|
13
16
|
class SqlResource:
|
|
14
17
|
"""Base class for SQL resources like Views and Materialized Views.
|
|
@@ -23,22 +26,25 @@ class SqlResource:
|
|
|
23
26
|
pulls_data_from (list[SqlObject]): List of tables/views this resource reads from.
|
|
24
27
|
pushes_data_to (list[SqlObject]): List of tables/views this resource writes to.
|
|
25
28
|
kind: The kind of the SQL resource (e.g., "SqlResource").
|
|
29
|
+
source_file: Optional path to the source file where this resource was defined.
|
|
26
30
|
"""
|
|
31
|
+
|
|
27
32
|
setup: list[str]
|
|
28
33
|
teardown: list[str]
|
|
29
34
|
name: str
|
|
30
35
|
kind: str = "SqlResource"
|
|
31
36
|
pulls_data_from: list[Union[OlapTable, "SqlResource"]]
|
|
32
37
|
pushes_data_to: list[Union[OlapTable, "SqlResource"]]
|
|
38
|
+
source_file: Optional[str]
|
|
33
39
|
|
|
34
40
|
def __init__(
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
41
|
+
self,
|
|
42
|
+
name: str,
|
|
43
|
+
setup: list[str],
|
|
44
|
+
teardown: list[str],
|
|
45
|
+
pulls_data_from: Optional[list[Union[OlapTable, "SqlResource"]]] = None,
|
|
46
|
+
pushes_data_to: Optional[list[Union[OlapTable, "SqlResource"]]] = None,
|
|
47
|
+
metadata: dict = None,
|
|
42
48
|
):
|
|
43
49
|
self.name = name
|
|
44
50
|
self.setup = setup
|
|
@@ -46,4 +52,6 @@ class SqlResource:
|
|
|
46
52
|
self.pulls_data_from = pulls_data_from or []
|
|
47
53
|
self.pushes_data_to = pushes_data_to or []
|
|
48
54
|
self.metadata = metadata
|
|
49
|
-
|
|
55
|
+
# Capture source file from call stack
|
|
56
|
+
self.source_file = get_source_file_from_stack()
|
|
57
|
+
_sql_resources[name] = self
|
moose_lib/dmv2/stream.py
CHANGED
|
@@ -4,16 +4,39 @@ Stream definitions for Moose Data Model v2 (dmv2).
|
|
|
4
4
|
This module provides classes for defining and configuring data streams,
|
|
5
5
|
including stream transformations, consumers, and dead letter queues.
|
|
6
6
|
"""
|
|
7
|
+
|
|
7
8
|
import dataclasses
|
|
8
9
|
import datetime
|
|
10
|
+
import json
|
|
9
11
|
from typing import Any, Optional, Callable, Union, Literal, Generic
|
|
10
|
-
from pydantic import BaseModel, ConfigDict, AliasGenerator
|
|
12
|
+
from pydantic import BaseModel, ConfigDict, AliasGenerator, Field
|
|
11
13
|
from pydantic.alias_generators import to_camel
|
|
14
|
+
from kafka import KafkaProducer
|
|
12
15
|
|
|
13
16
|
from .types import TypedMooseResource, ZeroOrMany, T, U
|
|
14
17
|
from .olap_table import OlapTable
|
|
15
18
|
from ._registry import _streams
|
|
16
19
|
from .life_cycle import LifeCycle
|
|
20
|
+
from ..config.runtime import config_registry, RuntimeKafkaConfig
|
|
21
|
+
from ..commons import get_kafka_producer
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class SubjectLatest(BaseModel):
|
|
25
|
+
name: str = Field(serialization_alias="subjectLatest")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class SubjectVersion(BaseModel):
|
|
29
|
+
subject: str
|
|
30
|
+
version: int
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SchemaById(BaseModel):
|
|
34
|
+
id: int
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class KafkaSchemaConfig(BaseModel):
|
|
38
|
+
kind: Literal["JSON", "AVRO", "PROTOBUF"]
|
|
39
|
+
reference: Union[SubjectLatest, SubjectVersion, SchemaById]
|
|
17
40
|
|
|
18
41
|
|
|
19
42
|
class StreamConfig(BaseModel):
|
|
@@ -28,6 +51,7 @@ class StreamConfig(BaseModel):
|
|
|
28
51
|
life_cycle: Determines how changes in code will propagate to the resources.
|
|
29
52
|
default_dead_letter_queue: default dead letter queue used by transforms/consumers
|
|
30
53
|
"""
|
|
54
|
+
|
|
31
55
|
parallelism: int = 1
|
|
32
56
|
retention_period: int = 60 * 60 * 24 * 7 # 7 days
|
|
33
57
|
destination: Optional[OlapTable] = None
|
|
@@ -37,6 +61,7 @@ class StreamConfig(BaseModel):
|
|
|
37
61
|
default_dead_letter_queue: "Optional[DeadLetterQueue]" = None
|
|
38
62
|
# allow DeadLetterQueue
|
|
39
63
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
64
|
+
schema_config: Optional[KafkaSchemaConfig] = None
|
|
40
65
|
|
|
41
66
|
|
|
42
67
|
class TransformConfig(BaseModel):
|
|
@@ -46,6 +71,7 @@ class TransformConfig(BaseModel):
|
|
|
46
71
|
version: Optional version string to identify a specific transformation.
|
|
47
72
|
Allows multiple transformations to the same destination if versions differ.
|
|
48
73
|
"""
|
|
74
|
+
|
|
49
75
|
version: Optional[str] = None
|
|
50
76
|
dead_letter_queue: "Optional[DeadLetterQueue]" = None
|
|
51
77
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
@@ -59,6 +85,7 @@ class ConsumerConfig(BaseModel):
|
|
|
59
85
|
version: Optional version string to identify a specific consumer.
|
|
60
86
|
Allows multiple consumers if versions differ.
|
|
61
87
|
"""
|
|
88
|
+
|
|
62
89
|
version: Optional[str] = None
|
|
63
90
|
dead_letter_queue: "Optional[DeadLetterQueue]" = None
|
|
64
91
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
@@ -67,6 +94,7 @@ class ConsumerConfig(BaseModel):
|
|
|
67
94
|
@dataclasses.dataclass
|
|
68
95
|
class _RoutedMessage:
|
|
69
96
|
"""Internal class representing a message routed to a specific stream."""
|
|
97
|
+
|
|
70
98
|
destination: "Stream[Any]"
|
|
71
99
|
values: ZeroOrMany[Any]
|
|
72
100
|
|
|
@@ -74,6 +102,7 @@ class _RoutedMessage:
|
|
|
74
102
|
@dataclasses.dataclass
|
|
75
103
|
class ConsumerEntry(Generic[T]):
|
|
76
104
|
"""Internal class representing a consumer with its configuration."""
|
|
105
|
+
|
|
77
106
|
consumer: Callable[[T], None]
|
|
78
107
|
config: ConsumerConfig
|
|
79
108
|
|
|
@@ -81,6 +110,7 @@ class ConsumerEntry(Generic[T]):
|
|
|
81
110
|
@dataclasses.dataclass
|
|
82
111
|
class TransformEntry(Generic[T]):
|
|
83
112
|
"""Internal class representing a transformation with its configuration."""
|
|
113
|
+
|
|
84
114
|
destination: "Stream[Any]"
|
|
85
115
|
transformation: Callable[[T], ZeroOrMany[Any]]
|
|
86
116
|
config: TransformConfig
|
|
@@ -105,11 +135,14 @@ class Stream(TypedMooseResource, Generic[T]):
|
|
|
105
135
|
name (str): The name of the stream.
|
|
106
136
|
model_type (type[T]): The Pydantic model associated with this stream.
|
|
107
137
|
"""
|
|
138
|
+
|
|
108
139
|
config: StreamConfig
|
|
109
140
|
transformations: dict[str, list[TransformEntry[T]]]
|
|
110
141
|
consumers: list[ConsumerEntry[T]]
|
|
111
142
|
_multipleTransformations: Optional[Callable[[T], list[_RoutedMessage]]] = None
|
|
112
143
|
default_dead_letter_queue: "Optional[DeadLetterQueue[T]]" = None
|
|
144
|
+
_memoized_producer: Optional[KafkaProducer] = None
|
|
145
|
+
_kafka_config_hash: Optional[str] = None
|
|
113
146
|
|
|
114
147
|
def __init__(self, name: str, config: "StreamConfig" = None, **kwargs):
|
|
115
148
|
super().__init__()
|
|
@@ -121,8 +154,12 @@ class Stream(TypedMooseResource, Generic[T]):
|
|
|
121
154
|
self.default_dead_letter_queue = self.config.default_dead_letter_queue
|
|
122
155
|
_streams[name] = self
|
|
123
156
|
|
|
124
|
-
def add_transform(
|
|
125
|
-
|
|
157
|
+
def add_transform(
|
|
158
|
+
self,
|
|
159
|
+
destination: "Stream[U]",
|
|
160
|
+
transformation: Callable[[T], ZeroOrMany[U]],
|
|
161
|
+
config: TransformConfig = None,
|
|
162
|
+
):
|
|
126
163
|
"""Adds a transformation step from this stream to a destination stream.
|
|
127
164
|
|
|
128
165
|
The transformation function receives a record of type `T` and should return
|
|
@@ -135,23 +172,37 @@ class Stream(TypedMooseResource, Generic[T]):
|
|
|
135
172
|
"""
|
|
136
173
|
config = config or TransformConfig()
|
|
137
174
|
if (
|
|
138
|
-
|
|
139
|
-
|
|
175
|
+
self.default_dead_letter_queue is not None
|
|
176
|
+
and config.dead_letter_queue is None
|
|
140
177
|
):
|
|
141
178
|
config = config.model_copy()
|
|
142
179
|
config.dead_letter_queue = self.default_dead_letter_queue
|
|
143
180
|
if destination.name in self.transformations:
|
|
144
181
|
existing_transforms = self.transformations[destination.name]
|
|
145
182
|
# Check if a transform with this version already exists
|
|
146
|
-
has_version = any(
|
|
183
|
+
has_version = any(
|
|
184
|
+
t.config.version == config.version for t in existing_transforms
|
|
185
|
+
)
|
|
147
186
|
if not has_version:
|
|
148
187
|
existing_transforms.append(
|
|
149
|
-
TransformEntry(
|
|
188
|
+
TransformEntry(
|
|
189
|
+
destination=destination,
|
|
190
|
+
transformation=transformation,
|
|
191
|
+
config=config,
|
|
192
|
+
)
|
|
193
|
+
)
|
|
150
194
|
else:
|
|
151
195
|
self.transformations[destination.name] = [
|
|
152
|
-
TransformEntry(
|
|
153
|
-
|
|
154
|
-
|
|
196
|
+
TransformEntry(
|
|
197
|
+
destination=destination,
|
|
198
|
+
transformation=transformation,
|
|
199
|
+
config=config,
|
|
200
|
+
)
|
|
201
|
+
]
|
|
202
|
+
|
|
203
|
+
def add_consumer(
|
|
204
|
+
self, consumer: Callable[[T], None], config: ConsumerConfig = None
|
|
205
|
+
):
|
|
155
206
|
"""Adds a consumer function to be executed for each record in the stream.
|
|
156
207
|
|
|
157
208
|
Consumers are typically used for side effects like logging or triggering external actions.
|
|
@@ -162,8 +213,8 @@ class Stream(TypedMooseResource, Generic[T]):
|
|
|
162
213
|
"""
|
|
163
214
|
config = config or ConsumerConfig()
|
|
164
215
|
if (
|
|
165
|
-
|
|
166
|
-
|
|
216
|
+
self.default_dead_letter_queue is not None
|
|
217
|
+
and config.dead_letter_queue is None
|
|
167
218
|
):
|
|
168
219
|
config = config.model_copy()
|
|
169
220
|
config.dead_letter_queue = self.default_dead_letter_queue
|
|
@@ -216,6 +267,160 @@ class Stream(TypedMooseResource, Generic[T]):
|
|
|
216
267
|
"""
|
|
217
268
|
self._multipleTransformations = transformation
|
|
218
269
|
|
|
270
|
+
def _build_full_topic_name(self, namespace: Optional[str]) -> str:
|
|
271
|
+
"""Build full topic name with optional namespace and version suffix."""
|
|
272
|
+
version_suffix = (
|
|
273
|
+
f"_{self.config.version.replace('.', '_')}" if self.config.version else ""
|
|
274
|
+
)
|
|
275
|
+
base = f"{self.name}{version_suffix}"
|
|
276
|
+
return f"{namespace}.{base}" if namespace else base
|
|
277
|
+
|
|
278
|
+
def _create_kafka_config_hash(self, cfg: RuntimeKafkaConfig) -> str:
|
|
279
|
+
import hashlib
|
|
280
|
+
|
|
281
|
+
config_string = ":".join(
|
|
282
|
+
str(x)
|
|
283
|
+
for x in (
|
|
284
|
+
cfg.broker,
|
|
285
|
+
cfg.message_timeout_ms,
|
|
286
|
+
cfg.sasl_username,
|
|
287
|
+
cfg.sasl_password,
|
|
288
|
+
cfg.sasl_mechanism,
|
|
289
|
+
cfg.security_protocol,
|
|
290
|
+
cfg.namespace,
|
|
291
|
+
)
|
|
292
|
+
)
|
|
293
|
+
return hashlib.sha256(config_string.encode()).hexdigest()[:16]
|
|
294
|
+
|
|
295
|
+
def _parse_brokers(self, broker_string: str) -> list[str]:
|
|
296
|
+
if not broker_string:
|
|
297
|
+
return []
|
|
298
|
+
return [b.strip() for b in broker_string.split(",") if b.strip()]
|
|
299
|
+
|
|
300
|
+
def _get_memoized_producer(self) -> tuple[KafkaProducer, RuntimeKafkaConfig]:
|
|
301
|
+
"""Create or reuse a KafkaProducer using runtime configuration."""
|
|
302
|
+
cfg: RuntimeKafkaConfig = config_registry.get_kafka_config()
|
|
303
|
+
current_hash = self._create_kafka_config_hash(cfg)
|
|
304
|
+
|
|
305
|
+
if (
|
|
306
|
+
self._memoized_producer is not None
|
|
307
|
+
and self._kafka_config_hash == current_hash
|
|
308
|
+
):
|
|
309
|
+
return self._memoized_producer, cfg
|
|
310
|
+
|
|
311
|
+
# Close previous producer if config changed
|
|
312
|
+
if (
|
|
313
|
+
self._memoized_producer is not None
|
|
314
|
+
and self._kafka_config_hash != current_hash
|
|
315
|
+
):
|
|
316
|
+
try:
|
|
317
|
+
self._memoized_producer.flush()
|
|
318
|
+
self._memoized_producer.close()
|
|
319
|
+
except Exception:
|
|
320
|
+
pass
|
|
321
|
+
finally:
|
|
322
|
+
self._memoized_producer = None
|
|
323
|
+
|
|
324
|
+
brokers = self._parse_brokers(cfg.broker)
|
|
325
|
+
if not brokers:
|
|
326
|
+
raise RuntimeError(f"No valid broker addresses found in: '{cfg.broker}'")
|
|
327
|
+
|
|
328
|
+
producer = get_kafka_producer(
|
|
329
|
+
broker=brokers,
|
|
330
|
+
sasl_username=cfg.sasl_username,
|
|
331
|
+
sasl_password=cfg.sasl_password,
|
|
332
|
+
sasl_mechanism=cfg.sasl_mechanism,
|
|
333
|
+
security_protocol=cfg.security_protocol,
|
|
334
|
+
value_serializer=lambda v: v.model_dump_json().encode("utf-8"),
|
|
335
|
+
acks="all",
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
self._memoized_producer = producer
|
|
339
|
+
self._kafka_config_hash = current_hash
|
|
340
|
+
return producer, cfg
|
|
341
|
+
|
|
342
|
+
def close_producer(self) -> None:
|
|
343
|
+
"""Closes the memoized Kafka producer if it exists."""
|
|
344
|
+
if self._memoized_producer is not None:
|
|
345
|
+
try:
|
|
346
|
+
self._memoized_producer.flush()
|
|
347
|
+
self._memoized_producer.close()
|
|
348
|
+
except Exception:
|
|
349
|
+
pass
|
|
350
|
+
finally:
|
|
351
|
+
self._memoized_producer = None
|
|
352
|
+
self._kafka_config_hash = None
|
|
353
|
+
|
|
354
|
+
def send(self, values: ZeroOrMany[T]) -> None:
|
|
355
|
+
"""Send one or more records to this stream's Kafka topic.
|
|
356
|
+
|
|
357
|
+
If `schema_registry` (JSON) is configured, resolve schema id and
|
|
358
|
+
send using Confluent wire format (0x00 + 4-byte schema id + JSON bytes).
|
|
359
|
+
Otherwise, values are JSON-serialized.
|
|
360
|
+
"""
|
|
361
|
+
# Normalize inputs to a flat list of records
|
|
362
|
+
filtered: list[T] = []
|
|
363
|
+
if isinstance(values, list):
|
|
364
|
+
for v in values:
|
|
365
|
+
if v is None:
|
|
366
|
+
continue
|
|
367
|
+
else:
|
|
368
|
+
filtered.append(v)
|
|
369
|
+
elif values is not None:
|
|
370
|
+
filtered.append(values) # type: ignore[arg-type]
|
|
371
|
+
|
|
372
|
+
if len(filtered) == 0:
|
|
373
|
+
return
|
|
374
|
+
|
|
375
|
+
# ensure all records are instances of the stream's model type
|
|
376
|
+
model_type = self._t
|
|
377
|
+
for rec in filtered:
|
|
378
|
+
if not isinstance(rec, model_type):
|
|
379
|
+
raise TypeError(
|
|
380
|
+
f"Stream '{self.name}' expects instances of {model_type.__name__}, "
|
|
381
|
+
f"got {type(rec).__name__}"
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
producer, cfg = self._get_memoized_producer()
|
|
385
|
+
topic = self._build_full_topic_name(cfg.namespace)
|
|
386
|
+
|
|
387
|
+
sr = self.config.schema_config
|
|
388
|
+
if sr is not None:
|
|
389
|
+
if sr.kind != "JSON":
|
|
390
|
+
raise NotImplementedError("Currently JSON Schema is supported.")
|
|
391
|
+
try:
|
|
392
|
+
from confluent_kafka.schema_registry import SchemaRegistryClient
|
|
393
|
+
from confluent_kafka.schema_registry.json_schema import JSONSerializer
|
|
394
|
+
except Exception as e:
|
|
395
|
+
raise RuntimeError(
|
|
396
|
+
"confluent-kafka[json,schemaregistry] is required for Schema Registry JSON"
|
|
397
|
+
) from e
|
|
398
|
+
|
|
399
|
+
sr_url = cfg.schema_registry_url
|
|
400
|
+
if not sr_url:
|
|
401
|
+
raise RuntimeError("Schema Registry URL not configured")
|
|
402
|
+
client = SchemaRegistryClient({"url": sr_url})
|
|
403
|
+
|
|
404
|
+
if isinstance(sr.reference, SchemaById):
|
|
405
|
+
schema = client.get_schema(sr.reference.id)
|
|
406
|
+
elif isinstance(sr.reference, SubjectLatest):
|
|
407
|
+
schema = client.get_latest_version(sr.reference.name).schema
|
|
408
|
+
else:
|
|
409
|
+
schema = client.get_version(
|
|
410
|
+
sr.reference.subject, sr.reference.version
|
|
411
|
+
).schema
|
|
412
|
+
|
|
413
|
+
serializer = JSONSerializer(schema, client)
|
|
414
|
+
|
|
415
|
+
for rec in filtered:
|
|
416
|
+
value_bytes = serializer(rec.model_dump())
|
|
417
|
+
producer.send(topic, value=value_bytes)
|
|
418
|
+
producer.flush()
|
|
419
|
+
else:
|
|
420
|
+
for rec in filtered:
|
|
421
|
+
producer.send(topic, value=rec)
|
|
422
|
+
producer.flush()
|
|
423
|
+
|
|
219
424
|
|
|
220
425
|
class DeadLetterModel(BaseModel, Generic[T]):
|
|
221
426
|
"""Model for dead letter queue messages.
|
|
@@ -227,9 +432,12 @@ class DeadLetterModel(BaseModel, Generic[T]):
|
|
|
227
432
|
failed_at: Timestamp when the error occurred.
|
|
228
433
|
source: Source of the error ("api", "transform", or "table").
|
|
229
434
|
"""
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
435
|
+
|
|
436
|
+
model_config = ConfigDict(
|
|
437
|
+
alias_generator=AliasGenerator(
|
|
438
|
+
serialization_alias=to_camel,
|
|
439
|
+
)
|
|
440
|
+
)
|
|
233
441
|
original_record: Any
|
|
234
442
|
error_message: str
|
|
235
443
|
error_type: str
|
|
@@ -261,10 +469,16 @@ class DeadLetterQueue(Stream, Generic[T]):
|
|
|
261
469
|
"""
|
|
262
470
|
self._model_type = self._get_type(kwargs)
|
|
263
471
|
kwargs["t"] = DeadLetterModel[self._model_type]
|
|
264
|
-
super().__init__(
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
472
|
+
super().__init__(
|
|
473
|
+
name, config if config is not None else StreamConfig(), **kwargs
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
def add_transform(
|
|
477
|
+
self,
|
|
478
|
+
destination: Stream[U],
|
|
479
|
+
transformation: Callable[[DeadLetterModel[T]], ZeroOrMany[U]],
|
|
480
|
+
config: TransformConfig = None,
|
|
481
|
+
):
|
|
268
482
|
def wrapped_transform(record: DeadLetterModel[T]):
|
|
269
483
|
record._t = self._model_type
|
|
270
484
|
return transformation(record)
|
|
@@ -272,7 +486,11 @@ class DeadLetterQueue(Stream, Generic[T]):
|
|
|
272
486
|
config = config or TransformConfig()
|
|
273
487
|
super().add_transform(destination, wrapped_transform, config)
|
|
274
488
|
|
|
275
|
-
def add_consumer(
|
|
489
|
+
def add_consumer(
|
|
490
|
+
self,
|
|
491
|
+
consumer: Callable[[DeadLetterModel[T]], None],
|
|
492
|
+
config: ConsumerConfig = None,
|
|
493
|
+
):
|
|
276
494
|
def wrapped_consumer(record: DeadLetterModel[T]):
|
|
277
495
|
record._t = self._model_type
|
|
278
496
|
return consumer(record)
|
|
@@ -280,7 +498,9 @@ class DeadLetterQueue(Stream, Generic[T]):
|
|
|
280
498
|
config = config or ConsumerConfig()
|
|
281
499
|
super().add_consumer(wrapped_consumer, config)
|
|
282
500
|
|
|
283
|
-
def set_multi_transform(
|
|
501
|
+
def set_multi_transform(
|
|
502
|
+
self, transformation: Callable[[DeadLetterModel[T]], list[_RoutedMessage]]
|
|
503
|
+
):
|
|
284
504
|
def wrapped_transform(record: DeadLetterModel[T]):
|
|
285
505
|
record._t = self._model_type
|
|
286
506
|
return transformation(record)
|
moose_lib/dmv2/types.py
CHANGED
|
@@ -5,6 +5,7 @@ This module provides the core type definitions and base classes used across
|
|
|
5
5
|
the dmv2 package, including generic type parameters, type aliases, and base
|
|
6
6
|
resource classes.
|
|
7
7
|
"""
|
|
8
|
+
|
|
8
9
|
from typing import Any, Generic, TypeVar, Union
|
|
9
10
|
|
|
10
11
|
import typing_extensions
|
|
@@ -12,10 +13,10 @@ from pydantic import BaseModel
|
|
|
12
13
|
from pydantic.fields import FieldInfo
|
|
13
14
|
from ..data_models import Column
|
|
14
15
|
|
|
15
|
-
T = TypeVar(
|
|
16
|
-
U = TypeVar(
|
|
17
|
-
T_none = TypeVar(
|
|
18
|
-
U_none = TypeVar(
|
|
16
|
+
T = TypeVar("T", bound=BaseModel)
|
|
17
|
+
U = TypeVar("U", bound=BaseModel)
|
|
18
|
+
T_none = TypeVar("T_none", bound=Union[BaseModel, None])
|
|
19
|
+
U_none = TypeVar("U_none", bound=Union[BaseModel, None])
|
|
19
20
|
type ZeroOrMany[T] = Union[T, list[T], None]
|
|
20
21
|
|
|
21
22
|
|
|
@@ -33,6 +34,7 @@ class Cols:
|
|
|
33
34
|
>>> print(table.cols.user_id) # Output: a column object
|
|
34
35
|
>>> print(table.cols.non_existent) # Raises AttributeError
|
|
35
36
|
"""
|
|
37
|
+
|
|
36
38
|
_columns: dict[str, Column]
|
|
37
39
|
|
|
38
40
|
def __init__(self, columns: list[Column]):
|
|
@@ -52,7 +54,7 @@ class Cols:
|
|
|
52
54
|
return self.__getattr__(item)
|
|
53
55
|
|
|
54
56
|
|
|
55
|
-
@typing_extensions.deprecated(
|
|
57
|
+
@typing_extensions.deprecated("use cols in OlapTable instead")
|
|
56
58
|
class Columns(Generic[T]):
|
|
57
59
|
"""Provides runtime checked column name access for Moose resources.
|
|
58
60
|
|
|
@@ -70,6 +72,7 @@ class Columns(Generic[T]):
|
|
|
70
72
|
Args:
|
|
71
73
|
model: The Pydantic model type whose fields represent the columns.
|
|
72
74
|
"""
|
|
75
|
+
|
|
73
76
|
_fields: dict[str, FieldInfo]
|
|
74
77
|
|
|
75
78
|
def __init__(self, model: type[T]):
|
|
@@ -90,14 +93,17 @@ class BaseTypedResource(Generic[T]):
|
|
|
90
93
|
Attributes:
|
|
91
94
|
name (str): The name of the Moose resource.
|
|
92
95
|
"""
|
|
96
|
+
|
|
93
97
|
_t: type[T]
|
|
94
98
|
name: str
|
|
95
99
|
|
|
96
100
|
@classmethod
|
|
97
101
|
def _get_type(cls, keyword_args: dict):
|
|
98
|
-
t = keyword_args.get(
|
|
102
|
+
t = keyword_args.get("t")
|
|
99
103
|
if t is None:
|
|
100
|
-
raise ValueError(
|
|
104
|
+
raise ValueError(
|
|
105
|
+
f"Use `{cls.__name__}[T](name='...')` to supply the Pydantic model type`"
|
|
106
|
+
)
|
|
101
107
|
if not isinstance(t, type) or not issubclass(t, BaseModel):
|
|
102
108
|
raise ValueError(f"{t} is not a Pydantic model")
|
|
103
109
|
return t
|
|
@@ -130,7 +136,7 @@ class TypedMooseResource(BaseTypedResource, Generic[T]):
|
|
|
130
136
|
"""
|
|
131
137
|
|
|
132
138
|
@property
|
|
133
|
-
@typing_extensions.deprecated(
|
|
139
|
+
@typing_extensions.deprecated("use cols in OlapTable instead", category=None)
|
|
134
140
|
def columns(self):
|
|
135
141
|
return Columns[T](self._t)
|
|
136
142
|
|
moose_lib/dmv2/view.py
CHANGED
|
@@ -4,12 +4,14 @@ View definitions for Moose Data Model v2 (dmv2).
|
|
|
4
4
|
This module provides classes for defining standard SQL Views,
|
|
5
5
|
including their SQL statements and dependencies.
|
|
6
6
|
"""
|
|
7
|
+
|
|
7
8
|
from typing import Union, List, Optional
|
|
8
9
|
from pydantic import BaseModel
|
|
9
10
|
|
|
10
11
|
from .sql_resource import SqlResource
|
|
11
12
|
from .olap_table import OlapTable
|
|
12
13
|
|
|
14
|
+
|
|
13
15
|
class View(SqlResource):
|
|
14
16
|
"""Represents a standard SQL database View.
|
|
15
17
|
|
|
@@ -27,10 +29,15 @@ class View(SqlResource):
|
|
|
27
29
|
pulls_data_from (list[SqlObject]): Source tables/views.
|
|
28
30
|
"""
|
|
29
31
|
|
|
30
|
-
def __init__(
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
]
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
name: str,
|
|
35
|
+
select_statement: str,
|
|
36
|
+
base_tables: list[Union[OlapTable, SqlResource]],
|
|
37
|
+
metadata: dict = None,
|
|
38
|
+
):
|
|
39
|
+
setup = [f"CREATE VIEW IF NOT EXISTS {name} AS {select_statement}".strip()]
|
|
35
40
|
teardown = [f"DROP VIEW IF EXISTS {name}"]
|
|
36
|
-
super().__init__(
|
|
41
|
+
super().__init__(
|
|
42
|
+
name, setup, teardown, pulls_data_from=base_tables, metadata=metadata
|
|
43
|
+
)
|