moose-lib 0.4.160__py3-none-any.whl → 0.4.162__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moose_lib/data_models.py +11 -5
- moose_lib/dmv2.py +86 -13
- moose_lib/streaming/streaming_function_runner.py +92 -45
- {moose_lib-0.4.160.dist-info → moose_lib-0.4.162.dist-info}/METADATA +1 -1
- {moose_lib-0.4.160.dist-info → moose_lib-0.4.162.dist-info}/RECORD +7 -7
- {moose_lib-0.4.160.dist-info → moose_lib-0.4.162.dist-info}/WHEEL +0 -0
- {moose_lib-0.4.160.dist-info → moose_lib-0.4.162.dist-info}/top_level.txt +0 -0
moose_lib/data_models.py
CHANGED
@@ -2,6 +2,7 @@ import dataclasses
|
|
2
2
|
from decimal import Decimal
|
3
3
|
import re
|
4
4
|
from enum import Enum
|
5
|
+
from inspect import isclass
|
5
6
|
from uuid import UUID
|
6
7
|
from datetime import datetime, date
|
7
8
|
|
@@ -162,6 +163,11 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da
|
|
162
163
|
data_type = "UUID"
|
163
164
|
elif t is Any:
|
164
165
|
data_type = "Json"
|
166
|
+
elif get_origin(t) is Literal and all(isinstance(arg, str) for arg in get_args(t)):
|
167
|
+
data_type = "String"
|
168
|
+
mds.append("LowCardinality")
|
169
|
+
elif not isclass(t):
|
170
|
+
raise ValueError(f"Unknown type {t}")
|
165
171
|
elif issubclass(t, BaseModel):
|
166
172
|
data_type = Nested(
|
167
173
|
name=t.__name__,
|
@@ -186,15 +192,15 @@ def _to_columns(model: type[BaseModel]) -> list[Column]:
|
|
186
192
|
primary_key, field_type = handle_key(field_type)
|
187
193
|
is_jwt, field_type = handle_jwt(field_type)
|
188
194
|
|
189
|
-
optional,
|
195
|
+
optional, mds, data_type = py_type_to_column_type(field_type, field_info.metadata)
|
190
196
|
|
191
197
|
annotations = []
|
192
|
-
for
|
193
|
-
if isinstance(
|
198
|
+
for md in mds:
|
199
|
+
if isinstance(md, AggregateFunction):
|
194
200
|
annotations.append(
|
195
|
-
("aggregationFunction",
|
201
|
+
("aggregationFunction", md.to_dict())
|
196
202
|
)
|
197
|
-
if
|
203
|
+
if md == "LowCardinality":
|
198
204
|
annotations.append(
|
199
205
|
("LowCardinality", True)
|
200
206
|
)
|
moose_lib/dmv2.py
CHANGED
@@ -9,8 +9,10 @@ It mirrors the functionality of the TypeScript `dmv2` module, enabling the defin
|
|
9
9
|
of data infrastructure using Python and Pydantic models.
|
10
10
|
"""
|
11
11
|
import dataclasses
|
12
|
-
|
13
|
-
from
|
12
|
+
import datetime
|
13
|
+
from typing import Any, Generic, Optional, TypeVar, Callable, Union, Literal
|
14
|
+
from pydantic import BaseModel, ConfigDict, AliasGenerator
|
15
|
+
from pydantic.alias_generators import to_camel
|
14
16
|
from pydantic.fields import FieldInfo
|
15
17
|
from pydantic.json_schema import JsonSchemaValue
|
16
18
|
|
@@ -176,6 +178,8 @@ class TransformConfig(BaseModel):
|
|
176
178
|
Allows multiple transformations to the same destination if versions differ.
|
177
179
|
"""
|
178
180
|
version: Optional[str] = None
|
181
|
+
dead_letter_queue: "Optional[DeadLetterQueue]" = None
|
182
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
179
183
|
|
180
184
|
|
181
185
|
class ConsumerConfig(BaseModel):
|
@@ -186,6 +190,8 @@ class ConsumerConfig(BaseModel):
|
|
186
190
|
Allows multiple consumers if versions differ.
|
187
191
|
"""
|
188
192
|
version: Optional[str] = None
|
193
|
+
dead_letter_queue: "Optional[DeadLetterQueue]" = None
|
194
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
189
195
|
|
190
196
|
|
191
197
|
@dataclasses.dataclass
|
@@ -242,7 +248,8 @@ class Stream(TypedMooseResource, Generic[T]):
|
|
242
248
|
self.transformations = {}
|
243
249
|
_streams[name] = self
|
244
250
|
|
245
|
-
def add_transform(self, destination: "Stream[U]", transformation: Callable[[T], ZeroOrMany[U]],
|
251
|
+
def add_transform(self, destination: "Stream[U]", transformation: Callable[[T], ZeroOrMany[U]],
|
252
|
+
config: TransformConfig = None):
|
246
253
|
"""Adds a transformation step from this stream to a destination stream.
|
247
254
|
|
248
255
|
The transformation function receives a record of type `T` and should return
|
@@ -253,16 +260,19 @@ class Stream(TypedMooseResource, Generic[T]):
|
|
253
260
|
transformation: A callable that performs the transformation.
|
254
261
|
config: Optional configuration, primarily for setting a version.
|
255
262
|
"""
|
263
|
+
config = config or TransformConfig()
|
256
264
|
if destination.name in self.transformations:
|
257
265
|
existing_transforms = self.transformations[destination.name]
|
258
266
|
# Check if a transform with this version already exists
|
259
267
|
has_version = any(t.config.version == config.version for t in existing_transforms)
|
260
268
|
if not has_version:
|
261
|
-
existing_transforms.append(
|
269
|
+
existing_transforms.append(
|
270
|
+
TransformEntry(destination=destination, transformation=transformation, config=config))
|
262
271
|
else:
|
263
|
-
self.transformations[destination.name] = [
|
272
|
+
self.transformations[destination.name] = [
|
273
|
+
TransformEntry(destination=destination, transformation=transformation, config=config)]
|
264
274
|
|
265
|
-
def add_consumer(self, consumer: Callable[[T], None], config: ConsumerConfig =
|
275
|
+
def add_consumer(self, consumer: Callable[[T], None], config: ConsumerConfig = None):
|
266
276
|
"""Adds a consumer function to be executed for each record in the stream.
|
267
277
|
|
268
278
|
Consumers are typically used for side effects like logging or triggering external actions.
|
@@ -271,6 +281,7 @@ class Stream(TypedMooseResource, Generic[T]):
|
|
271
281
|
consumer: A callable that accepts a record of type `T`.
|
272
282
|
config: Optional configuration, primarily for setting a version.
|
273
283
|
"""
|
284
|
+
config = config or ConsumerConfig()
|
274
285
|
has_version = any(c.config.version == config.version for c in self.consumers)
|
275
286
|
if not has_version:
|
276
287
|
self.consumers.append(ConsumerEntry(consumer=consumer, config=config))
|
@@ -321,6 +332,68 @@ class Stream(TypedMooseResource, Generic[T]):
|
|
321
332
|
self._multipleTransformations = transformation
|
322
333
|
|
323
334
|
|
335
|
+
class DeadLetterModel(BaseModel, Generic[T]):
|
336
|
+
model_config = ConfigDict(alias_generator=AliasGenerator(
|
337
|
+
serialization_alias=to_camel,
|
338
|
+
))
|
339
|
+
original_record: Any
|
340
|
+
error_message: str
|
341
|
+
error_type: str
|
342
|
+
failed_at: datetime.datetime
|
343
|
+
source: Literal["api", "transform", "table"]
|
344
|
+
|
345
|
+
def as_t(self) -> T:
|
346
|
+
return self._t.model_validate(self.original_record)
|
347
|
+
|
348
|
+
|
349
|
+
class DeadLetterQueue(Stream, Generic[T]):
|
350
|
+
"""A specialized Stream for handling failed records.
|
351
|
+
|
352
|
+
Dead letter queues store records that failed during processing, along with
|
353
|
+
error information to help diagnose and potentially recover from failures.
|
354
|
+
|
355
|
+
Attributes:
|
356
|
+
All attributes inherited from Stream.
|
357
|
+
"""
|
358
|
+
|
359
|
+
_model_type: type[T]
|
360
|
+
|
361
|
+
def __init__(self, name: str, config: StreamConfig = StreamConfig(), **kwargs):
|
362
|
+
"""Initialize a new DeadLetterQueue.
|
363
|
+
|
364
|
+
Args:
|
365
|
+
name: The name of the dead letter queue stream.
|
366
|
+
config: Configuration for the stream.
|
367
|
+
"""
|
368
|
+
self._model_type = self._get_type(kwargs)
|
369
|
+
kwargs["t"] = DeadLetterModel[self._model_type]
|
370
|
+
super().__init__(name, config, **kwargs)
|
371
|
+
|
372
|
+
def add_transform(self, destination: Stream[U], transformation: Callable[[DeadLetterModel[T]], ZeroOrMany[U]],
|
373
|
+
config: TransformConfig = None):
|
374
|
+
def wrapped_transform(record: DeadLetterModel[T]):
|
375
|
+
record._t = self._model_type
|
376
|
+
return transformation(record)
|
377
|
+
|
378
|
+
config = config or TransformConfig()
|
379
|
+
super().add_transform(destination, wrapped_transform, config)
|
380
|
+
|
381
|
+
def add_consumer(self, consumer: Callable[[DeadLetterModel[T]], None], config: ConsumerConfig = None):
|
382
|
+
def wrapped_consumer(record: DeadLetterModel[T]):
|
383
|
+
record._t = self._model_type
|
384
|
+
return consumer(record)
|
385
|
+
|
386
|
+
config = config or ConsumerConfig()
|
387
|
+
super().add_consumer(wrapped_consumer, config)
|
388
|
+
|
389
|
+
def set_multi_transform(self, transformation: Callable[[DeadLetterModel[T]], list[_RoutedMessage]]):
|
390
|
+
def wrapped_transform(record: DeadLetterModel[T]):
|
391
|
+
record._t = self._model_type
|
392
|
+
return transformation(record)
|
393
|
+
|
394
|
+
super().set_multi_transform(wrapped_transform)
|
395
|
+
|
396
|
+
|
324
397
|
class IngestConfig(BaseModel):
|
325
398
|
"""Basic configuration for an ingestion point.
|
326
399
|
|
@@ -602,10 +675,10 @@ class SqlResource:
|
|
602
675
|
pushes_data_to: list[Union[OlapTable, "SqlResource"]]
|
603
676
|
|
604
677
|
def __init__(
|
605
|
-
self,
|
606
|
-
name: str,
|
607
|
-
setup: list[str],
|
608
|
-
teardown: list[str],
|
678
|
+
self,
|
679
|
+
name: str,
|
680
|
+
setup: list[str],
|
681
|
+
teardown: list[str],
|
609
682
|
pulls_data_from: Optional[list[Union[OlapTable, "SqlResource"]]] = None,
|
610
683
|
pushes_data_to: Optional[list[Union[OlapTable, "SqlResource"]]] = None
|
611
684
|
):
|
@@ -705,12 +778,12 @@ class MaterializedView(SqlResource, BaseTypedResource, Generic[T]):
|
|
705
778
|
)
|
706
779
|
|
707
780
|
super().__init__(
|
708
|
-
options.materialized_view_name,
|
709
|
-
setup,
|
781
|
+
options.materialized_view_name,
|
782
|
+
setup,
|
710
783
|
teardown,
|
711
784
|
pulls_data_from=options.select_tables,
|
712
785
|
pushes_data_to=[target_table]
|
713
786
|
)
|
714
|
-
|
787
|
+
|
715
788
|
self.target_table = target_table
|
716
789
|
self.config = options
|
@@ -15,6 +15,7 @@ The runner handles:
|
|
15
15
|
|
16
16
|
import argparse
|
17
17
|
import dataclasses
|
18
|
+
import traceback
|
18
19
|
from datetime import datetime, timezone
|
19
20
|
from importlib import import_module
|
20
21
|
import io
|
@@ -27,8 +28,8 @@ import threading
|
|
27
28
|
import time
|
28
29
|
from typing import Optional, Callable, Tuple, Any
|
29
30
|
|
30
|
-
from moose_lib.dmv2 import _streams
|
31
|
-
from moose_lib import cli_log, CliLogData
|
31
|
+
from moose_lib.dmv2 import _streams, DeadLetterModel
|
32
|
+
from moose_lib import cli_log, CliLogData, DeadLetterQueue
|
32
33
|
|
33
34
|
# Force stdout to be unbuffered
|
34
35
|
sys.stdout = io.TextIOWrapper(
|
@@ -37,6 +38,7 @@ sys.stdout = io.TextIOWrapper(
|
|
37
38
|
line_buffering=True
|
38
39
|
)
|
39
40
|
|
41
|
+
|
40
42
|
@dataclasses.dataclass
|
41
43
|
class KafkaTopicConfig:
|
42
44
|
"""
|
@@ -76,9 +78,10 @@ class KafkaTopicConfig:
|
|
76
78
|
name = name.removeprefix(prefix)
|
77
79
|
else:
|
78
80
|
raise Exception(f"Namespace prefix {prefix} not found in topic name {name}")
|
79
|
-
|
81
|
+
|
80
82
|
return name
|
81
83
|
|
84
|
+
|
82
85
|
class EnhancedJSONEncoder(json.JSONEncoder):
|
83
86
|
"""
|
84
87
|
Custom JSON encoder that handles:
|
@@ -86,6 +89,7 @@ class EnhancedJSONEncoder(json.JSONEncoder):
|
|
86
89
|
- dataclass instances (converts to dict)
|
87
90
|
- Pydantic models (converts to dict)
|
88
91
|
"""
|
92
|
+
|
89
93
|
def default(self, o):
|
90
94
|
if isinstance(o, datetime):
|
91
95
|
if o.tzinfo is None:
|
@@ -105,6 +109,7 @@ class EnhancedJSONEncoder(json.JSONEncoder):
|
|
105
109
|
return dataclasses.asdict(o)
|
106
110
|
return super().default(o)
|
107
111
|
|
112
|
+
|
108
113
|
def load_streaming_function_dmv1(function_file_dir: str, function_file_name: str) -> Tuple[type, Callable]:
|
109
114
|
"""
|
110
115
|
Load a DMV1 (legacy) streaming function from a Python module.
|
@@ -136,7 +141,9 @@ def load_streaming_function_dmv1(function_file_dir: str, function_file_name: str
|
|
136
141
|
|
137
142
|
# Make sure that there is only one flow in the file
|
138
143
|
if len(streaming_functions) != 1:
|
139
|
-
cli_log(CliLogData(action="Function",
|
144
|
+
cli_log(CliLogData(action="Function",
|
145
|
+
message=f"Expected one streaming function in the file, but got {len(streaming_functions)}",
|
146
|
+
message_type="Error"))
|
140
147
|
sys.exit(1)
|
141
148
|
|
142
149
|
# get the flow definition
|
@@ -150,7 +157,9 @@ def load_streaming_function_dmv1(function_file_dir: str, function_file_name: str
|
|
150
157
|
|
151
158
|
return run_input_type, streaming_function_run
|
152
159
|
|
153
|
-
|
160
|
+
|
161
|
+
def load_streaming_function_dmv2(function_file_dir: str, function_file_name: str) -> tuple[
|
162
|
+
type, list[tuple[Callable, Optional[DeadLetterQueue]]]]:
|
154
163
|
"""
|
155
164
|
Load a DMV2 streaming function by finding the stream transformation that matches
|
156
165
|
the source and target topics.
|
@@ -162,7 +171,7 @@ def load_streaming_function_dmv2(function_file_dir: str, function_file_name: str
|
|
162
171
|
Returns:
|
163
172
|
Tuple of (input_type, transformation_functions) where:
|
164
173
|
- input_type is the Pydantic model type of the source stream
|
165
|
-
- transformation_functions is a list of functions that transform source to target data
|
174
|
+
- transformation_functions is a list of functions that transform source to target data and their dead letter queues
|
166
175
|
|
167
176
|
Raises:
|
168
177
|
SystemExit: If module import fails or if no matching transformation is found
|
@@ -182,7 +191,7 @@ def load_streaming_function_dmv2(function_file_dir: str, function_file_name: str
|
|
182
191
|
continue
|
183
192
|
|
184
193
|
if stream.has_consumers() and target_topic is None:
|
185
|
-
consumers = [entry.consumer for entry in stream.consumers]
|
194
|
+
consumers = [(entry.consumer, entry.config.dead_letter_queue) for entry in stream.consumers]
|
186
195
|
if not consumers:
|
187
196
|
continue
|
188
197
|
return stream.model_type, consumers
|
@@ -193,15 +202,16 @@ def load_streaming_function_dmv2(function_file_dir: str, function_file_name: str
|
|
193
202
|
# The destination topic name should match the destination stream name
|
194
203
|
if source_py_stream_name == source_topic.topic_name_to_stream_name() and dest_stream_py_name == target_topic.topic_name_to_stream_name():
|
195
204
|
# Found the matching transformation
|
196
|
-
transformations = [entry.transformation for entry in
|
205
|
+
transformations = [(entry.transformation, entry.config.dead_letter_queue) for entry in
|
206
|
+
transform_entries]
|
197
207
|
if not transformations:
|
198
208
|
continue
|
199
209
|
return stream.model_type, transformations
|
200
210
|
|
201
211
|
# If we get here, no matching transformation was found
|
202
212
|
cli_log(CliLogData(
|
203
|
-
action="Function",
|
204
|
-
message=f"No transformation found from {source_topic.name} to {target_topic.name}",
|
213
|
+
action="Function",
|
214
|
+
message=f"No transformation found from {source_topic.name} to {target_topic.name}",
|
205
215
|
message_type="Error"
|
206
216
|
))
|
207
217
|
sys.exit(1)
|
@@ -215,14 +225,16 @@ parser.add_argument('source_topic_json', type=str, help='The source topic for th
|
|
215
225
|
# In DMV1 the dir is the dir of the streaming function file
|
216
226
|
# and the function_file_name is the file name of the streaming function without the .py extension
|
217
227
|
parser.add_argument('function_file_dir', type=str, help='The dir of the streaming function file')
|
218
|
-
parser.add_argument('function_file_name', type=str,
|
228
|
+
parser.add_argument('function_file_name', type=str,
|
229
|
+
help='The file name of the streaming function without the .py extension')
|
219
230
|
parser.add_argument('broker', type=str, help='The broker to use for the streaming function')
|
220
231
|
parser.add_argument('--target_topic_json', type=str, help='The target topic for the streaming function')
|
221
232
|
parser.add_argument('--sasl_username', type=str, help='The SASL username to use for the streaming function')
|
222
233
|
parser.add_argument('--sasl_password', type=str, help='The SASL password to use for the streaming function')
|
223
234
|
parser.add_argument('--sasl_mechanism', type=str, help='The SASL mechanism to use for the streaming function')
|
224
235
|
parser.add_argument('--security_protocol', type=str, help='The security protocol to use for the streaming function')
|
225
|
-
parser.add_argument('--dmv2', action=argparse.BooleanOptionalAction, type=bool,
|
236
|
+
parser.add_argument('--dmv2', action=argparse.BooleanOptionalAction, type=bool,
|
237
|
+
help='Whether to use the DMV2 format for the streaming function')
|
226
238
|
|
227
239
|
args = parser.parse_args()
|
228
240
|
|
@@ -255,10 +267,12 @@ sasl_config = {
|
|
255
267
|
streaming_function_id = f'flow-{source_topic.name}-{target_topic.name}' if target_topic else f'flow-{source_topic.name}'
|
256
268
|
log_prefix = f"{source_topic.name} -> {target_topic.name}" if target_topic else f"{source_topic.name} -> None"
|
257
269
|
|
270
|
+
|
258
271
|
def log(msg: str) -> None:
|
259
272
|
"""Log a message with the source->target topic prefix."""
|
260
273
|
print(f"{log_prefix}: {msg}")
|
261
274
|
|
275
|
+
|
262
276
|
def error(msg: str) -> None:
|
263
277
|
"""Raise an exception with the source->target topic prefix."""
|
264
278
|
raise Exception(f"{log_prefix}: {msg}")
|
@@ -278,6 +292,7 @@ def parse_input(run_input_type: type, json_input: dict) -> Any:
|
|
278
292
|
Returns:
|
279
293
|
An instance of run_input_type populated with the JSON data
|
280
294
|
"""
|
295
|
+
|
281
296
|
def deserialize(data, cls):
|
282
297
|
if hasattr(cls, "model_validate"): # Check if it's a Pydantic model
|
283
298
|
return cls.model_validate(data)
|
@@ -292,7 +307,7 @@ def parse_input(run_input_type: type, json_input: dict) -> Any:
|
|
292
307
|
return deserialize(json_input, run_input_type)
|
293
308
|
|
294
309
|
|
295
|
-
def create_consumer():
|
310
|
+
def create_consumer() -> KafkaConsumer:
|
296
311
|
"""
|
297
312
|
Create a Kafka consumer configured for the source topic.
|
298
313
|
|
@@ -304,7 +319,7 @@ def create_consumer():
|
|
304
319
|
if sasl_config['mechanism'] is not None:
|
305
320
|
return KafkaConsumer(
|
306
321
|
source_topic.name,
|
307
|
-
client_id=
|
322
|
+
client_id="python_streaming_function_consumer",
|
308
323
|
group_id=streaming_function_id,
|
309
324
|
bootstrap_servers=broker,
|
310
325
|
sasl_plain_username=sasl_config['username'],
|
@@ -318,13 +333,14 @@ def create_consumer():
|
|
318
333
|
log("No sasl mechanism specified. Using default consumer.")
|
319
334
|
return KafkaConsumer(
|
320
335
|
source_topic.name,
|
321
|
-
client_id=
|
336
|
+
client_id="python_streaming_function_consumer",
|
322
337
|
group_id=streaming_function_id,
|
323
338
|
bootstrap_servers=broker,
|
324
339
|
# consumer_timeout_ms=10000,
|
325
340
|
value_deserializer=lambda m: json.loads(m.decode('utf-8'))
|
326
341
|
)
|
327
342
|
|
343
|
+
|
328
344
|
def create_producer() -> Optional[KafkaProducer]:
|
329
345
|
"""
|
330
346
|
Create a Kafka producer configured for the target topic.
|
@@ -334,7 +350,7 @@ def create_producer() -> Optional[KafkaProducer]:
|
|
334
350
|
Returns:
|
335
351
|
Configured KafkaProducer instance
|
336
352
|
"""
|
337
|
-
if sasl_config['mechanism'] is not None
|
353
|
+
if sasl_config['mechanism'] is not None:
|
338
354
|
return KafkaProducer(
|
339
355
|
bootstrap_servers=broker,
|
340
356
|
sasl_plain_username=sasl_config['username'],
|
@@ -343,16 +359,12 @@ def create_producer() -> Optional[KafkaProducer]:
|
|
343
359
|
security_protocol=args.security_protocol,
|
344
360
|
max_request_size=target_topic.max_message_bytes
|
345
361
|
)
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
)
|
353
|
-
else:
|
354
|
-
log("No target topic specified. Not creating producer.")
|
355
|
-
return None
|
362
|
+
log("No sasl mechanism specified. Using default producer.")
|
363
|
+
return KafkaProducer(
|
364
|
+
bootstrap_servers=broker,
|
365
|
+
max_in_flight_requests_per_connection=1,
|
366
|
+
max_request_size=target_topic.max_message_bytes
|
367
|
+
)
|
356
368
|
|
357
369
|
|
358
370
|
def main():
|
@@ -387,7 +399,7 @@ def main():
|
|
387
399
|
time.sleep(1)
|
388
400
|
with metrics_lock:
|
389
401
|
requests.post(
|
390
|
-
"http://localhost:5001/metrics-logs",
|
402
|
+
"http://localhost:5001/metrics-logs",
|
391
403
|
json={
|
392
404
|
'timestamp': datetime.now(timezone.utc).isoformat(),
|
393
405
|
'count_in': metrics['count_in'],
|
@@ -405,30 +417,35 @@ def main():
|
|
405
417
|
streaming_function_input_type = None
|
406
418
|
streaming_function_callables = None
|
407
419
|
if args.dmv2:
|
408
|
-
streaming_function_input_type, streaming_function_callables = load_streaming_function_dmv2(
|
420
|
+
streaming_function_input_type, streaming_function_callables = load_streaming_function_dmv2(
|
421
|
+
function_file_dir, function_file_name)
|
409
422
|
else:
|
410
|
-
streaming_function_input_type, streaming_function_callable = load_streaming_function_dmv1(
|
423
|
+
streaming_function_input_type, streaming_function_callable = load_streaming_function_dmv1(
|
424
|
+
function_file_dir, function_file_name)
|
411
425
|
|
412
|
-
streaming_function_callables = [streaming_function_callable]
|
426
|
+
streaming_function_callables = [(streaming_function_callable, None)]
|
427
|
+
|
428
|
+
needs_producer = target_topic is not None or any(
|
429
|
+
pair[1] is not None for pair in streaming_function_callables)
|
413
430
|
|
414
431
|
# Initialize Kafka connections in the processing thread
|
415
432
|
consumer = create_consumer()
|
416
|
-
producer = create_producer()
|
417
|
-
|
433
|
+
producer = create_producer() if needs_producer else None
|
434
|
+
|
418
435
|
# Store references for cleanup
|
419
436
|
kafka_refs['consumer'] = consumer
|
420
437
|
kafka_refs['producer'] = producer
|
421
438
|
|
422
439
|
# Subscribe to topic
|
423
440
|
consumer.subscribe([source_topic.name])
|
424
|
-
|
441
|
+
|
425
442
|
log("Kafka consumer and producer initialized in processing thread")
|
426
443
|
|
427
444
|
while running.is_set():
|
428
445
|
try:
|
429
446
|
# Poll with timeout to allow checking running state
|
430
447
|
messages = consumer.poll(timeout_ms=1000)
|
431
|
-
|
448
|
+
|
432
449
|
if not messages:
|
433
450
|
continue
|
434
451
|
|
@@ -443,8 +460,36 @@ def main():
|
|
443
460
|
|
444
461
|
# Run the flow
|
445
462
|
all_outputs = []
|
446
|
-
for streaming_function_callable in streaming_function_callables:
|
447
|
-
|
463
|
+
for (streaming_function_callable, dlq) in streaming_function_callables:
|
464
|
+
try:
|
465
|
+
output_data = streaming_function_callable(input_data)
|
466
|
+
except Exception as e:
|
467
|
+
traceback.print_exc()
|
468
|
+
if dlq is not None:
|
469
|
+
dead_letter = DeadLetterModel(
|
470
|
+
original_record=message.value,
|
471
|
+
error_message=str(e),
|
472
|
+
error_type=e.__class__.__name__,
|
473
|
+
failed_at=datetime.now(timezone.utc),
|
474
|
+
source="transform"
|
475
|
+
)
|
476
|
+
record = dead_letter.model_dump_json().encode('utf-8')
|
477
|
+
producer.send(dlq.name, record).get()
|
478
|
+
cli_log(CliLogData(
|
479
|
+
action="DeadLetter",
|
480
|
+
message=f"Sent message to DLQ {dlq.name}: {str(e)}",
|
481
|
+
message_type=CliLogData.ERROR
|
482
|
+
))
|
483
|
+
else:
|
484
|
+
cli_log(CliLogData(
|
485
|
+
action="Function",
|
486
|
+
message=f"Error processing message (no DLQ configured): {str(e)}",
|
487
|
+
message_type=CliLogData.ERROR
|
488
|
+
))
|
489
|
+
# Skip to the next transformation or message
|
490
|
+
continue
|
491
|
+
|
492
|
+
# For consumers, output_data will be None
|
448
493
|
if output_data is None:
|
449
494
|
continue
|
450
495
|
|
@@ -455,16 +500,17 @@ def main():
|
|
455
500
|
with metrics_lock:
|
456
501
|
metrics['count_in'] += len(output_data_list)
|
457
502
|
|
458
|
-
cli_log(CliLogData(action="Received",
|
503
|
+
cli_log(CliLogData(action="Received",
|
504
|
+
message=f'{log_prefix} {len(output_data_list)} message(s)'))
|
459
505
|
|
460
506
|
if producer is not None:
|
461
507
|
for item in all_outputs:
|
462
508
|
# Ignore flow function returning null
|
463
509
|
if item is not None:
|
464
510
|
record = json.dumps(item, cls=EnhancedJSONEncoder).encode('utf-8')
|
465
|
-
|
511
|
+
|
466
512
|
producer.send(target_topic.name, record)
|
467
|
-
|
513
|
+
|
468
514
|
with metrics_lock:
|
469
515
|
metrics['bytes_count'] += len(record)
|
470
516
|
metrics['count_out'] += 1
|
@@ -518,32 +564,33 @@ def main():
|
|
518
564
|
# Ensure cleanup happens even if main thread gets interrupted
|
519
565
|
running.clear()
|
520
566
|
log("Shutting down threads...")
|
521
|
-
|
567
|
+
|
522
568
|
# Give threads a chance to exit gracefully with timeout
|
523
569
|
metrics_thread.join(timeout=5)
|
524
570
|
processing_thread.join(timeout=5)
|
525
|
-
|
571
|
+
|
526
572
|
if metrics_thread.is_alive():
|
527
573
|
log("Metrics thread did not exit cleanly")
|
528
574
|
if processing_thread.is_alive():
|
529
575
|
log("Processing thread did not exit cleanly")
|
530
|
-
|
576
|
+
|
531
577
|
# Clean up Kafka resources regardless of thread state
|
532
578
|
if kafka_refs['consumer']:
|
533
579
|
try:
|
534
580
|
kafka_refs['consumer'].close()
|
535
581
|
except Exception as e:
|
536
582
|
log(f"Error closing consumer: {e}")
|
537
|
-
|
583
|
+
|
538
584
|
if kafka_refs['producer'] and kafka_refs['producer'] is not None:
|
539
585
|
try:
|
540
586
|
kafka_refs['producer'].flush()
|
541
587
|
kafka_refs['producer'].close()
|
542
588
|
except Exception as e:
|
543
589
|
log(f"Error closing producer: {e}")
|
544
|
-
|
590
|
+
|
545
591
|
log("Shutdown complete")
|
546
592
|
sys.exit(0)
|
547
593
|
|
594
|
+
|
548
595
|
if __name__ == "__main__":
|
549
|
-
main()
|
596
|
+
main()
|
@@ -1,19 +1,19 @@
|
|
1
1
|
moose_lib/__init__.py,sha256=VYCFk9Nlakd24JPEj7h1cMIeqjp0eofnmE-3jCtPwXM,138
|
2
2
|
moose_lib/blocks.py,sha256=_wdvC2NC_Y3MMEnB71WTgWbeQ--zPNHk19xjToJW0C0,3185
|
3
3
|
moose_lib/commons.py,sha256=BV5X78MuOWHiZV9bsWSN69JIvzTNWUi-gnuMiAtaO8A,2489
|
4
|
-
moose_lib/data_models.py,sha256=
|
4
|
+
moose_lib/data_models.py,sha256=1U8IFQaO1hR8pNGb1UTtdcGQehGgKIvLNtUFFFXC1Ko,7695
|
5
5
|
moose_lib/dmv2-serializer.py,sha256=CL_Pvvg8tJOT8Qk6hywDNzY8MYGhMVdTOw8arZi3jng,49
|
6
|
-
moose_lib/dmv2.py,sha256=
|
6
|
+
moose_lib/dmv2.py,sha256=ahkOdAhEfi7-UN-UHOxFoAZRupO_-6zG7DfxUq9o60c,30530
|
7
7
|
moose_lib/internal.py,sha256=URimCqQUW2aUnTmhsXWW2IHt6Gy_xuDih5fVH8SPHDI,12159
|
8
8
|
moose_lib/main.py,sha256=In-u7yA1FsLDeP_2bhIgBtHY_BkXaZqDwf7BxwyC21c,8471
|
9
9
|
moose_lib/query_param.py,sha256=AB5BKu610Ji-h1iYGMBZKfnEFqt85rS94kzhDwhWJnc,6288
|
10
10
|
moose_lib/tasks.py,sha256=6MXA0j7nhvQILAJVTQHCAsquwrSOi2zAevghAc_7kXs,1554
|
11
11
|
moose_lib/streaming/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
-
moose_lib/streaming/streaming_function_runner.py,sha256=
|
12
|
+
moose_lib/streaming/streaming_function_runner.py,sha256=K53lyzGLawAgKgrK3jreJrB7dQfh-Cd0lcJ4je4hGJE,24362
|
13
13
|
tests/__init__.py,sha256=0Gh4yzPkkC3TzBGKhenpMIxJcRhyrrCfxLSfpTZnPMQ,53
|
14
14
|
tests/conftest.py,sha256=ZVJNbnr4DwbcqkTmePW6U01zAzE6QD0kNAEZjPG1f4s,169
|
15
15
|
tests/test_moose.py,sha256=mBsx_OYWmL8ppDzL_7Bd7xR6qf_i3-pCIO3wm2iQNaA,2136
|
16
|
-
moose_lib-0.4.
|
17
|
-
moose_lib-0.4.
|
18
|
-
moose_lib-0.4.
|
19
|
-
moose_lib-0.4.
|
16
|
+
moose_lib-0.4.162.dist-info/METADATA,sha256=xj7rnL729QJ5jhzALdA-zgyaiTroQudch37PwFTGfXA,575
|
17
|
+
moose_lib-0.4.162.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
18
|
+
moose_lib-0.4.162.dist-info/top_level.txt,sha256=XEns2-4aCmGp2XjJAeEH9TAUcGONLnSLy6ycT9FSJh8,16
|
19
|
+
moose_lib-0.4.162.dist-info/RECORD,,
|
File without changes
|
File without changes
|