moose-lib 0.6.90__py3-none-any.whl → 0.6.283__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moose_lib/__init__.py +38 -3
- moose_lib/blocks.py +497 -37
- moose_lib/clients/redis_client.py +26 -14
- moose_lib/commons.py +94 -5
- moose_lib/config/config_file.py +44 -2
- moose_lib/config/runtime.py +137 -5
- moose_lib/data_models.py +451 -46
- moose_lib/dmv2/__init__.py +88 -60
- moose_lib/dmv2/_registry.py +3 -1
- moose_lib/dmv2/_source_capture.py +37 -0
- moose_lib/dmv2/consumption.py +55 -32
- moose_lib/dmv2/ingest_api.py +9 -2
- moose_lib/dmv2/ingest_pipeline.py +56 -13
- moose_lib/dmv2/life_cycle.py +3 -1
- moose_lib/dmv2/materialized_view.py +24 -14
- moose_lib/dmv2/moose_model.py +165 -0
- moose_lib/dmv2/olap_table.py +304 -119
- moose_lib/dmv2/registry.py +28 -3
- moose_lib/dmv2/sql_resource.py +16 -8
- moose_lib/dmv2/stream.py +241 -21
- moose_lib/dmv2/types.py +14 -8
- moose_lib/dmv2/view.py +13 -6
- moose_lib/dmv2/web_app.py +175 -0
- moose_lib/dmv2/web_app_helpers.py +96 -0
- moose_lib/dmv2/workflow.py +37 -9
- moose_lib/internal.py +537 -68
- moose_lib/main.py +87 -56
- moose_lib/query_builder.py +18 -5
- moose_lib/query_param.py +54 -20
- moose_lib/secrets.py +122 -0
- moose_lib/streaming/streaming_function_runner.py +266 -156
- moose_lib/utilities/sql.py +0 -1
- {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/METADATA +19 -1
- moose_lib-0.6.283.dist-info/RECORD +63 -0
- tests/__init__.py +1 -1
- tests/conftest.py +38 -1
- tests/test_backward_compatibility.py +85 -0
- tests/test_cluster_validation.py +85 -0
- tests/test_codec.py +75 -0
- tests/test_column_formatting.py +80 -0
- tests/test_fixedstring.py +43 -0
- tests/test_iceberg_config.py +105 -0
- tests/test_int_types.py +211 -0
- tests/test_kafka_config.py +141 -0
- tests/test_materialized.py +74 -0
- tests/test_metadata.py +37 -0
- tests/test_moose.py +21 -30
- tests/test_moose_model.py +153 -0
- tests/test_olap_table_moosemodel.py +89 -0
- tests/test_olap_table_versioning.py +210 -0
- tests/test_query_builder.py +97 -9
- tests/test_redis_client.py +10 -3
- tests/test_s3queue_config.py +211 -110
- tests/test_secrets.py +239 -0
- tests/test_simple_aggregate.py +114 -0
- tests/test_web_app.py +227 -0
- moose_lib-0.6.90.dist-info/RECORD +0 -42
- {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
- {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
|
@@ -30,13 +30,16 @@ from typing import Optional, Callable, Tuple, Any
|
|
|
30
30
|
|
|
31
31
|
from moose_lib.dmv2 import get_streams, DeadLetterModel
|
|
32
32
|
from moose_lib import cli_log, CliLogData, DeadLetterQueue
|
|
33
|
-
from moose_lib.commons import
|
|
33
|
+
from moose_lib.commons import (
|
|
34
|
+
EnhancedJSONEncoder,
|
|
35
|
+
moose_management_port,
|
|
36
|
+
get_kafka_consumer,
|
|
37
|
+
get_kafka_producer,
|
|
38
|
+
)
|
|
34
39
|
|
|
35
40
|
# Force stdout to be unbuffered
|
|
36
41
|
sys.stdout = io.TextIOWrapper(
|
|
37
|
-
open(sys.stdout.fileno(),
|
|
38
|
-
write_through=True,
|
|
39
|
-
line_buffering=True
|
|
42
|
+
open(sys.stdout.fileno(), "wb", 0), write_through=True, line_buffering=True
|
|
40
43
|
)
|
|
41
44
|
|
|
42
45
|
|
|
@@ -44,7 +47,7 @@ sys.stdout = io.TextIOWrapper(
|
|
|
44
47
|
class KafkaTopicConfig:
|
|
45
48
|
"""
|
|
46
49
|
Configuration for a Kafka topic including namespace support.
|
|
47
|
-
|
|
50
|
+
|
|
48
51
|
Attributes:
|
|
49
52
|
streaming_engine_type: The type of topic (source or target)
|
|
50
53
|
name: Full topic name including namespace if present
|
|
@@ -54,6 +57,7 @@ class KafkaTopicConfig:
|
|
|
54
57
|
namespace: Optional namespace prefix for the topic
|
|
55
58
|
version: Optional version string for the topic
|
|
56
59
|
"""
|
|
60
|
+
|
|
57
61
|
streaming_engine_type: str
|
|
58
62
|
name: str
|
|
59
63
|
partitions: int
|
|
@@ -71,31 +75,37 @@ class KafkaTopicConfig:
|
|
|
71
75
|
if name.endswith(version_suffix):
|
|
72
76
|
name = name.removesuffix(version_suffix)
|
|
73
77
|
else:
|
|
74
|
-
raise Exception(
|
|
78
|
+
raise Exception(
|
|
79
|
+
f"Version suffix {version_suffix} not found in topic name {name}"
|
|
80
|
+
)
|
|
75
81
|
|
|
76
82
|
if self.namespace is not None and self.namespace != "":
|
|
77
83
|
prefix = self.namespace + "."
|
|
78
84
|
if name.startswith(prefix):
|
|
79
85
|
name = name.removeprefix(prefix)
|
|
80
86
|
else:
|
|
81
|
-
raise Exception(
|
|
87
|
+
raise Exception(
|
|
88
|
+
f"Namespace prefix {prefix} not found in topic name {name}"
|
|
89
|
+
)
|
|
82
90
|
|
|
83
91
|
return name
|
|
84
92
|
|
|
85
93
|
|
|
86
|
-
def load_streaming_function_dmv1(
|
|
94
|
+
def load_streaming_function_dmv1(
|
|
95
|
+
function_file_dir: str, function_file_name: str
|
|
96
|
+
) -> Tuple[type, Callable]:
|
|
87
97
|
"""
|
|
88
98
|
Load a DMV1 (legacy) streaming function from a Python module.
|
|
89
|
-
|
|
99
|
+
|
|
90
100
|
Args:
|
|
91
101
|
function_file_dir: Directory containing the streaming function module
|
|
92
102
|
function_file_name: Name of the module file without .py extension
|
|
93
|
-
|
|
103
|
+
|
|
94
104
|
Returns:
|
|
95
105
|
Tuple of (input_type, run_function) where:
|
|
96
106
|
- input_type is the type annotation of the run function's input parameter
|
|
97
107
|
- run_function is the actual transformation function
|
|
98
|
-
|
|
108
|
+
|
|
99
109
|
Raises:
|
|
100
110
|
SystemExit: If module import fails or if multiple/no streaming functions found
|
|
101
111
|
"""
|
|
@@ -110,13 +120,19 @@ def load_streaming_function_dmv1(function_file_dir: str, function_file_name: str
|
|
|
110
120
|
sys.exit(1)
|
|
111
121
|
|
|
112
122
|
# Get all the named flows in the flow file and make sure the flow is of type StreamingFunction
|
|
113
|
-
streaming_functions = [
|
|
123
|
+
streaming_functions = [
|
|
124
|
+
f for f in dir(module) if isinstance(getattr(module, f), streaming_function_def)
|
|
125
|
+
]
|
|
114
126
|
|
|
115
127
|
# Make sure that there is only one flow in the file
|
|
116
128
|
if len(streaming_functions) != 1:
|
|
117
|
-
cli_log(
|
|
118
|
-
|
|
119
|
-
|
|
129
|
+
cli_log(
|
|
130
|
+
CliLogData(
|
|
131
|
+
action="Function",
|
|
132
|
+
message=f"Expected one streaming function in the file, but got {len(streaming_functions)}",
|
|
133
|
+
message_type="Error",
|
|
134
|
+
)
|
|
135
|
+
)
|
|
120
136
|
sys.exit(1)
|
|
121
137
|
|
|
122
138
|
# get the flow definition
|
|
@@ -126,26 +142,29 @@ def load_streaming_function_dmv1(function_file_dir: str, function_file_name: str
|
|
|
126
142
|
streaming_function_run = streaming_function_def.run
|
|
127
143
|
|
|
128
144
|
# get run input type that doesn't rely on the name of the input parameter
|
|
129
|
-
run_input_type = streaming_function_run.__annotations__[
|
|
145
|
+
run_input_type = streaming_function_run.__annotations__[
|
|
146
|
+
list(streaming_function_run.__annotations__.keys())[0]
|
|
147
|
+
]
|
|
130
148
|
|
|
131
149
|
return run_input_type, streaming_function_run
|
|
132
150
|
|
|
133
151
|
|
|
134
|
-
def load_streaming_function_dmv2(
|
|
135
|
-
|
|
152
|
+
def load_streaming_function_dmv2(
|
|
153
|
+
function_file_dir: str, function_file_name: str
|
|
154
|
+
) -> tuple[type, list[tuple[Callable, Optional[DeadLetterQueue]]]]:
|
|
136
155
|
"""
|
|
137
156
|
Load a DMV2 streaming function by finding the stream transformation that matches
|
|
138
157
|
the source and target topics.
|
|
139
|
-
|
|
158
|
+
|
|
140
159
|
Args:
|
|
141
160
|
function_file_dir: Directory containing the main.py file
|
|
142
161
|
function_file_name: Name of the main.py file (without extension)
|
|
143
|
-
|
|
162
|
+
|
|
144
163
|
Returns:
|
|
145
164
|
Tuple of (input_type, transformation_functions) where:
|
|
146
165
|
- input_type is the Pydantic model type of the source stream
|
|
147
166
|
- transformation_functions is a list of functions that transform source to target data and their dead letter queues
|
|
148
|
-
|
|
167
|
+
|
|
149
168
|
Raises:
|
|
150
169
|
SystemExit: If module import fails or if no matching transformation is found
|
|
151
170
|
"""
|
|
@@ -164,7 +183,10 @@ def load_streaming_function_dmv2(function_file_dir: str, function_file_name: str
|
|
|
164
183
|
continue
|
|
165
184
|
|
|
166
185
|
if stream.has_consumers() and target_topic is None:
|
|
167
|
-
consumers = [
|
|
186
|
+
consumers = [
|
|
187
|
+
(entry.consumer, entry.config.dead_letter_queue)
|
|
188
|
+
for entry in stream.consumers
|
|
189
|
+
]
|
|
168
190
|
if not consumers:
|
|
169
191
|
continue
|
|
170
192
|
return stream.model_type, consumers
|
|
@@ -173,52 +195,94 @@ def load_streaming_function_dmv2(function_file_dir: str, function_file_name: str
|
|
|
173
195
|
for dest_stream_py_name, transform_entries in stream.transformations.items():
|
|
174
196
|
# The source topic name should match the stream name
|
|
175
197
|
# The destination topic name should match the destination stream name
|
|
176
|
-
if
|
|
198
|
+
if (
|
|
199
|
+
source_py_stream_name == source_topic.topic_name_to_stream_name()
|
|
200
|
+
and dest_stream_py_name == target_topic.topic_name_to_stream_name()
|
|
201
|
+
):
|
|
177
202
|
# Found the matching transformation
|
|
178
|
-
transformations = [
|
|
179
|
-
|
|
203
|
+
transformations = [
|
|
204
|
+
(entry.transformation, entry.config.dead_letter_queue)
|
|
205
|
+
for entry in transform_entries
|
|
206
|
+
]
|
|
180
207
|
if not transformations:
|
|
181
208
|
continue
|
|
182
209
|
return stream.model_type, transformations
|
|
183
210
|
|
|
184
211
|
# If we get here, no matching transformation was found
|
|
185
|
-
cli_log(
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
212
|
+
cli_log(
|
|
213
|
+
CliLogData(
|
|
214
|
+
action="Function",
|
|
215
|
+
message=f"No transformation found from {source_topic.name} to {target_topic.name}",
|
|
216
|
+
message_type="Error",
|
|
217
|
+
)
|
|
218
|
+
)
|
|
190
219
|
sys.exit(1)
|
|
191
220
|
|
|
192
221
|
|
|
193
|
-
parser = argparse.ArgumentParser(description=
|
|
222
|
+
parser = argparse.ArgumentParser(description="Run a streaming function")
|
|
194
223
|
|
|
195
|
-
parser.add_argument(
|
|
224
|
+
parser.add_argument(
|
|
225
|
+
"source_topic_json", type=str, help="The source topic for the streaming function"
|
|
226
|
+
)
|
|
196
227
|
# In DMV2 is the dir is the dir of the main.py or index.ts file
|
|
197
228
|
# and the function_file_name is the file name of main.py or index.ts
|
|
198
229
|
# In DMV1 the dir is the dir of the streaming function file
|
|
199
230
|
# and the function_file_name is the file name of the streaming function without the .py extension
|
|
200
|
-
parser.add_argument(
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
parser.add_argument(
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
parser.add_argument(
|
|
209
|
-
|
|
210
|
-
|
|
231
|
+
parser.add_argument(
|
|
232
|
+
"function_file_dir", type=str, help="The dir of the streaming function file"
|
|
233
|
+
)
|
|
234
|
+
parser.add_argument(
|
|
235
|
+
"function_file_name",
|
|
236
|
+
type=str,
|
|
237
|
+
help="The file name of the streaming function without the .py extension",
|
|
238
|
+
)
|
|
239
|
+
parser.add_argument(
|
|
240
|
+
"broker", type=str, help="The broker to use for the streaming function"
|
|
241
|
+
)
|
|
242
|
+
parser.add_argument(
|
|
243
|
+
"--target_topic_json", type=str, help="The target topic for the streaming function"
|
|
244
|
+
)
|
|
245
|
+
parser.add_argument(
|
|
246
|
+
"--sasl_username",
|
|
247
|
+
type=str,
|
|
248
|
+
help="The SASL username to use for the streaming function",
|
|
249
|
+
)
|
|
250
|
+
parser.add_argument(
|
|
251
|
+
"--sasl_password",
|
|
252
|
+
type=str,
|
|
253
|
+
help="The SASL password to use for the streaming function",
|
|
254
|
+
)
|
|
255
|
+
parser.add_argument(
|
|
256
|
+
"--sasl_mechanism",
|
|
257
|
+
type=str,
|
|
258
|
+
help="The SASL mechanism to use for the streaming function",
|
|
259
|
+
)
|
|
260
|
+
parser.add_argument(
|
|
261
|
+
"--security_protocol",
|
|
262
|
+
type=str,
|
|
263
|
+
help="The security protocol to use for the streaming function",
|
|
264
|
+
)
|
|
265
|
+
parser.add_argument(
|
|
266
|
+
"--dmv2",
|
|
267
|
+
action=argparse.BooleanOptionalAction,
|
|
268
|
+
type=bool,
|
|
269
|
+
help="Whether to use the DMV2 format for the streaming function",
|
|
270
|
+
)
|
|
211
271
|
|
|
212
272
|
args: argparse.Namespace = parser.parse_args()
|
|
213
273
|
|
|
214
274
|
for arg in vars(args):
|
|
215
275
|
value = getattr(args, arg)
|
|
216
|
-
if
|
|
217
|
-
value =
|
|
276
|
+
if "password" in arg and value is not None:
|
|
277
|
+
value = "******"
|
|
218
278
|
print(arg, value)
|
|
219
279
|
|
|
220
280
|
source_topic = KafkaTopicConfig(**json.loads(args.source_topic_json))
|
|
221
|
-
target_topic =
|
|
281
|
+
target_topic = (
|
|
282
|
+
KafkaTopicConfig(**json.loads(args.target_topic_json))
|
|
283
|
+
if args.target_topic_json
|
|
284
|
+
else None
|
|
285
|
+
)
|
|
222
286
|
function_file_dir = args.function_file_dir
|
|
223
287
|
function_file_name = args.function_file_name
|
|
224
288
|
broker = args.broker
|
|
@@ -226,24 +290,36 @@ sasl_mechanism = args.sasl_mechanism
|
|
|
226
290
|
|
|
227
291
|
# Setup SASL config w/ supported mechanisms
|
|
228
292
|
if args.sasl_mechanism is not None:
|
|
229
|
-
if args.sasl_mechanism not in [
|
|
293
|
+
if args.sasl_mechanism not in ["PLAIN", "SCRAM-SHA-256", "SCRAM-SHA-512"]:
|
|
230
294
|
raise Exception(f"Unsupported SASL mechanism: {args.sasl_mechanism}")
|
|
231
295
|
if args.sasl_username is None or args.sasl_password is None:
|
|
232
|
-
raise Exception(
|
|
296
|
+
raise Exception(
|
|
297
|
+
"SASL username and password must be provided if a SASL mechanism is specified"
|
|
298
|
+
)
|
|
233
299
|
if args.security_protocol is None:
|
|
234
|
-
raise Exception(
|
|
300
|
+
raise Exception(
|
|
301
|
+
"Security protocol must be provided if a SASL mechanism is specified"
|
|
302
|
+
)
|
|
235
303
|
|
|
236
304
|
sasl_config = {
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
305
|
+
"username": args.sasl_username,
|
|
306
|
+
"password": args.sasl_password,
|
|
307
|
+
"mechanism": args.sasl_mechanism,
|
|
240
308
|
}
|
|
241
309
|
|
|
242
310
|
# We use flow- instead of function- because that's what the ACLs in boreal are linked with
|
|
243
|
-
# When migrating - make sure the ACLs are updated to use the new prefix.
|
|
311
|
+
# When migrating - make sure the ACLs are updated to use the new prefix.
|
|
244
312
|
# And make sure the prefixes are the same in the ts-moose-lib and py-moose-lib
|
|
245
|
-
streaming_function_id =
|
|
246
|
-
|
|
313
|
+
streaming_function_id = (
|
|
314
|
+
f"flow-{source_topic.name}-{target_topic.name}"
|
|
315
|
+
if target_topic
|
|
316
|
+
else f"flow-{source_topic.name}"
|
|
317
|
+
)
|
|
318
|
+
log_prefix = (
|
|
319
|
+
f"{source_topic.name} -> {target_topic.name}"
|
|
320
|
+
if target_topic
|
|
321
|
+
else f"{source_topic.name} (consumer)"
|
|
322
|
+
)
|
|
247
323
|
|
|
248
324
|
|
|
249
325
|
def log(msg: str) -> None:
|
|
@@ -260,13 +336,13 @@ def error(msg: str) -> None:
|
|
|
260
336
|
def parse_input(run_input_type: type, json_input: dict) -> Any:
|
|
261
337
|
"""
|
|
262
338
|
Parse JSON input data into the appropriate input type for the streaming function.
|
|
263
|
-
|
|
339
|
+
|
|
264
340
|
Handles Pydantic models, nested dataclass structures and lists of dataclasses.
|
|
265
|
-
|
|
341
|
+
|
|
266
342
|
Args:
|
|
267
343
|
run_input_type: The type to parse the JSON into
|
|
268
344
|
json_input: The JSON data as a Python dict
|
|
269
|
-
|
|
345
|
+
|
|
270
346
|
Returns:
|
|
271
347
|
An instance of run_input_type populated with the JSON data
|
|
272
348
|
"""
|
|
@@ -276,7 +352,12 @@ def parse_input(run_input_type: type, json_input: dict) -> Any:
|
|
|
276
352
|
return cls.model_validate(data)
|
|
277
353
|
elif dataclasses.is_dataclass(cls):
|
|
278
354
|
field_types = {f.name: f.type for f in dataclasses.fields(cls)}
|
|
279
|
-
return cls(
|
|
355
|
+
return cls(
|
|
356
|
+
**{
|
|
357
|
+
name: deserialize(data.get(name), field_types[name])
|
|
358
|
+
for name in field_types
|
|
359
|
+
}
|
|
360
|
+
)
|
|
280
361
|
elif isinstance(data, list):
|
|
281
362
|
return [deserialize(item, cls.__args__[0]) for item in data]
|
|
282
363
|
else:
|
|
@@ -288,69 +369,66 @@ def parse_input(run_input_type: type, json_input: dict) -> Any:
|
|
|
288
369
|
def create_consumer() -> KafkaConsumer:
|
|
289
370
|
"""
|
|
290
371
|
Create a Kafka consumer configured for the source topic.
|
|
291
|
-
|
|
372
|
+
|
|
292
373
|
Handles SASL authentication if configured.
|
|
293
|
-
|
|
374
|
+
Disables auto-commit to ensure at-least-once processing semantics.
|
|
375
|
+
|
|
294
376
|
Returns:
|
|
295
377
|
Configured KafkaConsumer instance
|
|
296
378
|
"""
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
379
|
+
|
|
380
|
+
def _sr_json_deserializer(m: bytes):
|
|
381
|
+
if m is None:
|
|
382
|
+
return None
|
|
383
|
+
# Schema Registry JSON envelope: 0x00 + 4-byte schema ID (big-endian) + JSON
|
|
384
|
+
if len(m) >= 5 and m[0] == 0x00:
|
|
385
|
+
m = m[5:]
|
|
386
|
+
return json.loads(m.decode("utf-8"))
|
|
387
|
+
|
|
388
|
+
kwargs = dict(
|
|
389
|
+
broker=broker,
|
|
390
|
+
client_id="python_streaming_function_consumer",
|
|
391
|
+
group_id=streaming_function_id,
|
|
392
|
+
value_deserializer=_sr_json_deserializer,
|
|
393
|
+
sasl_username=sasl_config.get("username"),
|
|
394
|
+
sasl_password=sasl_config.get("password"),
|
|
395
|
+
sasl_mechanism=sasl_config.get("mechanism"),
|
|
396
|
+
security_protocol=args.security_protocol,
|
|
397
|
+
enable_auto_commit=False, # Disable auto-commit for at-least-once semantics
|
|
398
|
+
auto_offset_reset="earliest",
|
|
399
|
+
)
|
|
400
|
+
consumer = get_kafka_consumer(**kwargs)
|
|
401
|
+
return consumer
|
|
320
402
|
|
|
321
403
|
|
|
322
404
|
def create_producer() -> Optional[KafkaProducer]:
|
|
323
405
|
"""
|
|
324
406
|
Create a Kafka producer configured for the target topic.
|
|
325
|
-
|
|
407
|
+
|
|
326
408
|
Handles SASL authentication if configured and sets appropriate message size limits.
|
|
327
|
-
|
|
409
|
+
|
|
328
410
|
Returns:
|
|
329
411
|
Configured KafkaProducer instance
|
|
330
412
|
"""
|
|
331
|
-
max_request_size =
|
|
413
|
+
max_request_size = (
|
|
414
|
+
KafkaProducer.DEFAULT_CONFIG["max_request_size"]
|
|
415
|
+
if target_topic is None
|
|
332
416
|
else target_topic.max_message_bytes
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
)
|
|
342
|
-
log("No sasl mechanism specified. Using default producer.")
|
|
343
|
-
return KafkaProducer(
|
|
344
|
-
bootstrap_servers=broker,
|
|
345
|
-
max_in_flight_requests_per_connection=1,
|
|
346
|
-
max_request_size=max_request_size
|
|
417
|
+
)
|
|
418
|
+
return get_kafka_producer(
|
|
419
|
+
broker=broker,
|
|
420
|
+
sasl_username=sasl_config.get("username"),
|
|
421
|
+
sasl_password=sasl_config.get("password"),
|
|
422
|
+
sasl_mechanism=sasl_config.get("mechanism"),
|
|
423
|
+
security_protocol=args.security_protocol,
|
|
424
|
+
max_request_size=max_request_size,
|
|
347
425
|
)
|
|
348
426
|
|
|
349
427
|
|
|
350
428
|
def main():
|
|
351
429
|
"""
|
|
352
430
|
Main entry point for the streaming function runner.
|
|
353
|
-
|
|
431
|
+
|
|
354
432
|
This function:
|
|
355
433
|
1. Loads the appropriate streaming function (DMV1 or DMV2)
|
|
356
434
|
2. Sets up metrics reporting thread and message processing thread
|
|
@@ -361,18 +439,11 @@ def main():
|
|
|
361
439
|
# Shared state for metrics and control
|
|
362
440
|
running = threading.Event()
|
|
363
441
|
running.set() # Start in running state
|
|
364
|
-
metrics = {
|
|
365
|
-
'count_in': 0,
|
|
366
|
-
'count_out': 0,
|
|
367
|
-
'bytes_count': 0
|
|
368
|
-
}
|
|
442
|
+
metrics = {"count_in": 0, "count_out": 0, "bytes_count": 0}
|
|
369
443
|
metrics_lock = threading.Lock()
|
|
370
444
|
|
|
371
445
|
# Shared references for cleanup
|
|
372
|
-
kafka_refs = {
|
|
373
|
-
'consumer': None,
|
|
374
|
-
'producer': None
|
|
375
|
-
}
|
|
446
|
+
kafka_refs = {"consumer": None, "producer": None}
|
|
376
447
|
|
|
377
448
|
def send_message_metrics():
|
|
378
449
|
while running.is_set():
|
|
@@ -381,42 +452,44 @@ def main():
|
|
|
381
452
|
requests.post(
|
|
382
453
|
f"http://localhost:{moose_management_port}/metrics-logs",
|
|
383
454
|
json={
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
}
|
|
455
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
456
|
+
"count_in": metrics["count_in"],
|
|
457
|
+
"count_out": metrics["count_out"],
|
|
458
|
+
"bytes": metrics["bytes_count"],
|
|
459
|
+
"function_name": log_prefix,
|
|
460
|
+
},
|
|
390
461
|
)
|
|
391
|
-
metrics[
|
|
392
|
-
metrics[
|
|
393
|
-
metrics[
|
|
462
|
+
metrics["count_in"] = 0
|
|
463
|
+
metrics["count_out"] = 0
|
|
464
|
+
metrics["bytes_count"] = 0
|
|
394
465
|
|
|
395
466
|
def process_messages():
|
|
396
467
|
try:
|
|
397
468
|
streaming_function_input_type = None
|
|
398
469
|
streaming_function_callables = None
|
|
399
470
|
if args.dmv2:
|
|
400
|
-
streaming_function_input_type, streaming_function_callables =
|
|
401
|
-
function_file_dir, function_file_name)
|
|
471
|
+
streaming_function_input_type, streaming_function_callables = (
|
|
472
|
+
load_streaming_function_dmv2(function_file_dir, function_file_name)
|
|
473
|
+
)
|
|
402
474
|
else:
|
|
403
|
-
streaming_function_input_type, streaming_function_callable =
|
|
404
|
-
function_file_dir, function_file_name)
|
|
475
|
+
streaming_function_input_type, streaming_function_callable = (
|
|
476
|
+
load_streaming_function_dmv1(function_file_dir, function_file_name)
|
|
477
|
+
)
|
|
405
478
|
|
|
406
479
|
streaming_function_callables = [(streaming_function_callable, None)]
|
|
407
480
|
|
|
408
481
|
needs_producer = target_topic is not None or any(
|
|
409
|
-
pair[1] is not None for pair in streaming_function_callables
|
|
482
|
+
pair[1] is not None for pair in streaming_function_callables
|
|
483
|
+
)
|
|
410
484
|
|
|
411
485
|
# Initialize Kafka connections in the processing thread
|
|
412
486
|
consumer = create_consumer()
|
|
413
487
|
producer = create_producer() if needs_producer else None
|
|
414
488
|
|
|
415
489
|
# Store references for cleanup
|
|
416
|
-
kafka_refs[
|
|
417
|
-
kafka_refs[
|
|
490
|
+
kafka_refs["consumer"] = consumer
|
|
491
|
+
kafka_refs["producer"] = producer
|
|
418
492
|
|
|
419
|
-
# Subscribe to topic
|
|
420
493
|
consumer.subscribe([source_topic.name])
|
|
421
494
|
|
|
422
495
|
log("Kafka consumer and producer initialized in processing thread")
|
|
@@ -432,17 +505,27 @@ def main():
|
|
|
432
505
|
# Process each partition's messages
|
|
433
506
|
for partition_messages in messages.values():
|
|
434
507
|
for message in partition_messages:
|
|
508
|
+
log(
|
|
509
|
+
f"Message partition={message.partition} offset={message.offset}"
|
|
510
|
+
)
|
|
435
511
|
if not running.is_set():
|
|
436
512
|
return
|
|
437
513
|
|
|
438
514
|
# Parse the message into the input type
|
|
439
|
-
input_data = parse_input(
|
|
515
|
+
input_data = parse_input(
|
|
516
|
+
streaming_function_input_type, message.value
|
|
517
|
+
)
|
|
440
518
|
|
|
441
519
|
# Run the flow
|
|
442
520
|
all_outputs = []
|
|
443
|
-
for (
|
|
521
|
+
for (
|
|
522
|
+
streaming_function_callable,
|
|
523
|
+
dlq,
|
|
524
|
+
) in streaming_function_callables:
|
|
444
525
|
try:
|
|
445
|
-
output_data = streaming_function_callable(
|
|
526
|
+
output_data = streaming_function_callable(
|
|
527
|
+
input_data
|
|
528
|
+
)
|
|
446
529
|
except Exception as e:
|
|
447
530
|
traceback.print_exc()
|
|
448
531
|
if dlq is not None:
|
|
@@ -451,21 +534,27 @@ def main():
|
|
|
451
534
|
error_message=str(e),
|
|
452
535
|
error_type=e.__class__.__name__,
|
|
453
536
|
failed_at=datetime.now(timezone.utc),
|
|
454
|
-
source="transform"
|
|
537
|
+
source="transform",
|
|
538
|
+
)
|
|
539
|
+
record = dead_letter.model_dump_json().encode(
|
|
540
|
+
"utf-8"
|
|
455
541
|
)
|
|
456
|
-
record = dead_letter.model_dump_json().encode('utf-8')
|
|
457
542
|
producer.send(dlq.name, record).get()
|
|
458
|
-
cli_log(
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
543
|
+
cli_log(
|
|
544
|
+
CliLogData(
|
|
545
|
+
action="DeadLetter",
|
|
546
|
+
message=f"Sent message to DLQ {dlq.name}: {str(e)}",
|
|
547
|
+
message_type=CliLogData.ERROR,
|
|
548
|
+
)
|
|
549
|
+
)
|
|
463
550
|
else:
|
|
464
|
-
cli_log(
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
551
|
+
cli_log(
|
|
552
|
+
CliLogData(
|
|
553
|
+
action="Function",
|
|
554
|
+
message=f"Error processing message (no DLQ configured): {str(e)}",
|
|
555
|
+
message_type=CliLogData.ERROR,
|
|
556
|
+
)
|
|
557
|
+
)
|
|
469
558
|
# Skip to the next transformation or message
|
|
470
559
|
continue
|
|
471
560
|
|
|
@@ -474,29 +563,50 @@ def main():
|
|
|
474
563
|
continue
|
|
475
564
|
|
|
476
565
|
# Handle streaming function returning an array or a single object
|
|
477
|
-
output_data_list =
|
|
566
|
+
output_data_list = (
|
|
567
|
+
output_data
|
|
568
|
+
if isinstance(output_data, list)
|
|
569
|
+
else [output_data]
|
|
570
|
+
)
|
|
478
571
|
all_outputs.extend(output_data_list)
|
|
479
572
|
|
|
480
573
|
with metrics_lock:
|
|
481
|
-
metrics[
|
|
574
|
+
metrics["count_in"] += len(output_data_list)
|
|
482
575
|
|
|
483
|
-
cli_log(
|
|
484
|
-
|
|
576
|
+
cli_log(
|
|
577
|
+
CliLogData(
|
|
578
|
+
action="Received",
|
|
579
|
+
message=f"{log_prefix} {len(output_data_list)} message(s)",
|
|
580
|
+
)
|
|
581
|
+
)
|
|
485
582
|
|
|
486
583
|
if producer is not None:
|
|
487
584
|
for item in all_outputs:
|
|
488
585
|
# Ignore flow function returning null
|
|
489
586
|
if item is not None:
|
|
490
|
-
record = json.dumps(
|
|
587
|
+
record = json.dumps(
|
|
588
|
+
item, cls=EnhancedJSONEncoder
|
|
589
|
+
).encode("utf-8")
|
|
491
590
|
|
|
492
591
|
producer.send(target_topic.name, record)
|
|
493
592
|
|
|
494
593
|
with metrics_lock:
|
|
495
|
-
metrics[
|
|
496
|
-
metrics[
|
|
594
|
+
metrics["bytes_count"] += len(record)
|
|
595
|
+
metrics["count_out"] += 1
|
|
596
|
+
|
|
597
|
+
# Flush producer to ensure messages are sent before committing
|
|
598
|
+
producer.flush()
|
|
599
|
+
|
|
600
|
+
# Commit offset only after successful processing and flushing
|
|
601
|
+
# This ensures at-least-once delivery semantics
|
|
602
|
+
consumer.commit()
|
|
497
603
|
|
|
498
604
|
except Exception as e:
|
|
499
|
-
cli_log(
|
|
605
|
+
cli_log(
|
|
606
|
+
CliLogData(
|
|
607
|
+
action="Function", message=str(e), message_type="Error"
|
|
608
|
+
)
|
|
609
|
+
)
|
|
500
610
|
if not running.is_set():
|
|
501
611
|
break
|
|
502
612
|
# Add a small delay before retrying on error
|
|
@@ -555,16 +665,16 @@ def main():
|
|
|
555
665
|
log("Processing thread did not exit cleanly")
|
|
556
666
|
|
|
557
667
|
# Clean up Kafka resources regardless of thread state
|
|
558
|
-
if kafka_refs[
|
|
668
|
+
if kafka_refs["consumer"]:
|
|
559
669
|
try:
|
|
560
|
-
kafka_refs[
|
|
670
|
+
kafka_refs["consumer"].close()
|
|
561
671
|
except Exception as e:
|
|
562
672
|
log(f"Error closing consumer: {e}")
|
|
563
673
|
|
|
564
|
-
if kafka_refs[
|
|
674
|
+
if kafka_refs["producer"] and kafka_refs["producer"] is not None:
|
|
565
675
|
try:
|
|
566
|
-
kafka_refs[
|
|
567
|
-
kafka_refs[
|
|
676
|
+
kafka_refs["producer"].flush()
|
|
677
|
+
kafka_refs["producer"].close()
|
|
568
678
|
except Exception as e:
|
|
569
679
|
log(f"Error closing producer: {e}")
|
|
570
680
|
|