moose-lib 0.6.148.dev3442438466__py3-none-any.whl → 0.6.283__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moose_lib/__init__.py +34 -3
- moose_lib/blocks.py +416 -52
- moose_lib/clients/redis_client.py +26 -14
- moose_lib/commons.py +37 -30
- moose_lib/config/config_file.py +5 -1
- moose_lib/config/runtime.py +73 -34
- moose_lib/data_models.py +331 -61
- moose_lib/dmv2/__init__.py +69 -73
- moose_lib/dmv2/_registry.py +2 -1
- moose_lib/dmv2/_source_capture.py +37 -0
- moose_lib/dmv2/consumption.py +55 -32
- moose_lib/dmv2/ingest_api.py +9 -2
- moose_lib/dmv2/ingest_pipeline.py +35 -16
- moose_lib/dmv2/life_cycle.py +3 -1
- moose_lib/dmv2/materialized_view.py +24 -14
- moose_lib/dmv2/moose_model.py +165 -0
- moose_lib/dmv2/olap_table.py +299 -151
- moose_lib/dmv2/registry.py +18 -3
- moose_lib/dmv2/sql_resource.py +16 -8
- moose_lib/dmv2/stream.py +75 -23
- moose_lib/dmv2/types.py +14 -8
- moose_lib/dmv2/view.py +13 -6
- moose_lib/dmv2/web_app.py +11 -6
- moose_lib/dmv2/web_app_helpers.py +5 -1
- moose_lib/dmv2/workflow.py +37 -9
- moose_lib/internal.py +340 -56
- moose_lib/main.py +87 -56
- moose_lib/query_builder.py +18 -5
- moose_lib/query_param.py +54 -20
- moose_lib/secrets.py +122 -0
- moose_lib/streaming/streaming_function_runner.py +233 -117
- moose_lib/utilities/sql.py +0 -1
- {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/METADATA +18 -1
- moose_lib-0.6.283.dist-info/RECORD +63 -0
- tests/__init__.py +1 -1
- tests/conftest.py +6 -5
- tests/test_backward_compatibility.py +85 -0
- tests/test_cluster_validation.py +85 -0
- tests/test_codec.py +75 -0
- tests/test_column_formatting.py +80 -0
- tests/test_fixedstring.py +43 -0
- tests/test_iceberg_config.py +105 -0
- tests/test_int_types.py +211 -0
- tests/test_kafka_config.py +141 -0
- tests/test_materialized.py +74 -0
- tests/test_metadata.py +37 -0
- tests/test_moose.py +21 -30
- tests/test_moose_model.py +153 -0
- tests/test_olap_table_moosemodel.py +89 -0
- tests/test_olap_table_versioning.py +52 -58
- tests/test_query_builder.py +97 -9
- tests/test_redis_client.py +10 -3
- tests/test_s3queue_config.py +211 -110
- tests/test_secrets.py +239 -0
- tests/test_simple_aggregate.py +42 -40
- tests/test_web_app.py +11 -5
- moose_lib-0.6.148.dev3442438466.dist-info/RECORD +0 -47
- {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
- {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
|
@@ -39,9 +39,7 @@ from moose_lib.commons import (
|
|
|
39
39
|
|
|
40
40
|
# Force stdout to be unbuffered
|
|
41
41
|
sys.stdout = io.TextIOWrapper(
|
|
42
|
-
open(sys.stdout.fileno(),
|
|
43
|
-
write_through=True,
|
|
44
|
-
line_buffering=True
|
|
42
|
+
open(sys.stdout.fileno(), "wb", 0), write_through=True, line_buffering=True
|
|
45
43
|
)
|
|
46
44
|
|
|
47
45
|
|
|
@@ -49,7 +47,7 @@ sys.stdout = io.TextIOWrapper(
|
|
|
49
47
|
class KafkaTopicConfig:
|
|
50
48
|
"""
|
|
51
49
|
Configuration for a Kafka topic including namespace support.
|
|
52
|
-
|
|
50
|
+
|
|
53
51
|
Attributes:
|
|
54
52
|
streaming_engine_type: The type of topic (source or target)
|
|
55
53
|
name: Full topic name including namespace if present
|
|
@@ -59,6 +57,7 @@ class KafkaTopicConfig:
|
|
|
59
57
|
namespace: Optional namespace prefix for the topic
|
|
60
58
|
version: Optional version string for the topic
|
|
61
59
|
"""
|
|
60
|
+
|
|
62
61
|
streaming_engine_type: str
|
|
63
62
|
name: str
|
|
64
63
|
partitions: int
|
|
@@ -76,31 +75,37 @@ class KafkaTopicConfig:
|
|
|
76
75
|
if name.endswith(version_suffix):
|
|
77
76
|
name = name.removesuffix(version_suffix)
|
|
78
77
|
else:
|
|
79
|
-
raise Exception(
|
|
78
|
+
raise Exception(
|
|
79
|
+
f"Version suffix {version_suffix} not found in topic name {name}"
|
|
80
|
+
)
|
|
80
81
|
|
|
81
82
|
if self.namespace is not None and self.namespace != "":
|
|
82
83
|
prefix = self.namespace + "."
|
|
83
84
|
if name.startswith(prefix):
|
|
84
85
|
name = name.removeprefix(prefix)
|
|
85
86
|
else:
|
|
86
|
-
raise Exception(
|
|
87
|
+
raise Exception(
|
|
88
|
+
f"Namespace prefix {prefix} not found in topic name {name}"
|
|
89
|
+
)
|
|
87
90
|
|
|
88
91
|
return name
|
|
89
92
|
|
|
90
93
|
|
|
91
|
-
def load_streaming_function_dmv1(
|
|
94
|
+
def load_streaming_function_dmv1(
|
|
95
|
+
function_file_dir: str, function_file_name: str
|
|
96
|
+
) -> Tuple[type, Callable]:
|
|
92
97
|
"""
|
|
93
98
|
Load a DMV1 (legacy) streaming function from a Python module.
|
|
94
|
-
|
|
99
|
+
|
|
95
100
|
Args:
|
|
96
101
|
function_file_dir: Directory containing the streaming function module
|
|
97
102
|
function_file_name: Name of the module file without .py extension
|
|
98
|
-
|
|
103
|
+
|
|
99
104
|
Returns:
|
|
100
105
|
Tuple of (input_type, run_function) where:
|
|
101
106
|
- input_type is the type annotation of the run function's input parameter
|
|
102
107
|
- run_function is the actual transformation function
|
|
103
|
-
|
|
108
|
+
|
|
104
109
|
Raises:
|
|
105
110
|
SystemExit: If module import fails or if multiple/no streaming functions found
|
|
106
111
|
"""
|
|
@@ -115,13 +120,19 @@ def load_streaming_function_dmv1(function_file_dir: str, function_file_name: str
|
|
|
115
120
|
sys.exit(1)
|
|
116
121
|
|
|
117
122
|
# Get all the named flows in the flow file and make sure the flow is of type StreamingFunction
|
|
118
|
-
streaming_functions = [
|
|
123
|
+
streaming_functions = [
|
|
124
|
+
f for f in dir(module) if isinstance(getattr(module, f), streaming_function_def)
|
|
125
|
+
]
|
|
119
126
|
|
|
120
127
|
# Make sure that there is only one flow in the file
|
|
121
128
|
if len(streaming_functions) != 1:
|
|
122
|
-
cli_log(
|
|
123
|
-
|
|
124
|
-
|
|
129
|
+
cli_log(
|
|
130
|
+
CliLogData(
|
|
131
|
+
action="Function",
|
|
132
|
+
message=f"Expected one streaming function in the file, but got {len(streaming_functions)}",
|
|
133
|
+
message_type="Error",
|
|
134
|
+
)
|
|
135
|
+
)
|
|
125
136
|
sys.exit(1)
|
|
126
137
|
|
|
127
138
|
# get the flow definition
|
|
@@ -131,26 +142,29 @@ def load_streaming_function_dmv1(function_file_dir: str, function_file_name: str
|
|
|
131
142
|
streaming_function_run = streaming_function_def.run
|
|
132
143
|
|
|
133
144
|
# get run input type that doesn't rely on the name of the input parameter
|
|
134
|
-
run_input_type = streaming_function_run.__annotations__[
|
|
145
|
+
run_input_type = streaming_function_run.__annotations__[
|
|
146
|
+
list(streaming_function_run.__annotations__.keys())[0]
|
|
147
|
+
]
|
|
135
148
|
|
|
136
149
|
return run_input_type, streaming_function_run
|
|
137
150
|
|
|
138
151
|
|
|
139
|
-
def load_streaming_function_dmv2(
|
|
140
|
-
|
|
152
|
+
def load_streaming_function_dmv2(
|
|
153
|
+
function_file_dir: str, function_file_name: str
|
|
154
|
+
) -> tuple[type, list[tuple[Callable, Optional[DeadLetterQueue]]]]:
|
|
141
155
|
"""
|
|
142
156
|
Load a DMV2 streaming function by finding the stream transformation that matches
|
|
143
157
|
the source and target topics.
|
|
144
|
-
|
|
158
|
+
|
|
145
159
|
Args:
|
|
146
160
|
function_file_dir: Directory containing the main.py file
|
|
147
161
|
function_file_name: Name of the main.py file (without extension)
|
|
148
|
-
|
|
162
|
+
|
|
149
163
|
Returns:
|
|
150
164
|
Tuple of (input_type, transformation_functions) where:
|
|
151
165
|
- input_type is the Pydantic model type of the source stream
|
|
152
166
|
- transformation_functions is a list of functions that transform source to target data and their dead letter queues
|
|
153
|
-
|
|
167
|
+
|
|
154
168
|
Raises:
|
|
155
169
|
SystemExit: If module import fails or if no matching transformation is found
|
|
156
170
|
"""
|
|
@@ -169,7 +183,10 @@ def load_streaming_function_dmv2(function_file_dir: str, function_file_name: str
|
|
|
169
183
|
continue
|
|
170
184
|
|
|
171
185
|
if stream.has_consumers() and target_topic is None:
|
|
172
|
-
consumers = [
|
|
186
|
+
consumers = [
|
|
187
|
+
(entry.consumer, entry.config.dead_letter_queue)
|
|
188
|
+
for entry in stream.consumers
|
|
189
|
+
]
|
|
173
190
|
if not consumers:
|
|
174
191
|
continue
|
|
175
192
|
return stream.model_type, consumers
|
|
@@ -178,52 +195,94 @@ def load_streaming_function_dmv2(function_file_dir: str, function_file_name: str
|
|
|
178
195
|
for dest_stream_py_name, transform_entries in stream.transformations.items():
|
|
179
196
|
# The source topic name should match the stream name
|
|
180
197
|
# The destination topic name should match the destination stream name
|
|
181
|
-
if
|
|
198
|
+
if (
|
|
199
|
+
source_py_stream_name == source_topic.topic_name_to_stream_name()
|
|
200
|
+
and dest_stream_py_name == target_topic.topic_name_to_stream_name()
|
|
201
|
+
):
|
|
182
202
|
# Found the matching transformation
|
|
183
|
-
transformations = [
|
|
184
|
-
|
|
203
|
+
transformations = [
|
|
204
|
+
(entry.transformation, entry.config.dead_letter_queue)
|
|
205
|
+
for entry in transform_entries
|
|
206
|
+
]
|
|
185
207
|
if not transformations:
|
|
186
208
|
continue
|
|
187
209
|
return stream.model_type, transformations
|
|
188
210
|
|
|
189
211
|
# If we get here, no matching transformation was found
|
|
190
|
-
cli_log(
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
212
|
+
cli_log(
|
|
213
|
+
CliLogData(
|
|
214
|
+
action="Function",
|
|
215
|
+
message=f"No transformation found from {source_topic.name} to {target_topic.name}",
|
|
216
|
+
message_type="Error",
|
|
217
|
+
)
|
|
218
|
+
)
|
|
195
219
|
sys.exit(1)
|
|
196
220
|
|
|
197
221
|
|
|
198
|
-
parser = argparse.ArgumentParser(description=
|
|
222
|
+
parser = argparse.ArgumentParser(description="Run a streaming function")
|
|
199
223
|
|
|
200
|
-
parser.add_argument(
|
|
224
|
+
parser.add_argument(
|
|
225
|
+
"source_topic_json", type=str, help="The source topic for the streaming function"
|
|
226
|
+
)
|
|
201
227
|
# In DMV2 is the dir is the dir of the main.py or index.ts file
|
|
202
228
|
# and the function_file_name is the file name of main.py or index.ts
|
|
203
229
|
# In DMV1 the dir is the dir of the streaming function file
|
|
204
230
|
# and the function_file_name is the file name of the streaming function without the .py extension
|
|
205
|
-
parser.add_argument(
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
parser.add_argument(
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
parser.add_argument(
|
|
214
|
-
|
|
215
|
-
|
|
231
|
+
parser.add_argument(
|
|
232
|
+
"function_file_dir", type=str, help="The dir of the streaming function file"
|
|
233
|
+
)
|
|
234
|
+
parser.add_argument(
|
|
235
|
+
"function_file_name",
|
|
236
|
+
type=str,
|
|
237
|
+
help="The file name of the streaming function without the .py extension",
|
|
238
|
+
)
|
|
239
|
+
parser.add_argument(
|
|
240
|
+
"broker", type=str, help="The broker to use for the streaming function"
|
|
241
|
+
)
|
|
242
|
+
parser.add_argument(
|
|
243
|
+
"--target_topic_json", type=str, help="The target topic for the streaming function"
|
|
244
|
+
)
|
|
245
|
+
parser.add_argument(
|
|
246
|
+
"--sasl_username",
|
|
247
|
+
type=str,
|
|
248
|
+
help="The SASL username to use for the streaming function",
|
|
249
|
+
)
|
|
250
|
+
parser.add_argument(
|
|
251
|
+
"--sasl_password",
|
|
252
|
+
type=str,
|
|
253
|
+
help="The SASL password to use for the streaming function",
|
|
254
|
+
)
|
|
255
|
+
parser.add_argument(
|
|
256
|
+
"--sasl_mechanism",
|
|
257
|
+
type=str,
|
|
258
|
+
help="The SASL mechanism to use for the streaming function",
|
|
259
|
+
)
|
|
260
|
+
parser.add_argument(
|
|
261
|
+
"--security_protocol",
|
|
262
|
+
type=str,
|
|
263
|
+
help="The security protocol to use for the streaming function",
|
|
264
|
+
)
|
|
265
|
+
parser.add_argument(
|
|
266
|
+
"--dmv2",
|
|
267
|
+
action=argparse.BooleanOptionalAction,
|
|
268
|
+
type=bool,
|
|
269
|
+
help="Whether to use the DMV2 format for the streaming function",
|
|
270
|
+
)
|
|
216
271
|
|
|
217
272
|
args: argparse.Namespace = parser.parse_args()
|
|
218
273
|
|
|
219
274
|
for arg in vars(args):
|
|
220
275
|
value = getattr(args, arg)
|
|
221
|
-
if
|
|
222
|
-
value =
|
|
276
|
+
if "password" in arg and value is not None:
|
|
277
|
+
value = "******"
|
|
223
278
|
print(arg, value)
|
|
224
279
|
|
|
225
280
|
source_topic = KafkaTopicConfig(**json.loads(args.source_topic_json))
|
|
226
|
-
target_topic =
|
|
281
|
+
target_topic = (
|
|
282
|
+
KafkaTopicConfig(**json.loads(args.target_topic_json))
|
|
283
|
+
if args.target_topic_json
|
|
284
|
+
else None
|
|
285
|
+
)
|
|
227
286
|
function_file_dir = args.function_file_dir
|
|
228
287
|
function_file_name = args.function_file_name
|
|
229
288
|
broker = args.broker
|
|
@@ -231,24 +290,36 @@ sasl_mechanism = args.sasl_mechanism
|
|
|
231
290
|
|
|
232
291
|
# Setup SASL config w/ supported mechanisms
|
|
233
292
|
if args.sasl_mechanism is not None:
|
|
234
|
-
if args.sasl_mechanism not in [
|
|
293
|
+
if args.sasl_mechanism not in ["PLAIN", "SCRAM-SHA-256", "SCRAM-SHA-512"]:
|
|
235
294
|
raise Exception(f"Unsupported SASL mechanism: {args.sasl_mechanism}")
|
|
236
295
|
if args.sasl_username is None or args.sasl_password is None:
|
|
237
|
-
raise Exception(
|
|
296
|
+
raise Exception(
|
|
297
|
+
"SASL username and password must be provided if a SASL mechanism is specified"
|
|
298
|
+
)
|
|
238
299
|
if args.security_protocol is None:
|
|
239
|
-
raise Exception(
|
|
300
|
+
raise Exception(
|
|
301
|
+
"Security protocol must be provided if a SASL mechanism is specified"
|
|
302
|
+
)
|
|
240
303
|
|
|
241
304
|
sasl_config = {
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
305
|
+
"username": args.sasl_username,
|
|
306
|
+
"password": args.sasl_password,
|
|
307
|
+
"mechanism": args.sasl_mechanism,
|
|
245
308
|
}
|
|
246
309
|
|
|
247
310
|
# We use flow- instead of function- because that's what the ACLs in boreal are linked with
|
|
248
|
-
# When migrating - make sure the ACLs are updated to use the new prefix.
|
|
311
|
+
# When migrating - make sure the ACLs are updated to use the new prefix.
|
|
249
312
|
# And make sure the prefixes are the same in the ts-moose-lib and py-moose-lib
|
|
250
|
-
streaming_function_id =
|
|
251
|
-
|
|
313
|
+
streaming_function_id = (
|
|
314
|
+
f"flow-{source_topic.name}-{target_topic.name}"
|
|
315
|
+
if target_topic
|
|
316
|
+
else f"flow-{source_topic.name}"
|
|
317
|
+
)
|
|
318
|
+
log_prefix = (
|
|
319
|
+
f"{source_topic.name} -> {target_topic.name}"
|
|
320
|
+
if target_topic
|
|
321
|
+
else f"{source_topic.name} (consumer)"
|
|
322
|
+
)
|
|
252
323
|
|
|
253
324
|
|
|
254
325
|
def log(msg: str) -> None:
|
|
@@ -265,13 +336,13 @@ def error(msg: str) -> None:
|
|
|
265
336
|
def parse_input(run_input_type: type, json_input: dict) -> Any:
|
|
266
337
|
"""
|
|
267
338
|
Parse JSON input data into the appropriate input type for the streaming function.
|
|
268
|
-
|
|
339
|
+
|
|
269
340
|
Handles Pydantic models, nested dataclass structures and lists of dataclasses.
|
|
270
|
-
|
|
341
|
+
|
|
271
342
|
Args:
|
|
272
343
|
run_input_type: The type to parse the JSON into
|
|
273
344
|
json_input: The JSON data as a Python dict
|
|
274
|
-
|
|
345
|
+
|
|
275
346
|
Returns:
|
|
276
347
|
An instance of run_input_type populated with the JSON data
|
|
277
348
|
"""
|
|
@@ -281,7 +352,12 @@ def parse_input(run_input_type: type, json_input: dict) -> Any:
|
|
|
281
352
|
return cls.model_validate(data)
|
|
282
353
|
elif dataclasses.is_dataclass(cls):
|
|
283
354
|
field_types = {f.name: f.type for f in dataclasses.fields(cls)}
|
|
284
|
-
return cls(
|
|
355
|
+
return cls(
|
|
356
|
+
**{
|
|
357
|
+
name: deserialize(data.get(name), field_types[name])
|
|
358
|
+
for name in field_types
|
|
359
|
+
}
|
|
360
|
+
)
|
|
285
361
|
elif isinstance(data, list):
|
|
286
362
|
return [deserialize(item, cls.__args__[0]) for item in data]
|
|
287
363
|
else:
|
|
@@ -293,12 +369,14 @@ def parse_input(run_input_type: type, json_input: dict) -> Any:
|
|
|
293
369
|
def create_consumer() -> KafkaConsumer:
|
|
294
370
|
"""
|
|
295
371
|
Create a Kafka consumer configured for the source topic.
|
|
296
|
-
|
|
372
|
+
|
|
297
373
|
Handles SASL authentication if configured.
|
|
298
|
-
|
|
374
|
+
Disables auto-commit to ensure at-least-once processing semantics.
|
|
375
|
+
|
|
299
376
|
Returns:
|
|
300
377
|
Configured KafkaConsumer instance
|
|
301
378
|
"""
|
|
379
|
+
|
|
302
380
|
def _sr_json_deserializer(m: bytes):
|
|
303
381
|
if m is None:
|
|
304
382
|
return None
|
|
@@ -316,6 +394,8 @@ def create_consumer() -> KafkaConsumer:
|
|
|
316
394
|
sasl_password=sasl_config.get("password"),
|
|
317
395
|
sasl_mechanism=sasl_config.get("mechanism"),
|
|
318
396
|
security_protocol=args.security_protocol,
|
|
397
|
+
enable_auto_commit=False, # Disable auto-commit for at-least-once semantics
|
|
398
|
+
auto_offset_reset="earliest",
|
|
319
399
|
)
|
|
320
400
|
consumer = get_kafka_consumer(**kwargs)
|
|
321
401
|
return consumer
|
|
@@ -324,14 +404,17 @@ def create_consumer() -> KafkaConsumer:
|
|
|
324
404
|
def create_producer() -> Optional[KafkaProducer]:
|
|
325
405
|
"""
|
|
326
406
|
Create a Kafka producer configured for the target topic.
|
|
327
|
-
|
|
407
|
+
|
|
328
408
|
Handles SASL authentication if configured and sets appropriate message size limits.
|
|
329
|
-
|
|
409
|
+
|
|
330
410
|
Returns:
|
|
331
411
|
Configured KafkaProducer instance
|
|
332
412
|
"""
|
|
333
|
-
max_request_size =
|
|
413
|
+
max_request_size = (
|
|
414
|
+
KafkaProducer.DEFAULT_CONFIG["max_request_size"]
|
|
415
|
+
if target_topic is None
|
|
334
416
|
else target_topic.max_message_bytes
|
|
417
|
+
)
|
|
335
418
|
return get_kafka_producer(
|
|
336
419
|
broker=broker,
|
|
337
420
|
sasl_username=sasl_config.get("username"),
|
|
@@ -345,7 +428,7 @@ def create_producer() -> Optional[KafkaProducer]:
|
|
|
345
428
|
def main():
|
|
346
429
|
"""
|
|
347
430
|
Main entry point for the streaming function runner.
|
|
348
|
-
|
|
431
|
+
|
|
349
432
|
This function:
|
|
350
433
|
1. Loads the appropriate streaming function (DMV1 or DMV2)
|
|
351
434
|
2. Sets up metrics reporting thread and message processing thread
|
|
@@ -356,18 +439,11 @@ def main():
|
|
|
356
439
|
# Shared state for metrics and control
|
|
357
440
|
running = threading.Event()
|
|
358
441
|
running.set() # Start in running state
|
|
359
|
-
metrics = {
|
|
360
|
-
'count_in': 0,
|
|
361
|
-
'count_out': 0,
|
|
362
|
-
'bytes_count': 0
|
|
363
|
-
}
|
|
442
|
+
metrics = {"count_in": 0, "count_out": 0, "bytes_count": 0}
|
|
364
443
|
metrics_lock = threading.Lock()
|
|
365
444
|
|
|
366
445
|
# Shared references for cleanup
|
|
367
|
-
kafka_refs = {
|
|
368
|
-
'consumer': None,
|
|
369
|
-
'producer': None
|
|
370
|
-
}
|
|
446
|
+
kafka_refs = {"consumer": None, "producer": None}
|
|
371
447
|
|
|
372
448
|
def send_message_metrics():
|
|
373
449
|
while running.is_set():
|
|
@@ -376,40 +452,43 @@ def main():
|
|
|
376
452
|
requests.post(
|
|
377
453
|
f"http://localhost:{moose_management_port}/metrics-logs",
|
|
378
454
|
json={
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
}
|
|
455
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
456
|
+
"count_in": metrics["count_in"],
|
|
457
|
+
"count_out": metrics["count_out"],
|
|
458
|
+
"bytes": metrics["bytes_count"],
|
|
459
|
+
"function_name": log_prefix,
|
|
460
|
+
},
|
|
385
461
|
)
|
|
386
|
-
metrics[
|
|
387
|
-
metrics[
|
|
388
|
-
metrics[
|
|
462
|
+
metrics["count_in"] = 0
|
|
463
|
+
metrics["count_out"] = 0
|
|
464
|
+
metrics["bytes_count"] = 0
|
|
389
465
|
|
|
390
466
|
def process_messages():
|
|
391
467
|
try:
|
|
392
468
|
streaming_function_input_type = None
|
|
393
469
|
streaming_function_callables = None
|
|
394
470
|
if args.dmv2:
|
|
395
|
-
streaming_function_input_type, streaming_function_callables =
|
|
396
|
-
function_file_dir, function_file_name)
|
|
471
|
+
streaming_function_input_type, streaming_function_callables = (
|
|
472
|
+
load_streaming_function_dmv2(function_file_dir, function_file_name)
|
|
473
|
+
)
|
|
397
474
|
else:
|
|
398
|
-
streaming_function_input_type, streaming_function_callable =
|
|
399
|
-
function_file_dir, function_file_name)
|
|
475
|
+
streaming_function_input_type, streaming_function_callable = (
|
|
476
|
+
load_streaming_function_dmv1(function_file_dir, function_file_name)
|
|
477
|
+
)
|
|
400
478
|
|
|
401
479
|
streaming_function_callables = [(streaming_function_callable, None)]
|
|
402
480
|
|
|
403
481
|
needs_producer = target_topic is not None or any(
|
|
404
|
-
pair[1] is not None for pair in streaming_function_callables
|
|
482
|
+
pair[1] is not None for pair in streaming_function_callables
|
|
483
|
+
)
|
|
405
484
|
|
|
406
485
|
# Initialize Kafka connections in the processing thread
|
|
407
486
|
consumer = create_consumer()
|
|
408
487
|
producer = create_producer() if needs_producer else None
|
|
409
488
|
|
|
410
489
|
# Store references for cleanup
|
|
411
|
-
kafka_refs[
|
|
412
|
-
kafka_refs[
|
|
490
|
+
kafka_refs["consumer"] = consumer
|
|
491
|
+
kafka_refs["producer"] = producer
|
|
413
492
|
|
|
414
493
|
consumer.subscribe([source_topic.name])
|
|
415
494
|
|
|
@@ -426,17 +505,27 @@ def main():
|
|
|
426
505
|
# Process each partition's messages
|
|
427
506
|
for partition_messages in messages.values():
|
|
428
507
|
for message in partition_messages:
|
|
508
|
+
log(
|
|
509
|
+
f"Message partition={message.partition} offset={message.offset}"
|
|
510
|
+
)
|
|
429
511
|
if not running.is_set():
|
|
430
512
|
return
|
|
431
513
|
|
|
432
514
|
# Parse the message into the input type
|
|
433
|
-
input_data = parse_input(
|
|
515
|
+
input_data = parse_input(
|
|
516
|
+
streaming_function_input_type, message.value
|
|
517
|
+
)
|
|
434
518
|
|
|
435
519
|
# Run the flow
|
|
436
520
|
all_outputs = []
|
|
437
|
-
for (
|
|
521
|
+
for (
|
|
522
|
+
streaming_function_callable,
|
|
523
|
+
dlq,
|
|
524
|
+
) in streaming_function_callables:
|
|
438
525
|
try:
|
|
439
|
-
output_data = streaming_function_callable(
|
|
526
|
+
output_data = streaming_function_callable(
|
|
527
|
+
input_data
|
|
528
|
+
)
|
|
440
529
|
except Exception as e:
|
|
441
530
|
traceback.print_exc()
|
|
442
531
|
if dlq is not None:
|
|
@@ -445,21 +534,27 @@ def main():
|
|
|
445
534
|
error_message=str(e),
|
|
446
535
|
error_type=e.__class__.__name__,
|
|
447
536
|
failed_at=datetime.now(timezone.utc),
|
|
448
|
-
source="transform"
|
|
537
|
+
source="transform",
|
|
538
|
+
)
|
|
539
|
+
record = dead_letter.model_dump_json().encode(
|
|
540
|
+
"utf-8"
|
|
449
541
|
)
|
|
450
|
-
record = dead_letter.model_dump_json().encode('utf-8')
|
|
451
542
|
producer.send(dlq.name, record).get()
|
|
452
|
-
cli_log(
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
543
|
+
cli_log(
|
|
544
|
+
CliLogData(
|
|
545
|
+
action="DeadLetter",
|
|
546
|
+
message=f"Sent message to DLQ {dlq.name}: {str(e)}",
|
|
547
|
+
message_type=CliLogData.ERROR,
|
|
548
|
+
)
|
|
549
|
+
)
|
|
457
550
|
else:
|
|
458
|
-
cli_log(
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
551
|
+
cli_log(
|
|
552
|
+
CliLogData(
|
|
553
|
+
action="Function",
|
|
554
|
+
message=f"Error processing message (no DLQ configured): {str(e)}",
|
|
555
|
+
message_type=CliLogData.ERROR,
|
|
556
|
+
)
|
|
557
|
+
)
|
|
463
558
|
# Skip to the next transformation or message
|
|
464
559
|
continue
|
|
465
560
|
|
|
@@ -468,29 +563,50 @@ def main():
|
|
|
468
563
|
continue
|
|
469
564
|
|
|
470
565
|
# Handle streaming function returning an array or a single object
|
|
471
|
-
output_data_list =
|
|
566
|
+
output_data_list = (
|
|
567
|
+
output_data
|
|
568
|
+
if isinstance(output_data, list)
|
|
569
|
+
else [output_data]
|
|
570
|
+
)
|
|
472
571
|
all_outputs.extend(output_data_list)
|
|
473
572
|
|
|
474
573
|
with metrics_lock:
|
|
475
|
-
metrics[
|
|
574
|
+
metrics["count_in"] += len(output_data_list)
|
|
476
575
|
|
|
477
|
-
cli_log(
|
|
478
|
-
|
|
576
|
+
cli_log(
|
|
577
|
+
CliLogData(
|
|
578
|
+
action="Received",
|
|
579
|
+
message=f"{log_prefix} {len(output_data_list)} message(s)",
|
|
580
|
+
)
|
|
581
|
+
)
|
|
479
582
|
|
|
480
583
|
if producer is not None:
|
|
481
584
|
for item in all_outputs:
|
|
482
585
|
# Ignore flow function returning null
|
|
483
586
|
if item is not None:
|
|
484
|
-
record = json.dumps(
|
|
587
|
+
record = json.dumps(
|
|
588
|
+
item, cls=EnhancedJSONEncoder
|
|
589
|
+
).encode("utf-8")
|
|
485
590
|
|
|
486
591
|
producer.send(target_topic.name, record)
|
|
487
592
|
|
|
488
593
|
with metrics_lock:
|
|
489
|
-
metrics[
|
|
490
|
-
metrics[
|
|
594
|
+
metrics["bytes_count"] += len(record)
|
|
595
|
+
metrics["count_out"] += 1
|
|
596
|
+
|
|
597
|
+
# Flush producer to ensure messages are sent before committing
|
|
598
|
+
producer.flush()
|
|
599
|
+
|
|
600
|
+
# Commit offset only after successful processing and flushing
|
|
601
|
+
# This ensures at-least-once delivery semantics
|
|
602
|
+
consumer.commit()
|
|
491
603
|
|
|
492
604
|
except Exception as e:
|
|
493
|
-
cli_log(
|
|
605
|
+
cli_log(
|
|
606
|
+
CliLogData(
|
|
607
|
+
action="Function", message=str(e), message_type="Error"
|
|
608
|
+
)
|
|
609
|
+
)
|
|
494
610
|
if not running.is_set():
|
|
495
611
|
break
|
|
496
612
|
# Add a small delay before retrying on error
|
|
@@ -549,16 +665,16 @@ def main():
|
|
|
549
665
|
log("Processing thread did not exit cleanly")
|
|
550
666
|
|
|
551
667
|
# Clean up Kafka resources regardless of thread state
|
|
552
|
-
if kafka_refs[
|
|
668
|
+
if kafka_refs["consumer"]:
|
|
553
669
|
try:
|
|
554
|
-
kafka_refs[
|
|
670
|
+
kafka_refs["consumer"].close()
|
|
555
671
|
except Exception as e:
|
|
556
672
|
log(f"Error closing consumer: {e}")
|
|
557
673
|
|
|
558
|
-
if kafka_refs[
|
|
674
|
+
if kafka_refs["producer"] and kafka_refs["producer"] is not None:
|
|
559
675
|
try:
|
|
560
|
-
kafka_refs[
|
|
561
|
-
kafka_refs[
|
|
676
|
+
kafka_refs["producer"].flush()
|
|
677
|
+
kafka_refs["producer"].close()
|
|
562
678
|
except Exception as e:
|
|
563
679
|
log(f"Error closing producer: {e}")
|
|
564
680
|
|
moose_lib/utilities/sql.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: moose_lib
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.283
|
|
4
4
|
Home-page: https://www.fiveonefour.com/moose
|
|
5
5
|
Author: Fiveonefour Labs Inc.
|
|
6
6
|
Author-email: support@fiveonefour.com
|
|
@@ -27,3 +27,20 @@ Dynamic: requires-python
|
|
|
27
27
|
# Python Moose Lib
|
|
28
28
|
|
|
29
29
|
Python package which contains moose utils
|
|
30
|
+
|
|
31
|
+
## Column Autocomplete with MooseModel
|
|
32
|
+
|
|
33
|
+
For LSP autocomplete when working with columns, use `MooseModel` instead of `BaseModel`:
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from moose_lib import MooseModel, OlapTable
|
|
37
|
+
|
|
38
|
+
class User(MooseModel):
|
|
39
|
+
user_id: int
|
|
40
|
+
email: str
|
|
41
|
+
|
|
42
|
+
# Autocomplete works when typing User.user_id
|
|
43
|
+
query = f"SELECT {User.user_id:col}, {User.email:col} FROM users"
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
See [MooseModel Autocomplete Guide](docs/moose-model-autocomplete.md) for details.
|