moose-lib 0.6.148.dev3442438466__py3-none-any.whl → 0.6.283__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. moose_lib/__init__.py +34 -3
  2. moose_lib/blocks.py +416 -52
  3. moose_lib/clients/redis_client.py +26 -14
  4. moose_lib/commons.py +37 -30
  5. moose_lib/config/config_file.py +5 -1
  6. moose_lib/config/runtime.py +73 -34
  7. moose_lib/data_models.py +331 -61
  8. moose_lib/dmv2/__init__.py +69 -73
  9. moose_lib/dmv2/_registry.py +2 -1
  10. moose_lib/dmv2/_source_capture.py +37 -0
  11. moose_lib/dmv2/consumption.py +55 -32
  12. moose_lib/dmv2/ingest_api.py +9 -2
  13. moose_lib/dmv2/ingest_pipeline.py +35 -16
  14. moose_lib/dmv2/life_cycle.py +3 -1
  15. moose_lib/dmv2/materialized_view.py +24 -14
  16. moose_lib/dmv2/moose_model.py +165 -0
  17. moose_lib/dmv2/olap_table.py +299 -151
  18. moose_lib/dmv2/registry.py +18 -3
  19. moose_lib/dmv2/sql_resource.py +16 -8
  20. moose_lib/dmv2/stream.py +75 -23
  21. moose_lib/dmv2/types.py +14 -8
  22. moose_lib/dmv2/view.py +13 -6
  23. moose_lib/dmv2/web_app.py +11 -6
  24. moose_lib/dmv2/web_app_helpers.py +5 -1
  25. moose_lib/dmv2/workflow.py +37 -9
  26. moose_lib/internal.py +340 -56
  27. moose_lib/main.py +87 -56
  28. moose_lib/query_builder.py +18 -5
  29. moose_lib/query_param.py +54 -20
  30. moose_lib/secrets.py +122 -0
  31. moose_lib/streaming/streaming_function_runner.py +233 -117
  32. moose_lib/utilities/sql.py +0 -1
  33. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/METADATA +18 -1
  34. moose_lib-0.6.283.dist-info/RECORD +63 -0
  35. tests/__init__.py +1 -1
  36. tests/conftest.py +6 -5
  37. tests/test_backward_compatibility.py +85 -0
  38. tests/test_cluster_validation.py +85 -0
  39. tests/test_codec.py +75 -0
  40. tests/test_column_formatting.py +80 -0
  41. tests/test_fixedstring.py +43 -0
  42. tests/test_iceberg_config.py +105 -0
  43. tests/test_int_types.py +211 -0
  44. tests/test_kafka_config.py +141 -0
  45. tests/test_materialized.py +74 -0
  46. tests/test_metadata.py +37 -0
  47. tests/test_moose.py +21 -30
  48. tests/test_moose_model.py +153 -0
  49. tests/test_olap_table_moosemodel.py +89 -0
  50. tests/test_olap_table_versioning.py +52 -58
  51. tests/test_query_builder.py +97 -9
  52. tests/test_redis_client.py +10 -3
  53. tests/test_s3queue_config.py +211 -110
  54. tests/test_secrets.py +239 -0
  55. tests/test_simple_aggregate.py +42 -40
  56. tests/test_web_app.py +11 -5
  57. moose_lib-0.6.148.dev3442438466.dist-info/RECORD +0 -47
  58. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
  59. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
@@ -39,9 +39,7 @@ from moose_lib.commons import (
39
39
 
40
40
  # Force stdout to be unbuffered
41
41
  sys.stdout = io.TextIOWrapper(
42
- open(sys.stdout.fileno(), 'wb', 0),
43
- write_through=True,
44
- line_buffering=True
42
+ open(sys.stdout.fileno(), "wb", 0), write_through=True, line_buffering=True
45
43
  )
46
44
 
47
45
 
@@ -49,7 +47,7 @@ sys.stdout = io.TextIOWrapper(
49
47
  class KafkaTopicConfig:
50
48
  """
51
49
  Configuration for a Kafka topic including namespace support.
52
-
50
+
53
51
  Attributes:
54
52
  streaming_engine_type: The type of topic (source or target)
55
53
  name: Full topic name including namespace if present
@@ -59,6 +57,7 @@ class KafkaTopicConfig:
59
57
  namespace: Optional namespace prefix for the topic
60
58
  version: Optional version string for the topic
61
59
  """
60
+
62
61
  streaming_engine_type: str
63
62
  name: str
64
63
  partitions: int
@@ -76,31 +75,37 @@ class KafkaTopicConfig:
76
75
  if name.endswith(version_suffix):
77
76
  name = name.removesuffix(version_suffix)
78
77
  else:
79
- raise Exception(f"Version suffix {version_suffix} not found in topic name {name}")
78
+ raise Exception(
79
+ f"Version suffix {version_suffix} not found in topic name {name}"
80
+ )
80
81
 
81
82
  if self.namespace is not None and self.namespace != "":
82
83
  prefix = self.namespace + "."
83
84
  if name.startswith(prefix):
84
85
  name = name.removeprefix(prefix)
85
86
  else:
86
- raise Exception(f"Namespace prefix {prefix} not found in topic name {name}")
87
+ raise Exception(
88
+ f"Namespace prefix {prefix} not found in topic name {name}"
89
+ )
87
90
 
88
91
  return name
89
92
 
90
93
 
91
- def load_streaming_function_dmv1(function_file_dir: str, function_file_name: str) -> Tuple[type, Callable]:
94
+ def load_streaming_function_dmv1(
95
+ function_file_dir: str, function_file_name: str
96
+ ) -> Tuple[type, Callable]:
92
97
  """
93
98
  Load a DMV1 (legacy) streaming function from a Python module.
94
-
99
+
95
100
  Args:
96
101
  function_file_dir: Directory containing the streaming function module
97
102
  function_file_name: Name of the module file without .py extension
98
-
103
+
99
104
  Returns:
100
105
  Tuple of (input_type, run_function) where:
101
106
  - input_type is the type annotation of the run function's input parameter
102
107
  - run_function is the actual transformation function
103
-
108
+
104
109
  Raises:
105
110
  SystemExit: If module import fails or if multiple/no streaming functions found
106
111
  """
@@ -115,13 +120,19 @@ def load_streaming_function_dmv1(function_file_dir: str, function_file_name: str
115
120
  sys.exit(1)
116
121
 
117
122
  # Get all the named flows in the flow file and make sure the flow is of type StreamingFunction
118
- streaming_functions = [f for f in dir(module) if isinstance(getattr(module, f), streaming_function_def)]
123
+ streaming_functions = [
124
+ f for f in dir(module) if isinstance(getattr(module, f), streaming_function_def)
125
+ ]
119
126
 
120
127
  # Make sure that there is only one flow in the file
121
128
  if len(streaming_functions) != 1:
122
- cli_log(CliLogData(action="Function",
123
- message=f"Expected one streaming function in the file, but got {len(streaming_functions)}",
124
- message_type="Error"))
129
+ cli_log(
130
+ CliLogData(
131
+ action="Function",
132
+ message=f"Expected one streaming function in the file, but got {len(streaming_functions)}",
133
+ message_type="Error",
134
+ )
135
+ )
125
136
  sys.exit(1)
126
137
 
127
138
  # get the flow definition
@@ -131,26 +142,29 @@ def load_streaming_function_dmv1(function_file_dir: str, function_file_name: str
131
142
  streaming_function_run = streaming_function_def.run
132
143
 
133
144
  # get run input type that doesn't rely on the name of the input parameter
134
- run_input_type = streaming_function_run.__annotations__[list(streaming_function_run.__annotations__.keys())[0]]
145
+ run_input_type = streaming_function_run.__annotations__[
146
+ list(streaming_function_run.__annotations__.keys())[0]
147
+ ]
135
148
 
136
149
  return run_input_type, streaming_function_run
137
150
 
138
151
 
139
- def load_streaming_function_dmv2(function_file_dir: str, function_file_name: str) -> tuple[
140
- type, list[tuple[Callable, Optional[DeadLetterQueue]]]]:
152
+ def load_streaming_function_dmv2(
153
+ function_file_dir: str, function_file_name: str
154
+ ) -> tuple[type, list[tuple[Callable, Optional[DeadLetterQueue]]]]:
141
155
  """
142
156
  Load a DMV2 streaming function by finding the stream transformation that matches
143
157
  the source and target topics.
144
-
158
+
145
159
  Args:
146
160
  function_file_dir: Directory containing the main.py file
147
161
  function_file_name: Name of the main.py file (without extension)
148
-
162
+
149
163
  Returns:
150
164
  Tuple of (input_type, transformation_functions) where:
151
165
  - input_type is the Pydantic model type of the source stream
152
166
  - transformation_functions is a list of functions that transform source to target data and their dead letter queues
153
-
167
+
154
168
  Raises:
155
169
  SystemExit: If module import fails or if no matching transformation is found
156
170
  """
@@ -169,7 +183,10 @@ def load_streaming_function_dmv2(function_file_dir: str, function_file_name: str
169
183
  continue
170
184
 
171
185
  if stream.has_consumers() and target_topic is None:
172
- consumers = [(entry.consumer, entry.config.dead_letter_queue) for entry in stream.consumers]
186
+ consumers = [
187
+ (entry.consumer, entry.config.dead_letter_queue)
188
+ for entry in stream.consumers
189
+ ]
173
190
  if not consumers:
174
191
  continue
175
192
  return stream.model_type, consumers
@@ -178,52 +195,94 @@ def load_streaming_function_dmv2(function_file_dir: str, function_file_name: str
178
195
  for dest_stream_py_name, transform_entries in stream.transformations.items():
179
196
  # The source topic name should match the stream name
180
197
  # The destination topic name should match the destination stream name
181
- if source_py_stream_name == source_topic.topic_name_to_stream_name() and dest_stream_py_name == target_topic.topic_name_to_stream_name():
198
+ if (
199
+ source_py_stream_name == source_topic.topic_name_to_stream_name()
200
+ and dest_stream_py_name == target_topic.topic_name_to_stream_name()
201
+ ):
182
202
  # Found the matching transformation
183
- transformations = [(entry.transformation, entry.config.dead_letter_queue) for entry in
184
- transform_entries]
203
+ transformations = [
204
+ (entry.transformation, entry.config.dead_letter_queue)
205
+ for entry in transform_entries
206
+ ]
185
207
  if not transformations:
186
208
  continue
187
209
  return stream.model_type, transformations
188
210
 
189
211
  # If we get here, no matching transformation was found
190
- cli_log(CliLogData(
191
- action="Function",
192
- message=f"No transformation found from {source_topic.name} to {target_topic.name}",
193
- message_type="Error"
194
- ))
212
+ cli_log(
213
+ CliLogData(
214
+ action="Function",
215
+ message=f"No transformation found from {source_topic.name} to {target_topic.name}",
216
+ message_type="Error",
217
+ )
218
+ )
195
219
  sys.exit(1)
196
220
 
197
221
 
198
- parser = argparse.ArgumentParser(description='Run a streaming function')
222
+ parser = argparse.ArgumentParser(description="Run a streaming function")
199
223
 
200
- parser.add_argument('source_topic_json', type=str, help='The source topic for the streaming function')
224
+ parser.add_argument(
225
+ "source_topic_json", type=str, help="The source topic for the streaming function"
226
+ )
201
227
  # In DMV2 is the dir is the dir of the main.py or index.ts file
202
228
  # and the function_file_name is the file name of main.py or index.ts
203
229
  # In DMV1 the dir is the dir of the streaming function file
204
230
  # and the function_file_name is the file name of the streaming function without the .py extension
205
- parser.add_argument('function_file_dir', type=str, help='The dir of the streaming function file')
206
- parser.add_argument('function_file_name', type=str,
207
- help='The file name of the streaming function without the .py extension')
208
- parser.add_argument('broker', type=str, help='The broker to use for the streaming function')
209
- parser.add_argument('--target_topic_json', type=str, help='The target topic for the streaming function')
210
- parser.add_argument('--sasl_username', type=str, help='The SASL username to use for the streaming function')
211
- parser.add_argument('--sasl_password', type=str, help='The SASL password to use for the streaming function')
212
- parser.add_argument('--sasl_mechanism', type=str, help='The SASL mechanism to use for the streaming function')
213
- parser.add_argument('--security_protocol', type=str, help='The security protocol to use for the streaming function')
214
- parser.add_argument('--dmv2', action=argparse.BooleanOptionalAction, type=bool,
215
- help='Whether to use the DMV2 format for the streaming function')
231
+ parser.add_argument(
232
+ "function_file_dir", type=str, help="The dir of the streaming function file"
233
+ )
234
+ parser.add_argument(
235
+ "function_file_name",
236
+ type=str,
237
+ help="The file name of the streaming function without the .py extension",
238
+ )
239
+ parser.add_argument(
240
+ "broker", type=str, help="The broker to use for the streaming function"
241
+ )
242
+ parser.add_argument(
243
+ "--target_topic_json", type=str, help="The target topic for the streaming function"
244
+ )
245
+ parser.add_argument(
246
+ "--sasl_username",
247
+ type=str,
248
+ help="The SASL username to use for the streaming function",
249
+ )
250
+ parser.add_argument(
251
+ "--sasl_password",
252
+ type=str,
253
+ help="The SASL password to use for the streaming function",
254
+ )
255
+ parser.add_argument(
256
+ "--sasl_mechanism",
257
+ type=str,
258
+ help="The SASL mechanism to use for the streaming function",
259
+ )
260
+ parser.add_argument(
261
+ "--security_protocol",
262
+ type=str,
263
+ help="The security protocol to use for the streaming function",
264
+ )
265
+ parser.add_argument(
266
+ "--dmv2",
267
+ action=argparse.BooleanOptionalAction,
268
+ type=bool,
269
+ help="Whether to use the DMV2 format for the streaming function",
270
+ )
216
271
 
217
272
  args: argparse.Namespace = parser.parse_args()
218
273
 
219
274
  for arg in vars(args):
220
275
  value = getattr(args, arg)
221
- if 'password' in arg and value is not None:
222
- value = '******'
276
+ if "password" in arg and value is not None:
277
+ value = "******"
223
278
  print(arg, value)
224
279
 
225
280
  source_topic = KafkaTopicConfig(**json.loads(args.source_topic_json))
226
- target_topic = KafkaTopicConfig(**json.loads(args.target_topic_json)) if args.target_topic_json else None
281
+ target_topic = (
282
+ KafkaTopicConfig(**json.loads(args.target_topic_json))
283
+ if args.target_topic_json
284
+ else None
285
+ )
227
286
  function_file_dir = args.function_file_dir
228
287
  function_file_name = args.function_file_name
229
288
  broker = args.broker
@@ -231,24 +290,36 @@ sasl_mechanism = args.sasl_mechanism
231
290
 
232
291
  # Setup SASL config w/ supported mechanisms
233
292
  if args.sasl_mechanism is not None:
234
- if args.sasl_mechanism not in ['PLAIN', 'SCRAM-SHA-256', 'SCRAM-SHA-512']:
293
+ if args.sasl_mechanism not in ["PLAIN", "SCRAM-SHA-256", "SCRAM-SHA-512"]:
235
294
  raise Exception(f"Unsupported SASL mechanism: {args.sasl_mechanism}")
236
295
  if args.sasl_username is None or args.sasl_password is None:
237
- raise Exception("SASL username and password must be provided if a SASL mechanism is specified")
296
+ raise Exception(
297
+ "SASL username and password must be provided if a SASL mechanism is specified"
298
+ )
238
299
  if args.security_protocol is None:
239
- raise Exception("Security protocol must be provided if a SASL mechanism is specified")
300
+ raise Exception(
301
+ "Security protocol must be provided if a SASL mechanism is specified"
302
+ )
240
303
 
241
304
  sasl_config = {
242
- 'username': args.sasl_username,
243
- 'password': args.sasl_password,
244
- 'mechanism': args.sasl_mechanism
305
+ "username": args.sasl_username,
306
+ "password": args.sasl_password,
307
+ "mechanism": args.sasl_mechanism,
245
308
  }
246
309
 
247
310
  # We use flow- instead of function- because that's what the ACLs in boreal are linked with
248
- # When migrating - make sure the ACLs are updated to use the new prefix.
311
+ # When migrating - make sure the ACLs are updated to use the new prefix.
249
312
  # And make sure the prefixes are the same in the ts-moose-lib and py-moose-lib
250
- streaming_function_id = f'flow-{source_topic.name}-{target_topic.name}' if target_topic else f'flow-{source_topic.name}'
251
- log_prefix = f"{source_topic.name} -> {target_topic.name}" if target_topic else f"{source_topic.name} (consumer)"
313
+ streaming_function_id = (
314
+ f"flow-{source_topic.name}-{target_topic.name}"
315
+ if target_topic
316
+ else f"flow-{source_topic.name}"
317
+ )
318
+ log_prefix = (
319
+ f"{source_topic.name} -> {target_topic.name}"
320
+ if target_topic
321
+ else f"{source_topic.name} (consumer)"
322
+ )
252
323
 
253
324
 
254
325
  def log(msg: str) -> None:
@@ -265,13 +336,13 @@ def error(msg: str) -> None:
265
336
  def parse_input(run_input_type: type, json_input: dict) -> Any:
266
337
  """
267
338
  Parse JSON input data into the appropriate input type for the streaming function.
268
-
339
+
269
340
  Handles Pydantic models, nested dataclass structures and lists of dataclasses.
270
-
341
+
271
342
  Args:
272
343
  run_input_type: The type to parse the JSON into
273
344
  json_input: The JSON data as a Python dict
274
-
345
+
275
346
  Returns:
276
347
  An instance of run_input_type populated with the JSON data
277
348
  """
@@ -281,7 +352,12 @@ def parse_input(run_input_type: type, json_input: dict) -> Any:
281
352
  return cls.model_validate(data)
282
353
  elif dataclasses.is_dataclass(cls):
283
354
  field_types = {f.name: f.type for f in dataclasses.fields(cls)}
284
- return cls(**{name: deserialize(data.get(name), field_types[name]) for name in field_types})
355
+ return cls(
356
+ **{
357
+ name: deserialize(data.get(name), field_types[name])
358
+ for name in field_types
359
+ }
360
+ )
285
361
  elif isinstance(data, list):
286
362
  return [deserialize(item, cls.__args__[0]) for item in data]
287
363
  else:
@@ -293,12 +369,14 @@ def parse_input(run_input_type: type, json_input: dict) -> Any:
293
369
  def create_consumer() -> KafkaConsumer:
294
370
  """
295
371
  Create a Kafka consumer configured for the source topic.
296
-
372
+
297
373
  Handles SASL authentication if configured.
298
-
374
+ Disables auto-commit to ensure at-least-once processing semantics.
375
+
299
376
  Returns:
300
377
  Configured KafkaConsumer instance
301
378
  """
379
+
302
380
  def _sr_json_deserializer(m: bytes):
303
381
  if m is None:
304
382
  return None
@@ -316,6 +394,8 @@ def create_consumer() -> KafkaConsumer:
316
394
  sasl_password=sasl_config.get("password"),
317
395
  sasl_mechanism=sasl_config.get("mechanism"),
318
396
  security_protocol=args.security_protocol,
397
+ enable_auto_commit=False, # Disable auto-commit for at-least-once semantics
398
+ auto_offset_reset="earliest",
319
399
  )
320
400
  consumer = get_kafka_consumer(**kwargs)
321
401
  return consumer
@@ -324,14 +404,17 @@ def create_consumer() -> KafkaConsumer:
324
404
  def create_producer() -> Optional[KafkaProducer]:
325
405
  """
326
406
  Create a Kafka producer configured for the target topic.
327
-
407
+
328
408
  Handles SASL authentication if configured and sets appropriate message size limits.
329
-
409
+
330
410
  Returns:
331
411
  Configured KafkaProducer instance
332
412
  """
333
- max_request_size = KafkaProducer.DEFAULT_CONFIG['max_request_size'] if target_topic is None \
413
+ max_request_size = (
414
+ KafkaProducer.DEFAULT_CONFIG["max_request_size"]
415
+ if target_topic is None
334
416
  else target_topic.max_message_bytes
417
+ )
335
418
  return get_kafka_producer(
336
419
  broker=broker,
337
420
  sasl_username=sasl_config.get("username"),
@@ -345,7 +428,7 @@ def create_producer() -> Optional[KafkaProducer]:
345
428
  def main():
346
429
  """
347
430
  Main entry point for the streaming function runner.
348
-
431
+
349
432
  This function:
350
433
  1. Loads the appropriate streaming function (DMV1 or DMV2)
351
434
  2. Sets up metrics reporting thread and message processing thread
@@ -356,18 +439,11 @@ def main():
356
439
  # Shared state for metrics and control
357
440
  running = threading.Event()
358
441
  running.set() # Start in running state
359
- metrics = {
360
- 'count_in': 0,
361
- 'count_out': 0,
362
- 'bytes_count': 0
363
- }
442
+ metrics = {"count_in": 0, "count_out": 0, "bytes_count": 0}
364
443
  metrics_lock = threading.Lock()
365
444
 
366
445
  # Shared references for cleanup
367
- kafka_refs = {
368
- 'consumer': None,
369
- 'producer': None
370
- }
446
+ kafka_refs = {"consumer": None, "producer": None}
371
447
 
372
448
  def send_message_metrics():
373
449
  while running.is_set():
@@ -376,40 +452,43 @@ def main():
376
452
  requests.post(
377
453
  f"http://localhost:{moose_management_port}/metrics-logs",
378
454
  json={
379
- 'timestamp': datetime.now(timezone.utc).isoformat(),
380
- 'count_in': metrics['count_in'],
381
- 'count_out': metrics['count_out'],
382
- 'bytes': metrics['bytes_count'],
383
- 'function_name': log_prefix
384
- }
455
+ "timestamp": datetime.now(timezone.utc).isoformat(),
456
+ "count_in": metrics["count_in"],
457
+ "count_out": metrics["count_out"],
458
+ "bytes": metrics["bytes_count"],
459
+ "function_name": log_prefix,
460
+ },
385
461
  )
386
- metrics['count_in'] = 0
387
- metrics['count_out'] = 0
388
- metrics['bytes_count'] = 0
462
+ metrics["count_in"] = 0
463
+ metrics["count_out"] = 0
464
+ metrics["bytes_count"] = 0
389
465
 
390
466
  def process_messages():
391
467
  try:
392
468
  streaming_function_input_type = None
393
469
  streaming_function_callables = None
394
470
  if args.dmv2:
395
- streaming_function_input_type, streaming_function_callables = load_streaming_function_dmv2(
396
- function_file_dir, function_file_name)
471
+ streaming_function_input_type, streaming_function_callables = (
472
+ load_streaming_function_dmv2(function_file_dir, function_file_name)
473
+ )
397
474
  else:
398
- streaming_function_input_type, streaming_function_callable = load_streaming_function_dmv1(
399
- function_file_dir, function_file_name)
475
+ streaming_function_input_type, streaming_function_callable = (
476
+ load_streaming_function_dmv1(function_file_dir, function_file_name)
477
+ )
400
478
 
401
479
  streaming_function_callables = [(streaming_function_callable, None)]
402
480
 
403
481
  needs_producer = target_topic is not None or any(
404
- pair[1] is not None for pair in streaming_function_callables)
482
+ pair[1] is not None for pair in streaming_function_callables
483
+ )
405
484
 
406
485
  # Initialize Kafka connections in the processing thread
407
486
  consumer = create_consumer()
408
487
  producer = create_producer() if needs_producer else None
409
488
 
410
489
  # Store references for cleanup
411
- kafka_refs['consumer'] = consumer
412
- kafka_refs['producer'] = producer
490
+ kafka_refs["consumer"] = consumer
491
+ kafka_refs["producer"] = producer
413
492
 
414
493
  consumer.subscribe([source_topic.name])
415
494
 
@@ -426,17 +505,27 @@ def main():
426
505
  # Process each partition's messages
427
506
  for partition_messages in messages.values():
428
507
  for message in partition_messages:
508
+ log(
509
+ f"Message partition={message.partition} offset={message.offset}"
510
+ )
429
511
  if not running.is_set():
430
512
  return
431
513
 
432
514
  # Parse the message into the input type
433
- input_data = parse_input(streaming_function_input_type, message.value)
515
+ input_data = parse_input(
516
+ streaming_function_input_type, message.value
517
+ )
434
518
 
435
519
  # Run the flow
436
520
  all_outputs = []
437
- for (streaming_function_callable, dlq) in streaming_function_callables:
521
+ for (
522
+ streaming_function_callable,
523
+ dlq,
524
+ ) in streaming_function_callables:
438
525
  try:
439
- output_data = streaming_function_callable(input_data)
526
+ output_data = streaming_function_callable(
527
+ input_data
528
+ )
440
529
  except Exception as e:
441
530
  traceback.print_exc()
442
531
  if dlq is not None:
@@ -445,21 +534,27 @@ def main():
445
534
  error_message=str(e),
446
535
  error_type=e.__class__.__name__,
447
536
  failed_at=datetime.now(timezone.utc),
448
- source="transform"
537
+ source="transform",
538
+ )
539
+ record = dead_letter.model_dump_json().encode(
540
+ "utf-8"
449
541
  )
450
- record = dead_letter.model_dump_json().encode('utf-8')
451
542
  producer.send(dlq.name, record).get()
452
- cli_log(CliLogData(
453
- action="DeadLetter",
454
- message=f"Sent message to DLQ {dlq.name}: {str(e)}",
455
- message_type=CliLogData.ERROR
456
- ))
543
+ cli_log(
544
+ CliLogData(
545
+ action="DeadLetter",
546
+ message=f"Sent message to DLQ {dlq.name}: {str(e)}",
547
+ message_type=CliLogData.ERROR,
548
+ )
549
+ )
457
550
  else:
458
- cli_log(CliLogData(
459
- action="Function",
460
- message=f"Error processing message (no DLQ configured): {str(e)}",
461
- message_type=CliLogData.ERROR
462
- ))
551
+ cli_log(
552
+ CliLogData(
553
+ action="Function",
554
+ message=f"Error processing message (no DLQ configured): {str(e)}",
555
+ message_type=CliLogData.ERROR,
556
+ )
557
+ )
463
558
  # Skip to the next transformation or message
464
559
  continue
465
560
 
@@ -468,29 +563,50 @@ def main():
468
563
  continue
469
564
 
470
565
  # Handle streaming function returning an array or a single object
471
- output_data_list = output_data if isinstance(output_data, list) else [output_data]
566
+ output_data_list = (
567
+ output_data
568
+ if isinstance(output_data, list)
569
+ else [output_data]
570
+ )
472
571
  all_outputs.extend(output_data_list)
473
572
 
474
573
  with metrics_lock:
475
- metrics['count_in'] += len(output_data_list)
574
+ metrics["count_in"] += len(output_data_list)
476
575
 
477
- cli_log(CliLogData(action="Received",
478
- message=f'{log_prefix} {len(output_data_list)} message(s)'))
576
+ cli_log(
577
+ CliLogData(
578
+ action="Received",
579
+ message=f"{log_prefix} {len(output_data_list)} message(s)",
580
+ )
581
+ )
479
582
 
480
583
  if producer is not None:
481
584
  for item in all_outputs:
482
585
  # Ignore flow function returning null
483
586
  if item is not None:
484
- record = json.dumps(item, cls=EnhancedJSONEncoder).encode('utf-8')
587
+ record = json.dumps(
588
+ item, cls=EnhancedJSONEncoder
589
+ ).encode("utf-8")
485
590
 
486
591
  producer.send(target_topic.name, record)
487
592
 
488
593
  with metrics_lock:
489
- metrics['bytes_count'] += len(record)
490
- metrics['count_out'] += 1
594
+ metrics["bytes_count"] += len(record)
595
+ metrics["count_out"] += 1
596
+
597
+ # Flush producer to ensure messages are sent before committing
598
+ producer.flush()
599
+
600
+ # Commit offset only after successful processing and flushing
601
+ # This ensures at-least-once delivery semantics
602
+ consumer.commit()
491
603
 
492
604
  except Exception as e:
493
- cli_log(CliLogData(action="Function", message=str(e), message_type="Error"))
605
+ cli_log(
606
+ CliLogData(
607
+ action="Function", message=str(e), message_type="Error"
608
+ )
609
+ )
494
610
  if not running.is_set():
495
611
  break
496
612
  # Add a small delay before retrying on error
@@ -549,16 +665,16 @@ def main():
549
665
  log("Processing thread did not exit cleanly")
550
666
 
551
667
  # Clean up Kafka resources regardless of thread state
552
- if kafka_refs['consumer']:
668
+ if kafka_refs["consumer"]:
553
669
  try:
554
- kafka_refs['consumer'].close()
670
+ kafka_refs["consumer"].close()
555
671
  except Exception as e:
556
672
  log(f"Error closing consumer: {e}")
557
673
 
558
- if kafka_refs['producer'] and kafka_refs['producer'] is not None:
674
+ if kafka_refs["producer"] and kafka_refs["producer"] is not None:
559
675
  try:
560
- kafka_refs['producer'].flush()
561
- kafka_refs['producer'].close()
676
+ kafka_refs["producer"].flush()
677
+ kafka_refs["producer"].close()
562
678
  except Exception as e:
563
679
  log(f"Error closing producer: {e}")
564
680
 
@@ -31,4 +31,3 @@ def clickhouse_param_type_for_value(value: Any) -> str:
31
31
  if not isinstance(value, str):
32
32
  print(f"unhandled type {type(value)}", file=sys.stderr)
33
33
  return "String"
34
-
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: moose_lib
3
- Version: 0.6.148.dev3442438466
3
+ Version: 0.6.283
4
4
  Home-page: https://www.fiveonefour.com/moose
5
5
  Author: Fiveonefour Labs Inc.
6
6
  Author-email: support@fiveonefour.com
@@ -27,3 +27,20 @@ Dynamic: requires-python
27
27
  # Python Moose Lib
28
28
 
29
29
  Python package which contains moose utils
30
+
31
+ ## Column Autocomplete with MooseModel
32
+
33
+ For LSP autocomplete when working with columns, use `MooseModel` instead of `BaseModel`:
34
+
35
+ ```python
36
+ from moose_lib import MooseModel, OlapTable
37
+
38
+ class User(MooseModel):
39
+ user_id: int
40
+ email: str
41
+
42
+ # Autocomplete works when typing User.user_id
43
+ query = f"SELECT {User.user_id:col}, {User.email:col} FROM users"
44
+ ```
45
+
46
+ See [MooseModel Autocomplete Guide](docs/moose-model-autocomplete.md) for details.