flowcept 0.8.11__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. flowcept/__init__.py +7 -4
  2. flowcept/agents/__init__.py +5 -0
  3. flowcept/{flowceptor/consumers/agent/client_agent.py → agents/agent_client.py} +22 -12
  4. flowcept/agents/agents_utils.py +181 -0
  5. flowcept/agents/dynamic_schema_tracker.py +191 -0
  6. flowcept/agents/flowcept_agent.py +30 -0
  7. flowcept/agents/flowcept_ctx_manager.py +175 -0
  8. flowcept/agents/gui/__init__.py +5 -0
  9. flowcept/agents/gui/agent_gui.py +76 -0
  10. flowcept/agents/gui/gui_utils.py +239 -0
  11. flowcept/agents/llms/__init__.py +1 -0
  12. flowcept/agents/llms/claude_gcp.py +139 -0
  13. flowcept/agents/llms/gemini25.py +119 -0
  14. flowcept/agents/prompts/__init__.py +1 -0
  15. flowcept/{flowceptor/adapters/agents/prompts.py → agents/prompts/general_prompts.py} +18 -0
  16. flowcept/agents/prompts/in_memory_query_prompts.py +297 -0
  17. flowcept/agents/tools/__init__.py +1 -0
  18. flowcept/agents/tools/general_tools.py +102 -0
  19. flowcept/agents/tools/in_memory_queries/__init__.py +1 -0
  20. flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +704 -0
  21. flowcept/agents/tools/in_memory_queries/pandas_agent_utils.py +309 -0
  22. flowcept/cli.py +286 -44
  23. flowcept/commons/daos/docdb_dao/mongodb_dao.py +47 -0
  24. flowcept/commons/daos/mq_dao/mq_dao_base.py +24 -13
  25. flowcept/commons/daos/mq_dao/mq_dao_kafka.py +18 -2
  26. flowcept/commons/flowcept_dataclasses/task_object.py +16 -21
  27. flowcept/commons/flowcept_dataclasses/workflow_object.py +9 -1
  28. flowcept/commons/task_data_preprocess.py +260 -60
  29. flowcept/commons/utils.py +25 -6
  30. flowcept/configs.py +41 -26
  31. flowcept/flowcept_api/flowcept_controller.py +73 -6
  32. flowcept/flowceptor/adapters/base_interceptor.py +11 -5
  33. flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +25 -1
  34. flowcept/flowceptor/consumers/base_consumer.py +4 -0
  35. flowcept/flowceptor/consumers/consumer_utils.py +5 -4
  36. flowcept/flowceptor/consumers/document_inserter.py +2 -2
  37. flowcept/flowceptor/telemetry_capture.py +5 -2
  38. flowcept/instrumentation/flowcept_agent_task.py +294 -0
  39. flowcept/instrumentation/flowcept_decorator.py +43 -0
  40. flowcept/instrumentation/flowcept_loop.py +3 -3
  41. flowcept/instrumentation/flowcept_task.py +64 -24
  42. flowcept/instrumentation/flowcept_torch.py +5 -5
  43. flowcept/instrumentation/task_capture.py +83 -6
  44. flowcept/version.py +1 -1
  45. {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/METADATA +42 -14
  46. {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/RECORD +50 -36
  47. resources/sample_settings.yaml +12 -4
  48. flowcept/flowceptor/adapters/agents/__init__.py +0 -1
  49. flowcept/flowceptor/adapters/agents/agents_utils.py +0 -89
  50. flowcept/flowceptor/adapters/agents/flowcept_agent.py +0 -292
  51. flowcept/flowceptor/adapters/agents/flowcept_llm_prov_capture.py +0 -186
  52. flowcept/flowceptor/consumers/agent/flowcept_agent_context_manager.py +0 -145
  53. flowcept/flowceptor/consumers/agent/flowcept_qa_manager.py +0 -112
  54. {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/WHEEL +0 -0
  55. {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/entry_points.txt +0 -0
  56. {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/licenses/LICENSE +0 -0
flowcept/cli.py CHANGED
@@ -15,7 +15,7 @@ Supports:
15
15
  """
16
16
 
17
17
  import subprocess
18
- from time import sleep
18
+ import shlex
19
19
  from typing import Dict, Optional
20
20
  import argparse
21
21
  import os
@@ -28,7 +28,7 @@ from importlib import resources
28
28
  from pathlib import Path
29
29
  from typing import List
30
30
 
31
- from flowcept import Flowcept, configs
31
+ from flowcept import configs
32
32
 
33
33
 
34
34
  def no_docstring(func):
@@ -41,7 +41,7 @@ def no_docstring(func):
41
41
  return wrapper
42
42
 
43
43
 
44
- def show_config():
44
+ def show_settings():
45
45
  """
46
46
  Show Flowcept configuration.
47
47
  """
@@ -55,11 +55,20 @@ def show_config():
55
55
  )
56
56
 
57
57
 
58
- def init_settings():
58
+ def init_settings(full: bool = False):
59
59
  """
60
60
  Create a new settings.yaml file in your home directory under ~/.flowcept.
61
+
62
+ Parameters
63
+ ----------
64
+ full : bool, optional -- Run with full to generate a complete version of the settings file.
61
65
  """
62
- dest_path = Path(os.path.join(configs._SETTINGS_DIR, "settings.yaml"))
66
+ settings_path_env = os.getenv("FLOWCEPT_SETTINGS_PATH", None)
67
+ if settings_path_env is not None:
68
+ print(f"FLOWCEPT_SETTINGS_PATH environment variable is set to {settings_path_env}.")
69
+ dest_path = settings_path_env
70
+ else:
71
+ dest_path = Path(os.path.join(configs._SETTINGS_DIR, "settings.yaml"))
63
72
 
64
73
  if dest_path.exists():
65
74
  overwrite = input(f"{dest_path} already exists. Overwrite? (y/N): ").strip().lower()
@@ -69,11 +78,125 @@ def init_settings():
69
78
 
70
79
  os.makedirs(configs._SETTINGS_DIR, exist_ok=True)
71
80
 
72
- SAMPLE_SETTINGS_PATH = str(resources.files("resources").joinpath("sample_settings.yaml"))
81
+ if full:
82
+ print("Going to generate full settings.yaml.")
83
+ sample_settings_path = str(resources.files("resources").joinpath("sample_settings.yaml"))
84
+ with open(sample_settings_path, "rb") as src_file, open(dest_path, "wb") as dst_file:
85
+ dst_file.write(src_file.read())
86
+ print(f"Copied {sample_settings_path} to {dest_path}")
87
+ else:
88
+ from omegaconf import OmegaConf
89
+
90
+ cfg = OmegaConf.create(configs.DEFAULT_SETTINGS)
91
+ OmegaConf.save(cfg, dest_path)
92
+ print(f"Generated default settings under {dest_path}.")
93
+
94
+
95
+ def version():
96
+ """
97
+ Returns this Flowcept's installation version.
98
+ """
99
+ from flowcept.version import __version__
100
+
101
+ print(f"Flowcept {__version__}")
102
+
103
+
104
+ def stream_messages(print_messages: bool = False, messages_file_path: Optional[str] = None):
105
+ """
106
+ Listen to Flowcept's message stream and optionally echo/save messages.
107
+
108
+ Parameters.
109
+ ----------
110
+ print_messages : bool, optional
111
+ If True, print each decoded message to stdout.
112
+ messages_file_path : str, optional
113
+ If provided, append each message as JSON (one per line) to this file.
114
+ If the file already exists, a new timestamped file is created instead.
115
+ """
116
+ # Local imports to avoid changing module-level deps
117
+ from flowcept.configs import MQ_TYPE
118
+
119
+ if MQ_TYPE != "redis":
120
+ print("This is currently only available for Redis. Other MQ impls coming soon.")
121
+ return
73
122
 
74
- with open(SAMPLE_SETTINGS_PATH, "rb") as src_file, open(dest_path, "wb") as dst_file:
75
- dst_file.write(src_file.read())
76
- print(f"Copied {configs.SETTINGS_PATH} to {dest_path}")
123
+ import os
124
+ import json
125
+ from datetime import datetime
126
+ import redis
127
+ import msgpack
128
+ from flowcept.configs import MQ_HOST, MQ_PORT, MQ_CHANNEL, KVDB_URI
129
+ from flowcept.commons.daos.mq_dao.mq_dao_redis import MQDaoRedis
130
+
131
+ def _timestamped_path_if_exists(path: Optional[str]) -> Optional[str]:
132
+ if not path:
133
+ return path
134
+ if os.path.exists(path):
135
+ base, ext = os.path.splitext(path)
136
+ ts = datetime.now().strftime("%Y-%m-%d %H.%M.%S")
137
+ return f"{base} ({ts}){ext}"
138
+ return path
139
+
140
+ def _json_dumps(obj) -> str:
141
+ """JSON-dump a msgpack-decoded object; handle bytes safely."""
142
+
143
+ def _default(o):
144
+ if isinstance(o, (bytes, bytearray)):
145
+ try:
146
+ return o.decode("utf-8")
147
+ except Exception:
148
+ return o.hex()
149
+ raise TypeError(f"Object of type {type(o).__name__} is not JSON serializable")
150
+
151
+ return json.dumps(obj, ensure_ascii=False, separators=(",", ":"), default=_default)
152
+
153
+ # Prepare output file (JSONL)
154
+ out_fh = None
155
+ if messages_file_path:
156
+ out_path = _timestamped_path_if_exists(messages_file_path)
157
+ out_fh = open(out_path, "w", encoding="utf-8", buffering=1) # line-buffered
158
+
159
+ # Connect & subscribe
160
+ redis_client = redis.from_url(KVDB_URI) if KVDB_URI else redis.Redis(host=MQ_HOST, port=MQ_PORT, db=0)
161
+ pubsub = redis_client.pubsub()
162
+ pubsub.subscribe(MQ_CHANNEL)
163
+
164
+ print(f"Listening for messages on channel '{MQ_CHANNEL}'... (Ctrl+C to exit)")
165
+
166
+ try:
167
+ for message in pubsub.listen():
168
+ if not message or message.get("type") in MQDaoRedis.MESSAGE_TYPES_IGNORE:
169
+ continue
170
+
171
+ data = message.get("data")
172
+ if not isinstance(data, (bytes, bytearray)):
173
+ print(f"Skipping message with unexpected data type: {type(data)} - {data}")
174
+ continue
175
+
176
+ try:
177
+ msg_obj = msgpack.loads(data, strict_map_key=False)
178
+ msg_type = msg_obj.get("type", None)
179
+ print(f"\nReceived a message! type={msg_type}")
180
+
181
+ if print_messages:
182
+ print(_json_dumps(msg_obj))
183
+
184
+ if out_fh is not None:
185
+ out_fh.write(_json_dumps(msg_obj))
186
+ out_fh.write("\n")
187
+
188
+ except Exception as e:
189
+ print(f"Error decoding message: {e}")
190
+
191
+ except KeyboardInterrupt:
192
+ print("\nInterrupted, shutting down...")
193
+ finally:
194
+ try:
195
+ if out_fh:
196
+ out_fh.close()
197
+ pubsub.close()
198
+ except Exception:
199
+ pass
77
200
 
78
201
 
79
202
  def start_consumption_services(bundle_exec_id: str = None, check_safe_stops: bool = False, consumers: List[str] = None):
@@ -94,6 +217,8 @@ def start_consumption_services(bundle_exec_id: str = None, check_safe_stops: boo
94
217
  print(f" check_safe_stops: {check_safe_stops}")
95
218
  print(f" consumers: {consumers or []}")
96
219
 
220
+ from flowcept import Flowcept
221
+
97
222
  Flowcept.start_consumption_services(
98
223
  bundle_exec_id=bundle_exec_id,
99
224
  check_safe_stops=check_safe_stops,
@@ -137,6 +262,8 @@ def workflow_count(workflow_id: str):
137
262
  workflow_id : str
138
263
  The ID of the workflow to count tasks for.
139
264
  """
265
+ from flowcept import Flowcept
266
+
140
267
  result = {
141
268
  "workflow_id": workflow_id,
142
269
  "tasks": len(Flowcept.db.query({"workflow_id": workflow_id})),
@@ -166,11 +293,19 @@ def query(filter: str, project: str = None, sort: str = None, limit: int = 0):
166
293
  List[dict]
167
294
  A list of task documents matching the query.
168
295
  """
169
- _filter = json.loads(filter)
170
- _project = json.loads(project) or None
171
- _sort = list(sort) or None
296
+ from flowcept import Flowcept
297
+
298
+ _filter, _project, _sort = None, None, None
299
+ if filter:
300
+ _filter = json.loads(filter)
301
+ if project:
302
+ _project = json.loads(project)
303
+ if sort:
304
+ _sort = list(sort)
172
305
  print(
173
- json.dumps(Flowcept.db.query(filter=_filter, project=_project, sort=_sort, limit=limit), indent=2, default=str)
306
+ json.dumps(
307
+ Flowcept.db.query(filter=_filter, projection=_project, sort=_sort, limit=limit), indent=2, default=str
308
+ )
174
309
  )
175
310
 
176
311
 
@@ -183,17 +318,37 @@ def get_task(task_id: str):
183
318
  task_id : str
184
319
  The identifier of the task.
185
320
  """
321
+ from flowcept import Flowcept
322
+
186
323
  _query = {"task_id": task_id}
187
324
  print(json.dumps(Flowcept.db.query(_query), indent=2, default=str))
188
325
 
189
326
 
190
- def start_agent():
327
+ def start_agent(): # TODO: start with gui
191
328
  """Start Flowcept agent."""
192
- from flowcept.flowceptor.adapters.agents.flowcept_agent import main
329
+ from flowcept.agents.flowcept_agent import main
193
330
 
194
331
  main()
195
332
 
196
333
 
334
+ def start_agent_gui(port: int = None):
335
+ """Start Flowcept agent GUI service.
336
+
337
+ Parameters
338
+ ----------
339
+ port : int, optional
340
+ The default port is 8501. Use --port if you want to run the GUI on a different port.
341
+ """
342
+ gui_path = Path(__file__).parent / "agents" / "gui" / "agent_gui.py"
343
+ gui_path = gui_path.resolve()
344
+ cmd = f"streamlit run {gui_path}"
345
+
346
+ if port is not None and isinstance(port, int):
347
+ cmd += f" --server.port {port}"
348
+
349
+ _run_command(cmd, check_output=True)
350
+
351
+
197
352
  def agent_client(tool_name: str, kwargs: str = None):
198
353
  """Agent Client.
199
354
 
@@ -204,19 +359,20 @@ def agent_client(tool_name: str, kwargs: str = None):
204
359
  kwargs : str, optional
205
360
  A stringfied JSON containing the kwargs for the tool, if needed.
206
361
  """
207
- print(kwargs)
208
- if kwargs is not None:
209
- kwargs = json.loads(kwargs)
210
-
211
362
  print(f"Going to run agent tool '{tool_name}'.")
212
363
  if kwargs:
213
- print(f"Using kwargs: {kwargs}")
364
+ try:
365
+ kwargs = json.loads(kwargs)
366
+ print(f"Using kwargs: {kwargs}")
367
+ except Exception as e:
368
+ print(f"Could not parse kwargs as a valid JSON: {kwargs}")
369
+ print(e)
214
370
  print("-----------------")
215
- from flowcept.flowceptor.consumers.agent.client_agent import run_tool
371
+ from flowcept.agents.agent_client import run_tool
216
372
 
217
373
  result = run_tool(tool_name, kwargs)[0]
218
374
 
219
- print(result.text)
375
+ print(result)
220
376
 
221
377
 
222
378
  def check_services():
@@ -235,8 +391,10 @@ def check_services():
235
391
  None
236
392
  Prints diagnostics to stdout; returns nothing.
237
393
  """
394
+ from flowcept import Flowcept
395
+
238
396
  print(f"Testing with settings at: {configs.SETTINGS_PATH}")
239
- from flowcept.configs import MONGO_ENABLED, AGENT, KVDB_ENABLED, INSERTION_BUFFER_TIME
397
+ from flowcept.configs import MONGO_ENABLED, AGENT, KVDB_ENABLED
240
398
 
241
399
  if not Flowcept.services_alive():
242
400
  print("Some of the enabled services are not alive!")
@@ -265,7 +423,7 @@ def check_services():
265
423
 
266
424
  if AGENT.get("enabled", False):
267
425
  print("Agent is enabled, so we are testing it too.")
268
- from flowcept.flowceptor.consumers.agent.client_agent import run_tool
426
+ from flowcept.agents.agent_client import run_tool
269
427
 
270
428
  try:
271
429
  print(run_tool("check_liveness"))
@@ -275,30 +433,113 @@ def check_services():
275
433
 
276
434
  print("Testing LLM connectivity")
277
435
  check_llm_result = run_tool("check_llm")[0]
278
- print(check_llm_result.text)
436
+ print(check_llm_result)
279
437
 
280
- if "error" in check_llm_result.text.lower():
438
+ if "error" in check_llm_result.lower():
281
439
  print("There is an error with the LLM communication.")
282
440
  return
283
- elif MONGO_ENABLED:
284
- print("Testing if llm chat was stored in MongoDB.")
285
- response_metadata = json.loads(check_llm_result.text.split("\n")[0])
286
- print(response_metadata)
287
- sleep(INSERTION_BUFFER_TIME * 1.05)
288
- chats = Flowcept.db.query({"workflow_id": response_metadata["agent_id"]})
289
- if chats:
290
- print(chats)
291
- else:
292
- print("Could not find chat history. Make sure that the DB Inserter service is on.")
441
+ # TODO: the following needs to be fixed
442
+ # elif MONGO_ENABLED:
443
+ #
444
+ # print("Testing if llm chat was stored in MongoDB.")
445
+ # response_metadata = json.loads(check_llm_result.split("\n")[0])
446
+ # print(response_metadata)
447
+ # sleep(INSERTION_BUFFER_TIME * 1.05)
448
+ # chats = Flowcept.db.query({"workflow_id": response_metadata["agent_id"]})
449
+ # if chats:
450
+ # print(chats)
451
+ # else:
452
+ # print("Could not find chat history. Make sure that the DB Inserter service is on.")
293
453
  print("\n\nAll expected services seem to be working properly!")
294
454
  return
295
455
 
296
456
 
457
+ def start_mongo() -> None:
458
+ """
459
+ Start a MongoDB server using paths configured in the settings file.
460
+
461
+ Looks up:
462
+ databases:
463
+ mongodb:
464
+ - bin : str (required) path to the mongod executable
465
+ - log_path : str, optional (adds --fork --logpath)
466
+ - lock_file_path : str, optional (adds --pidfilepath)
467
+
468
+ Builds and runs the startup command.
469
+ """
470
+ # Safe nested gets
471
+ settings = getattr(configs, "settings", {}) or {}
472
+ databases = settings.get("databases") or {}
473
+ mongodb = databases.get("mongodb") or {}
474
+
475
+ bin_path = mongodb.get("bin")
476
+ log_path = mongodb.get("log_path")
477
+ lock_file_path = mongodb.get("lock_file_path")
478
+
479
+ if not bin_path:
480
+ print("Error: settings['databases']['mongodb']['bin'] is required.")
481
+ return
482
+
483
+ # Build command
484
+ parts = [shlex.quote(str(bin_path))]
485
+ if log_path:
486
+ parts += ["--fork", "--logpath", shlex.quote(str(log_path))]
487
+ if lock_file_path:
488
+ parts += ["--pidfilepath", shlex.quote(str(lock_file_path))]
489
+
490
+ cmd = " ".join(parts)
491
+ try:
492
+ out = _run_command(cmd, check_output=True)
493
+ if out:
494
+ print(out)
495
+ except subprocess.CalledProcessError as e:
496
+ print(f"Failed to start MongoDB: {e}")
497
+
498
+
499
+ def start_redis() -> None:
500
+ """
501
+ Start a Redis server using paths configured in settings.
502
+
503
+ Looks up:
504
+ mq:
505
+ - bin : str (required) path to the redis-server executable
506
+ - conf_file : str, optional (appended as the sole argument)
507
+
508
+ Builds and runs the command via _run_command(cmd, check_output=True).
509
+ """
510
+ settings = getattr(configs, "settings", {}) or {}
511
+ mq = settings.get("mq") or {}
512
+
513
+ if mq.get("type", None) != "redis":
514
+ print("Your settings file needs to specify redis as the MQ type. Please fix it.")
515
+ return
516
+
517
+ bin_path = mq.get("bin")
518
+ conf_file = mq.get("conf_file", None)
519
+
520
+ if not bin_path:
521
+ print("Error: settings['mq']['bin'] is required.")
522
+ return
523
+
524
+ parts = [shlex.quote(str(bin_path))]
525
+ if conf_file:
526
+ parts.append(shlex.quote(str(conf_file)))
527
+
528
+ cmd = " ".join(parts)
529
+ try:
530
+ out = _run_command(cmd, check_output=True)
531
+ if out:
532
+ print(out)
533
+ except subprocess.CalledProcessError as e:
534
+ print(f"Failed to start Redis: {e}")
535
+
536
+
297
537
  COMMAND_GROUPS = [
298
- ("Basic Commands", [check_services, show_config, init_settings, start_services, stop_services]),
299
- ("Consumption Commands", [start_consumption_services, stop_consumption_services]),
538
+ ("Basic Commands", [version, check_services, show_settings, init_settings, start_services, stop_services]),
539
+ ("Consumption Commands", [start_consumption_services, stop_consumption_services, stream_messages]),
300
540
  ("Database Commands", [workflow_count, query, get_task]),
301
- ("Agent Commands", [start_agent, agent_client]),
541
+ ("Agent Commands", [start_agent, agent_client, start_agent_gui]),
542
+ ("External Services", [start_mongo, start_redis]),
302
543
  ]
303
544
 
304
545
  COMMANDS = set(f for _, fs in COMMAND_GROUPS for f in fs)
@@ -332,7 +573,7 @@ def _run_command(cmd_str: str, check_output: bool = True, popen_kwargs: Optional
332
573
  popen_kwargs = {}
333
574
 
334
575
  kwargs = {"shell": True, "check": True, **popen_kwargs}
335
-
576
+ print(f"Going to run shell command:\n{cmd_str}")
336
577
  if check_output:
337
578
  kwargs.update({"capture_output": True, "text": True})
338
579
  result = subprocess.run(cmd_str, **kwargs)
@@ -377,8 +618,9 @@ def main(): # noqa: D103
377
618
  for pname, param in inspect.signature(func).parameters.items():
378
619
  arg_name = f"--{pname.replace('_', '-')}"
379
620
  params_doc = _parse_numpy_doc(doc).get(pname, {})
621
+
380
622
  help_text = f"{params_doc.get('type', '')} - {params_doc.get('desc', '').strip()}"
381
- if isinstance(param.annotation, bool):
623
+ if param.annotation is bool:
382
624
  parser.add_argument(arg_name, action="store_true", help=help_text)
383
625
  elif param.annotation == List[str]:
384
626
  parser.add_argument(arg_name, type=lambda s: s.split(","), help=help_text)
@@ -386,7 +628,7 @@ def main(): # noqa: D103
386
628
  parser.add_argument(arg_name, type=str, help=help_text)
387
629
 
388
630
  # Handle --help --command
389
- help_flag = "--help" in sys.argv
631
+ help_flag = "--help" in sys.argv or "-h" in sys.argv
390
632
  command_flags = {f"--{f.__name__.replace('_', '-')}" for f in COMMANDS}
391
633
  matched_command_flag = next((arg for arg in sys.argv if arg in command_flags), None)
392
634
 
@@ -402,7 +644,7 @@ def main(): # noqa: D103
402
644
  meta = params.get(pname, {})
403
645
  opt = p.default != inspect.Parameter.empty
404
646
  print(
405
- f" --{pname:<18} {meta.get('type', 'str')}, "
647
+ f" --{pname.replace('_', '-'):<18} {meta.get('type', 'str')}, "
406
648
  f"{'optional' if opt else 'required'} - {meta.get('desc', '').strip()}"
407
649
  )
408
650
  print()
@@ -430,7 +672,7 @@ def main(): # noqa: D103
430
672
  opt = sig.parameters[argname].default != inspect.Parameter.empty
431
673
  print(
432
674
  f" --"
433
- f"{argname:<18} {meta['type']}, "
675
+ f"{argname.replace('_', '-'):<18} {meta['type']}, "
434
676
  f"{'optional' if opt else 'required'} - {meta['desc'].strip()}"
435
677
  )
436
678
  print()
@@ -707,6 +707,53 @@ class MongoDBDAO(DocumentDBDAO):
707
707
  else:
708
708
  raise Exception(f"You used type={collection}, but MongoDB only stores tasks, workflows, and objects")
709
709
 
710
+ def raw_task_pipeline(self, pipeline: List[Dict]):
711
+ """
712
+ Run a raw MongoDB aggregation pipeline on the tasks collection.
713
+
714
+ This method allows advanced users to directly execute an
715
+ aggregation pipeline against the underlying ``_tasks_collection``.
716
+ It is intended for cases where more complex queries, transformations,
717
+ or aggregations are needed beyond the high-level query APIs.
718
+
719
+ Parameters
720
+ ----------
721
+ pipeline : list of dict
722
+ A MongoDB aggregation pipeline represented as a list of
723
+ stage documents (e.g., ``[{"$match": {...}}, {"$group": {...}}]``).
724
+
725
+ Returns
726
+ -------
727
+ list of dict or None
728
+ The aggregation results as a list of documents if successful,
729
+ or ``None`` if an error occurred.
730
+
731
+ Raises
732
+ ------
733
+ Exception
734
+ Any exception raised by the underlying MongoDB driver will be
735
+ logged and the method will return ``None`` instead of propagating.
736
+
737
+ Examples
738
+ --------
739
+ Count the number of tasks per workflow:
740
+
741
+ >>> pipeline = [
742
+ ... {"$group": {"_id": "$workflow_id", "count": {"$sum": 1}}}
743
+ ... ]
744
+ >>> results = obj.raw_task_pipeline(pipeline)
745
+ >>> for r in results:
746
+ ... print(r["_id"], r["count"])
747
+ wf_123 42
748
+ wf_456 18
749
+ """
750
+ try:
751
+ rs = self._tasks_collection.aggregate(pipeline)
752
+ return list(rs)
753
+ except Exception as e:
754
+ self.logger.exception(e)
755
+ return None
756
+
710
757
  def task_query(
711
758
  self,
712
759
  filter: Dict = None,
@@ -1,15 +1,12 @@
1
1
  """MQ base module."""
2
2
 
3
- from abc import ABC, abstractmethod
3
+ from abc import abstractmethod
4
4
  from typing import Union, List, Callable
5
5
  import csv
6
6
  import msgpack
7
7
  from time import time
8
8
  import flowcept.commons
9
9
  from flowcept.commons.autoflush_buffer import AutoflushBuffer
10
-
11
- from flowcept.commons.daos.keyvalue_dao import KeyValueDAO
12
-
13
10
  from flowcept.commons.utils import chunked
14
11
  from flowcept.commons.flowcept_logger import FlowceptLogger
15
12
  from flowcept.configs import (
@@ -21,12 +18,14 @@ from flowcept.configs import (
21
18
  MQ_TYPE,
22
19
  MQ_TIMING,
23
20
  KVDB_ENABLED,
21
+ MQ_ENABLED,
22
+ DUMP_BUFFER_PATH,
24
23
  )
25
24
 
26
25
  from flowcept.commons.utils import GenericJSONEncoder
27
26
 
28
27
 
29
- class MQDao(ABC):
28
+ class MQDao(object):
30
29
  """MQ base class."""
31
30
 
32
31
  ENCODER = GenericJSONEncoder if JSON_SERIALIZER == "complex" else None
@@ -35,6 +34,9 @@ class MQDao(ABC):
35
34
  @staticmethod
36
35
  def build(*args, **kwargs) -> "MQDao":
37
36
  """Build it."""
37
+ if not MQ_ENABLED:
38
+ return MQDao()
39
+
38
40
  if MQ_TYPE == "redis":
39
41
  from flowcept.commons.daos.mq_dao.mq_dao_redis import MQDaoRedis
40
42
 
@@ -69,12 +71,11 @@ class MQDao(ABC):
69
71
  self.started = False
70
72
  self._adapter_settings = adapter_settings
71
73
  if KVDB_ENABLED:
74
+ from flowcept.commons.daos.keyvalue_dao import KeyValueDAO
75
+
72
76
  self._keyvalue_dao = KeyValueDAO()
73
77
  else:
74
78
  self._keyvalue_dao = None
75
- self.logger.warning(
76
- "We are going to run without KVDB. If you are running a workflow, this may lead to errors."
77
- )
78
79
  self._time_based_flushing_started = False
79
80
  self.buffer: Union[AutoflushBuffer, List] = None
80
81
  if MQ_TIMING:
@@ -95,11 +96,21 @@ class MQDao(ABC):
95
96
  def bulk_publish(self, buffer):
96
97
  """Publish it."""
97
98
  # self.logger.info(f"Going to flush {len(buffer)} to MQ...")
98
- if MQ_CHUNK_SIZE > 1:
99
- for chunk in chunked(buffer, MQ_CHUNK_SIZE):
100
- self._bulk_publish(chunk)
99
+ if flowcept.configs.DB_FLUSH_MODE == "offline":
100
+ if DUMP_BUFFER_PATH is not None:
101
+ import orjson
102
+
103
+ with open(DUMP_BUFFER_PATH, "wb", buffering=1_048_576) as f:
104
+ for obj in buffer:
105
+ f.write(orjson.dumps(obj))
106
+ f.write(b"\n")
107
+ self.logger.info(f"Saved Flowcept messages into {DUMP_BUFFER_PATH}.")
101
108
  else:
102
- self._bulk_publish(buffer)
109
+ if MQ_CHUNK_SIZE > 1:
110
+ for chunk in chunked(buffer, MQ_CHUNK_SIZE):
111
+ self._bulk_publish(chunk)
112
+ else:
113
+ self._bulk_publish(buffer)
103
114
 
104
115
  def register_time_based_thread_init(self, interceptor_instance_id: str, exec_bundle_id=None):
105
116
  """Register the time."""
@@ -183,7 +194,7 @@ class MQDao(ABC):
183
194
  writer.writerow(["type", "start", "end", "duration", "size"])
184
195
  writer.writerows(self._flush_events)
185
196
 
186
- def _stop(self, interceptor_instance_id: str, check_safe_stops: bool = True, bundle_exec_id: int = None):
197
+ def _stop(self, interceptor_instance_id: str = None, check_safe_stops: bool = True, bundle_exec_id: int = None):
187
198
  """Stop MQ publisher."""
188
199
  self.logger.debug(f"MQ pub received stop sign: bundle={bundle_exec_id}, interceptor={interceptor_instance_id}")
189
200
  self._close_buffer()
@@ -116,5 +116,21 @@ class MQDaoKafka(MQDao):
116
116
  return False
117
117
 
118
118
  def unsubscribe(self):
119
- """Unsubscribes from Kafka topic."""
120
- raise NotImplementedError()
119
+ """Unsubscribes from Kafka topic and closes consumer if open."""
120
+ if self._consumer is None:
121
+ self.logger.warning("No Kafka consumer to unsubscribe.")
122
+ return
123
+
124
+ try:
125
+ self._consumer.unsubscribe()
126
+ self.logger.info("Unsubscribed from Kafka topics.")
127
+ except RuntimeError as e:
128
+ self.logger.debug(f"Consumer already closed while unsubscribing: {e}")
129
+ except Exception as e:
130
+ self.logger.exception(f"Error while unsubscribing from Kafka: {e}")
131
+ finally:
132
+ try:
133
+ self._consumer.close()
134
+ except Exception as e:
135
+ self.logger.debug(f"Error closing consumer after unsubscribe: {e}")
136
+ self._consumer = None