flowcept 0.9.17__py3-none-any.whl → 0.9.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. flowcept/agents/agents_utils.py +42 -0
  2. flowcept/agents/flowcept_agent.py +4 -1
  3. flowcept/agents/flowcept_ctx_manager.py +99 -36
  4. flowcept/agents/gui/gui_utils.py +21 -3
  5. flowcept/agents/prompts/general_prompts.py +1 -1
  6. flowcept/agents/prompts/in_memory_query_prompts.py +158 -45
  7. flowcept/agents/tools/general_tools.py +20 -3
  8. flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +14 -31
  9. flowcept/commons/daos/docdb_dao/lmdb_dao.py +48 -0
  10. flowcept/commons/daos/mq_dao/mq_dao_kafka.py +2 -2
  11. flowcept/commons/daos/mq_dao/mq_dao_redis.py +33 -2
  12. flowcept/commons/flowcept_dataclasses/task_object.py +4 -1
  13. flowcept/configs.py +4 -1
  14. flowcept/flowcept_api/flowcept_controller.py +5 -1
  15. flowcept/flowceptor/adapters/mlflow/interception_event_handler.py +33 -2
  16. flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py +18 -4
  17. flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py +1 -0
  18. flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +7 -8
  19. flowcept/instrumentation/flowcept_task.py +147 -51
  20. flowcept/instrumentation/task_capture.py +10 -1
  21. flowcept/version.py +1 -1
  22. {flowcept-0.9.17.dist-info → flowcept-0.9.18.dist-info}/METADATA +8 -1
  23. {flowcept-0.9.17.dist-info → flowcept-0.9.18.dist-info}/RECORD +27 -27
  24. {flowcept-0.9.17.dist-info → flowcept-0.9.18.dist-info}/WHEEL +1 -1
  25. resources/sample_settings.yaml +2 -1
  26. {flowcept-0.9.17.dist-info → flowcept-0.9.18.dist-info}/entry_points.txt +0 -0
  27. {flowcept-0.9.17.dist-info → flowcept-0.9.18.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  from typing import List
3
3
 
4
- from flowcept.agents.agents_utils import build_llm_model, ToolResult
4
+ from flowcept.agents.agents_utils import build_llm_model, ToolResult, normalize_message
5
5
  from flowcept.agents.flowcept_ctx_manager import mcp_flowcept
6
6
  from flowcept.agents.prompts.general_prompts import ROUTING_PROMPT, SMALL_TALK_PROMPT
7
7
 
@@ -105,6 +105,19 @@ def reset_records() -> ToolResult:
105
105
  return ToolResult(code=499, result=str(e))
106
106
 
107
107
 
108
+ @mcp_flowcept.tool()
109
+ def reset_context() -> ToolResult:
110
+ """
111
+ Resets all context.
112
+ """
113
+ try:
114
+ ctx = mcp_flowcept.get_context()
115
+ ctx.request_context.lifespan_context.reset_context()
116
+ return ToolResult(code=201, result="Context reset.")
117
+ except Exception as e:
118
+ return ToolResult(code=499, result=str(e))
119
+
120
+
108
121
  @mcp_flowcept.tool()
109
122
  def prompt_handler(message: str) -> ToolResult:
110
123
  """
@@ -120,20 +133,24 @@ def prompt_handler(message: str) -> ToolResult:
120
133
  TextContent
121
134
  The AI response or routing feedback.
122
135
  """
123
- df_key_words = ["df", "save", "result = df", "reset context"]
136
+ df_key_words = ["df", "save", "result = df"]
124
137
  for key in df_key_words:
125
138
  if key in message:
126
139
  return run_df_query(llm=None, query=message, plot=False)
127
140
 
141
+ if "reset context" in message:
142
+ return reset_context()
128
143
  if "@record" in message:
129
144
  return record_guidance(message)
130
145
  if "@show records" in message:
131
146
  return show_records()
132
147
  if "@reset records" in message:
133
- return reset_records(message)
148
+ return reset_records()
134
149
 
135
150
  llm = build_llm_model()
136
151
 
152
+ message = normalize_message(message)
153
+
137
154
  prompt = ROUTING_PROMPT + message
138
155
  route = llm.invoke(prompt)
139
156
 
@@ -63,11 +63,6 @@ def run_df_query(llm, query: str, plot=False) -> ToolResult:
63
63
 
64
64
  Examples
65
65
  --------
66
- Reset the context:
67
-
68
- >>> run_df_query(llm, "reset context")
69
- ToolResult(code=201, result="Context Reset!")
70
-
71
66
  Save the current DataFrame:
72
67
 
73
68
  >>> run_df_query(llm, "save")
@@ -90,10 +85,6 @@ def run_df_query(llm, query: str, plot=False) -> ToolResult:
90
85
  custom_user_guidance = ctx.request_context.lifespan_context.custom_guidance
91
86
  if df is None or not len(df):
92
87
  return ToolResult(code=404, result="Current df is empty or null.")
93
-
94
- if "reset context" in query:
95
- ctx.request_context.lifespan_context.df = pd.DataFrame()
96
- return ToolResult(code=201, result="Context Reset!")
97
88
  elif "save" in query:
98
89
  return save_df(df, schema, value_examples)
99
90
  elif "result = df" in query:
@@ -173,7 +164,7 @@ def generate_plot_code(llm, query, dynamic_schema, value_examples, df, custom_us
173
164
  >>> print(result.result["plot_code"])
174
165
  plt.bar(result_df["region"], result_df["total_sales"])
175
166
  """
176
- plot_prompt = generate_plot_code_prompt(query, dynamic_schema, value_examples)
167
+ plot_prompt = generate_plot_code_prompt(query, dynamic_schema, value_examples, list(df.columns))
177
168
  try:
178
169
  response = llm(plot_prompt)
179
170
  except Exception as e:
@@ -300,7 +291,9 @@ def generate_result_df(
300
291
  if llm is None:
301
292
  llm = build_llm_model()
302
293
  try:
303
- prompt = generate_pandas_code_prompt(query, dynamic_schema, example_values, custom_user_guidance)
294
+ prompt = generate_pandas_code_prompt(
295
+ query, dynamic_schema, example_values, custom_user_guidance, list(df.columns)
296
+ )
304
297
  response = llm(prompt)
305
298
  except Exception as e:
306
299
  return ToolResult(code=400, result=str(e), extra=prompt)
@@ -317,9 +310,10 @@ def generate_result_df(
317
310
  extra={"generated_code": result_code, "exception": str(e), "prompt": prompt},
318
311
  )
319
312
  else:
320
- tool_result = extract_or_fix_python_code(llm, result_code)
313
+ tool_result = extract_or_fix_python_code(llm, result_code, list(df.columns))
321
314
  if tool_result.code == 201:
322
315
  new_result_code = tool_result.result
316
+ result_code = new_result_code
323
317
  try:
324
318
  result_df = safe_execute(df, new_result_code)
325
319
  except Exception as e:
@@ -357,12 +351,7 @@ def generate_result_df(
357
351
  if summarize:
358
352
  try:
359
353
  tool_result = summarize_result(
360
- llm,
361
- result_code,
362
- result_df,
363
- query,
364
- dynamic_schema,
365
- example_values,
354
+ llm, result_code, result_df, query, dynamic_schema, example_values, list(df.columns)
366
355
  )
367
356
  if tool_result.is_success():
368
357
  return_code = 301
@@ -377,7 +366,7 @@ def generate_result_df(
377
366
  return_code = 303
378
367
 
379
368
  try:
380
- result_df = format_result_df(result_df)
369
+ result_df_str = format_result_df(result_df)
381
370
  except Exception as e:
382
371
  return ToolResult(
383
372
  code=405,
@@ -387,7 +376,8 @@ def generate_result_df(
387
376
 
388
377
  this_result = {
389
378
  "result_code": result_code,
390
- "result_df": result_df,
379
+ "result_df": result_df_str,
380
+ "result_df_markdown": result_df.to_markdown(index=False),
391
381
  "summary": summary,
392
382
  "summary_error": summary_error,
393
383
  }
@@ -473,7 +463,7 @@ def run_df_code(user_code: str, df):
473
463
 
474
464
 
475
465
  @mcp_flowcept.tool()
476
- def extract_or_fix_python_code(llm, raw_text):
466
+ def extract_or_fix_python_code(llm, raw_text, current_fields):
477
467
  """
478
468
  Extract or repair JSON code from raw text using an LLM.
479
469
 
@@ -523,7 +513,7 @@ def extract_or_fix_python_code(llm, raw_text):
523
513
  >>> print(res)
524
514
  ToolResult(code=499, result='LLM service unavailable')
525
515
  """
526
- prompt = extract_or_fix_python_code_prompt(raw_text)
516
+ prompt = extract_or_fix_python_code_prompt(raw_text, current_fields)
527
517
  try:
528
518
  response = llm(prompt)
529
519
  return ToolResult(code=201, result=response)
@@ -582,14 +572,7 @@ def extract_or_fix_json_code(llm, raw_text) -> ToolResult:
582
572
 
583
573
 
584
574
  @mcp_flowcept.tool()
585
- def summarize_result(
586
- llm,
587
- code,
588
- result,
589
- query: str,
590
- dynamic_schema,
591
- example_values,
592
- ) -> ToolResult:
575
+ def summarize_result(llm, code, result, query: str, dynamic_schema, example_values, current_fields) -> ToolResult:
593
576
  """
594
577
  Summarize the pandas result with local reduction for large DataFrames.
595
578
  - For wide DataFrames, selects top columns based on variance and uniqueness.
@@ -597,7 +580,7 @@ def summarize_result(
597
580
  - Constructs a detailed prompt for the LLM with original column context.
598
581
  """
599
582
  summarized_df = summarize_df(result, code)
600
- prompt = dataframe_summarizer_context(code, summarized_df, dynamic_schema, example_values, query)
583
+ prompt = dataframe_summarizer_context(code, summarized_df, dynamic_schema, example_values, query, current_fields)
601
584
  try:
602
585
  response = llm(prompt)
603
586
  return ToolResult(code=201, result=response)
@@ -115,6 +115,54 @@ class LMDBDAO(DocumentDBDAO):
115
115
  self.logger.exception(e)
116
116
  return False
117
117
 
118
+ def delete_task_keys(self, key_name, keys_list: List[str]) -> bool:
119
+ """Delete task documents by a key value list.
120
+
121
+ When deleting by task_id, deletes keys directly. Otherwise, scans
122
+ tasks and deletes matching entries.
123
+ """
124
+ if self._is_closed:
125
+ self._open()
126
+ if type(keys_list) is not list:
127
+ keys_list = [keys_list]
128
+ try:
129
+ with self._env.begin(write=True, db=self._tasks_db) as txn:
130
+ if key_name == "task_id":
131
+ for key in keys_list:
132
+ if key is None:
133
+ continue
134
+ txn.delete(str(key).encode())
135
+ else:
136
+ cursor = txn.cursor()
137
+ for key, value in cursor:
138
+ entry = json.loads(value.decode())
139
+ if entry.get(key_name) in keys_list:
140
+ cursor.delete()
141
+ return True
142
+ except Exception as e:
143
+ self.logger.exception(e)
144
+ return False
145
+
146
+ def count_tasks(self) -> int:
147
+ """Count number of docs in tasks collection."""
148
+ if self._is_closed:
149
+ self._open()
150
+ try:
151
+ return self._env.stat(db=self._tasks_db).get("entries", 0)
152
+ except Exception as e:
153
+ self.logger.exception(e)
154
+ return -1
155
+
156
+ def count_workflows(self) -> int:
157
+ """Count number of docs in workflows collection."""
158
+ if self._is_closed:
159
+ self._open()
160
+ try:
161
+ return self._env.stat(db=self._workflows_db).get("entries", 0)
162
+ except Exception as e:
163
+ self.logger.exception(e)
164
+ return -1
165
+
118
166
  @staticmethod
119
167
  def _match_filter(entry, filter):
120
168
  """
@@ -42,7 +42,7 @@ class MQDaoKafka(MQDao):
42
42
  def message_listener(self, message_handler: Callable):
43
43
  """Get message listener."""
44
44
  try:
45
- while True:
45
+ while self._consumer is not None:
46
46
  msg = self._consumer.poll(1.0)
47
47
  if msg is None:
48
48
  continue
@@ -59,7 +59,7 @@ class MQDaoKafka(MQDao):
59
59
  except Exception as e:
60
60
  self.logger.exception(e)
61
61
  finally:
62
- self._consumer.close()
62
+ self.unsubscribe()
63
63
 
64
64
  def send_message(self, message: dict, channel=MQ_CHANNEL, serializer=msgpack.dumps):
65
65
  """Send the message."""
@@ -1,5 +1,6 @@
1
1
  """MQ redis module."""
2
2
 
3
+ from threading import Thread
3
4
  from typing import Callable
4
5
  import redis
5
6
 
@@ -14,12 +15,15 @@ from flowcept.configs import MQ_CHANNEL, MQ_HOST, MQ_PORT, MQ_PASSWORD, MQ_URI,
14
15
  class MQDaoRedis(MQDao):
15
16
  """MQ redis class."""
16
17
 
17
- MESSAGE_TYPES_IGNORE = {"psubscribe"}
18
+ MESSAGE_TYPES_IGNORE = {"psubscribe", "subscribe", "pong"}
18
19
 
19
20
  def __init__(self, adapter_settings=None):
20
21
  super().__init__(adapter_settings)
21
22
 
22
23
  self._consumer = None
24
+ self._ping_thread = None
25
+ self._ping_stop = False
26
+
23
27
  use_same_as_kv = MQ_SETTINGS.get("same_as_kvdb", False)
24
28
  if use_same_as_kv:
25
29
  if KVDB_ENABLED:
@@ -37,6 +41,26 @@ class MQDaoRedis(MQDao):
37
41
  """
38
42
  self._consumer = self._producer.pubsub()
39
43
  self._consumer.psubscribe(MQ_CHANNEL)
44
+ self._start_ping_thread()
45
+
46
+ def _start_ping_thread(self, interval: int = 30):
47
+ """Start a background thread to ping Redis pubsub periodically."""
48
+ if self._ping_thread and self._ping_thread.is_alive():
49
+ return
50
+
51
+ self._ping_stop = False
52
+
53
+ def _pinger():
54
+ while not self._ping_stop:
55
+ try:
56
+ if self._consumer is not None:
57
+ self._consumer.ping()
58
+ except (redis.exceptions.ConnectionError, redis.exceptions.TimeoutError) as e:
59
+ self.logger.critical(f"Redis PubSub ping failed: {e}")
60
+ sleep(interval)
61
+
62
+ self._ping_thread = Thread(target=_pinger, daemon=True)
63
+ self._ping_thread.start()
40
64
 
41
65
  def unsubscribe(self):
42
66
  """
@@ -75,8 +99,15 @@ class MQDaoRedis(MQDao):
75
99
  current_trials = 0
76
100
  except (redis.exceptions.ConnectionError, redis.exceptions.TimeoutError) as e:
77
101
  current_trials += 1
78
- self.logger.critical(f"Redis connection lost: {e}. Reconnecting in 3 seconds...")
102
+ self.logger.critical(f"Redis connection lost: {e}. Trying to reconnect in 3 seconds...")
79
103
  sleep(3)
104
+ try:
105
+ self.subscribe()
106
+ self.logger.warning(f"Redis reconnected after {current_trials} trials.")
107
+ current_trials = 0
108
+ except Exception as e:
109
+ self.logger.critical(f"Redis error when trying to reconnect: {e}.")
110
+
80
111
  except Exception as e:
81
112
  self.logger.exception(e)
82
113
  continue
@@ -134,7 +134,10 @@ class TaskObject:
134
134
  """User-defined tags attached to the task."""
135
135
 
136
136
  agent_id: str = None
137
- """Identifier of the agent responsible for executing this task (if any)."""
137
+ """Identifier of the agent that executed (or is going to execute) this task."""
138
+
139
+ source_agent_id: str = None
140
+ """Identifier of the agent that sent this task to be executed (if any)."""
138
141
 
139
142
  _DEFAULT_ENRICH_VALUES = {
140
143
  "node_name": NODE_NAME,
flowcept/configs.py CHANGED
@@ -155,14 +155,16 @@ DB_INSERTER_SLEEP_TRIALS_STOP = db_buffer_settings.get("stop_trials_sleep", 0.01
155
155
  ###########################
156
156
 
157
157
  DB_FLUSH_MODE = settings["project"].get("db_flush_mode", "offline")
158
- # DEBUG_MODE = settings["project"].get("debug", False)
159
158
  PERF_LOG = settings["project"].get("performance_logging", False)
160
159
  JSON_SERIALIZER = settings["project"].get("json_serializer", "default")
161
160
  REPLACE_NON_JSON_SERIALIZABLE = settings["project"].get("replace_non_json_serializable", True)
162
161
  ENRICH_MESSAGES = settings["project"].get("enrich_messages", True)
163
162
 
163
+ # Default: enable dump buffer only when running in offline flush mode.
164
164
  _DEFAULT_DUMP_BUFFER_ENABLED = DB_FLUSH_MODE == "offline"
165
165
  DUMP_BUFFER_ENABLED = (
166
+ # Env var "DUMP_BUFFER" overrides settings.yaml.
167
+ # Falls back to settings project.dump_buffer.enabled, then to the default above.
166
168
  os.getenv(
167
169
  "DUMP_BUFFER", str(settings["project"].get("dump_buffer", {}).get("enabled", _DEFAULT_DUMP_BUFFER_ENABLED))
168
170
  )
@@ -170,6 +172,7 @@ DUMP_BUFFER_ENABLED = (
170
172
  .lower()
171
173
  in _TRUE_VALUES
172
174
  )
175
+ # Path is only read from settings.yaml; env override is not supported here.
173
176
  DUMP_BUFFER_PATH = settings["project"].get("dump_buffer", {}).get("path", "flowcept_buffer.jsonl")
174
177
 
175
178
  TELEMETRY_CAPTURE = settings.get("telemetry_capture", None)
@@ -320,7 +320,11 @@ class Flowcept(object):
320
320
  file_path = DUMP_BUFFER_PATH
321
321
  assert file_path is not None, "Please indicate file_path either in the argument or in the config file."
322
322
  if not os.path.exists(file_path):
323
- raise FileNotFoundError(f"Flowcept buffer file '{file_path}' was not found.")
323
+ raise FileNotFoundError(
324
+ f"Flowcept buffer file '{file_path}' was not found. "
325
+ f"Check your settings to see if you're dumping the data to a file and check if you"
326
+ f"have started Flowcept."
327
+ )
324
328
 
325
329
  with open(file_path, "rb") as f:
326
330
  lines = [ln for ln in f.read().splitlines() if ln]
@@ -13,7 +13,38 @@ class InterceptionEventHandler(FileSystemEventHandler):
13
13
  self.callback_function = callback_function
14
14
  self.interceptor_instance = interceptor_instance
15
15
 
16
+ def _matches_watch_target(self, path):
17
+ """Return True when a path matches the watch target.
18
+
19
+ If the target is a directory, any event under that directory matches.
20
+ If the target is a file, only the exact file path matches.
21
+ """
22
+ if not path:
23
+ return False
24
+ target = Path(self.file_path_to_watch).resolve()
25
+ candidate = Path(path).resolve()
26
+ if target.is_dir():
27
+ try:
28
+ candidate.relative_to(target)
29
+ return True
30
+ except ValueError:
31
+ return False
32
+ return candidate == target
33
+
34
+ def _maybe_callback(self, event):
35
+ """Invoke the callback when an event matches the watch target."""
36
+ paths = [getattr(event, "src_path", None), getattr(event, "dest_path", None)]
37
+ if any(self._matches_watch_target(path) for path in paths):
38
+ self.callback_function(self.interceptor_instance)
39
+
16
40
  def on_modified(self, event):
17
41
  """Get on modified."""
18
- if Path(event.src_path).resolve() == Path(self.file_path_to_watch).resolve():
19
- self.callback_function(self.interceptor_instance)
42
+ self._maybe_callback(event)
43
+
44
+ def on_created(self, event):
45
+ """Get on created."""
46
+ self._maybe_callback(event)
47
+
48
+ def on_moved(self, event):
49
+ """Get on moved."""
50
+ self._maybe_callback(event)
@@ -50,9 +50,10 @@ class MLFlowInterceptor(BaseInterceptor):
50
50
  interesting change, it calls self.intercept; otherwise, let it
51
51
  go....
52
52
  """
53
+ intercepted = 0
53
54
  runs = self.dao.get_finished_run_uuids()
54
55
  if not runs:
55
- return
56
+ return intercepted
56
57
  for run_uuid_tuple in runs:
57
58
  run_uuid = run_uuid_tuple[0]
58
59
  if not self.state_manager.has_element_id(run_uuid):
@@ -63,6 +64,8 @@ class MLFlowInterceptor(BaseInterceptor):
63
64
  continue
64
65
  task_msg = self.prepare_task_msg(run_data).to_dict()
65
66
  self.intercept(task_msg)
67
+ intercepted += 1
68
+ return intercepted
66
69
 
67
70
  def start(self, bundle_exec_id, check_safe_stops) -> "MLFlowInterceptor":
68
71
  """Start it."""
@@ -74,10 +77,20 @@ class MLFlowInterceptor(BaseInterceptor):
74
77
  def stop(self, check_safe_stops: bool = True) -> bool:
75
78
  """Stop it."""
76
79
  sleep(1)
77
- super().stop(check_safe_stops)
78
80
  self.logger.debug("Interceptor stopping...")
79
- self._observer.stop()
80
- self._observer_thread.join()
81
+ # Flush any late writes before stopping the observer.
82
+ try:
83
+ intercepted = self.callback()
84
+ if intercepted == 0:
85
+ sleep(self.settings.watch_interval_sec)
86
+ self.callback()
87
+ except Exception as e:
88
+ self.logger.exception(e)
89
+ super().stop(check_safe_stops)
90
+ if self._observer is not None:
91
+ self._observer.stop()
92
+ if self._observer_thread is not None:
93
+ self._observer_thread.join()
81
94
  self.logger.debug("Interceptor stopped.")
82
95
  return True
83
96
 
@@ -98,4 +111,5 @@ class MLFlowInterceptor(BaseInterceptor):
98
111
  watch_dir = os.path.dirname(self.settings.file_path) or "."
99
112
  self._observer.schedule(event_handler, watch_dir, recursive=True)
100
113
  self._observer.start()
114
+ sleep(0.2)
101
115
  self.logger.info(f"Watching directory {watch_dir} with file {self.settings.file_path} ")
@@ -122,4 +122,5 @@ class TensorboardInterceptor(BaseInterceptor):
122
122
 
123
123
  self._observer.schedule(event_handler, self.settings.file_path, recursive=True)
124
124
  self._observer.start()
125
+ sleep(0.2)
125
126
  self.logger.debug(f"Watching {self.settings.file_path}")
@@ -20,6 +20,12 @@ class BaseAppContext:
20
20
 
21
21
  tasks: List[Dict]
22
22
 
23
+ def reset_context(self):
24
+ """
25
+ Method to reset the variables in the context.
26
+ """
27
+ self.tasks = []
28
+
23
29
 
24
30
  class BaseAgentContextManager(BaseConsumer):
25
31
  """
@@ -45,8 +51,7 @@ class BaseAgentContextManager(BaseConsumer):
45
51
  """
46
52
  self._started = False
47
53
  super().__init__()
48
- self.context = None
49
- self.reset_context()
54
+ # self.context = BaseAppContext(tasks=[])
50
55
  self.agent_id = BaseAgentContextManager.agent_id
51
56
 
52
57
  def message_handler(self, msg_obj: Dict) -> bool:
@@ -77,12 +82,6 @@ class BaseAgentContextManager(BaseConsumer):
77
82
 
78
83
  return True
79
84
 
80
- def reset_context(self):
81
- """
82
- Resets the internal context, clearing all stored task data.
83
- """
84
- self.context = BaseAppContext(tasks=[])
85
-
86
85
  @asynccontextmanager
87
86
  async def lifespan(self, app):
88
87
  """