flowcept 0.8.10__py3-none-any.whl → 0.8.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. flowcept/cli.py +210 -10
  2. flowcept/commons/daos/keyvalue_dao.py +19 -23
  3. flowcept/commons/daos/mq_dao/mq_dao_base.py +29 -29
  4. flowcept/commons/daos/mq_dao/mq_dao_kafka.py +4 -3
  5. flowcept/commons/daos/mq_dao/mq_dao_mofka.py +4 -0
  6. flowcept/commons/daos/mq_dao/mq_dao_redis.py +38 -5
  7. flowcept/commons/daos/redis_conn.py +47 -0
  8. flowcept/commons/flowcept_dataclasses/task_object.py +36 -8
  9. flowcept/commons/settings_factory.py +2 -4
  10. flowcept/commons/task_data_preprocess.py +200 -0
  11. flowcept/commons/utils.py +1 -1
  12. flowcept/configs.py +8 -4
  13. flowcept/flowcept_api/flowcept_controller.py +30 -13
  14. flowcept/flowceptor/adapters/agents/__init__.py +1 -0
  15. flowcept/flowceptor/adapters/agents/agents_utils.py +89 -0
  16. flowcept/flowceptor/adapters/agents/flowcept_agent.py +292 -0
  17. flowcept/flowceptor/adapters/agents/flowcept_llm_prov_capture.py +186 -0
  18. flowcept/flowceptor/adapters/agents/prompts.py +51 -0
  19. flowcept/flowceptor/adapters/base_interceptor.py +13 -6
  20. flowcept/flowceptor/adapters/brokers/__init__.py +1 -0
  21. flowcept/flowceptor/adapters/brokers/mqtt_interceptor.py +132 -0
  22. flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py +3 -3
  23. flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py +3 -3
  24. flowcept/flowceptor/consumers/agent/__init__.py +1 -0
  25. flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +101 -0
  26. flowcept/flowceptor/consumers/agent/client_agent.py +48 -0
  27. flowcept/flowceptor/consumers/agent/flowcept_agent_context_manager.py +145 -0
  28. flowcept/flowceptor/consumers/agent/flowcept_qa_manager.py +112 -0
  29. flowcept/flowceptor/consumers/base_consumer.py +90 -0
  30. flowcept/flowceptor/consumers/document_inserter.py +135 -36
  31. flowcept/flowceptor/telemetry_capture.py +1 -1
  32. flowcept/instrumentation/task_capture.py +8 -2
  33. flowcept/version.py +1 -1
  34. {flowcept-0.8.10.dist-info → flowcept-0.8.11.dist-info}/METADATA +10 -1
  35. {flowcept-0.8.10.dist-info → flowcept-0.8.11.dist-info}/RECORD +39 -27
  36. resources/sample_settings.yaml +37 -13
  37. flowcept/flowceptor/adapters/zambeze/__init__.py +0 -1
  38. flowcept/flowceptor/adapters/zambeze/zambeze_dataclasses.py +0 -41
  39. flowcept/flowceptor/adapters/zambeze/zambeze_interceptor.py +0 -102
  40. {flowcept-0.8.10.dist-info → flowcept-0.8.11.dist-info}/WHEEL +0 -0
  41. {flowcept-0.8.10.dist-info → flowcept-0.8.11.dist-info}/entry_points.txt +0 -0
  42. {flowcept-0.8.10.dist-info → flowcept-0.8.11.dist-info}/licenses/LICENSE +0 -0
@@ -7,9 +7,6 @@ from flowcept.commons.flowcept_dataclasses.base_settings_dataclasses import (
7
7
  BaseSettings,
8
8
  KeyValue,
9
9
  )
10
- from flowcept.flowceptor.adapters.zambeze.zambeze_dataclasses import (
11
- ZambezeSettings,
12
- )
13
10
  from flowcept.flowceptor.adapters.mlflow.mlflow_dataclasses import (
14
11
  MLFlowSettings,
15
12
  )
@@ -22,7 +19,6 @@ from flowcept.flowceptor.adapters.dask.dask_dataclasses import (
22
19
 
23
20
 
24
21
  SETTINGS_CLASSES = {
25
- Vocabulary.Settings.ZAMBEZE_KIND: ZambezeSettings,
26
22
  Vocabulary.Settings.MLFLOW_KIND: MLFlowSettings,
27
23
  Vocabulary.Settings.TENSORBOARD_KIND: TensorboardSettings,
28
24
  Vocabulary.Settings.DASK_KIND: DaskSettings,
@@ -30,6 +26,8 @@ SETTINGS_CLASSES = {
30
26
 
31
27
 
32
28
  def _build_base_settings(kind: str, settings_dict: dict) -> BaseSettings:
29
+ if kind not in SETTINGS_CLASSES:
30
+ return settings_dict
33
31
  settings_obj = SETTINGS_CLASSES[kind](**settings_dict)
34
32
  return settings_obj
35
33
 
@@ -0,0 +1,200 @@
1
+ """
2
+ The base of this code was generated using ChatGPT.
3
+
4
+ Prompt:
5
+
6
+ Here I have a list containing one real task.
7
+
8
+ <Paste one real task here>
9
+
10
+ I want to create a list of summarized task data, per task, containing:
11
+ - activity_id
12
+ - task_id
13
+ - used
14
+ - generated
15
+ - task_duration (ended_at - started_at)
16
+ - hostname
17
+ - cpu_info
18
+ - disk_info
19
+ - mem_info
20
+ - network_info
21
+ <Consider adding GPU info too, if you have gpu in your task data>
22
+
23
+ Where info about cpu, disk, mem, and network must consider telemetry_at_end and telemetry_at_start.
24
+
25
+ We will use this summarized data as input for LLM questions to find patterns in the resource usage and how they relate
26
+ to input (used) and output (generated) of each task.
27
+ """
28
+
29
+ from typing import Dict, List
30
+
31
+
32
+ def summarize_telemetry(task: Dict) -> Dict:
33
+ """
34
+ Extract and compute the telemetry summary for a task based on start and end telemetry snapshots.
35
+
36
+ Parameters
37
+ ----------
38
+ task : dict
39
+ The task dictionary containing telemetry_at_start and telemetry_at_end.
40
+
41
+ Returns
42
+ -------
43
+ dict
44
+ A summary of telemetry differences including CPU, disk, memory, and network metrics, and task duration.
45
+ """
46
+
47
+ def extract_cpu_info(start: Dict, end: Dict) -> Dict:
48
+ return {
49
+ "percent_all_diff": end["percent_all"] - start["percent_all"],
50
+ "user_time_diff": end["times_avg"]["user"] - start["times_avg"]["user"],
51
+ "system_time_diff": end["times_avg"]["system"] - start["times_avg"]["system"],
52
+ "idle_time_diff": end["times_avg"]["idle"] - start["times_avg"]["idle"],
53
+ }
54
+
55
+ def extract_disk_info(start: Dict, end: Dict) -> Dict:
56
+ io_start = start["io_sum"]
57
+ io_end = end["io_sum"]
58
+ return {
59
+ "read_bytes_diff": io_end["read_bytes"] - io_start["read_bytes"],
60
+ "write_bytes_diff": io_end["write_bytes"] - io_start["write_bytes"],
61
+ "read_count_diff": io_end["read_count"] - io_start["read_count"],
62
+ "write_count_diff": io_end["write_count"] - io_start["write_count"],
63
+ }
64
+
65
+ def extract_mem_info(start: Dict, end: Dict) -> Dict:
66
+ return {
67
+ "used_mem_diff": end["virtual"]["used"] - start["virtual"]["used"],
68
+ "percent_diff": end["virtual"]["percent"] - start["virtual"]["percent"],
69
+ "swap_used_diff": end["swap"]["used"] - start["swap"]["used"],
70
+ }
71
+
72
+ def extract_network_info(start: Dict, end: Dict) -> Dict:
73
+ net_start = start["netio_sum"]
74
+ net_end = end["netio_sum"]
75
+ return {
76
+ "bytes_sent_diff": net_end["bytes_sent"] - net_start["bytes_sent"],
77
+ "bytes_recv_diff": net_end["bytes_recv"] - net_start["bytes_recv"],
78
+ "packets_sent_diff": net_end["packets_sent"] - net_start["packets_sent"],
79
+ "packets_recv_diff": net_end["packets_recv"] - net_start["packets_recv"],
80
+ }
81
+
82
+ start_tele = task["telemetry_at_start"]
83
+ end_tele = task["telemetry_at_end"]
84
+
85
+ started_at = task["started_at"]
86
+ ended_at = task["ended_at"]
87
+ duration = ended_at - started_at
88
+
89
+ telemetry_summary = {
90
+ "duration_sec": duration,
91
+ "cpu_info": extract_cpu_info(start_tele["cpu"], end_tele["cpu"]),
92
+ "disk_info": extract_disk_info(start_tele["disk"], end_tele["disk"]),
93
+ "mem_info": extract_mem_info(start_tele["memory"], end_tele["memory"]),
94
+ "network_info": extract_network_info(start_tele["network"], end_tele["network"]),
95
+ }
96
+
97
+ return telemetry_summary
98
+
99
+
100
+ def summarize_task(task: Dict, thresholds: Dict = None, logger=None) -> Dict:
101
+ """
102
+ Summarize key metadata and telemetry for a task, optionally tagging critical conditions.
103
+
104
+ Parameters
105
+ ----------
106
+ task : dict
107
+ The task dictionary containing metadata and telemetry snapshots.
108
+ thresholds : dict, optional
109
+ Threshold values used to tag abnormal resource usage.
110
+
111
+ Returns
112
+ -------
113
+ dict
114
+ Summary of the task including identifiers, telemetry summary, and optional critical tags.
115
+ """
116
+ task_summary = {
117
+ "workflow_id": task.get("workflow_id"),
118
+ "task_id": task.get("task_id"),
119
+ "activity_id": task.get("activity_id"),
120
+ "used": task.get("used"),
121
+ "generated": task.get("generated"),
122
+ "hostname": task.get("hostname"),
123
+ "status": task.get("status"),
124
+ }
125
+
126
+ try:
127
+ telemetry_summary = summarize_telemetry(task)
128
+ tags = tag_critical_task(
129
+ generated=task.get("generated", {}), telemetry_summary=telemetry_summary, thresholds=thresholds
130
+ )
131
+ if tags:
132
+ task_summary["tags"] = tags
133
+ task_summary["telemetry_summary"] = telemetry_summary
134
+ except Exception as e:
135
+ if logger:
136
+ logger.exception(e)
137
+ else:
138
+ print(e)
139
+
140
+ return task_summary
141
+
142
+
143
+ def tag_critical_task(
144
+ generated: Dict, telemetry_summary: Dict, generated_keywords: List[str] = ["result"], thresholds: Dict = None
145
+ ) -> List[str]:
146
+ """
147
+ Tag a task with labels indicating abnormal or noteworthy resource usage or result anomalies.
148
+
149
+ Parameters
150
+ ----------
151
+ generated : dict
152
+ Dictionary of generated output values (e.g., results).
153
+ telemetry_summary : dict
154
+ Telemetry summary produced from summarize_telemetry().
155
+ generated_keywords : list of str, optional
156
+ List of keys in the generated output to check for anomalies.
157
+ thresholds : dict, optional
158
+ Custom thresholds for tagging high CPU, memory, disk, etc.
159
+
160
+ Returns
161
+ -------
162
+ list of str
163
+ Tags indicating abnormal patterns (e.g., "high_cpu", "low_output").
164
+ """
165
+ if thresholds is None:
166
+ thresholds = {
167
+ "high_cpu": 80,
168
+ "high_mem": 1e9,
169
+ "high_disk": 1e8,
170
+ "long_duration": 0.8,
171
+ "low_output": 0.1,
172
+ "high_output": 0.9,
173
+ }
174
+
175
+ cpu = abs(telemetry_summary["cpu_info"].get("percent_all_diff", 0))
176
+ mem = telemetry_summary["mem_info"].get("used_mem_diff", 0)
177
+ disk = telemetry_summary["disk_info"].get("read_bytes_diff", 0) + telemetry_summary["disk_info"].get(
178
+ "write_bytes_diff", 0
179
+ )
180
+ duration = telemetry_summary["duration_sec"]
181
+
182
+ tags = []
183
+
184
+ if cpu > thresholds["high_cpu"]:
185
+ tags.append("high_cpu")
186
+ if mem > thresholds["high_mem"]:
187
+ tags.append("high_mem")
188
+ if disk > thresholds["high_disk"]:
189
+ tags.append("high_disk")
190
+ if duration > thresholds["long_duration"]:
191
+ tags.append("long_duration")
192
+
193
+ for key in generated_keywords:
194
+ value = generated.get(key, 0)
195
+ if value < thresholds["low_output"]:
196
+ tags.append("low_output")
197
+ if value > thresholds["high_output"]:
198
+ tags.append("high_output")
199
+
200
+ return tags
flowcept/commons/utils.py CHANGED
@@ -26,7 +26,7 @@ def get_utc_now() -> float:
26
26
 
27
27
  def get_utc_now_str() -> str:
28
28
  """Get UTC string."""
29
- format_string = "%Y-%m-%dT%H:%M:%S.%f"
29
+ format_string = "%Y-%m-%dT%H:%M:%S.%f%z"
30
30
  now = datetime.now(timezone.utc)
31
31
  return now.strftime(format_string)
32
32
 
flowcept/configs.py CHANGED
@@ -69,10 +69,11 @@ FLOWCEPT_USER = settings["experiment"].get("user", "blank_user")
69
69
  MQ_INSTANCES = settings["mq"].get("instances", None)
70
70
  MQ_SETTINGS = settings["mq"]
71
71
  MQ_TYPE = os.getenv("MQ_TYPE", settings["mq"].get("type", "redis"))
72
- MQ_CHANNEL = settings["mq"].get("channel", "interception")
72
+ MQ_CHANNEL = os.getenv("MQ_CHANNEL", settings["mq"].get("channel", "interception"))
73
73
  MQ_PASSWORD = settings["mq"].get("password", None)
74
74
  MQ_HOST = os.getenv("MQ_HOST", settings["mq"].get("host", "localhost"))
75
75
  MQ_PORT = int(os.getenv("MQ_PORT", settings["mq"].get("port", "6379")))
76
+ MQ_URI = os.getenv("MQ_URI", settings["mq"].get("uri", None))
76
77
  MQ_BUFFER_SIZE = settings["mq"].get("buffer_size", None)
77
78
  MQ_INSERTION_BUFFER_TIME = settings["mq"].get("insertion_buffer_time_secs", None)
78
79
  MQ_TIMING = settings["mq"].get("timing", False)
@@ -86,6 +87,7 @@ KVDB_PASSWORD = settings["kv_db"].get("password", None)
86
87
  KVDB_HOST = os.getenv("KVDB_HOST", settings["kv_db"].get("host", "localhost"))
87
88
  KVDB_PORT = int(os.getenv("KVDB_PORT", settings["kv_db"].get("port", "6379")))
88
89
  KVDB_URI = os.getenv("KVDB_URI", settings["kv_db"].get("uri", None))
90
+ KVDB_ENABLED = settings["kv_db"].get("enabled", True)
89
91
 
90
92
 
91
93
  DATABASES = settings.get("databases", {})
@@ -118,9 +120,9 @@ if LMDB_SETTINGS:
118
120
  else:
119
121
  LMDB_ENABLED = LMDB_SETTINGS.get("enabled", False)
120
122
 
121
- if not LMDB_ENABLED and not MONGO_ENABLED:
122
- # At least one of these variables need to be enabled.
123
- LMDB_ENABLED = True
123
+ # if not LMDB_ENABLED and not MONGO_ENABLED:
124
+ # # At least one of these variables need to be enabled.
125
+ # LMDB_ENABLED = True
124
126
 
125
127
  ##########################
126
128
  # DB Buffer Settings #
@@ -219,6 +221,8 @@ ANALYTICS = settings.get("analytics", None)
219
221
  INSTRUMENTATION = settings.get("instrumentation", {})
220
222
  INSTRUMENTATION_ENABLED = INSTRUMENTATION.get("enabled", False)
221
223
 
224
+ AGENT = settings.get("agent", {})
225
+
222
226
  ####################
223
227
  # Enabled ADAPTERS #
224
228
  ####################
@@ -9,7 +9,14 @@ from flowcept.commons.flowcept_dataclasses.workflow_object import (
9
9
  )
10
10
  from flowcept.commons.flowcept_logger import FlowceptLogger
11
11
  from flowcept.commons.utils import ClassProperty
12
- from flowcept.configs import MQ_INSTANCES, INSTRUMENTATION_ENABLED, MONGO_ENABLED, SETTINGS_PATH
12
+ from flowcept.configs import (
13
+ MQ_INSTANCES,
14
+ INSTRUMENTATION_ENABLED,
15
+ MONGO_ENABLED,
16
+ SETTINGS_PATH,
17
+ LMDB_ENABLED,
18
+ KVDB_ENABLED,
19
+ )
13
20
  from flowcept.flowceptor.adapters.base_interceptor import BaseInterceptor
14
21
 
15
22
 
@@ -39,6 +46,7 @@ class Flowcept(object):
39
46
  workflow_name: str = None,
40
47
  workflow_args: str = None,
41
48
  start_persistence=True,
49
+ check_safe_stops=True, # TODO add to docstring
42
50
  save_workflow=True,
43
51
  *args,
44
52
  **kwargs,
@@ -86,6 +94,7 @@ class Flowcept(object):
86
94
  self.logger.debug(f"Using settings file: {SETTINGS_PATH}")
87
95
  self._enable_persistence = start_persistence
88
96
  self._db_inserters: List = []
97
+ self._check_safe_stops = check_safe_stops
89
98
  if bundle_exec_id is None:
90
99
  self._bundle_exec_id = id(self)
91
100
  else:
@@ -140,7 +149,7 @@ class Flowcept(object):
140
149
  Flowcept.current_workflow_id = self.current_workflow_id
141
150
 
142
151
  interceptor_inst = BaseInterceptor.build(interceptor)
143
- interceptor_inst.start(bundle_exec_id=self._bundle_exec_id)
152
+ interceptor_inst.start(bundle_exec_id=self._bundle_exec_id, check_safe_stops=self._check_safe_stops)
144
153
  self._interceptor_instances.append(interceptor_inst)
145
154
 
146
155
  if self._should_save_workflow and not self._workflow_saved:
@@ -190,19 +199,20 @@ class Flowcept(object):
190
199
  else:
191
200
  raise Exception("You must provide the argument `dask_client` so we can correctly link the workflow.")
192
201
 
193
- interceptor_instance._mq_dao.set_campaign_id(Flowcept.campaign_id)
202
+ if KVDB_ENABLED:
203
+ interceptor_instance._mq_dao.set_campaign_id(Flowcept.campaign_id)
194
204
  interceptor_instance.send_workflow_message(wf_obj)
195
205
  self._workflow_saved = True
196
206
 
197
207
  def _init_persistence(self, mq_host=None, mq_port=None):
208
+ if not LMDB_ENABLED and not MONGO_ENABLED:
209
+ return
210
+
198
211
  from flowcept.flowceptor.consumers.document_inserter import DocumentInserter
199
212
 
200
- self._db_inserters.append(
201
- DocumentInserter(
202
- check_safe_stops=True,
203
- bundle_exec_id=self._bundle_exec_id,
204
- ).start()
205
- )
213
+ doc_inserter = DocumentInserter(check_safe_stops=self._check_safe_stops, bundle_exec_id=self._bundle_exec_id)
214
+ doc_inserter.start()
215
+ self._db_inserters.append(doc_inserter)
206
216
 
207
217
  def stop(self):
208
218
  """Stop it."""
@@ -214,7 +224,7 @@ class Flowcept(object):
214
224
  for interceptor in self._interceptor_instances:
215
225
  if interceptor is None:
216
226
  continue
217
- interceptor.stop()
227
+ interceptor.stop(check_safe_stops=self._check_safe_stops)
218
228
 
219
229
  if len(self._db_inserters):
220
230
  self.logger.info("Stopping DB Inserters...")
@@ -259,17 +269,24 @@ class Flowcept(object):
259
269
  ... print("One or more services are not ready.")
260
270
  """
261
271
  logger = FlowceptLogger()
262
- if not MQDao.build().liveness_test():
272
+ mq = MQDao.build()
273
+ if not mq.liveness_test():
263
274
  logger.error("MQ Not Ready!")
264
275
  return False
265
276
 
277
+ if KVDB_ENABLED:
278
+ if not mq._keyvalue_dao.liveness_test():
279
+ logger.error("KVBD is enabled but is not ready!")
280
+ return False
281
+
282
+ logger.info("MQ is alive!")
266
283
  if MONGO_ENABLED:
267
284
  from flowcept.commons.daos.docdb_dao.mongodb_dao import MongoDBDAO
268
285
 
269
286
  if not MongoDBDAO(create_indices=False).liveness_test():
270
- logger.error("DocDB Not Ready!")
287
+ logger.error("MongoDB is enabled but DocDB is not Ready!")
271
288
  return False
272
- logger.info("MQ and DocDB are alive!")
289
+ logger.info("DocDB is alive!")
273
290
  return True
274
291
 
275
292
  @staticmethod
@@ -0,0 +1 @@
1
+ """Agent adapter subpackage."""
@@ -0,0 +1,89 @@
1
+ from typing import List, Union
2
+
3
+ from langchain_community.llms.sambanova import SambaStudio
4
+ from mcp.server.fastmcp.prompts import base
5
+ from langchain_core.language_models import LLM
6
+ from langchain_core.messages import HumanMessage, AIMessage
7
+
8
+ from flowcept.configs import AGENT
9
+
10
+
11
+ def build_llm_model() -> LLM:
12
+ """
13
+ Build and return an LLM instance using agent configuration.
14
+
15
+ This function retrieves the model name and keyword arguments from the AGENT configuration,
16
+ constructs a SambaStudio LLM instance, and returns it.
17
+
18
+ Returns
19
+ -------
20
+ LLM
21
+ An initialized LLM object configured using the `AGENT` settings.
22
+ """
23
+ model_kwargs = AGENT.get("model_kwargs").copy()
24
+ model_kwargs["model"] = AGENT.get("model")
25
+ llm = SambaStudio(model_kwargs=model_kwargs)
26
+
27
+ return llm
28
+
29
+
30
+ def convert_mcp_messages_to_plain_text(messages: list[base.Message]) -> str:
31
+ """
32
+ Convert a list of MCP base.Message objects into a plain text dialogue.
33
+
34
+ Parameters
35
+ ----------
36
+ messages : list of BaseMessage
37
+ The list of messages, typically from HumanMessage, AIMessage, SystemMessage, etc.
38
+
39
+ Returns
40
+ -------
41
+ str
42
+ A plain text version of the conversation, with roles labeled.
43
+ """
44
+ lines = []
45
+ for message in messages:
46
+ role = message.role.capitalize() # e.g., "human" → "Human"
47
+ line = f"{role}: {message.content.text}"
48
+ lines.append(line)
49
+ return "\n".join(lines)
50
+
51
+
52
+ def convert_mcp_to_langchain(messages: list[base.Message]) -> List[Union[HumanMessage, AIMessage]]:
53
+ """
54
+ Convert a list of MCP-style messages to LangChain-compatible message objects.
55
+
56
+ Parameters
57
+ ----------
58
+ messages : list of base.Message
59
+ A list of messages in the MCP message format, each with a `role` and `content`.
60
+
61
+ Returns
62
+ -------
63
+ list of Union[HumanMessage, AIMessage]
64
+ A list of LangChain message objects, converted from the original MCP format.
65
+
66
+ Raises
67
+ ------
68
+ ValueError
69
+ If a message has a role that is not 'user' or 'assistant'.
70
+
71
+ Notes
72
+ -----
73
+ This function extracts the `text` attribute from message content if present, falling back to `str(content)`
74
+ otherwise. It maps MCP 'user' roles to LangChain `HumanMessage` and 'assistant' roles to `AIMessage`.
75
+ """
76
+ converted = []
77
+ for m in messages:
78
+ if hasattr(m.content, "text"):
79
+ content = m.content.text
80
+ else:
81
+ content = str(m.content) # fallback if it's already a string
82
+
83
+ if m.role == "user":
84
+ converted.append(HumanMessage(content=content))
85
+ elif m.role == "assistant":
86
+ converted.append(AIMessage(content=content))
87
+ else:
88
+ raise ValueError(f"Unsupported role: {m.role}")
89
+ return converted