flowcept 0.9.7__py3-none-any.whl → 0.9.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -176,6 +176,7 @@ QUERY_GUIDELINES = """
176
176
  -To select the first (or earliest) N workflow executions, use or adapt the following: `df.groupby('workflow_id', as_index=False).agg({{"started_at": 'min'}}).sort_values(by='started_at', ascending=True).head(N)['workflow_id']` - utilize `started_at` to sort!
177
177
  -To select the last (or latest or most recent) N workflow executions, use or adapt the following: `df.groupby('workflow_id', as_index=False).agg({{"ended_at": 'max'}}).sort_values(by='ended_at', ascending=False).head(N)['workflow_id']` - utilize `ended_at` to sort!
178
178
 
179
+ -If the user does not ask for a specific workflow run, do not use `workflow_id` in your query.
179
180
  -To select the first or earliest or initial tasks, use or adapt the following: `df.sort_values(by='started_at', ascending=True)`
180
181
  -To select the last or final or most recent tasks, use or adapt the following: `df.sort_values(by='ended_at', ascending=False)`
181
182
 
@@ -70,6 +70,7 @@ class MQDaoRedis(MQDao):
70
70
  except Exception as e:
71
71
  self.logger.error(f"Failed to process message {message}")
72
72
  self.logger.exception(e)
73
+ continue
73
74
 
74
75
  current_trials = 0
75
76
  except (redis.exceptions.ConnectionError, redis.exceptions.TimeoutError) as e:
@@ -78,7 +79,7 @@ class MQDaoRedis(MQDao):
78
79
  sleep(3)
79
80
  except Exception as e:
80
81
  self.logger.exception(e)
81
- break
82
+ continue
82
83
 
83
84
  def send_message(self, message: dict, channel=MQ_CHANNEL, serializer=msgpack.dumps):
84
85
  """Send the message."""
@@ -1,7 +1,6 @@
1
1
  """Controller module."""
2
2
 
3
- import os.path
4
- from typing import List, Dict
3
+ from typing import List, Dict, Any
5
4
  from uuid import uuid4
6
5
 
7
6
  from flowcept.commons.autoflush_buffer import AutoflushBuffer
@@ -175,25 +174,31 @@ class Flowcept(object):
175
174
  self._interceptor_instances[0]._mq_dao.bulk_publish(self.buffer)
176
175
 
177
176
  @staticmethod
178
- def read_messages_file(file_path: str = None) -> List[Dict]:
177
+ def read_messages_file(file_path: str | None = None, return_df: bool = False):
179
178
  """
180
179
  Read a JSON Lines (JSONL) file containing captured Flowcept messages.
181
180
 
182
181
  This function loads a file where each line is a serialized JSON object.
183
182
  It joins the lines into a single JSON array and parses them efficiently
184
- with ``orjson``.
183
+ with ``orjson``. If ``return_df`` is True, it returns a pandas DataFrame
184
+ created via ``pandas.json_normalize(..., sep='.')`` so nested fields become
185
+ dot-separated columns (for example, ``generated.attention``).
185
186
 
186
187
  Parameters
187
188
  ----------
188
189
  file_path : str, optional
189
- Path to the messages file. If not provided, defaults to the
190
- value of ``DUMP_BUFFER_PATH`` from the configuration.
191
- If neither is provided, an assertion error is raised.
190
+ Path to the messages file. If not provided, defaults to the value of
191
+ ``DUMP_BUFFER_PATH`` from the configuration. If neither is provided,
192
+ an assertion error is raised.
193
+ return_df : bool, default False
194
+ If True, return a normalized pandas DataFrame. If False, return the
195
+ parsed list of dictionaries.
192
196
 
193
197
  Returns
194
198
  -------
195
- List[dict]
196
- A list of message objects (dictionaries) parsed from the file.
199
+ list of dict or pandas.DataFrame
200
+ A list of message objects when ``return_df`` is False,
201
+ otherwise a normalized DataFrame with dot-separated columns.
197
202
 
198
203
  Raises
199
204
  ------
@@ -203,35 +208,45 @@ class Flowcept(object):
203
208
  If the specified file does not exist.
204
209
  orjson.JSONDecodeError
205
210
  If the file contents cannot be parsed as valid JSON.
211
+ ModuleNotFoundError
212
+ If ``return_df`` is True but pandas is not installed.
206
213
 
207
214
  Examples
208
215
  --------
209
- Read messages from a file explicitly:
216
+ Read messages as a list:
210
217
 
211
218
  >>> msgs = read_messages_file("offline_buffer.jsonl")
212
- >>> print(len(msgs))
213
- 128
219
+ >>> len(msgs) > 0
220
+ True
214
221
 
215
- Use the default dump buffer path from config:
222
+ Read messages as a normalized DataFrame:
216
223
 
217
- >>> msgs = read_messages_file()
218
- >>> for m in msgs[:2]:
219
- ... print(m["type"], m.get("workflow_id"))
220
- task_start wf_123
221
- task_end wf_123
224
+ >>> df = read_messages_file("offline_buffer.jsonl", return_df=True)
225
+ >>> "generated.attention" in df.columns
226
+ True
222
227
  """
228
+ import os
223
229
  import orjson
224
230
 
225
- _buffer = []
226
231
  if file_path is None:
227
232
  file_path = DUMP_BUFFER_PATH
228
233
  assert file_path is not None, "Please indicate file_path either in the argument or in the config file."
229
234
  if not os.path.exists(file_path):
230
- raise f"File {file_path} has not been created. It will only be created if you run in fully offline mode."
235
+ raise FileNotFoundError(f"File '{file_path}' was not found. It is created only in fully offline mode.")
236
+
231
237
  with open(file_path, "rb") as f:
232
238
  lines = [ln for ln in f.read().splitlines() if ln]
233
- _buffer = orjson.loads(b"[" + b",".join(lines) + b"]")
234
- return _buffer
239
+
240
+ buffer: List[Dict[str, Any]] = orjson.loads(b"[" + b",".join(lines) + b"]")
241
+
242
+ if return_df:
243
+ try:
244
+ import pandas as pd
245
+ except ModuleNotFoundError as e:
246
+ raise ModuleNotFoundError("pandas is required when return_df=True. Please install pandas.") from e
247
+ return pd.json_normalize(buffer, sep=".")
248
+
249
+ return buffer
235
250
 
236
251
  def save_workflow(self, interceptor: str, interceptor_instance: BaseInterceptor):
237
252
  """
flowcept/version.py CHANGED
@@ -4,4 +4,4 @@
4
4
  # The expected format is: <Major>.<Minor>.<Patch>
5
5
  # This file is supposed to be automatically modified by the CI Bot.
6
6
  # See .github/workflows/version_bumper.py
7
- __version__ = "0.9.7"
7
+ __version__ = "0.9.8"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flowcept
3
- Version: 0.9.7
3
+ Version: 0.9.8
4
4
  Summary: Capture and query workflow provenance data using data observability
5
5
  Author: Oak Ridge National Laboratory
6
6
  License-Expression: MIT
@@ -27,6 +27,7 @@ Requires-Dist: jupyterlab; extra == 'all'
27
27
  Requires-Dist: langchain-community; extra == 'all'
28
28
  Requires-Dist: langchain-openai; extra == 'all'
29
29
  Requires-Dist: lmdb; extra == 'all'
30
+ Requires-Dist: matplotlib; extra == 'all'
30
31
  Requires-Dist: mcp[cli]; extra == 'all'
31
32
  Requires-Dist: mlflow-skinny; extra == 'all'
32
33
  Requires-Dist: nbmake; extra == 'all'
@@ -88,6 +89,7 @@ Requires-Dist: confluent-kafka<=2.8.0; extra == 'kafka'
88
89
  Provides-Extra: llm-agent
89
90
  Requires-Dist: langchain-community; extra == 'llm-agent'
90
91
  Requires-Dist: langchain-openai; extra == 'llm-agent'
92
+ Requires-Dist: matplotlib; extra == 'llm-agent'
91
93
  Requires-Dist: mcp[cli]; extra == 'llm-agent'
92
94
  Requires-Dist: pymupdf; extra == 'llm-agent'
93
95
  Requires-Dist: streamlit; extra == 'llm-agent'
@@ -95,6 +97,7 @@ Provides-Extra: llm-agent-audio
95
97
  Requires-Dist: gtts; extra == 'llm-agent-audio'
96
98
  Requires-Dist: langchain-community; extra == 'llm-agent-audio'
97
99
  Requires-Dist: langchain-openai; extra == 'llm-agent-audio'
100
+ Requires-Dist: matplotlib; extra == 'llm-agent-audio'
98
101
  Requires-Dist: mcp[cli]; extra == 'llm-agent-audio'
99
102
  Requires-Dist: pydub; extra == 'llm-agent-audio'
100
103
  Requires-Dist: pymupdf; extra == 'llm-agent-audio'
@@ -105,6 +108,7 @@ Provides-Extra: llm-google
105
108
  Requires-Dist: google-genai; extra == 'llm-google'
106
109
  Requires-Dist: langchain-community; extra == 'llm-google'
107
110
  Requires-Dist: langchain-openai; extra == 'llm-google'
111
+ Requires-Dist: matplotlib; extra == 'llm-google'
108
112
  Requires-Dist: mcp[cli]; extra == 'llm-google'
109
113
  Requires-Dist: pymupdf; extra == 'llm-google'
110
114
  Requires-Dist: streamlit; extra == 'llm-google'
@@ -1,7 +1,7 @@
1
1
  flowcept/__init__.py,sha256=urpwIEJeikV0P6ORXKsM5Lq4o6wCwhySS9A487BYGy4,2241
2
2
  flowcept/cli.py,sha256=eVnUrmZtVhZ1ldRMGB1QsqBzNC1Pf2CX33efnlaZ4gs,22842
3
3
  flowcept/configs.py,sha256=aXgBkBpTs4_4MpvAe76aQ5lXl1gTmgk92bFiNqMQXPM,8382
4
- flowcept/version.py,sha256=82O8Nb-xrNpDOjABeIhhxyF9hYDKXZ1hFjdTCo4zE2Y,306
4
+ flowcept/version.py,sha256=zH7JKitqQGm2p8zaw6JClXGAc-kbLbhXB70bFMI-zhU,306
5
5
  flowcept/agents/__init__.py,sha256=8eeD2CiKBtHiDsWdrHK_UreIkKlTq4dUbhHDyzw372o,175
6
6
  flowcept/agents/agent_client.py,sha256=UiBQkC9WE2weLZR2OTkEOEQt9-zqQOkPwRA17HfI-jk,2027
7
7
  flowcept/agents/agents_utils.py,sha256=Az5lvWTsBHs_3sWWwy7jSdDjNn-PvZ7KmYd79wxvdyU,6666
@@ -17,7 +17,7 @@ flowcept/agents/llms/claude_gcp.py,sha256=fzz7235DgzVueuFj5odsr93jWtYHpYlXkSGW1k
17
17
  flowcept/agents/llms/gemini25.py,sha256=VARrjb3tITIh3_Wppmocp_ocSKVZNon0o0GeFEwTnTI,4229
18
18
  flowcept/agents/prompts/__init__.py,sha256=7ICsNhLYzvPS1esG3Vg519s51b1c4yN0WegJUb6Qvww,26
19
19
  flowcept/agents/prompts/general_prompts.py,sha256=Mj6dMdrnJfq-bibi1XQVNZ8zx5MZUwxTvYY_qijPfoI,3894
20
- flowcept/agents/prompts/in_memory_query_prompts.py,sha256=j5lZLuQdGzKFRiYaNrNThMwHslPUWwDARdufmFxMoTQ,19562
20
+ flowcept/agents/prompts/in_memory_query_prompts.py,sha256=0u6hIV1v-Fhk3dQVvbEW0qggi0KZbEBopMvJtgCNIVc,19664
21
21
  flowcept/agents/tools/__init__.py,sha256=Xqz2E4-LL_7DDcm1XYJFx2f5RdAsjeTpOJb_DPC7xyc,27
22
22
  flowcept/agents/tools/general_tools.py,sha256=KS7ZTf1UbTxg0yQ6zCxh1g3NzcliYKWdurMArhPowxs,3248
23
23
  flowcept/agents/tools/in_memory_queries/__init__.py,sha256=K8-JI_lXUgquKkgga8Nef8AntGg_logQtjjQjaEE7yI,39
@@ -46,7 +46,7 @@ flowcept/commons/daos/mq_dao/__init__.py,sha256=Xxm4FmbBUZDQ7XIAmSFbeKE_AdHsbgFm
46
46
  flowcept/commons/daos/mq_dao/mq_dao_base.py,sha256=EL8eQedvNLsVLMz4oHemBAsR1S6xFZiezM8dIqKmmCA,9696
47
47
  flowcept/commons/daos/mq_dao/mq_dao_kafka.py,sha256=kjZqPLIu5PaNeM4IDvOxkDRVGTd5UWwq3zhDvVirqW8,5067
48
48
  flowcept/commons/daos/mq_dao/mq_dao_mofka.py,sha256=tRdMGYDzdeIJxad-B4-DE6u8Wzs61eTzOW4ojZrnTxs,4057
49
- flowcept/commons/daos/mq_dao/mq_dao_redis.py,sha256=WKPoMPBSce4shqbBkgsnuqJAJoZZ4U_hdebhyFqtejQ,5535
49
+ flowcept/commons/daos/mq_dao/mq_dao_redis.py,sha256=ejBMxImA-h2KuMEAk3l7aU0chCcObCbUXEOXM6L4Zhc,5571
50
50
  flowcept/commons/flowcept_dataclasses/__init__.py,sha256=8KkiJh0WSRAB50waVluxCSI8Tb9X1L9nup4c8RN3ulc,30
51
51
  flowcept/commons/flowcept_dataclasses/base_settings_dataclasses.py,sha256=Cjw2PGYtZDfnwecz6G3S42Ncmxj7AIZVEBx05bsxRUo,399
52
52
  flowcept/commons/flowcept_dataclasses/task_object.py,sha256=XLFD8YTWsyDLSRcgZc5qK2a9yk97XnqZoUAL4T6HNPE,8110
@@ -54,7 +54,7 @@ flowcept/commons/flowcept_dataclasses/telemetry.py,sha256=9_5ONCo-06r5nKHXmi5HfI
54
54
  flowcept/commons/flowcept_dataclasses/workflow_object.py,sha256=cauWtXHhBv9lHS-q6cb7yUsNiwQ6PkZPuSinR1TKcqU,6161
55
55
  flowcept/flowcept_api/__init__.py,sha256=T1ty86YlocQ5Z18l5fUqHj_CC6Unq_iBv0lFyiI7Ao8,22
56
56
  flowcept/flowcept_api/db_api.py,sha256=hKXep-n50rp9cAzV0ljk2QVEF8O64yxi3ujXv5_Ibac,9723
57
- flowcept/flowcept_api/flowcept_controller.py,sha256=JcUQXJfEjmg-KQsolIN5Ul7vbSxZUg8QTWaGAahZKTE,15251
57
+ flowcept/flowcept_api/flowcept_controller.py,sha256=jfssXUvG55RVXJBziq-lXekt7Dog3mAalo5Zsp_7_to,16060
58
58
  flowcept/flowcept_api/task_query_api.py,sha256=SrwB0OCVtbpvCPECkE2ySM10G_g8Wlk5PJ8h-0xEaNc,23821
59
59
  flowcept/flowcept_webserver/__init__.py,sha256=8411GIXGddKTKoHUvbo_Rq6svosNG7tG8VzvUEBd7WI,28
60
60
  flowcept/flowcept_webserver/app.py,sha256=VUV8_JZbIbx9u_1O7m7XtRdhZb_7uifUa-iNlPhmZws,658
@@ -94,9 +94,9 @@ flowcept/instrumentation/flowcept_loop.py,sha256=jea_hYPuXg5_nOWf-nNb4vx8A__OBM4
94
94
  flowcept/instrumentation/flowcept_task.py,sha256=EmKODpjl8usNklKSVmsKYyCa6gC_QMqKhAr3DKaw44s,8199
95
95
  flowcept/instrumentation/flowcept_torch.py,sha256=kkZQRYq6cDBpdBU6J39_4oKRVkhyF3ODlz8ydV5WGKw,23455
96
96
  flowcept/instrumentation/task_capture.py,sha256=1g9EtLdqsTB0RHsF-eRmA2Xh9l_YqTd953d4v89IC24,8287
97
- resources/sample_settings.yaml,sha256=bOyHdzQe-CJ7nmpqstlDfk9fuQMHerS95Oy2BGdMLds,6779
98
- flowcept-0.9.7.dist-info/METADATA,sha256=gy3Ul7p8x8MSFq17wOuUtuLMzZay4no0-yFmfEl_Ni0,32246
99
- flowcept-0.9.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
100
- flowcept-0.9.7.dist-info/entry_points.txt,sha256=i8q67WE0201rVxYI2lyBtS52shvgl93x2Szp4q8zMlw,47
101
- flowcept-0.9.7.dist-info/licenses/LICENSE,sha256=r5-2P6tFTuRGWT5TiX32s1y0tnp4cIqBEC1QjTaXe2k,1086
102
- flowcept-0.9.7.dist-info/RECORD,,
97
+ resources/sample_settings.yaml,sha256=ufx-07gm7u0UMJa_HPutD3w1VrZKaPBht5H1xFUbIWU,6779
98
+ flowcept-0.9.8.dist-info/METADATA,sha256=-a_76ZRJ8DAu_cwGtwiW4OIUdil-orVS7TC5heM-Yco,32439
99
+ flowcept-0.9.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
100
+ flowcept-0.9.8.dist-info/entry_points.txt,sha256=i8q67WE0201rVxYI2lyBtS52shvgl93x2Szp4q8zMlw,47
101
+ flowcept-0.9.8.dist-info/licenses/LICENSE,sha256=r5-2P6tFTuRGWT5TiX32s1y0tnp4cIqBEC1QjTaXe2k,1086
102
+ flowcept-0.9.8.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
- flowcept_version: 0.9.7 # Version of the Flowcept package. This setting file is compatible with this version.
1
+ flowcept_version: 0.9.8 # Version of the Flowcept package. This setting file is compatible with this version.
2
2
 
3
3
  project:
4
4
  debug: true # Toggle debug mode. This will add a property `debug: true` to all saved data, making it easier to retrieve/delete them later.