flowcept 0.9.6__py3-none-any.whl → 0.9.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,76 +1,87 @@
1
1
  import streamlit as st
2
+
2
3
  from flowcept.agents.gui import AI, PAGE_TITLE
4
+ from flowcept.agents.gui.audio_utils import get_audio_text
3
5
  from flowcept.agents.gui.gui_utils import (
4
6
  query_agent,
5
7
  display_ai_msg,
6
8
  display_ai_msg_from_tool,
7
9
  display_df_tool_response,
10
+ resolve_logo_path,
11
+ render_title_with_logo,
8
12
  )
9
-
10
13
  from flowcept.agents.tools.in_memory_queries.in_memory_queries_tools import (
11
14
  generate_result_df,
12
15
  generate_plot_code,
13
16
  run_df_code,
14
17
  )
18
+ from flowcept.configs import AGENT_AUDIO
15
19
 
20
+ # ---- Page setup & header with logo ----
16
21
  st.set_page_config(page_title=PAGE_TITLE, page_icon=AI)
17
- st.title(PAGE_TITLE)
22
+
23
+ LOGO_PATH = resolve_logo_path(package="flowcept", resource="docs/img/flowcept-logo.png")
24
+ render_title_with_logo(PAGE_TITLE, LOGO_PATH, logo_width=150, add_to_sidebar=False, debug=False)
18
25
 
19
26
  GREETING = (
20
- "Hi, there! I'm a **Workflow Provenance Specialist**.\n\n"
27
+ "Hi, there! I'm your **Workflow Provenance Assistant**.\n\n"
21
28
  "I am tracking workflow executions and I can:\n"
22
- "- 🔍 Analyze running workflows\n"
29
+ "- 🔍 Query running workflows\n"
23
30
  "- 📊 Plot graphs\n"
24
31
  "- 🤖 Answer general questions about provenance data\n\n"
25
32
  "How can I help you today?"
26
33
  )
27
-
28
-
29
34
  display_ai_msg(GREETING)
30
35
 
31
- # if "chat_history" not in st.session_state:
32
- # st.session_state.chat_history = [{"role": "system", "content":GREETING}]
33
- #
34
- # for msg in st.session_state.chat_history:
35
- # with st.chat_message(msg["role"], avatar=AI):
36
- # st.markdown(msg["content"])
37
-
38
36
 
39
37
  def main():
40
- """Main Streamlit Function."""
38
+ """Main Agent GUI function."""
39
+ st.caption(
40
+ "💡 Tip: Ask about workflow metrics, generate plots, or summarize data. "
41
+ "Inputs are mapped to `used` and outputs to `generated` fields. "
42
+ "Use @record <your query guidance> if you have custom guidance."
43
+ )
44
+
41
45
  user_input = st.chat_input("Send a message")
42
- st.caption("💡 Tip: Ask about workflow metrics, generate plots, or summarize data.")
43
46
 
44
47
  if user_input:
45
- # st.session_state.chat_history.append({"role": "human", "content": user_input})
48
+ st.session_state["speak_reply"] = False
49
+
50
+ if AGENT_AUDIO:
51
+ user_input = get_audio_text(user_input)
46
52
 
53
+ if user_input:
47
54
  with st.chat_message("human"):
48
55
  st.markdown(user_input)
49
56
 
50
57
  try:
51
58
  with st.spinner("🤖 Thinking..."):
52
59
  tool_result = query_agent(user_input)
53
- print(tool_result)
54
60
 
55
61
  if tool_result.result_is_str():
56
62
  display_ai_msg_from_tool(tool_result)
63
+
57
64
  elif tool_result.is_success_dict():
58
65
  tool_name = tool_result.tool_name
59
- if tool_name in [generate_result_df.__name__, generate_plot_code.__name__, run_df_code.__name__]:
66
+ if tool_name in (
67
+ generate_result_df.__name__,
68
+ generate_plot_code.__name__,
69
+ run_df_code.__name__,
70
+ ):
60
71
  display_df_tool_response(tool_result)
61
72
  else:
62
73
  display_ai_msg(f"⚠️ Received unexpected response from agent: {tool_result}")
63
74
  st.stop()
64
75
  else:
65
76
  display_df_tool_response(tool_result)
66
- # display_ai_msg(f"⚠️ Received unexpected response from agent: {tool_result}")
67
77
  st.stop()
68
78
 
69
79
  except Exception as e:
70
80
  display_ai_msg(f"❌ Error talking to MCP agent:\n\n```text\n{e}\n```")
71
81
  st.stop()
72
82
 
73
- # st.session_state.chat_history.append({"role": "system", "content": agent_reply})
74
83
 
84
+ if "speak_reply" not in st.session_state:
85
+ st.session_state["speak_reply"] = False
75
86
 
76
87
  main()
@@ -0,0 +1,129 @@
1
+ import re
2
+ import tempfile
3
+ from io import BytesIO
4
+ import base64
5
+
6
+ import streamlit as st
7
+ from gtts import gTTS
8
+ from streamlit_mic_recorder import mic_recorder
9
+ import speech_recognition as sr
10
+ from pydub import AudioSegment # needs ffmpeg installed
11
+
12
+
13
+ def _normalize_mic_output(out) -> bytes | None:
14
+ """Handle different return shapes from streamlit-mic-recorder."""
15
+ if not isinstance(out, dict):
16
+ return None
17
+ if out.get("wav"):
18
+ return out["wav"]
19
+ if out.get("bytes"):
20
+ return out["bytes"]
21
+ if out.get("b64"):
22
+ return base64.b64decode(out["b64"])
23
+ return None
24
+
25
+
26
+ def _is_wav_pcm(blob: bytes) -> bool:
27
+ """Quick RIFF/WAVE header check."""
28
+ h = blob[:12]
29
+ return h.startswith(b"RIFF") and h[8:12] == b"WAVE"
30
+
31
+
32
+ def _to_pcm_wav_16k(blob: bytes) -> bytes:
33
+ """
34
+ Convert arbitrary audio bytes (webm/ogg/mp3/…) to 16-bit PCM WAV mono @16k.
35
+ Requires ffmpeg via pydub.
36
+ """
37
+ if _is_wav_pcm(blob):
38
+ return blob
39
+ seg = AudioSegment.from_file(BytesIO(blob)) # ffmpeg does the heavy lifting
40
+ seg = seg.set_channels(1).set_frame_rate(16000).set_sample_width(2)
41
+ buf = BytesIO()
42
+ seg.export(buf, format="wav")
43
+ return buf.getvalue()
44
+
45
+
46
+ def get_audio_text(user_input: str) -> str:
47
+ """
48
+ User Audio Getter.
49
+ """
50
+ # Voice input expander
51
+ with st.expander("🎤 Voice input", expanded=False):
52
+ st.caption("Click **Speak**, talk, then **Stop**. Allow mic permission in your browser.")
53
+ out = mic_recorder(
54
+ start_prompt="🎙️ Speak",
55
+ stop_prompt="⏹️ Stop",
56
+ key="mic_rec_1",
57
+ use_container_width=True,
58
+ )
59
+
60
+ # Normalize outputs from the component
61
+ raw_audio = _normalize_mic_output(out)
62
+
63
+ if raw_audio:
64
+ try:
65
+ wav_bytes = _to_pcm_wav_16k(raw_audio)
66
+ except Exception as e:
67
+ st.error(f"Could not convert audio to WAV (need ffmpeg/ffprobe?): {e}")
68
+ wav_bytes = None
69
+
70
+ if wav_bytes:
71
+ st.audio(wav_bytes, format="audio/wav")
72
+
73
+ # Transcribe with SpeechRecognition
74
+ r = sr.Recognizer()
75
+ try:
76
+ with sr.AudioFile(BytesIO(wav_bytes)) as source:
77
+ audio = r.record(source)
78
+ voice_text = r.recognize_google(audio) # type: ignore[attr-defined]
79
+ st.success(f"You said: {voice_text}")
80
+ if not user_input:
81
+ user_input = voice_text
82
+ st.session_state["speak_reply"] = True # speak back only when voice was used
83
+ print(f"Setting session state to {st.session_state['speak_reply']}")
84
+ except Exception as e:
85
+ st.warning(f"Transcription failed: {e}")
86
+
87
+ return user_input
88
+
89
+
90
+ def speech_to_text():
91
+ """Record from mic, return transcribed text or None."""
92
+ rec = mic_recorder(
93
+ start_prompt="🎙️ Speak",
94
+ stop_prompt="⏹️ Stop",
95
+ key="mic",
96
+ use_container_width=True,
97
+ )
98
+ if rec and "wav" in rec:
99
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
100
+ tmp.write(rec["wav"])
101
+ tmp.flush()
102
+ r = sr.Recognizer()
103
+ with sr.AudioFile(tmp.name) as source:
104
+ audio = r.record(source)
105
+ try:
106
+ return r.recognize_google(audio)
107
+ except Exception as e:
108
+ st.warning(f"Speech recognition failed: {e}")
109
+ return None
110
+
111
+
112
+ def speak(text: str):
113
+ """Synthesize speech for the agent reply and play it."""
114
+ if not text:
115
+ return
116
+ try:
117
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
118
+ gTTS(text).save(tmp.name)
119
+ st.audio(tmp.name, format="audio/mp3")
120
+ except Exception as e:
121
+ st.warning(f"TTS failed: {e}")
122
+
123
+
124
+ def _md_to_plain_text(s: str) -> str:
125
+ """Very light Markdown cleanup for TTS."""
126
+ s = re.sub(r"```.*?```", lambda m: m.group(0).replace("```", ""), s, flags=re.S) # drop fences
127
+ s = s.replace("`", "") # inline code ticks
128
+ s = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", s) # links: [text](url) -> text
129
+ return s.strip()
@@ -2,14 +2,20 @@ import base64
2
2
  import ast
3
3
  import io
4
4
  import json
5
+ from pathlib import Path
6
+ from importlib.resources import files as pkg_files
7
+
8
+ import pandas as pd
5
9
 
6
10
  import streamlit as st
11
+ from flowcept.agents.gui import AI
7
12
  from flowcept.agents import prompt_handler
8
13
  from flowcept.agents.agent_client import run_tool
9
14
  from flowcept.agents.agents_utils import ToolResult
10
- import pandas as pd
11
15
 
12
- from flowcept.agents.gui import AI
16
+
17
+ from flowcept.agents.gui.audio_utils import _md_to_plain_text, speak
18
+ from flowcept.configs import AGENT_AUDIO
13
19
 
14
20
 
15
21
  def query_agent(user_input: str) -> ToolResult:
@@ -125,6 +131,8 @@ def display_ai_msg_from_tool(tool_result: ToolResult):
125
131
 
126
132
 
127
133
  def _sniff_mime(b: bytes) -> str:
134
+ if b.startswith(b"%PDF-"):
135
+ return "application/pdf"
128
136
  if b.startswith(b"\x89PNG\r\n\x1a\n"):
129
137
  return "image/png"
130
138
  if b.startswith(b"\xff\xd8\xff"):
@@ -138,23 +146,72 @@ def _sniff_mime(b: bytes) -> str:
138
146
  return "application/octet-stream"
139
147
 
140
148
 
149
+ def _pdf_first_page_to_png(pdf_bytes: bytes, zoom: float = 2.0) -> bytes:
150
+ """
151
+ Convert the first page of a PDF to PNG bytes using PyMuPDF (fitz).
152
+ zoom ~2.0 gives a good thumbnail; increase for higher resolution.
153
+ """
154
+ try:
155
+ import fitz # PyMuPDF
156
+ except Exception as e:
157
+ # PyMuPDF not installed; caller can decide how to handle
158
+ raise ImportError("PyMuPDF (fitz) is required to render PDF thumbnails") from e
159
+
160
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
161
+ try:
162
+ page = doc.load_page(0)
163
+ pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom), alpha=False)
164
+ return pix.tobytes("png")
165
+ finally:
166
+ doc.close()
167
+
168
+
141
169
  def ensure_data_uri(val):
142
- r"""Accepts bytes/bytearray/memoryview or a repr like \"b'\\x89PNG...'\" and returns a data URL."""
170
+ r"""Accept bytes/bytearray/memoryview or a repr like "b'\x89PNG...'", or a file path/URL.
171
+ Returns a data URL suitable for st.column_config.ImageColumn. If input is a PDF, converts
172
+ the first page to PNG (requires PyMuPDF).
173
+ """
174
+ # Already a data URI?
143
175
  if isinstance(val, str) and val.startswith("data:"):
144
176
  return val
177
+
178
+ # Bytes repr string? -> real bytes
145
179
  if isinstance(val, str) and val.startswith("b'"):
146
180
  try:
147
- val = ast.literal_eval(val) # turn repr into bytes
181
+ val = ast.literal_eval(val)
148
182
  except Exception:
149
183
  return None
184
+
185
+ # Paths that point to a PDF: convert
186
+ if isinstance(val, str) and val.lower().endswith(".pdf") and Path(val).is_file():
187
+ try:
188
+ pdf_bytes = Path(val).read_bytes()
189
+ png_bytes = _pdf_first_page_to_png(pdf_bytes)
190
+ return f"data:image/png;base64,{base64.b64encode(png_bytes).decode('ascii')}"
191
+ except Exception:
192
+ # Fallback: no preview; caller will show blank cell
193
+ return None
194
+
195
+ # Normalize to bytes if memoryview/bytearray
150
196
  if isinstance(val, memoryview):
151
197
  val = val.tobytes()
152
198
  if isinstance(val, bytearray):
153
199
  val = bytes(val)
200
+
201
+ # Raw bytes? detect and convert if PDF
154
202
  if isinstance(val, bytes):
155
203
  mime = _sniff_mime(val)
204
+ if mime == "application/pdf":
205
+ try:
206
+ png_bytes = _pdf_first_page_to_png(val)
207
+ return f"data:image/png;base64,{base64.b64encode(png_bytes).decode('ascii')}"
208
+ except Exception:
209
+ return None
210
+ # Regular image bytes -> data URI
156
211
  return f"data:{mime};base64,{base64.b64encode(val).decode('ascii')}"
157
- return val # path/URL, etc.
212
+
213
+ # Otherwise (URL/path to an image) let Streamlit try; PDFs won’t render as images
214
+ return val
158
215
 
159
216
 
160
217
  def _render_df(df: pd.DataFrame, image_width: int = 90, row_height: int = 90):
@@ -242,6 +299,17 @@ def display_df_tool_response(tool_result: ToolResult):
242
299
  st.markdown("📝 Summary:")
243
300
  print(f"THIS IS THE SUMMARY\n{summary}")
244
301
  st.markdown(summary)
302
+
303
+ if AGENT_AUDIO:
304
+ # 🔊 Speak only if user spoke to us this turn
305
+ print(f"This is the session state nowww: {st.session_state['speak_reply']}")
306
+ if st.session_state.get("speak_reply"):
307
+ try:
308
+ plain_text = _md_to_plain_text(summary)
309
+ print(f"Trying to speak plain text {plain_text}")
310
+ speak(plain_text) # uses your existing gTTS-based speak()
311
+ except Exception as e:
312
+ st.warning(f"TTS failed: {e}")
245
313
  elif summary_error:
246
314
  st.markdown(f"⚠️ Encountered this error when summarizing the result dataframe:\n```text\n{summary_error}")
247
315
 
@@ -288,3 +356,87 @@ def exec_st_plot_code(code, result_df, st_module):
288
356
  code,
289
357
  {"result": result_df, "st": st_module, "plt": __import__("matplotlib.pyplot"), "alt": __import__("altair")},
290
358
  )
359
+
360
+
361
+ def _resolve_logo() -> str | None:
362
+ # Try package resource
363
+ try:
364
+ p = pkg_files("flowcept").joinpath("docs/img/flowcept-logo.png")
365
+ if p.is_file():
366
+ return str(p)
367
+ except Exception:
368
+ pass
369
+ # Fallbacks for dev checkouts
370
+ here = Path(__file__).resolve()
371
+ candidates = [
372
+ here.parents[3] / "docs/img/flowcept-logo.png",
373
+ here.parents[2] / "docs/img/flowcept-logo.png",
374
+ here.parents[1] / "docs/img/flowcept-logo.png",
375
+ Path("flowcept/docs/img/flowcept-logo.png"),
376
+ ]
377
+ for c in candidates:
378
+ if c.is_file():
379
+ return str(c)
380
+ print(str(c))
381
+ return None
382
+
383
+
384
+ def resolve_logo_path(package: str = "flowcept", resource: str = "docs/img/flowcept-logo.png") -> str | None:
385
+ """
386
+ Resolve the Flowcept logo whether running from an installed package or a src/ layout repo.
387
+ Returns an absolute string path or None if not found.
388
+ """
389
+ # 1) Try packaged resource (works if docs/img is included in the wheel/sdist)
390
+ try:
391
+ p = pkg_files(package).joinpath(resource)
392
+ if hasattr(p, "is_file") and p.is_file():
393
+ return str(p)
394
+ except Exception:
395
+ pass
396
+
397
+ here = Path(__file__).resolve()
398
+
399
+ # 2) src/ layout repo: .../<repo>/flowcept/src/flowcept/agents/gui/gui_utils.py
400
+ # Find the nearest 'src' ancestor, then go to repo root (src/..), then docs/img/...
401
+ try:
402
+ src_dir = next(p for p in here.parents if p.name == "src")
403
+ repo_root = src_dir.parent # <repo>/flowcept
404
+ cand = repo_root / "docs" / "img" / "flowcept-logo.png"
405
+ if cand.is_file():
406
+ return str(cand)
407
+ except StopIteration:
408
+ pass
409
+
410
+ # 3) Editable install package dir: .../src/flowcept (package root)
411
+ pkg_dir = here.parents[2] # .../src/flowcept
412
+ cand = pkg_dir / "docs" / "img" / "flowcept-logo.png"
413
+ if cand.is_file():
414
+ return str(cand)
415
+
416
+ # 4) CWD fallback
417
+ cand = Path.cwd() / "flowcept" / "docs" / "img" / "flowcept-logo.png"
418
+ if cand.is_file():
419
+ return str(cand)
420
+
421
+ return None
422
+
423
+
424
+ def render_title_with_logo(
425
+ page_title: str, logo_path: str | None, logo_width: int = 150, add_to_sidebar: bool = True, debug: bool = False
426
+ ):
427
+ """
428
+ Render a header row with an optional logo next to the title; optionally mirror it in the sidebar.
429
+ """
430
+ if debug:
431
+ st.caption(f"Logo path resolved to: {logo_path or 'NOT FOUND'}")
432
+
433
+ if logo_path and Path(logo_path).is_file():
434
+ col_logo, col_title = st.columns([1, 6])
435
+ with col_logo:
436
+ st.image(logo_path, width=logo_width)
437
+ with col_title:
438
+ st.title(page_title)
439
+ if add_to_sidebar:
440
+ st.sidebar.image(logo_path, width=logo_width)
441
+ else:
442
+ st.title(page_title)
@@ -24,9 +24,9 @@ ROUTING_PROMPT = (
24
24
  "Given the following user message, classify it into one of the following routes:\n"
25
25
  "- small_talk: if it's casual conversation or some random word (e.g., 'hausdn', 'a', hello, how are you, what can you do, what's your name)\n"
26
26
  "- plot: if user is requesting plots (e.g., plot, chart, visualize)\n"
27
- #"- in_context_query: if the user asks questions about tasks or data in running workflow (or a workflow that ran recently) or if the user mentions the in-memory 'df' or a dataframe.\n"
27
+ # "- in_context_query: if the user asks questions about tasks or data in running workflow (or a workflow that ran recently) or if the user mentions the in-memory 'df' or a dataframe.\n"
28
28
  "- in_context_query: if the user is querying the provenance data questions about tasks or data in running workflow (or a workflow that ran recently) or if the user mentions the in-memory 'df' or a dataframe.\n"
29
- #"- historical_prov_query: if the user wants to query historical provenance data\n"
29
+ # "- historical_prov_query: if the user wants to query historical provenance data\n"
30
30
  "- in_chat_query: if the user appears to be asking about something that has said recently in this chat.\n"
31
31
  "- unknown: if you don't know.\n"
32
32
  "Respond with only the route label."
@@ -176,6 +176,7 @@ QUERY_GUIDELINES = """
176
176
  -To select the first (or earliest) N workflow executions, use or adapt the following: `df.groupby('workflow_id', as_index=False).agg({{"started_at": 'min'}}).sort_values(by='started_at', ascending=True).head(N)['workflow_id']` - utilize `started_at` to sort!
177
177
  -To select the last (or latest or most recent) N workflow executions, use or adapt the following: `df.groupby('workflow_id', as_index=False).agg({{"ended_at": 'max'}}).sort_values(by='ended_at', ascending=False).head(N)['workflow_id']` - utilize `ended_at` to sort!
178
178
 
179
+ -If the user does not ask for a specific workflow run, do not use `workflow_id` in your query.
179
180
  -To select the first or earliest or initial tasks, use or adapt the following: `df.sort_values(by='started_at', ascending=True)`
180
181
  -To select the last or final or most recent tasks, use or adapt the following: `df.sort_values(by='ended_at', ascending=False)`
181
182
 
@@ -226,10 +227,11 @@ OUTPUT_FORMATTING = """
226
227
  def generate_pandas_code_prompt(query: str, dynamic_schema, example_values, custom_user_guidances):
227
228
  if custom_user_guidances is not None and isinstance(custom_user_guidances, list) and len(custom_user_guidances):
228
229
  concatenated_guidance = "\n".join(f"- {msg}" for msg in custom_user_guidances)
229
- custom_user_guidance_prompt = (f"You MUST consider the following guidance from the user:\n"
230
- f"{concatenated_guidance}"
231
- "------------------------------------------------------"
232
- )
230
+ custom_user_guidance_prompt = (
231
+ f"You MUST consider the following guidance from the user:\n"
232
+ f"{concatenated_guidance}"
233
+ "------------------------------------------------------"
234
+ )
233
235
  else:
234
236
  custom_user_guidance_prompt = ""
235
237
  prompt = (
@@ -62,6 +62,9 @@ def check_llm() -> str:
62
62
 
63
63
  @mcp_flowcept.tool()
64
64
  def record_guidance(message: str) -> ToolResult:
65
+ """
66
+ Record guidance tool.
67
+ """
65
68
  ctx = mcp_flowcept.get_context()
66
69
  message = message.replace("@record", "")
67
70
  custom_guidance: List = ctx.request_context.lifespan_context.custom_guidance
@@ -222,7 +222,9 @@ def generate_plot_code(llm, query, dynamic_schema, value_examples, df, custom_us
222
222
 
223
223
 
224
224
  @mcp_flowcept.tool()
225
- def generate_result_df(llm, query: str, dynamic_schema, example_values, df, custom_user_guidance=None, attempt_fix=True, summarize=True):
225
+ def generate_result_df(
226
+ llm, query: str, dynamic_schema, example_values, df, custom_user_guidance=None, attempt_fix=True, summarize=True
227
+ ):
226
228
  """
227
229
  Generate a result DataFrame from a natural language query using an LLM.
228
230
 
@@ -70,6 +70,7 @@ class MQDaoRedis(MQDao):
70
70
  except Exception as e:
71
71
  self.logger.error(f"Failed to process message {message}")
72
72
  self.logger.exception(e)
73
+ continue
73
74
 
74
75
  current_trials = 0
75
76
  except (redis.exceptions.ConnectionError, redis.exceptions.TimeoutError) as e:
@@ -78,7 +79,7 @@ class MQDaoRedis(MQDao):
78
79
  sleep(3)
79
80
  except Exception as e:
80
81
  self.logger.exception(e)
81
- break
82
+ continue
82
83
 
83
84
  def send_message(self, message: dict, channel=MQ_CHANNEL, serializer=msgpack.dumps):
84
85
  """Send the message."""
@@ -149,7 +149,8 @@ def summarize_task(task: Dict, thresholds: Dict = None, logger=None) -> Dict:
149
149
  # a provenance task.
150
150
  if "data" in task:
151
151
  if "custom_metadata" in task:
152
- if "image" in task["custom_metadata"].get("mime_type", ""):
152
+ mime_type = task["custom_metadata"].get("mime_type", "")
153
+ if "image" in mime_type or "application/pdf" in mime_type:
153
154
  task_summary["image"] = task["data"]
154
155
 
155
156
  # Special handling for timestamp field
flowcept/configs.py CHANGED
@@ -235,6 +235,13 @@ INSTRUMENTATION = settings.get("instrumentation", {})
235
235
  INSTRUMENTATION_ENABLED = INSTRUMENTATION.get("enabled", True)
236
236
 
237
237
  AGENT = settings.get("agent", {})
238
+ AGENT_AUDIO = os.getenv("AGENT_AUDIO", str(settings["agent"].get("audio_enabled", "false"))).strip().lower() in {
239
+ "1",
240
+ "true",
241
+ "yes",
242
+ "y",
243
+ "t",
244
+ }
238
245
  AGENT_HOST = os.getenv("AGENT_HOST", settings["agent"].get("mcp_host", "localhost"))
239
246
  AGENT_PORT = int(os.getenv("AGENT_PORT", settings["agent"].get("mcp_port", "8000")))
240
247
 
@@ -1,7 +1,6 @@
1
1
  """Controller module."""
2
2
 
3
- import os.path
4
- from typing import List, Dict
3
+ from typing import List, Dict, Any
5
4
  from uuid import uuid4
6
5
 
7
6
  from flowcept.commons.autoflush_buffer import AutoflushBuffer
@@ -175,25 +174,31 @@ class Flowcept(object):
175
174
  self._interceptor_instances[0]._mq_dao.bulk_publish(self.buffer)
176
175
 
177
176
  @staticmethod
178
- def read_messages_file(file_path: str = None) -> List[Dict]:
177
+ def read_messages_file(file_path: str | None = None, return_df: bool = False):
179
178
  """
180
179
  Read a JSON Lines (JSONL) file containing captured Flowcept messages.
181
180
 
182
181
  This function loads a file where each line is a serialized JSON object.
183
182
  It joins the lines into a single JSON array and parses them efficiently
184
- with ``orjson``.
183
+ with ``orjson``. If ``return_df`` is True, it returns a pandas DataFrame
184
+ created via ``pandas.json_normalize(..., sep='.')`` so nested fields become
185
+ dot-separated columns (for example, ``generated.attention``).
185
186
 
186
187
  Parameters
187
188
  ----------
188
189
  file_path : str, optional
189
- Path to the messages file. If not provided, defaults to the
190
- value of ``DUMP_BUFFER_PATH`` from the configuration.
191
- If neither is provided, an assertion error is raised.
190
+ Path to the messages file. If not provided, defaults to the value of
191
+ ``DUMP_BUFFER_PATH`` from the configuration. If neither is provided,
192
+ an assertion error is raised.
193
+ return_df : bool, default False
194
+ If True, return a normalized pandas DataFrame. If False, return the
195
+ parsed list of dictionaries.
192
196
 
193
197
  Returns
194
198
  -------
195
- List[dict]
196
- A list of message objects (dictionaries) parsed from the file.
199
+ list of dict or pandas.DataFrame
200
+ A list of message objects when ``return_df`` is False,
201
+ otherwise a normalized DataFrame with dot-separated columns.
197
202
 
198
203
  Raises
199
204
  ------
@@ -203,35 +208,45 @@ class Flowcept(object):
203
208
  If the specified file does not exist.
204
209
  orjson.JSONDecodeError
205
210
  If the file contents cannot be parsed as valid JSON.
211
+ ModuleNotFoundError
212
+ If ``return_df`` is True but pandas is not installed.
206
213
 
207
214
  Examples
208
215
  --------
209
- Read messages from a file explicitly:
216
+ Read messages as a list:
210
217
 
211
218
  >>> msgs = read_messages_file("offline_buffer.jsonl")
212
- >>> print(len(msgs))
213
- 128
219
+ >>> len(msgs) > 0
220
+ True
214
221
 
215
- Use the default dump buffer path from config:
222
+ Read messages as a normalized DataFrame:
216
223
 
217
- >>> msgs = read_messages_file()
218
- >>> for m in msgs[:2]:
219
- ... print(m["type"], m.get("workflow_id"))
220
- task_start wf_123
221
- task_end wf_123
224
+ >>> df = read_messages_file("offline_buffer.jsonl", return_df=True)
225
+ >>> "generated.attention" in df.columns
226
+ True
222
227
  """
228
+ import os
223
229
  import orjson
224
230
 
225
- _buffer = []
226
231
  if file_path is None:
227
232
  file_path = DUMP_BUFFER_PATH
228
233
  assert file_path is not None, "Please indicate file_path either in the argument or in the config file."
229
234
  if not os.path.exists(file_path):
230
- raise f"File {file_path} has not been created. It will only be created if you run in fully offline mode."
235
+ raise FileNotFoundError(f"File '{file_path}' was not found. It is created only in fully offline mode.")
236
+
231
237
  with open(file_path, "rb") as f:
232
238
  lines = [ln for ln in f.read().splitlines() if ln]
233
- _buffer = orjson.loads(b"[" + b",".join(lines) + b"]")
234
- return _buffer
239
+
240
+ buffer: List[Dict[str, Any]] = orjson.loads(b"[" + b",".join(lines) + b"]")
241
+
242
+ if return_df:
243
+ try:
244
+ import pandas as pd
245
+ except ModuleNotFoundError as e:
246
+ raise ModuleNotFoundError("pandas is required when return_df=True. Please install pandas.") from e
247
+ return pd.json_normalize(buffer, sep=".")
248
+
249
+ return buffer
235
250
 
236
251
  def save_workflow(self, interceptor: str, interceptor_instance: BaseInterceptor):
237
252
  """
flowcept/version.py CHANGED
@@ -4,4 +4,4 @@
4
4
  # The expected format is: <Major>.<Minor>.<Patch>
5
5
  # This file is supposed to be automatically modified by the CI Bot.
6
6
  # See .github/workflows/version_bumper.py
7
- __version__ = "0.9.6"
7
+ __version__ = "0.9.8"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flowcept
3
- Version: 0.9.6
3
+ Version: 0.9.8
4
4
  Summary: Capture and query workflow provenance data using data observability
5
5
  Author: Oak Ridge National Laboratory
6
6
  License-Expression: MIT
@@ -27,6 +27,7 @@ Requires-Dist: jupyterlab; extra == 'all'
27
27
  Requires-Dist: langchain-community; extra == 'all'
28
28
  Requires-Dist: langchain-openai; extra == 'all'
29
29
  Requires-Dist: lmdb; extra == 'all'
30
+ Requires-Dist: matplotlib; extra == 'all'
30
31
  Requires-Dist: mcp[cli]; extra == 'all'
31
32
  Requires-Dist: mlflow-skinny; extra == 'all'
32
33
  Requires-Dist: nbmake; extra == 'all'
@@ -38,6 +39,7 @@ Requires-Dist: psutil>=6.1.1; extra == 'all'
38
39
  Requires-Dist: py-cpuinfo; extra == 'all'
39
40
  Requires-Dist: pyarrow; extra == 'all'
40
41
  Requires-Dist: pymongo; extra == 'all'
42
+ Requires-Dist: pymupdf; extra == 'all'
41
43
  Requires-Dist: pytest; extra == 'all'
42
44
  Requires-Dist: pyyaml; extra == 'all'
43
45
  Requires-Dist: redis; extra == 'all'
@@ -87,13 +89,28 @@ Requires-Dist: confluent-kafka<=2.8.0; extra == 'kafka'
87
89
  Provides-Extra: llm-agent
88
90
  Requires-Dist: langchain-community; extra == 'llm-agent'
89
91
  Requires-Dist: langchain-openai; extra == 'llm-agent'
92
+ Requires-Dist: matplotlib; extra == 'llm-agent'
90
93
  Requires-Dist: mcp[cli]; extra == 'llm-agent'
94
+ Requires-Dist: pymupdf; extra == 'llm-agent'
91
95
  Requires-Dist: streamlit; extra == 'llm-agent'
96
+ Provides-Extra: llm-agent-audio
97
+ Requires-Dist: gtts; extra == 'llm-agent-audio'
98
+ Requires-Dist: langchain-community; extra == 'llm-agent-audio'
99
+ Requires-Dist: langchain-openai; extra == 'llm-agent-audio'
100
+ Requires-Dist: matplotlib; extra == 'llm-agent-audio'
101
+ Requires-Dist: mcp[cli]; extra == 'llm-agent-audio'
102
+ Requires-Dist: pydub; extra == 'llm-agent-audio'
103
+ Requires-Dist: pymupdf; extra == 'llm-agent-audio'
104
+ Requires-Dist: speechrecognition; extra == 'llm-agent-audio'
105
+ Requires-Dist: streamlit; extra == 'llm-agent-audio'
106
+ Requires-Dist: streamlit-mic-recorder; extra == 'llm-agent-audio'
92
107
  Provides-Extra: llm-google
93
108
  Requires-Dist: google-genai; extra == 'llm-google'
94
109
  Requires-Dist: langchain-community; extra == 'llm-google'
95
110
  Requires-Dist: langchain-openai; extra == 'llm-google'
111
+ Requires-Dist: matplotlib; extra == 'llm-google'
96
112
  Requires-Dist: mcp[cli]; extra == 'llm-google'
113
+ Requires-Dist: pymupdf; extra == 'llm-google'
97
114
  Requires-Dist: streamlit; extra == 'llm-google'
98
115
  Provides-Extra: lmdb
99
116
  Requires-Dist: lmdb; extra == 'lmdb'
@@ -1,7 +1,7 @@
1
1
  flowcept/__init__.py,sha256=urpwIEJeikV0P6ORXKsM5Lq4o6wCwhySS9A487BYGy4,2241
2
2
  flowcept/cli.py,sha256=eVnUrmZtVhZ1ldRMGB1QsqBzNC1Pf2CX33efnlaZ4gs,22842
3
- flowcept/configs.py,sha256=xw9cdk-bDkR4_bV2jBkDCe9__na9LKJW5tUG32by-m4,8216
4
- flowcept/version.py,sha256=52f8jJknjzSRjyruDcKgGzkV7OsLh2SvZl5sAIsExvU,306
3
+ flowcept/configs.py,sha256=aXgBkBpTs4_4MpvAe76aQ5lXl1gTmgk92bFiNqMQXPM,8382
4
+ flowcept/version.py,sha256=zH7JKitqQGm2p8zaw6JClXGAc-kbLbhXB70bFMI-zhU,306
5
5
  flowcept/agents/__init__.py,sha256=8eeD2CiKBtHiDsWdrHK_UreIkKlTq4dUbhHDyzw372o,175
6
6
  flowcept/agents/agent_client.py,sha256=UiBQkC9WE2weLZR2OTkEOEQt9-zqQOkPwRA17HfI-jk,2027
7
7
  flowcept/agents/agents_utils.py,sha256=Az5lvWTsBHs_3sWWwy7jSdDjNn-PvZ7KmYd79wxvdyU,6666
@@ -9,18 +9,19 @@ flowcept/agents/dynamic_schema_tracker.py,sha256=TsmXRRkyUkqB-0bEgmeqSms8xj1tMMJ
9
9
  flowcept/agents/flowcept_agent.py,sha256=1sidjnNMdG0S6lUKBvml7ZfIb6o3u7zc6HNogsJbl9g,871
10
10
  flowcept/agents/flowcept_ctx_manager.py,sha256=-WmkddzzFY2dnU9LbZaoY4-5RcSAQH4FziEJgcC5LEI,7083
11
11
  flowcept/agents/gui/__init__.py,sha256=Qw9YKbAzgZqBjMQGnF7XWmfUo0fivtkDISQRK3LA3gU,113
12
- flowcept/agents/gui/agent_gui.py,sha256=8sTG3MjWBi6oc4tnfHa-duTBXWEE6RBxBE5uHooGkzI,2501
13
- flowcept/agents/gui/gui_utils.py,sha256=61JpFKu-yd7luWVBW6HQYd3feOmupR01tYsZxl804No,9517
12
+ flowcept/agents/gui/agent_gui.py,sha256=jsKPxJbXL2C2tXyNKpJnuVhSFktc0IpXyccW158rSWU,2752
13
+ flowcept/agents/gui/audio_utils.py,sha256=piA_dc36io1sYqLF6QArS4AMl-cfDa001jGhYz5LkB4,4279
14
+ flowcept/agents/gui/gui_utils.py,sha256=cQVhOgnfxJNUVZyXyO8f40nB1yaKAKVtBrwQmJjL0B0,14933
14
15
  flowcept/agents/llms/__init__.py,sha256=kzOaJic5VhMBnGvy_Fr5C6sRKVrRntH1ZnYz7f5_4-s,23
15
16
  flowcept/agents/llms/claude_gcp.py,sha256=fzz7235DgzVueuFj5odsr93jWtYHpYlXkSGW1kmmJwU,4915
16
17
  flowcept/agents/llms/gemini25.py,sha256=VARrjb3tITIh3_Wppmocp_ocSKVZNon0o0GeFEwTnTI,4229
17
18
  flowcept/agents/prompts/__init__.py,sha256=7ICsNhLYzvPS1esG3Vg519s51b1c4yN0WegJUb6Qvww,26
18
- flowcept/agents/prompts/general_prompts.py,sha256=OWVyToJL3w16zjycA0U0oRIx3XQRuklg0wqiUOny_1U,3892
19
- flowcept/agents/prompts/in_memory_query_prompts.py,sha256=70f4u3iFP9u1-CBM8yZR2cBu4qvxBe6FiKXLhRK8RCs,19634
19
+ flowcept/agents/prompts/general_prompts.py,sha256=Mj6dMdrnJfq-bibi1XQVNZ8zx5MZUwxTvYY_qijPfoI,3894
20
+ flowcept/agents/prompts/in_memory_query_prompts.py,sha256=0u6hIV1v-Fhk3dQVvbEW0qggi0KZbEBopMvJtgCNIVc,19664
20
21
  flowcept/agents/tools/__init__.py,sha256=Xqz2E4-LL_7DDcm1XYJFx2f5RdAsjeTpOJb_DPC7xyc,27
21
- flowcept/agents/tools/general_tools.py,sha256=_c8NCMU32u2HOvEDMTSDptmHZYMMh48WRkZWBayZGaY,3206
22
+ flowcept/agents/tools/general_tools.py,sha256=KS7ZTf1UbTxg0yQ6zCxh1g3NzcliYKWdurMArhPowxs,3248
22
23
  flowcept/agents/tools/in_memory_queries/__init__.py,sha256=K8-JI_lXUgquKkgga8Nef8AntGg_logQtjjQjaEE7yI,39
23
- flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py,sha256=2kDmjz2cTN7q3eMjoTo4iruoyRTS0i370hSBYq2FZgA,25978
24
+ flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py,sha256=GcfAiUBhQ1DU3QKk0kAy9TSq8XmZw691Xs0beZoO76A,25984
24
25
  flowcept/agents/tools/in_memory_queries/pandas_agent_utils.py,sha256=xyrZupR86qoUptnnQ7PeF0LTzSOquEK2cjc0ghT1KBs,9018
25
26
  flowcept/analytics/__init__.py,sha256=46q-7vsHq_ddPNrzNnDgEOiRgvlx-5Ggu2ocyROMV0w,641
26
27
  flowcept/analytics/analytics_utils.py,sha256=FRJdBtQa7Hrk2oR_FFhmhmMf3X6YyZ4nbH5RIYh7KL4,8753
@@ -31,7 +32,7 @@ flowcept/commons/autoflush_buffer.py,sha256=Ohy_RNbq6BXn0_R83OL5iaTgGPmV8cT1moIR
31
32
  flowcept/commons/flowcept_logger.py,sha256=0asRucrDMeRXvsdhuCmH6lWO7lAt_Z5o5uW7rrQhcjc,1857
32
33
  flowcept/commons/query_utils.py,sha256=3tyK5VYA10iDtmtzNwa8OQGn93DBxsu6rTjHDphftSc,2208
33
34
  flowcept/commons/settings_factory.py,sha256=bMTjgXRfb5HsL2lPnLfem-9trqELbNWE04Ie7lSlxYM,1731
34
- flowcept/commons/task_data_preprocess.py,sha256=bJed8Jbo4Mxk6aRVt3sCn4_KxfV5jWXwAIQWwuqHm3U,13846
35
+ flowcept/commons/task_data_preprocess.py,sha256=-ceLexv2ZfZOAYF43DPagGwQPgt_L_lNKuK8ZCpnzXs,13914
35
36
  flowcept/commons/utils.py,sha256=gF6ENWlTpR2ZSw3yVNPNBTVzSpcgy-WuzYzwWSXXsug,9252
36
37
  flowcept/commons/vocabulary.py,sha256=_GzHJ1wSYJlLsu_uu1Am6N3zvc59S4FCuT5yp7lynPw,713
37
38
  flowcept/commons/daos/__init__.py,sha256=RO51svfHOg9naN676zuQwbj_RQ6IFHu-RALeefvtwwk,23
@@ -45,7 +46,7 @@ flowcept/commons/daos/mq_dao/__init__.py,sha256=Xxm4FmbBUZDQ7XIAmSFbeKE_AdHsbgFm
45
46
  flowcept/commons/daos/mq_dao/mq_dao_base.py,sha256=EL8eQedvNLsVLMz4oHemBAsR1S6xFZiezM8dIqKmmCA,9696
46
47
  flowcept/commons/daos/mq_dao/mq_dao_kafka.py,sha256=kjZqPLIu5PaNeM4IDvOxkDRVGTd5UWwq3zhDvVirqW8,5067
47
48
  flowcept/commons/daos/mq_dao/mq_dao_mofka.py,sha256=tRdMGYDzdeIJxad-B4-DE6u8Wzs61eTzOW4ojZrnTxs,4057
48
- flowcept/commons/daos/mq_dao/mq_dao_redis.py,sha256=WKPoMPBSce4shqbBkgsnuqJAJoZZ4U_hdebhyFqtejQ,5535
49
+ flowcept/commons/daos/mq_dao/mq_dao_redis.py,sha256=ejBMxImA-h2KuMEAk3l7aU0chCcObCbUXEOXM6L4Zhc,5571
49
50
  flowcept/commons/flowcept_dataclasses/__init__.py,sha256=8KkiJh0WSRAB50waVluxCSI8Tb9X1L9nup4c8RN3ulc,30
50
51
  flowcept/commons/flowcept_dataclasses/base_settings_dataclasses.py,sha256=Cjw2PGYtZDfnwecz6G3S42Ncmxj7AIZVEBx05bsxRUo,399
51
52
  flowcept/commons/flowcept_dataclasses/task_object.py,sha256=XLFD8YTWsyDLSRcgZc5qK2a9yk97XnqZoUAL4T6HNPE,8110
@@ -53,7 +54,7 @@ flowcept/commons/flowcept_dataclasses/telemetry.py,sha256=9_5ONCo-06r5nKHXmi5HfI
53
54
  flowcept/commons/flowcept_dataclasses/workflow_object.py,sha256=cauWtXHhBv9lHS-q6cb7yUsNiwQ6PkZPuSinR1TKcqU,6161
54
55
  flowcept/flowcept_api/__init__.py,sha256=T1ty86YlocQ5Z18l5fUqHj_CC6Unq_iBv0lFyiI7Ao8,22
55
56
  flowcept/flowcept_api/db_api.py,sha256=hKXep-n50rp9cAzV0ljk2QVEF8O64yxi3ujXv5_Ibac,9723
56
- flowcept/flowcept_api/flowcept_controller.py,sha256=JcUQXJfEjmg-KQsolIN5Ul7vbSxZUg8QTWaGAahZKTE,15251
57
+ flowcept/flowcept_api/flowcept_controller.py,sha256=jfssXUvG55RVXJBziq-lXekt7Dog3mAalo5Zsp_7_to,16060
57
58
  flowcept/flowcept_api/task_query_api.py,sha256=SrwB0OCVtbpvCPECkE2ySM10G_g8Wlk5PJ8h-0xEaNc,23821
58
59
  flowcept/flowcept_webserver/__init__.py,sha256=8411GIXGddKTKoHUvbo_Rq6svosNG7tG8VzvUEBd7WI,28
59
60
  flowcept/flowcept_webserver/app.py,sha256=VUV8_JZbIbx9u_1O7m7XtRdhZb_7uifUa-iNlPhmZws,658
@@ -93,9 +94,9 @@ flowcept/instrumentation/flowcept_loop.py,sha256=jea_hYPuXg5_nOWf-nNb4vx8A__OBM4
93
94
  flowcept/instrumentation/flowcept_task.py,sha256=EmKODpjl8usNklKSVmsKYyCa6gC_QMqKhAr3DKaw44s,8199
94
95
  flowcept/instrumentation/flowcept_torch.py,sha256=kkZQRYq6cDBpdBU6J39_4oKRVkhyF3ODlz8ydV5WGKw,23455
95
96
  flowcept/instrumentation/task_capture.py,sha256=1g9EtLdqsTB0RHsF-eRmA2Xh9l_YqTd953d4v89IC24,8287
96
- resources/sample_settings.yaml,sha256=WSwpz8vmyx3oEsO6skV1KbFkYMDz-yIVQC6xlbUMDXs,6756
97
- flowcept-0.9.6.dist-info/METADATA,sha256=Bv9ZnCip57dtrn0Hv0GaT_i8CX3DfEGn6Ngclb7P-9Y,31581
98
- flowcept-0.9.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
99
- flowcept-0.9.6.dist-info/entry_points.txt,sha256=i8q67WE0201rVxYI2lyBtS52shvgl93x2Szp4q8zMlw,47
100
- flowcept-0.9.6.dist-info/licenses/LICENSE,sha256=r5-2P6tFTuRGWT5TiX32s1y0tnp4cIqBEC1QjTaXe2k,1086
101
- flowcept-0.9.6.dist-info/RECORD,,
97
+ resources/sample_settings.yaml,sha256=ufx-07gm7u0UMJa_HPutD3w1VrZKaPBht5H1xFUbIWU,6779
98
+ flowcept-0.9.8.dist-info/METADATA,sha256=-a_76ZRJ8DAu_cwGtwiW4OIUdil-orVS7TC5heM-Yco,32439
99
+ flowcept-0.9.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
100
+ flowcept-0.9.8.dist-info/entry_points.txt,sha256=i8q67WE0201rVxYI2lyBtS52shvgl93x2Szp4q8zMlw,47
101
+ flowcept-0.9.8.dist-info/licenses/LICENSE,sha256=r5-2P6tFTuRGWT5TiX32s1y0tnp4cIqBEC1QjTaXe2k,1086
102
+ flowcept-0.9.8.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
- flowcept_version: 0.9.6 # Version of the Flowcept package. This setting file is compatible with this version.
1
+ flowcept_version: 0.9.8 # Version of the Flowcept package. This setting file is compatible with this version.
2
2
 
3
3
  project:
4
4
  debug: true # Toggle debug mode. This will add a property `debug: true` to all saved data, making it easier to retrieve/delete them later.
@@ -91,6 +91,7 @@ agent:
91
91
  model: '?'
92
92
  service_provider: '?'
93
93
  model_kwargs: {}
94
+ audio_enabled: false
94
95
 
95
96
  databases:
96
97