PyPI - flowcept - Versions diffs - 0.9.6__py3-none-any.whl → 0.9.8__py3-none-any.whl - Mend

flowcept 0.9.6py3-none-any.whl → 0.9.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

flowcept/agents/gui/agent_gui.py +31 -20
flowcept/agents/gui/audio_utils.py +129 -0
flowcept/agents/gui/gui_utils.py +157 -5
flowcept/agents/prompts/general_prompts.py +2 -2
flowcept/agents/prompts/in_memory_query_prompts.py +6 -4
flowcept/agents/tools/general_tools.py +3 -0
flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +3 -1
flowcept/commons/daos/mq_dao/mq_dao_redis.py +2 -1
flowcept/commons/task_data_preprocess.py +2 -1
flowcept/configs.py +7 -0
flowcept/flowcept_api/flowcept_controller.py +37 -22
flowcept/version.py +1 -1
{flowcept-0.9.6.dist-info → flowcept-0.9.8.dist-info}/METADATA +18 -1
{flowcept-0.9.6.dist-info → flowcept-0.9.8.dist-info}/RECORD +18 -17
resources/sample_settings.yaml +2 -1
{flowcept-0.9.6.dist-info → flowcept-0.9.8.dist-info}/WHEEL +0 -0
{flowcept-0.9.6.dist-info → flowcept-0.9.8.dist-info}/entry_points.txt +0 -0
{flowcept-0.9.6.dist-info → flowcept-0.9.8.dist-info}/licenses/LICENSE +0 -0

flowcept/agents/gui/agent_gui.py CHANGED Viewed

@@ -1,76 +1,87 @@
 import streamlit as st
 from flowcept.agents.gui import AI, PAGE_TITLE
+from flowcept.agents.gui.audio_utils import get_audio_text
 from flowcept.agents.gui.gui_utils import (
     query_agent,
     display_ai_msg,
     display_ai_msg_from_tool,
     display_df_tool_response,
+    resolve_logo_path,
+    render_title_with_logo,
 )
 from flowcept.agents.tools.in_memory_queries.in_memory_queries_tools import (
     generate_result_df,
     generate_plot_code,
     run_df_code,
 )
+from flowcept.configs import AGENT_AUDIO
+# ---- Page setup & header with logo ----
 st.set_page_config(page_title=PAGE_TITLE, page_icon=AI)
-st.title(PAGE_TITLE)
+LOGO_PATH = resolve_logo_path(package="flowcept", resource="docs/img/flowcept-logo.png")
+render_title_with_logo(PAGE_TITLE, LOGO_PATH, logo_width=150, add_to_sidebar=False, debug=False)
 GREETING = (
-    "Hi, there! I'm a **Workflow Provenance Specialist**.\n\n"
+    "Hi, there! I'm your **Workflow Provenance Assistant**.\n\n"
     "I am tracking workflow executions and I can:\n"
-    "- 🔍 Analyze running workflows\n"
+    "- 🔍 Query running workflows\n"
     "- 📊 Plot graphs\n"
     "- 🤖 Answer general questions about provenance data\n\n"
     "How can I help you today?"
 )
 display_ai_msg(GREETING)
-# if "chat_history" not in st.session_state:
-#     st.session_state.chat_history = [{"role": "system", "content":GREETING}]
-#
-# for msg in st.session_state.chat_history:
-#     with st.chat_message(msg["role"], avatar=AI):
-#         st.markdown(msg["content"])
 def main():
-    """Main Streamlit Function."""
+    """Main Agent GUI function."""
+    st.caption(
+        "💡 Tip: Ask about workflow metrics, generate plots, or summarize data. "
+        "Inputs are mapped to `used` and outputs to `generated` fields. "
+        "Use @record <your query guidance> if you have custom guidance."
+    )
     user_input = st.chat_input("Send a message")
-    st.caption("💡 Tip: Ask about workflow metrics, generate plots, or summarize data.")
     if user_input:
-        # st.session_state.chat_history.append({"role": "human", "content": user_input})
+        st.session_state["speak_reply"] = False
+    if AGENT_AUDIO:
+        user_input = get_audio_text(user_input)
+    if user_input:
         with st.chat_message("human"):
             st.markdown(user_input)
         try:
             with st.spinner("🤖 Thinking..."):
                 tool_result = query_agent(user_input)
-            print(tool_result)
             if tool_result.result_is_str():
                 display_ai_msg_from_tool(tool_result)
             elif tool_result.is_success_dict():
                 tool_name = tool_result.tool_name
-                if tool_name in [generate_result_df.__name__, generate_plot_code.__name__, run_df_code.__name__]:
+                if tool_name in (
+                    generate_result_df.__name__,
+                    generate_plot_code.__name__,
+                    run_df_code.__name__,
+                ):
                     display_df_tool_response(tool_result)
                 else:
                     display_ai_msg(f"⚠️ Received unexpected response from agent: {tool_result}")
                     st.stop()
             else:
                 display_df_tool_response(tool_result)
-                # display_ai_msg(f"⚠️ Received unexpected response from agent: {tool_result}")
                 st.stop()
         except Exception as e:
             display_ai_msg(f"❌ Error talking to MCP agent:\n\n```text\n{e}\n```")
             st.stop()
-        # st.session_state.chat_history.append({"role": "system", "content": agent_reply})
+if "speak_reply" not in st.session_state:
+    st.session_state["speak_reply"] = False
 main()

flowcept/agents/gui/audio_utils.py ADDED Viewed

@@ -0,0 +1,129 @@
+import re
+import tempfile
+from io import BytesIO
+import base64
+import streamlit as st
+from gtts import gTTS
+from streamlit_mic_recorder import mic_recorder
+import speech_recognition as sr
+from pydub import AudioSegment  # needs ffmpeg installed
+def _normalize_mic_output(out) -> bytes | None:
+    """Handle different return shapes from streamlit-mic-recorder."""
+    if not isinstance(out, dict):
+        return None
+    if out.get("wav"):
+        return out["wav"]
+    if out.get("bytes"):
+        return out["bytes"]
+    if out.get("b64"):
+        return base64.b64decode(out["b64"])
+    return None
+def _is_wav_pcm(blob: bytes) -> bool:
+    """Quick RIFF/WAVE header check."""
+    h = blob[:12]
+    return h.startswith(b"RIFF") and h[8:12] == b"WAVE"
+def _to_pcm_wav_16k(blob: bytes) -> bytes:
+    """
+    Convert arbitrary audio bytes (webm/ogg/mp3/…) to 16-bit PCM WAV mono @16k.
+    Requires ffmpeg via pydub.
+    """
+    if _is_wav_pcm(blob):
+        return blob
+    seg = AudioSegment.from_file(BytesIO(blob))  # ffmpeg does the heavy lifting
+    seg = seg.set_channels(1).set_frame_rate(16000).set_sample_width(2)
+    buf = BytesIO()
+    seg.export(buf, format="wav")
+    return buf.getvalue()
+def get_audio_text(user_input: str) -> str:
+    """
+    User Audio Getter.
+    """
+    # Voice input expander
+    with st.expander("🎤 Voice input", expanded=False):
+        st.caption("Click **Speak**, talk, then **Stop**. Allow mic permission in your browser.")
+        out = mic_recorder(
+            start_prompt="🎙️ Speak",
+            stop_prompt="⏹️ Stop",
+            key="mic_rec_1",
+            use_container_width=True,
+        )
+        # Normalize outputs from the component
+        raw_audio = _normalize_mic_output(out)
+        if raw_audio:
+            try:
+                wav_bytes = _to_pcm_wav_16k(raw_audio)
+            except Exception as e:
+                st.error(f"Could not convert audio to WAV (need ffmpeg/ffprobe?): {e}")
+                wav_bytes = None
+            if wav_bytes:
+                st.audio(wav_bytes, format="audio/wav")
+                # Transcribe with SpeechRecognition
+                r = sr.Recognizer()
+                try:
+                    with sr.AudioFile(BytesIO(wav_bytes)) as source:
+                        audio = r.record(source)
+                    voice_text = r.recognize_google(audio)  # type: ignore[attr-defined]
+                    st.success(f"You said: {voice_text}")
+                    if not user_input:
+                        user_input = voice_text
+                        st.session_state["speak_reply"] = True  # speak back only when voice was used
+                        print(f"Setting session state to {st.session_state['speak_reply']}")
+                except Exception as e:
+                    st.warning(f"Transcription failed: {e}")
+    return user_input
+def speech_to_text():
+    """Record from mic, return transcribed text or None."""
+    rec = mic_recorder(
+        start_prompt="🎙️ Speak",
+        stop_prompt="⏹️ Stop",
+        key="mic",
+        use_container_width=True,
+    )
+    if rec and "wav" in rec:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+            tmp.write(rec["wav"])
+            tmp.flush()
+            r = sr.Recognizer()
+            with sr.AudioFile(tmp.name) as source:
+                audio = r.record(source)
+            try:
+                return r.recognize_google(audio)
+            except Exception as e:
+                st.warning(f"Speech recognition failed: {e}")
+    return None
+def speak(text: str):
+    """Synthesize speech for the agent reply and play it."""
+    if not text:
+        return
+    try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
+            gTTS(text).save(tmp.name)
+            st.audio(tmp.name, format="audio/mp3")
+    except Exception as e:
+        st.warning(f"TTS failed: {e}")
+def _md_to_plain_text(s: str) -> str:
+    """Very light Markdown cleanup for TTS."""
+    s = re.sub(r"```.*?```", lambda m: m.group(0).replace("```", ""), s, flags=re.S)  # drop fences
+    s = s.replace("`", "")  # inline code ticks
+    s = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", s)  # links: [text](url) -> text
+    return s.strip()

flowcept/agents/gui/gui_utils.py CHANGED Viewed

@@ -2,14 +2,20 @@ import base64
 import ast
 import io
 import json
+from pathlib import Path
+from importlib.resources import files as pkg_files
+import pandas as pd
 import streamlit as st
+from flowcept.agents.gui import AI
 from flowcept.agents import prompt_handler
 from flowcept.agents.agent_client import run_tool
 from flowcept.agents.agents_utils import ToolResult
-import pandas as pd
-from flowcept.agents.gui import AI
+from flowcept.agents.gui.audio_utils import _md_to_plain_text, speak
+from flowcept.configs import AGENT_AUDIO
 def query_agent(user_input: str) -> ToolResult:
@@ -125,6 +131,8 @@ def display_ai_msg_from_tool(tool_result: ToolResult):
 def _sniff_mime(b: bytes) -> str:
+    if b.startswith(b"%PDF-"):
+        return "application/pdf"
     if b.startswith(b"\x89PNG\r\n\x1a\n"):
         return "image/png"
     if b.startswith(b"\xff\xd8\xff"):
@@ -138,23 +146,72 @@ def _sniff_mime(b: bytes) -> str:
     return "application/octet-stream"
+def _pdf_first_page_to_png(pdf_bytes: bytes, zoom: float = 2.0) -> bytes:
+    """
+    Convert the first page of a PDF to PNG bytes using PyMuPDF (fitz).
+    zoom ~2.0 gives a good thumbnail; increase for higher resolution.
+    """
+    try:
+        import fitz  # PyMuPDF
+    except Exception as e:
+        # PyMuPDF not installed; caller can decide how to handle
+        raise ImportError("PyMuPDF (fitz) is required to render PDF thumbnails") from e
+    doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+    try:
+        page = doc.load_page(0)
+        pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom), alpha=False)
+        return pix.tobytes("png")
+    finally:
+        doc.close()
 def ensure_data_uri(val):
-    r"""Accepts bytes/bytearray/memoryview or a repr like \"b'\\x89PNG...'\" and returns a data URL."""
+    r"""Accept bytes/bytearray/memoryview or a repr like "b'\x89PNG...'", or a file path/URL.
+    Returns a data URL suitable for st.column_config.ImageColumn. If input is a PDF, converts
+    the first page to PNG (requires PyMuPDF).
+    """
+    # Already a data URI?
     if isinstance(val, str) and val.startswith("data:"):
         return val
+    # Bytes repr string? -> real bytes
     if isinstance(val, str) and val.startswith("b'"):
         try:
-            val = ast.literal_eval(val)  # turn repr into bytes
+            val = ast.literal_eval(val)
         except Exception:
             return None
+    # Paths that point to a PDF: convert
+    if isinstance(val, str) and val.lower().endswith(".pdf") and Path(val).is_file():
+        try:
+            pdf_bytes = Path(val).read_bytes()
+            png_bytes = _pdf_first_page_to_png(pdf_bytes)
+            return f"data:image/png;base64,{base64.b64encode(png_bytes).decode('ascii')}"
+        except Exception:
+            # Fallback: no preview; caller will show blank cell
+            return None
+    # Normalize to bytes if memoryview/bytearray
     if isinstance(val, memoryview):
         val = val.tobytes()
     if isinstance(val, bytearray):
         val = bytes(val)
+    # Raw bytes? detect and convert if PDF
     if isinstance(val, bytes):
         mime = _sniff_mime(val)
+        if mime == "application/pdf":
+            try:
+                png_bytes = _pdf_first_page_to_png(val)
+                return f"data:image/png;base64,{base64.b64encode(png_bytes).decode('ascii')}"
+            except Exception:
+                return None
+        # Regular image bytes -> data URI
         return f"data:{mime};base64,{base64.b64encode(val).decode('ascii')}"
-    return val  # path/URL, etc.
+    # Otherwise (URL/path to an image) let Streamlit try; PDFs won’t render as images
+    return val
 def _render_df(df: pd.DataFrame, image_width: int = 90, row_height: int = 90):
@@ -242,6 +299,17 @@ def display_df_tool_response(tool_result: ToolResult):
             st.markdown("📝 Summary:")
             print(f"THIS IS THE SUMMARY\n{summary}")
             st.markdown(summary)
+            if AGENT_AUDIO:
+                # 🔊 Speak only if user spoke to us this turn
+                print(f"This is the session state nowww: {st.session_state['speak_reply']}")
+                if st.session_state.get("speak_reply"):
+                    try:
+                        plain_text = _md_to_plain_text(summary)
+                        print(f"Trying to speak plain text {plain_text}")
+                        speak(plain_text)  # uses your existing gTTS-based speak()
+                    except Exception as e:
+                        st.warning(f"TTS failed: {e}")
         elif summary_error:
             st.markdown(f"⚠️ Encountered this error when summarizing the result dataframe:\n```text\n{summary_error}")
@@ -288,3 +356,87 @@ def exec_st_plot_code(code, result_df, st_module):
         code,
         {"result": result_df, "st": st_module, "plt": __import__("matplotlib.pyplot"), "alt": __import__("altair")},
     )
+def _resolve_logo() -> str | None:
+    # Try package resource
+    try:
+        p = pkg_files("flowcept").joinpath("docs/img/flowcept-logo.png")
+        if p.is_file():
+            return str(p)
+    except Exception:
+        pass
+    # Fallbacks for dev checkouts
+    here = Path(__file__).resolve()
+    candidates = [
+        here.parents[3] / "docs/img/flowcept-logo.png",
+        here.parents[2] / "docs/img/flowcept-logo.png",
+        here.parents[1] / "docs/img/flowcept-logo.png",
+        Path("flowcept/docs/img/flowcept-logo.png"),
+    ]
+    for c in candidates:
+        if c.is_file():
+            return str(c)
+    print(str(c))
+    return None
+def resolve_logo_path(package: str = "flowcept", resource: str = "docs/img/flowcept-logo.png") -> str | None:
+    """
+    Resolve the Flowcept logo whether running from an installed package or a src/ layout repo.
+    Returns an absolute string path or None if not found.
+    """
+    # 1) Try packaged resource (works if docs/img is included in the wheel/sdist)
+    try:
+        p = pkg_files(package).joinpath(resource)
+        if hasattr(p, "is_file") and p.is_file():
+            return str(p)
+    except Exception:
+        pass
+    here = Path(__file__).resolve()
+    # 2) src/ layout repo: .../<repo>/flowcept/src/flowcept/agents/gui/gui_utils.py
+    #    Find the nearest 'src' ancestor, then go to repo root (src/..), then docs/img/...
+    try:
+        src_dir = next(p for p in here.parents if p.name == "src")
+        repo_root = src_dir.parent  # <repo>/flowcept
+        cand = repo_root / "docs" / "img" / "flowcept-logo.png"
+        if cand.is_file():
+            return str(cand)
+    except StopIteration:
+        pass
+    # 3) Editable install package dir: .../src/flowcept (package root)
+    pkg_dir = here.parents[2]  # .../src/flowcept
+    cand = pkg_dir / "docs" / "img" / "flowcept-logo.png"
+    if cand.is_file():
+        return str(cand)
+    # 4) CWD fallback
+    cand = Path.cwd() / "flowcept" / "docs" / "img" / "flowcept-logo.png"
+    if cand.is_file():
+        return str(cand)
+    return None
+def render_title_with_logo(
+    page_title: str, logo_path: str | None, logo_width: int = 150, add_to_sidebar: bool = True, debug: bool = False
+):
+    """
+    Render a header row with an optional logo next to the title; optionally mirror it in the sidebar.
+    """
+    if debug:
+        st.caption(f"Logo path resolved to: {logo_path or 'NOT FOUND'}")
+    if logo_path and Path(logo_path).is_file():
+        col_logo, col_title = st.columns([1, 6])
+        with col_logo:
+            st.image(logo_path, width=logo_width)
+        with col_title:
+            st.title(page_title)
+        if add_to_sidebar:
+            st.sidebar.image(logo_path, width=logo_width)
+    else:
+        st.title(page_title)

flowcept/agents/prompts/general_prompts.py CHANGED Viewed

@@ -24,9 +24,9 @@ ROUTING_PROMPT = (
     "Given the following user message, classify it into one of the following routes:\n"
     "- small_talk: if it's casual conversation or some random word (e.g., 'hausdn', 'a', hello, how are you, what can you do, what's your name)\n"
     "- plot: if user is requesting plots (e.g., plot, chart, visualize)\n"
-    #"- in_context_query: if the user asks questions about tasks or data in running workflow (or a workflow that ran recently) or if the user mentions the in-memory 'df' or a dataframe.\n"
+    # "- in_context_query: if the user asks questions about tasks or data in running workflow (or a workflow that ran recently) or if the user mentions the in-memory 'df' or a dataframe.\n"
     "- in_context_query: if the user is querying the provenance data questions about tasks or data in running workflow (or a workflow that ran recently) or if the user mentions the in-memory 'df' or a dataframe.\n"
-    #"- historical_prov_query: if the user wants to query historical provenance data\n"
+    # "- historical_prov_query: if the user wants to query historical provenance data\n"
     "- in_chat_query: if the user appears to be asking about something that has said recently in this chat.\n"
     "- unknown: if you don't know.\n"
     "Respond with only the route label."

flowcept/agents/prompts/in_memory_query_prompts.py CHANGED Viewed

@@ -176,6 +176,7 @@ QUERY_GUIDELINES = """
       -To select the first (or earliest) N workflow executions, use or adapt the following: `df.groupby('workflow_id', as_index=False).agg({{"started_at": 'min'}}).sort_values(by='started_at', ascending=True).head(N)['workflow_id']` - utilize `started_at` to sort!
       -To select the last (or latest or most recent) N workflow executions, use or adapt the following: `df.groupby('workflow_id', as_index=False).agg({{"ended_at": 'max'}}).sort_values(by='ended_at', ascending=False).head(N)['workflow_id']` - utilize `ended_at` to sort!
+      -If the user does not ask for a specific workflow run, do not use `workflow_id` in your query.
       -To select the first or earliest or initial tasks, use or adapt the following: `df.sort_values(by='started_at', ascending=True)`
       -To select the last or final or most recent tasks, use or adapt the following: `df.sort_values(by='ended_at', ascending=False)`
@@ -226,10 +227,11 @@ OUTPUT_FORMATTING = """
 def generate_pandas_code_prompt(query: str, dynamic_schema, example_values, custom_user_guidances):
     if custom_user_guidances is not None and isinstance(custom_user_guidances, list) and len(custom_user_guidances):
         concatenated_guidance = "\n".join(f"- {msg}" for msg in custom_user_guidances)
-        custom_user_guidance_prompt = (f"You MUST consider the following guidance from the user:\n"
-                                       f"{concatenated_guidance}"
-                                       "------------------------------------------------------"
-                                       )
+        custom_user_guidance_prompt = (
+            f"You MUST consider the following guidance from the user:\n"
+            f"{concatenated_guidance}"
+            "------------------------------------------------------"
+        )
     else:
         custom_user_guidance_prompt = ""
     prompt = (

flowcept/agents/tools/general_tools.py CHANGED Viewed

@@ -62,6 +62,9 @@ def check_llm() -> str:
 @mcp_flowcept.tool()
 def record_guidance(message: str) -> ToolResult:
+    """
+    Record guidance tool.
+    """
     ctx = mcp_flowcept.get_context()
     message = message.replace("@record", "")
     custom_guidance: List = ctx.request_context.lifespan_context.custom_guidance

flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py CHANGED Viewed

@@ -222,7 +222,9 @@ def generate_plot_code(llm, query, dynamic_schema, value_examples, df, custom_us
 @mcp_flowcept.tool()
-def generate_result_df(llm, query: str, dynamic_schema, example_values, df, custom_user_guidance=None, attempt_fix=True, summarize=True):
+def generate_result_df(
+    llm, query: str, dynamic_schema, example_values, df, custom_user_guidance=None, attempt_fix=True, summarize=True
+):
     """
     Generate a result DataFrame from a natural language query using an LLM.

flowcept/commons/daos/mq_dao/mq_dao_redis.py CHANGED Viewed

@@ -70,6 +70,7 @@ class MQDaoRedis(MQDao):
                     except Exception as e:
                         self.logger.error(f"Failed to process message {message}")
                         self.logger.exception(e)
+                        continue
                     current_trials = 0
             except (redis.exceptions.ConnectionError, redis.exceptions.TimeoutError) as e:
@@ -78,7 +79,7 @@ class MQDaoRedis(MQDao):
                 sleep(3)
             except Exception as e:
                 self.logger.exception(e)
-                break
+                continue
     def send_message(self, message: dict, channel=MQ_CHANNEL, serializer=msgpack.dumps):
         """Send the message."""

flowcept/commons/task_data_preprocess.py CHANGED Viewed

@@ -149,7 +149,8 @@ def summarize_task(task: Dict, thresholds: Dict = None, logger=None) -> Dict:
     # a provenance task.
     if "data" in task:
         if "custom_metadata" in task:
-            if "image" in task["custom_metadata"].get("mime_type", ""):
+            mime_type = task["custom_metadata"].get("mime_type", "")
+            if "image" in mime_type or "application/pdf" in mime_type:
                 task_summary["image"] = task["data"]
     # Special handling for timestamp field

flowcept/configs.py CHANGED Viewed

@@ -235,6 +235,13 @@ INSTRUMENTATION = settings.get("instrumentation", {})
 INSTRUMENTATION_ENABLED = INSTRUMENTATION.get("enabled", True)
 AGENT = settings.get("agent", {})
+AGENT_AUDIO = os.getenv("AGENT_AUDIO", str(settings["agent"].get("audio_enabled", "false"))).strip().lower() in {
+    "1",
+    "true",
+    "yes",
+    "y",
+    "t",
+}
 AGENT_HOST = os.getenv("AGENT_HOST", settings["agent"].get("mcp_host", "localhost"))
 AGENT_PORT = int(os.getenv("AGENT_PORT", settings["agent"].get("mcp_port", "8000")))

flowcept/flowcept_api/flowcept_controller.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """Controller module."""
-import os.path
-from typing import List, Dict
+from typing import List, Dict, Any
 from uuid import uuid4
 from flowcept.commons.autoflush_buffer import AutoflushBuffer
@@ -175,25 +174,31 @@ class Flowcept(object):
         self._interceptor_instances[0]._mq_dao.bulk_publish(self.buffer)
     @staticmethod
-    def read_messages_file(file_path: str = None) -> List[Dict]:
+    def read_messages_file(file_path: str | None = None, return_df: bool = False):
         """
         Read a JSON Lines (JSONL) file containing captured Flowcept messages.
         This function loads a file where each line is a serialized JSON object.
         It joins the lines into a single JSON array and parses them efficiently
-        with ``orjson``.
+        with ``orjson``. If ``return_df`` is True, it returns a pandas DataFrame
+        created via ``pandas.json_normalize(..., sep='.')`` so nested fields become
+        dot-separated columns (for example, ``generated.attention``).
         Parameters
         ----------
         file_path : str, optional
-            Path to the messages file. If not provided, defaults to the
-            value of ``DUMP_BUFFER_PATH`` from the configuration.
-            If neither is provided, an assertion error is raised.
+            Path to the messages file. If not provided, defaults to the value of
+            ``DUMP_BUFFER_PATH`` from the configuration. If neither is provided,
+            an assertion error is raised.
+        return_df : bool, default False
+            If True, return a normalized pandas DataFrame. If False, return the
+            parsed list of dictionaries.
         Returns
         -------
-        List[dict]
-            A list of message objects (dictionaries) parsed from the file.
+        list of dict or pandas.DataFrame
+            A list of message objects when ``return_df`` is False,
+            otherwise a normalized DataFrame with dot-separated columns.
         Raises
         ------
@@ -203,35 +208,45 @@ class Flowcept(object):
             If the specified file does not exist.
         orjson.JSONDecodeError
             If the file contents cannot be parsed as valid JSON.
+        ModuleNotFoundError
+            If ``return_df`` is True but pandas is not installed.
         Examples
         --------
-        Read messages from a file explicitly:
+        Read messages as a list:
         >>> msgs = read_messages_file("offline_buffer.jsonl")
-        >>> print(len(msgs))
-        128
+        >>> len(msgs) > 0
+        True
-        Use the default dump buffer path from config:
+        Read messages as a normalized DataFrame:
-        >>> msgs = read_messages_file()
-        >>> for m in msgs[:2]:
-        ...     print(m["type"], m.get("workflow_id"))
-        task_start wf_123
-        task_end wf_123
+        >>> df = read_messages_file("offline_buffer.jsonl", return_df=True)
+        >>> "generated.attention" in df.columns
+        True
         """
+        import os
         import orjson
-        _buffer = []
         if file_path is None:
             file_path = DUMP_BUFFER_PATH
         assert file_path is not None, "Please indicate file_path either in the argument or in the config file."
         if not os.path.exists(file_path):
-            raise f"File {file_path} has not been created. It will only be created if you run in fully offline mode."
+            raise FileNotFoundError(f"File '{file_path}' was not found. It is created only in fully offline mode.")
         with open(file_path, "rb") as f:
             lines = [ln for ln in f.read().splitlines() if ln]
-        _buffer = orjson.loads(b"[" + b",".join(lines) + b"]")
-        return _buffer
+        buffer: List[Dict[str, Any]] = orjson.loads(b"[" + b",".join(lines) + b"]")
+        if return_df:
+            try:
+                import pandas as pd
+            except ModuleNotFoundError as e:
+                raise ModuleNotFoundError("pandas is required when return_df=True. Please install pandas.") from e
+            return pd.json_normalize(buffer, sep=".")
+        return buffer
     def save_workflow(self, interceptor: str, interceptor_instance: BaseInterceptor):
         """

flowcept/version.py CHANGED Viewed

@@ -4,4 +4,4 @@
 # The expected format is: <Major>.<Minor>.<Patch>
 # This file is supposed to be automatically modified by the CI Bot.
 # See .github/workflows/version_bumper.py
-__version__ = "0.9.6"
+__version__ = "0.9.8"

{flowcept-0.9.6.dist-info → flowcept-0.9.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: flowcept
-Version: 0.9.6
+Version: 0.9.8
 Summary: Capture and query workflow provenance data using data observability
 Author: Oak Ridge National Laboratory
 License-Expression: MIT
@@ -27,6 +27,7 @@ Requires-Dist: jupyterlab; extra == 'all'
 Requires-Dist: langchain-community; extra == 'all'
 Requires-Dist: langchain-openai; extra == 'all'
 Requires-Dist: lmdb; extra == 'all'
+Requires-Dist: matplotlib; extra == 'all'
 Requires-Dist: mcp[cli]; extra == 'all'
 Requires-Dist: mlflow-skinny; extra == 'all'
 Requires-Dist: nbmake; extra == 'all'
@@ -38,6 +39,7 @@ Requires-Dist: psutil>=6.1.1; extra == 'all'
 Requires-Dist: py-cpuinfo; extra == 'all'
 Requires-Dist: pyarrow; extra == 'all'
 Requires-Dist: pymongo; extra == 'all'
+Requires-Dist: pymupdf; extra == 'all'
 Requires-Dist: pytest; extra == 'all'
 Requires-Dist: pyyaml; extra == 'all'
 Requires-Dist: redis; extra == 'all'
@@ -87,13 +89,28 @@ Requires-Dist: confluent-kafka<=2.8.0; extra == 'kafka'
 Provides-Extra: llm-agent
 Requires-Dist: langchain-community; extra == 'llm-agent'
 Requires-Dist: langchain-openai; extra == 'llm-agent'
+Requires-Dist: matplotlib; extra == 'llm-agent'
 Requires-Dist: mcp[cli]; extra == 'llm-agent'
+Requires-Dist: pymupdf; extra == 'llm-agent'
 Requires-Dist: streamlit; extra == 'llm-agent'
+Provides-Extra: llm-agent-audio
+Requires-Dist: gtts; extra == 'llm-agent-audio'
+Requires-Dist: langchain-community; extra == 'llm-agent-audio'
+Requires-Dist: langchain-openai; extra == 'llm-agent-audio'
+Requires-Dist: matplotlib; extra == 'llm-agent-audio'
+Requires-Dist: mcp[cli]; extra == 'llm-agent-audio'
+Requires-Dist: pydub; extra == 'llm-agent-audio'
+Requires-Dist: pymupdf; extra == 'llm-agent-audio'
+Requires-Dist: speechrecognition; extra == 'llm-agent-audio'
+Requires-Dist: streamlit; extra == 'llm-agent-audio'
+Requires-Dist: streamlit-mic-recorder; extra == 'llm-agent-audio'
 Provides-Extra: llm-google
 Requires-Dist: google-genai; extra == 'llm-google'
 Requires-Dist: langchain-community; extra == 'llm-google'
 Requires-Dist: langchain-openai; extra == 'llm-google'
+Requires-Dist: matplotlib; extra == 'llm-google'
 Requires-Dist: mcp[cli]; extra == 'llm-google'
+Requires-Dist: pymupdf; extra == 'llm-google'
 Requires-Dist: streamlit; extra == 'llm-google'
 Provides-Extra: lmdb
 Requires-Dist: lmdb; extra == 'lmdb'

{flowcept-0.9.6.dist-info → flowcept-0.9.8.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 flowcept/__init__.py,sha256=urpwIEJeikV0P6ORXKsM5Lq4o6wCwhySS9A487BYGy4,2241
 flowcept/cli.py,sha256=eVnUrmZtVhZ1ldRMGB1QsqBzNC1Pf2CX33efnlaZ4gs,22842
-flowcept/configs.py,sha256=xw9cdk-bDkR4_bV2jBkDCe9__na9LKJW5tUG32by-m4,8216
-flowcept/version.py,sha256=52f8jJknjzSRjyruDcKgGzkV7OsLh2SvZl5sAIsExvU,306
+flowcept/configs.py,sha256=aXgBkBpTs4_4MpvAe76aQ5lXl1gTmgk92bFiNqMQXPM,8382
+flowcept/version.py,sha256=zH7JKitqQGm2p8zaw6JClXGAc-kbLbhXB70bFMI-zhU,306
 flowcept/agents/__init__.py,sha256=8eeD2CiKBtHiDsWdrHK_UreIkKlTq4dUbhHDyzw372o,175
 flowcept/agents/agent_client.py,sha256=UiBQkC9WE2weLZR2OTkEOEQt9-zqQOkPwRA17HfI-jk,2027
 flowcept/agents/agents_utils.py,sha256=Az5lvWTsBHs_3sWWwy7jSdDjNn-PvZ7KmYd79wxvdyU,6666
@@ -9,18 +9,19 @@ flowcept/agents/dynamic_schema_tracker.py,sha256=TsmXRRkyUkqB-0bEgmeqSms8xj1tMMJ
 flowcept/agents/flowcept_agent.py,sha256=1sidjnNMdG0S6lUKBvml7ZfIb6o3u7zc6HNogsJbl9g,871
 flowcept/agents/flowcept_ctx_manager.py,sha256=-WmkddzzFY2dnU9LbZaoY4-5RcSAQH4FziEJgcC5LEI,7083
 flowcept/agents/gui/__init__.py,sha256=Qw9YKbAzgZqBjMQGnF7XWmfUo0fivtkDISQRK3LA3gU,113
-flowcept/agents/gui/agent_gui.py,sha256=8sTG3MjWBi6oc4tnfHa-duTBXWEE6RBxBE5uHooGkzI,2501
-flowcept/agents/gui/gui_utils.py,sha256=61JpFKu-yd7luWVBW6HQYd3feOmupR01tYsZxl804No,9517
+flowcept/agents/gui/agent_gui.py,sha256=jsKPxJbXL2C2tXyNKpJnuVhSFktc0IpXyccW158rSWU,2752
+flowcept/agents/gui/audio_utils.py,sha256=piA_dc36io1sYqLF6QArS4AMl-cfDa001jGhYz5LkB4,4279
+flowcept/agents/gui/gui_utils.py,sha256=cQVhOgnfxJNUVZyXyO8f40nB1yaKAKVtBrwQmJjL0B0,14933
 flowcept/agents/llms/__init__.py,sha256=kzOaJic5VhMBnGvy_Fr5C6sRKVrRntH1ZnYz7f5_4-s,23
 flowcept/agents/llms/claude_gcp.py,sha256=fzz7235DgzVueuFj5odsr93jWtYHpYlXkSGW1kmmJwU,4915
 flowcept/agents/llms/gemini25.py,sha256=VARrjb3tITIh3_Wppmocp_ocSKVZNon0o0GeFEwTnTI,4229
 flowcept/agents/prompts/__init__.py,sha256=7ICsNhLYzvPS1esG3Vg519s51b1c4yN0WegJUb6Qvww,26
-flowcept/agents/prompts/general_prompts.py,sha256=OWVyToJL3w16zjycA0U0oRIx3XQRuklg0wqiUOny_1U,3892
-flowcept/agents/prompts/in_memory_query_prompts.py,sha256=70f4u3iFP9u1-CBM8yZR2cBu4qvxBe6FiKXLhRK8RCs,19634
+flowcept/agents/prompts/general_prompts.py,sha256=Mj6dMdrnJfq-bibi1XQVNZ8zx5MZUwxTvYY_qijPfoI,3894
+flowcept/agents/prompts/in_memory_query_prompts.py,sha256=0u6hIV1v-Fhk3dQVvbEW0qggi0KZbEBopMvJtgCNIVc,19664
 flowcept/agents/tools/__init__.py,sha256=Xqz2E4-LL_7DDcm1XYJFx2f5RdAsjeTpOJb_DPC7xyc,27
-flowcept/agents/tools/general_tools.py,sha256=_c8NCMU32u2HOvEDMTSDptmHZYMMh48WRkZWBayZGaY,3206
+flowcept/agents/tools/general_tools.py,sha256=KS7ZTf1UbTxg0yQ6zCxh1g3NzcliYKWdurMArhPowxs,3248
 flowcept/agents/tools/in_memory_queries/__init__.py,sha256=K8-JI_lXUgquKkgga8Nef8AntGg_logQtjjQjaEE7yI,39
-flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py,sha256=2kDmjz2cTN7q3eMjoTo4iruoyRTS0i370hSBYq2FZgA,25978
+flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py,sha256=GcfAiUBhQ1DU3QKk0kAy9TSq8XmZw691Xs0beZoO76A,25984
 flowcept/agents/tools/in_memory_queries/pandas_agent_utils.py,sha256=xyrZupR86qoUptnnQ7PeF0LTzSOquEK2cjc0ghT1KBs,9018
 flowcept/analytics/__init__.py,sha256=46q-7vsHq_ddPNrzNnDgEOiRgvlx-5Ggu2ocyROMV0w,641
 flowcept/analytics/analytics_utils.py,sha256=FRJdBtQa7Hrk2oR_FFhmhmMf3X6YyZ4nbH5RIYh7KL4,8753
@@ -31,7 +32,7 @@ flowcept/commons/autoflush_buffer.py,sha256=Ohy_RNbq6BXn0_R83OL5iaTgGPmV8cT1moIR
 flowcept/commons/flowcept_logger.py,sha256=0asRucrDMeRXvsdhuCmH6lWO7lAt_Z5o5uW7rrQhcjc,1857
 flowcept/commons/query_utils.py,sha256=3tyK5VYA10iDtmtzNwa8OQGn93DBxsu6rTjHDphftSc,2208
 flowcept/commons/settings_factory.py,sha256=bMTjgXRfb5HsL2lPnLfem-9trqELbNWE04Ie7lSlxYM,1731
-flowcept/commons/task_data_preprocess.py,sha256=bJed8Jbo4Mxk6aRVt3sCn4_KxfV5jWXwAIQWwuqHm3U,13846
+flowcept/commons/task_data_preprocess.py,sha256=-ceLexv2ZfZOAYF43DPagGwQPgt_L_lNKuK8ZCpnzXs,13914
 flowcept/commons/utils.py,sha256=gF6ENWlTpR2ZSw3yVNPNBTVzSpcgy-WuzYzwWSXXsug,9252
 flowcept/commons/vocabulary.py,sha256=_GzHJ1wSYJlLsu_uu1Am6N3zvc59S4FCuT5yp7lynPw,713
 flowcept/commons/daos/__init__.py,sha256=RO51svfHOg9naN676zuQwbj_RQ6IFHu-RALeefvtwwk,23
@@ -45,7 +46,7 @@ flowcept/commons/daos/mq_dao/__init__.py,sha256=Xxm4FmbBUZDQ7XIAmSFbeKE_AdHsbgFm
 flowcept/commons/daos/mq_dao/mq_dao_base.py,sha256=EL8eQedvNLsVLMz4oHemBAsR1S6xFZiezM8dIqKmmCA,9696
 flowcept/commons/daos/mq_dao/mq_dao_kafka.py,sha256=kjZqPLIu5PaNeM4IDvOxkDRVGTd5UWwq3zhDvVirqW8,5067
 flowcept/commons/daos/mq_dao/mq_dao_mofka.py,sha256=tRdMGYDzdeIJxad-B4-DE6u8Wzs61eTzOW4ojZrnTxs,4057
-flowcept/commons/daos/mq_dao/mq_dao_redis.py,sha256=WKPoMPBSce4shqbBkgsnuqJAJoZZ4U_hdebhyFqtejQ,5535
+flowcept/commons/daos/mq_dao/mq_dao_redis.py,sha256=ejBMxImA-h2KuMEAk3l7aU0chCcObCbUXEOXM6L4Zhc,5571
 flowcept/commons/flowcept_dataclasses/__init__.py,sha256=8KkiJh0WSRAB50waVluxCSI8Tb9X1L9nup4c8RN3ulc,30
 flowcept/commons/flowcept_dataclasses/base_settings_dataclasses.py,sha256=Cjw2PGYtZDfnwecz6G3S42Ncmxj7AIZVEBx05bsxRUo,399
 flowcept/commons/flowcept_dataclasses/task_object.py,sha256=XLFD8YTWsyDLSRcgZc5qK2a9yk97XnqZoUAL4T6HNPE,8110
@@ -53,7 +54,7 @@ flowcept/commons/flowcept_dataclasses/telemetry.py,sha256=9_5ONCo-06r5nKHXmi5HfI
 flowcept/commons/flowcept_dataclasses/workflow_object.py,sha256=cauWtXHhBv9lHS-q6cb7yUsNiwQ6PkZPuSinR1TKcqU,6161
 flowcept/flowcept_api/__init__.py,sha256=T1ty86YlocQ5Z18l5fUqHj_CC6Unq_iBv0lFyiI7Ao8,22
 flowcept/flowcept_api/db_api.py,sha256=hKXep-n50rp9cAzV0ljk2QVEF8O64yxi3ujXv5_Ibac,9723
-flowcept/flowcept_api/flowcept_controller.py,sha256=JcUQXJfEjmg-KQsolIN5Ul7vbSxZUg8QTWaGAahZKTE,15251
+flowcept/flowcept_api/flowcept_controller.py,sha256=jfssXUvG55RVXJBziq-lXekt7Dog3mAalo5Zsp_7_to,16060
 flowcept/flowcept_api/task_query_api.py,sha256=SrwB0OCVtbpvCPECkE2ySM10G_g8Wlk5PJ8h-0xEaNc,23821
 flowcept/flowcept_webserver/__init__.py,sha256=8411GIXGddKTKoHUvbo_Rq6svosNG7tG8VzvUEBd7WI,28
 flowcept/flowcept_webserver/app.py,sha256=VUV8_JZbIbx9u_1O7m7XtRdhZb_7uifUa-iNlPhmZws,658
@@ -93,9 +94,9 @@ flowcept/instrumentation/flowcept_loop.py,sha256=jea_hYPuXg5_nOWf-nNb4vx8A__OBM4
 flowcept/instrumentation/flowcept_task.py,sha256=EmKODpjl8usNklKSVmsKYyCa6gC_QMqKhAr3DKaw44s,8199
 flowcept/instrumentation/flowcept_torch.py,sha256=kkZQRYq6cDBpdBU6J39_4oKRVkhyF3ODlz8ydV5WGKw,23455
 flowcept/instrumentation/task_capture.py,sha256=1g9EtLdqsTB0RHsF-eRmA2Xh9l_YqTd953d4v89IC24,8287
-resources/sample_settings.yaml,sha256=WSwpz8vmyx3oEsO6skV1KbFkYMDz-yIVQC6xlbUMDXs,6756
-flowcept-0.9.6.dist-info/METADATA,sha256=Bv9ZnCip57dtrn0Hv0GaT_i8CX3DfEGn6Ngclb7P-9Y,31581
-flowcept-0.9.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-flowcept-0.9.6.dist-info/entry_points.txt,sha256=i8q67WE0201rVxYI2lyBtS52shvgl93x2Szp4q8zMlw,47
-flowcept-0.9.6.dist-info/licenses/LICENSE,sha256=r5-2P6tFTuRGWT5TiX32s1y0tnp4cIqBEC1QjTaXe2k,1086
-flowcept-0.9.6.dist-info/RECORD,,
+resources/sample_settings.yaml,sha256=ufx-07gm7u0UMJa_HPutD3w1VrZKaPBht5H1xFUbIWU,6779
+flowcept-0.9.8.dist-info/METADATA,sha256=-a_76ZRJ8DAu_cwGtwiW4OIUdil-orVS7TC5heM-Yco,32439
+flowcept-0.9.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+flowcept-0.9.8.dist-info/entry_points.txt,sha256=i8q67WE0201rVxYI2lyBtS52shvgl93x2Szp4q8zMlw,47
+flowcept-0.9.8.dist-info/licenses/LICENSE,sha256=r5-2P6tFTuRGWT5TiX32s1y0tnp4cIqBEC1QjTaXe2k,1086
+flowcept-0.9.8.dist-info/RECORD,,

resources/sample_settings.yaml CHANGED Viewed

@@ -1,4 +1,4 @@
-flowcept_version: 0.9.6 # Version of the Flowcept package. This setting file is compatible with this version.
+flowcept_version: 0.9.8 # Version of the Flowcept package. This setting file is compatible with this version.
 project:
   debug: true # Toggle debug mode. This will add a property `debug: true` to all saved data, making it easier to retrieve/delete them later.
@@ -91,6 +91,7 @@ agent:
   model: '?'
   service_provider: '?'
   model_kwargs: {}
+  audio_enabled: false
 databases:

{flowcept-0.9.6.dist-info → flowcept-0.9.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{flowcept-0.9.6.dist-info → flowcept-0.9.8.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{flowcept-0.9.6.dist-info → flowcept-0.9.8.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

flowcept 0.9.6__py3-none-any.whl → 0.9.8__py3-none-any.whl

flowcept 0.9.6py3-none-any.whl → 0.9.8py3-none-any.whl