PyPI - flowcept - Versions diffs - 0.8.9__py3-none-any.whl → 0.8.11__py3-none-any.whl - Mend

flowcept 0.8.9py3-none-any.whl → 0.8.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

flowcept/cli.py +460 -0
flowcept/commons/daos/keyvalue_dao.py +19 -23
flowcept/commons/daos/mq_dao/mq_dao_base.py +29 -29
flowcept/commons/daos/mq_dao/mq_dao_kafka.py +4 -3
flowcept/commons/daos/mq_dao/mq_dao_mofka.py +4 -0
flowcept/commons/daos/mq_dao/mq_dao_redis.py +38 -5
flowcept/commons/daos/redis_conn.py +47 -0
flowcept/commons/flowcept_dataclasses/task_object.py +36 -8
flowcept/commons/settings_factory.py +2 -4
flowcept/commons/task_data_preprocess.py +200 -0
flowcept/commons/utils.py +1 -1
flowcept/configs.py +11 -9
flowcept/flowcept_api/flowcept_controller.py +30 -13
flowcept/flowceptor/adapters/agents/__init__.py +1 -0
flowcept/flowceptor/adapters/agents/agents_utils.py +89 -0
flowcept/flowceptor/adapters/agents/flowcept_agent.py +292 -0
flowcept/flowceptor/adapters/agents/flowcept_llm_prov_capture.py +186 -0
flowcept/flowceptor/adapters/agents/prompts.py +51 -0
flowcept/flowceptor/adapters/base_interceptor.py +17 -19
flowcept/flowceptor/adapters/brokers/__init__.py +1 -0
flowcept/flowceptor/adapters/brokers/mqtt_interceptor.py +132 -0
flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py +3 -3
flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py +3 -3
flowcept/flowceptor/consumers/agent/__init__.py +1 -0
flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +101 -0
flowcept/flowceptor/consumers/agent/client_agent.py +48 -0
flowcept/flowceptor/consumers/agent/flowcept_agent_context_manager.py +145 -0
flowcept/flowceptor/consumers/agent/flowcept_qa_manager.py +112 -0
flowcept/flowceptor/consumers/base_consumer.py +90 -0
flowcept/flowceptor/consumers/document_inserter.py +138 -53
flowcept/flowceptor/telemetry_capture.py +1 -1
flowcept/instrumentation/task_capture.py +19 -9
flowcept/version.py +1 -1
{flowcept-0.8.9.dist-info → flowcept-0.8.11.dist-info}/METADATA +18 -6
{flowcept-0.8.9.dist-info → flowcept-0.8.11.dist-info}/RECORD +39 -25
flowcept-0.8.11.dist-info/entry_points.txt +2 -0
resources/sample_settings.yaml +44 -23
flowcept/flowceptor/adapters/zambeze/__init__.py +0 -1
flowcept/flowceptor/adapters/zambeze/zambeze_dataclasses.py +0 -41
flowcept/flowceptor/adapters/zambeze/zambeze_interceptor.py +0 -102
{flowcept-0.8.9.dist-info → flowcept-0.8.11.dist-info}/WHEEL +0 -0
{flowcept-0.8.9.dist-info → flowcept-0.8.11.dist-info}/licenses/LICENSE +0 -0

flowcept/flowceptor/adapters/agents/flowcept_agent.py ADDED Viewed

@@ -0,0 +1,292 @@
+import json
+import os
+from typing import Dict, List
+import uvicorn
+from langchain.chains.retrieval_qa.base import RetrievalQA
+from mcp.server.fastmcp import FastMCP
+from mcp.server.fastmcp.prompts import base
+from flowcept.configs import AGENT
+from flowcept.flowcept_api.flowcept_controller import Flowcept
+from flowcept.flowceptor.adapters.agents.agents_utils import (
+    convert_mcp_to_langchain,
+    convert_mcp_messages_to_plain_text,
+)
+from flowcept.flowceptor.adapters.agents.flowcept_llm_prov_capture import (
+    invoke_llm,
+    invoke_qa_question,
+    add_preamble_to_response,
+)
+from flowcept.flowceptor.adapters.agents.prompts import (
+    get_question_prompt,
+    BASE_MULTITASK_PROMPT,
+    BASE_SINGLETASK_PROMPT,
+)
+from flowcept.flowceptor.consumers.agent.flowcept_agent_context_manager import FlowceptAgentContextManager
+from flowcept.flowceptor.consumers.agent.flowcept_qa_manager import FlowceptQAManager
+os.environ["SAMBASTUDIO_URL"] = AGENT.get("llm_server_url")
+os.environ["SAMBASTUDIO_API_KEY"] = AGENT.get("api_key")
+agent_controller = FlowceptAgentContextManager()
+mcp = FastMCP("FlowceptAgent", require_session=True, lifespan=agent_controller.lifespan)
+#################################################
+# PROMPTS
+#################################################
+@mcp.prompt()
+def single_task_used_generated_prompt(task_data: Dict, question: str) -> list[base.Message]:
+    """
+    Generate a prompt for analyzing a single task's provenance and resource usage.
+    Parameters
+    ----------
+    task_data : dict
+        The task object containing provenance and telemetry fields.
+    question : str
+        A specific question to ask about the task.
+    Returns
+    -------
+    list of base.Message
+        The structured prompt messages for LLM analysis.
+    """
+    msgs = BASE_SINGLETASK_PROMPT.copy()
+    msgs.append(get_question_prompt(question))
+    msgs.append(base.UserMessage(f"This is the task object I need you to focus on: \n {task_data}\n"))
+    return msgs
+@mcp.prompt()
+def multi_task_summary_prompt(task_list: List[Dict]) -> List[base.Message]:
+    """
+    Generate a prompt for analyzing multiple task objects in a workflow.
+    Parameters
+    ----------
+    task_list : list of dict
+        A list of task objects with provenance and telemetry data.
+    Returns
+    -------
+    list of base.Message
+        The structured prompt messages for the LLM.
+    """
+    messages = BASE_MULTITASK_PROMPT.copy()
+    pretty_tasks = json.dumps(task_list, indent=2, default=str)
+    messages.append(base.UserMessage(f"These are the tasks I need you to reason about:\n\n{pretty_tasks}\n\n"))
+    return messages
+@mcp.prompt()
+def multi_task_qa_prompt(question: str) -> List[base.Message]:
+    """
+    Generate a prompt for asking a specific question about multiple tasks.
+    Parameters
+    ----------
+    question : str
+        The user's query about task data.
+    Returns
+    -------
+    list of base.Message
+        Prompt messages structured for the LLM.
+    """
+    messages = BASE_MULTITASK_PROMPT.copy()
+    messages.append(get_question_prompt(question))
+    return messages
+#################################################
+# TOOLS
+#################################################
+@mcp.tool()
+def analyze_task_chunk() -> str:
+    """
+    Analyze a recent chunk of tasks using an LLM to detect patterns or anomalies.
+    Returns
+    -------
+    str
+        LLM-generated analysis of the selected task chunk.
+    """
+    LAST_K = 5  # TODO make this dynamic from config
+    ctx = mcp.get_context()
+    task_list = ctx.request_context.lifespan_context.task_summaries[:-LAST_K]
+    agent_controller.logger.debug(f"N Tasks = {len(task_list)}")
+    if not task_list:
+        return "No tasks available."
+    messages = multi_task_summary_prompt(task_list)
+    langchain_messages = convert_mcp_to_langchain(messages)
+    response = invoke_llm(langchain_messages)
+    result = add_preamble_to_response(response, mcp, task_data=None)
+    agent_controller.logger.debug(f"Result={result}")
+    return result
+@mcp.tool()
+def ask_about_tasks_buffer(question: str) -> str:
+    """
+    Use a QA chain to answer a question about the current task buffer.
+    Parameters
+    ----------
+    question : str
+        The question to ask about the buffered tasks.
+    Returns
+    -------
+    str
+        Answer from the QA chain or an error message.
+    """
+    ctx = mcp.get_context()
+    qa_chain = build_qa_chain_from_ctx(ctx)
+    if not qa_chain:
+        return "No tasks available."
+    messages = multi_task_qa_prompt(question)
+    try:
+        query_str = convert_mcp_messages_to_plain_text(messages)
+    except Exception as e:
+        agent_controller.logger.exception(e)
+        return f"An internal error happened: {e}"
+    response = invoke_qa_question(qa_chain, query_str=query_str)
+    agent_controller.logger.debug(f"Response={response}")
+    return response
+def build_qa_chain_from_ctx(ctx) -> RetrievalQA:
+    """
+    Build or retrieve a QA chain from the current request context.
+    Parameters
+    ----------
+    ctx : RequestContext
+        The current MCP request context.
+    Returns
+    -------
+    RetrievalQA or None
+        A QA chain built from vectorstore metadata, or None if unavailable.
+    """
+    qa_chain = ctx.request_context.lifespan_context.qa_chain
+    if not qa_chain:
+        vectorstore_path = ctx.request_context.lifespan_context.vectorstore_path
+        if not vectorstore_path:
+            return None
+        agent_controller.logger.debug(f"Path: {vectorstore_path}")
+        qa_chain = FlowceptQAManager.build_qa_chain_from_vectorstore_path(vectorstore_path)
+        if not qa_chain:
+            return None
+    return qa_chain
+@mcp.tool()
+def get_latest(n: int = None) -> str:
+    """
+    Return the most recent task(s) from the task buffer.
+    Parameters
+    ----------
+    n : int, optional
+        Number of most recent tasks to return. If None, return only the latest.
+    Returns
+    -------
+    str
+        JSON-encoded task(s).
+    """
+    ctx = mcp.get_context()
+    tasks = ctx.request_context.lifespan_context.tasks
+    if not tasks:
+        return "No tasks available."
+    if n is None:
+        return json.dumps(tasks[-1])
+    return json.dumps(tasks[-n])
+@mcp.tool()
+def check_liveness() -> str:
+    """
+    Confirm the agent is alive and responding.
+    Returns
+    -------
+    str
+        Liveness status string.
+    """
+    return f"I'm {mcp.name} and I'm ready!"
+@mcp.tool()
+def check_llm() -> str:
+    """
+    Check connectivity and response from the LLM backend.
+    Returns
+    -------
+    str
+        LLM response, formatted with MCP metadata.
+    """
+    messages = [base.UserMessage("Hi, are you working properly?")]
+    langchain_messages = convert_mcp_to_langchain(messages)
+    response = invoke_llm(langchain_messages)
+    result = add_preamble_to_response(response, mcp)
+    return result
+@mcp.tool()
+def ask_about_latest_task(question) -> str:
+    """
+    Ask a question specifically about the latest task in the buffer.
+    Parameters
+    ----------
+    question : str
+        A user-defined question to analyze the latest task.
+    Returns
+    -------
+    str
+        Response from the LLM based on the latest task.
+    """
+    ctx = mcp.get_context()
+    tasks = ctx.request_context.lifespan_context.task_summaries
+    if not tasks:
+        return "No tasks available."
+    task_data = tasks[-1]
+    messages = single_task_used_generated_prompt(task_data, question)
+    langchain_messages = convert_mcp_to_langchain(messages)
+    response = invoke_llm(langchain_messages)
+    result = add_preamble_to_response(response, mcp, task_data)
+    return result
+def main():
+    """
+    Start the MCP server.
+    """
+    f = Flowcept(start_persistence=False, save_workflow=False, check_safe_stops=False).start()
+    f.logger.info(f"This section's workflow_id={Flowcept.current_workflow_id}")
+    setattr(mcp, "workflow_id", f.current_workflow_id)
+    uvicorn.run(
+        mcp.streamable_http_app, host=AGENT.get("mcp_host", "0.0.0.0"), port=AGENT.get("mcp_port", 8000), lifespan="on"
+    )
+if __name__ == "__main__":
+    main()

flowcept/flowceptor/adapters/agents/flowcept_llm_prov_capture.py ADDED Viewed

@@ -0,0 +1,186 @@
+import inspect
+import json
+from typing import List, Union, Dict
+from langchain.chains.retrieval_qa.base import RetrievalQA
+from langchain_community.callbacks import get_openai_callback
+from langchain_core.language_models import LLM
+from langchain_core.messages import HumanMessage, AIMessage
+from flowcept.flowceptor.adapters.agents.agents_utils import build_llm_model
+from flowcept.instrumentation.task_capture import FlowceptTask
+def add_preamble_to_response(response, mcp, task_data=None):
+    """
+    Add workflow/task-related metadata as a preamble to the LLM response.
+    Parameters
+    ----------
+    response : str
+        The LLM response text.
+    mcp : Any
+        The agent or workflow object, expected to have an optional `workflow_id` attribute.
+    task_data : dict, optional
+        Dictionary containing task metadata such as `workflow_id` and `task_id`.
+    Returns
+    -------
+    str
+        The response string prefixed with workflow/task metadata.
+    """
+    preamb_obj = {}
+    if hasattr(mcp, "workflow_id"):
+        agent_id = getattr(mcp, "workflow_id")
+        preamb_obj["agent_id"] = agent_id
+    if task_data:
+        preamb_obj["workflow_id"] = task_data.get("workflow_id")
+        preamb_obj["task_id"] = task_data.get("task_id")
+    result = ""
+    if preamb_obj:
+        result = f"{json.dumps(preamb_obj)}\n\n"
+    result += f"Response:\n{response}"
+    return result
+def invoke_llm(messages: List[Union[HumanMessage, AIMessage]], llm: LLM = None, activity_id=None) -> str:
+    """
+    Invoke an LLM with a list of chat-style messages and return its response.
+    Parameters
+    ----------
+    messages : List[Union[HumanMessage, AIMessage]]
+        The list of messages forming the conversation history for the LLM.
+    llm : LLM, optional
+        An instance of a LangChain-compatible LLM. If None, a default model is built.
+    activity_id : str, optional
+        An optional identifier for the activity, used for Flowcept instrumentation.
+    Returns
+    -------
+    str
+        The LLM's text response.
+    """
+    if llm is None:
+        llm = build_llm_model()
+    if activity_id is None:
+        activity_id = inspect.stack()[1].function
+    used = {"messages": [{"role": msg.type, "content": msg.content} for msg in messages]}
+    llm_metadata = _extract_llm_metadata(llm)
+    with FlowceptTask(
+        activity_id=activity_id,
+        used=used,
+        custom_metadata={"llm_metadata": llm_metadata, "query_type": "llm_invoke"},
+        subtype="llm_query",
+    ) as t:
+        with get_openai_callback() as cb:
+            response = llm.invoke(messages)
+            generated = {
+                "text_response": response,
+                "total_tokens": cb.total_tokens,
+                "prompt_tokens": cb.prompt_tokens,
+                "completion_tokens": cb.completion_tokens,
+                "cost": cb.total_cost,
+            }
+            t.end(generated)
+            return response
+def invoke_qa_question(qa_chain: RetrievalQA, query_str: str, activity_id=None) -> str:
+    """
+    Query a RetrievalQA chain with a given question and return the response.
+    Parameters
+    ----------
+    qa_chain : RetrievalQA
+        The QA chain object to invoke.
+    query_str : str
+        The question to ask the QA chain.
+    activity_id : str, optional
+        An optional identifier for the activity, used for Flowcept instrumentation.
+    Returns
+    -------
+    str
+        The textual result from the QA chain.
+    """
+    used = {"message": query_str}
+    qa_chain_metadata = _extract_qa_chain_metadata(qa_chain)
+    with FlowceptTask(
+        activity_id=activity_id,
+        used=used,
+        subtype="llm_query",
+        custom_metadata={"qa_chain_metadata": qa_chain_metadata, "query_type": "qa_chain"},
+    ) as t:
+        with get_openai_callback() as cb:
+            response = dict(qa_chain({"query": f"{query_str}"}))  # TODO bug?
+            text_response = response.pop("result")
+            generated = {
+                "response": response,
+                "text_response": text_response,
+                "total_tokens": cb.total_tokens,
+                "prompt_tokens": cb.prompt_tokens,
+                "completion_tokens": cb.completion_tokens,
+                "cost": cb.total_cost,
+            }
+            t.end(generated)
+            return text_response
+def _extract_llm_metadata(llm: LLM) -> Dict:
+    """
+    Extract metadata from a LangChain LLM instance.
+    Parameters
+    ----------
+    llm : LLM
+        The language model instance.
+    Returns
+    -------
+    dict
+        Dictionary containing class name, module, model name, and configuration if available.
+    """
+    llm_metadata = {
+        "class_name": llm.__class__.__name__,
+        "module": llm.__class__.__module__,
+        "config": llm.dict() if hasattr(llm, "dict") else {},
+    }
+    return llm_metadata
+def _extract_qa_chain_metadata(qa_chain: RetrievalQA) -> Dict:
+    """
+    Extract metadata from a RetrievalQA chain, including LLM and retriever details.
+    Parameters
+    ----------
+    qa_chain : RetrievalQA
+        The QA chain to extract metadata from.
+    Returns
+    -------
+    dict
+        Metadata dictionary including QA chain class name, retriever details, and optionally LLM metadata.
+    """
+    retriever = getattr(qa_chain, "retriever", None)
+    retriever_metadata = {
+        "class_name": retriever.__class__.__name__ if retriever else None,
+        "module": retriever.__class__.__module__ if retriever else None,
+        "vectorstore_type": getattr(retriever, "vectorstore", None).__class__.__name__
+        if hasattr(retriever, "vectorstore")
+        else None,
+        "retriever_config": retriever.__dict__ if retriever else {},
+    }
+    metadata = {
+        "qa_chain_class": qa_chain.__class__.__name__,
+        "retriever": retriever_metadata,
+    }
+    llm = getattr(qa_chain, "llm", None)
+    if llm:
+        metadata["llm"] = _extract_llm_metadata(llm)
+    return metadata

flowcept/flowceptor/adapters/agents/prompts.py ADDED Viewed

@@ -0,0 +1,51 @@
+from mcp.server.fastmcp.prompts import base
+BASE_ROLE = (
+    "You are a helpful assistant analyzing provenance data from a large-scale workflow composed of multiple tasks."
+)
+DATA_SCHEMA_PROMPT = (
+    "A task object has its provenance: input data is stored in the 'used' field, output in the 'generated' field. "
+    "Tasks sharing the same 'workflow_id' belong to the same workflow execution trace. "
+    "Pay attention to the 'tags' field, as it may indicate critical tasks. "
+    "The 'telemetry_summary' field reports CPU, disk, memory, and network usage, along with 'duration_sec'. "
+    "Task placement is stored in the 'hostname' field."
+)
+QUESTION_PROMPT = "I am particularly more interested in the following question: %QUESTION%."
+def get_question_prompt(question: str):
+    """Generates a user prompt with the given question filled in."""
+    return base.UserMessage(QUESTION_PROMPT.replace("%QUESTION%", question))
+SINGLE_TASK_PROMPT = {
+    "role": f"{BASE_ROLE} You are focusing now on a particular task object which I will provide below.",
+    "data_schema": DATA_SCHEMA_PROMPT,
+    "job": (
+        "Your job is to analyze this single task. Find any anomalies, relationships, or correlations between input,"
+        " output, resource usage metrics, task duration, and task placement. "
+        "Correlations involving 'used' vs 'generated' data are especially important. "
+        "So are relationships between (used or generated) data and resource metrics. "
+        "Highlight outliers or critical information and give actionable insights or recommendations. "
+        "Explain what this task may be doing, using the data provided."
+    ),
+}
+MULTITASK_PROMPTS = {
+    "role": BASE_ROLE,
+    "data_schema": DATA_SCHEMA_PROMPT,
+    "job": (
+        "Your job is to analyze a list of task objects to identify patterns across tasks, anomalies, relationships,"
+        " or correlations between inputs, outputs, resource usage, duration, and task placement. "
+        "Correlations involving 'used' vs 'generated' data are especially important. "
+        "So are relationships between (used or generated) data and resource metrics. "
+        "Try to infer the purpose of the workflow. "
+        "Highlight outliers or critical tasks and give actionable insights or recommendations. "
+        "Use the data provided to justify your analysis."
+    ),
+}
+BASE_SINGLETASK_PROMPT = [base.UserMessage(SINGLE_TASK_PROMPT[k]) for k in ("role", "data_schema", "job")]
+BASE_MULTITASK_PROMPT = [base.UserMessage(MULTITASK_PROMPTS[k]) for k in ("role", "data_schema", "job")]

flowcept/flowceptor/adapters/base_interceptor.py CHANGED Viewed

@@ -9,7 +9,6 @@ from flowcept.commons.flowcept_dataclasses.workflow_object import (
 )
 from flowcept.configs import (
     ENRICH_MESSAGES,
-    INSTRUMENTATION,
 )
 from flowcept.commons.flowcept_logger import FlowceptLogger
 from flowcept.commons.daos.mq_dao.mq_dao_base import MQDao
@@ -29,8 +28,6 @@ from flowcept.flowceptor.telemetry_capture import TelemetryCapture
 class BaseInterceptor(object):
     """Base interceptor class."""
-    # KINDS_TO_NOT_EXPLICITLY_CONTROL = {"dask"}
     @staticmethod
     def build(kind: str) -> "BaseInterceptor":
         """Build the Interceptor."""
@@ -43,6 +40,11 @@ class BaseInterceptor(object):
             from flowcept.flowceptor.adapters.tensorboard.tensorboard_interceptor import TensorboardInterceptor
             return TensorboardInterceptor()
+        elif kind == "broker_mqtt":
+            from flowcept.flowceptor.adapters.brokers.mqtt_interceptor import MQTTBrokerInterceptor
+            return MQTTBrokerInterceptor()
         elif kind == "dask_worker":
             from flowcept.flowceptor.adapters.dask.dask_interceptor import DaskWorkerInterceptor
@@ -50,23 +52,15 @@ class BaseInterceptor(object):
         elif kind in "dask":
             # This is dask's client interceptor. We essentially use it to store the dask workflow.
             # That's why we don't need another special interceptor and we can reuse the instrumentation one.
-            return BaseInterceptor._build_instrumentation_interceptor()
-        elif kind == "instrumentation":
-            return BaseInterceptor._build_instrumentation_interceptor()
-        else:
-            raise NotImplementedError
+            from flowcept.flowceptor.adapters.instrumentation_interceptor import InstrumentationInterceptor
-    @staticmethod
-    def _build_instrumentation_interceptor():
-        # By using singleton, we lose the thread safety for the Interceptor, particularly, its MQ buffer.
-        # Since some use cases need threads, this allows disabling the singleton for more thread safety.
-        is_singleton = INSTRUMENTATION.get("singleton", True)
-        if is_singleton:
+            return InstrumentationInterceptor.get_instance()
+        elif kind == "instrumentation":
             from flowcept.flowceptor.adapters.instrumentation_interceptor import InstrumentationInterceptor
             return InstrumentationInterceptor.get_instance()
         else:
-            return BaseInterceptor(kind="instrumentation")
+            raise NotImplementedError
     def __init__(self, plugin_key=None, kind=None):
         self.logger = FlowceptLogger()
@@ -89,17 +83,21 @@ class BaseInterceptor(object):
         """Prepare a task."""
         raise NotImplementedError()
-    def start(self, bundle_exec_id) -> "BaseInterceptor":
+    def start(self, bundle_exec_id, check_safe_stops: bool = True) -> "BaseInterceptor":
         """Start an interceptor."""
         if not self.started:
             self._bundle_exec_id = bundle_exec_id
-            self._mq_dao.init_buffer(self._interceptor_instance_id, bundle_exec_id)
+            self._mq_dao.init_buffer(self._interceptor_instance_id, bundle_exec_id, check_safe_stops)
             self.started = True
         return self
-    def stop(self):
+    def stop(self, check_safe_stops: bool = True):
         """Stop an interceptor."""
-        self._mq_dao.stop(self._interceptor_instance_id, self._bundle_exec_id)
+        self._mq_dao.stop(
+            interceptor_instance_id=self._interceptor_instance_id,
+            check_safe_stops=check_safe_stops,
+            bundle_exec_id=self._bundle_exec_id,
+        )
         self.started = False
     def observe(self, *args, **kwargs):

flowcept/flowceptor/adapters/brokers/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Brokers' adapters subpackage."""

flowcept 0.8.9__py3-none-any.whl → 0.8.11__py3-none-any.whl

flowcept 0.8.9py3-none-any.whl → 0.8.11py3-none-any.whl