PyPI - flowcept - Versions diffs - 0.9.17__py3-none-any.whl → 0.9.19__py3-none-any.whl - Mend

flowcept 0.9.17py3-none-any.whl → 0.9.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

flowcept/agents/agent_client.py +10 -4
flowcept/agents/agents_utils.py +54 -19
flowcept/agents/flowcept_agent.py +116 -12
flowcept/agents/flowcept_ctx_manager.py +116 -46
flowcept/agents/gui/gui_utils.py +21 -3
flowcept/agents/prompts/general_prompts.py +1 -1
flowcept/agents/prompts/in_memory_query_prompts.py +158 -45
flowcept/agents/tools/general_tools.py +20 -3
flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +14 -31
flowcept/commons/daos/docdb_dao/lmdb_dao.py +48 -0
flowcept/commons/daos/keyvalue_dao.py +12 -3
flowcept/commons/daos/mq_dao/mq_dao_base.py +37 -20
flowcept/commons/daos/mq_dao/mq_dao_kafka.py +2 -2
flowcept/commons/daos/mq_dao/mq_dao_redis.py +33 -2
flowcept/commons/flowcept_dataclasses/task_object.py +4 -1
flowcept/configs.py +17 -3
flowcept/flowcept_api/flowcept_controller.py +5 -1
flowcept/flowceptor/adapters/mlflow/interception_event_handler.py +33 -2
flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py +18 -4
flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py +1 -0
flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +9 -10
flowcept/flowceptor/consumers/base_consumer.py +22 -4
flowcept/flowceptor/consumers/document_inserter.py +22 -1
flowcept/instrumentation/flowcept_task.py +147 -51
flowcept/instrumentation/task_capture.py +10 -1
flowcept/version.py +1 -1
{flowcept-0.9.17.dist-info → flowcept-0.9.19.dist-info}/METADATA +8 -1
{flowcept-0.9.17.dist-info → flowcept-0.9.19.dist-info}/RECORD +32 -32
{flowcept-0.9.17.dist-info → flowcept-0.9.19.dist-info}/WHEEL +1 -1
resources/sample_settings.yaml +2 -1
{flowcept-0.9.17.dist-info → flowcept-0.9.19.dist-info}/entry_points.txt +0 -0
{flowcept-0.9.17.dist-info → flowcept-0.9.19.dist-info}/licenses/LICENSE +0 -0

flowcept/agents/prompts/in_memory_query_prompts.py CHANGED Viewed

@@ -1,32 +1,117 @@
 # flake8: noqa: E501
 # flake8: noqa: D103
-COMMON_TASK_FIELDS = """
-    | Column                        | Data Type | Description |
-    |-------------------------------|-------------|
-    | `workflow_id`                 | string | Workflow the task belongs to. Use this field when the query is asking about workflow execution |
-    | `task_id`                     | string | Task identifier. |
-    | `parent_task_id`              | string | A task may be directly linked to others. Use this field when the query asks for a task informed by (or associated with or linked to) other task.  |
-    | `activity_id`                 | string | Type of task (e.g., 'choose_option'). Use this for "task type" queries. One activity_id is linked to multiple task_ids. |
-    | `campaign_id`                 | string | A group of workflows. |
-    | `hostname`                    | string | Compute node name. |
-    | `agent_id`                    | string | Set if executed by an agent. |
-    | `started_at`                  | datetime64[ns, UTC] | Start time of a task. Always use this field when the query is has any temporal reference related to the workflow execution, such as 'get the first 10 workflow executions' or 'the last workflow execution'. |
-    | `ended_at`                    | datetime64[ns, UTC] | End time of a task. |
-    | `subtype`                     | string | Subtype of a task. |
-    | `tags`                        | List[str] | List of descriptive tags. |
-    | `image`                        | blob | Raw binary data related to an image. |
-    | `telemetry_summary.duration_sec` | float | Task duration (seconds). |
-    | `telemetry_summary.cpu.percent_all_diff` | float | Difference in overall CPU utilization percentage across all cores between task end and start.|
-    | `telemetry_summary.cpu.user_time_diff`   | float |  Difference average per core CPU user time ( seconds ) between task start and end times.|
-    | `telemetry_summary.cpu.system_time_diff` | float |  Difference in CPU system (kernel) time (seconds) used during the task execution.|
-    | `telemetry_summary.cpu.idle_time_diff`   | float |  Difference in CPU idle time (seconds) during task end and start.|
-    ---
-    For any queries involving CPU, use fields that begin with telemetry_summary.cpu
+def generate_common_task_fields(current_fields):
+    # TODO: make this better
+    common_task_fields = """
+       | Column                        | Data Type | Description |
+       |-------------------------------|-------------|
     """
+    common_task_fields += (
+        "| `workflow_id`                 | string | Workflow the task belongs to. Use this field when the query is asking about workflow execution |\n"
+        if "workflow_id" in current_fields
+        else ""
+    )
+    common_task_fields += (
+        "| `task_id`                     | string | Task identifier. |\n" if "task_id" in current_fields else ""
+    )
+    common_task_fields += (
+        "| `parent_task_id`              | string | A task may be directly linked to others. Use this field when the query asks for a task informed by (or associated with or linked to) other task.  |\n"
+        if "parent_task_id" in current_fields
+        else ""
+    )
+    common_task_fields += (
+        "| `activity_id`                 | string | Type of task (e.g., 'choose_option'). Use this for \"task type\" queries. One activity_id is linked to multiple task_ids. |\n"
+        if "activity_id" in current_fields
+        else ""
+    )
+    common_task_fields += (
+        "| `campaign_id`                 | string | A group of workflows. |\n"
+        if "campaign_id" in current_fields
+        else ""
+    )
+    common_task_fields += (
+        "| `hostname`                    | string | Compute node name. |\n" if "hostname" in current_fields else ""
+    )
+    common_task_fields += (
+        "| `agent_id`                    | string | Set if executed by an agent. |\n"
+        if "agent_id" in current_fields
+        else ""
+    )
+    common_task_fields += (
+        "| `started_at`                  | datetime64[ns, UTC] | Start time of a task. Always use this field when the query has any temporal reference related to the workflow execution, such as 'get the first 10 workflow executions' or 'the last workflow execution'. |\n"
+        if "started_at" in current_fields
+        else ""
+    )
+    common_task_fields += (
+        "| `ended_at`                    | datetime64[ns, UTC] | End time of a task. |\n"
+        if "ended_at" in current_fields
+        else ""
+    )
+    common_task_fields += (
+        "| `subtype`                     | string | Subtype of a task. |\n" if "subtype" in current_fields else ""
+    )
+    common_task_fields += (
+        "| `tags`                        | List[str] | List of descriptive tags. |\n"
+        if "tags" in current_fields
+        else ""
+    )
+    common_task_fields += (
+        "| `image`                       | blob | Raw binary data related to an image. |\n"
+        if "image" in current_fields
+        else ""
+    )
+    common_task_fields += (
+        "| `telemetry_summary.duration_sec` | float | Task duration (seconds). |\n"
+        if "telemetry_summary.duration_sec" in current_fields
+        else ""
+    )
+    common_task_fields += (
+        "| `telemetry_summary.cpu.percent_all_diff` | float | Difference in overall CPU utilization percentage across all cores between task end and start. |\n"
+        if "telemetry_summary.cpu.percent_all_diff" in current_fields
+        else ""
+    )
+    common_task_fields += (
+        "| `telemetry_summary.cpu.user_time_diff`   | float | Difference average per core CPU user time (seconds) between task start and end times. |\n"
+        if "telemetry_summary.cpu.user_time_diff" in current_fields
+        else ""
+    )
+    common_task_fields += (
+        "| `telemetry_summary.cpu.system_time_diff` | float | Difference in CPU system (kernel) time (seconds) used during the task execution. |\n"
+        if "telemetry_summary.cpu.system_time_diff" in current_fields
+        else ""
+    )
+    common_task_fields += (
+        "| `telemetry_summary.cpu.idle_time_diff`   | float | Difference in CPU idle time (seconds) during task end and start. |\n"
+        if "telemetry_summary.cpu.idle_time_diff" in current_fields
+        else ""
+    )
+    common_task_fields += "\n For any queries involving CPU, use fields that begin with telemetry_summary.cpu"
+    return common_task_fields
 DF_FORM = "The user has a pandas DataFrame called `df`, created from flattened task objects using `pd.json_normalize`."
+CURRENT_DF_COLUMNS_PROMPT = """
+### ABSOLUTE FIELD CONSTRAINT -- THIS IS CRITICAL
+The following list is the ONLY valid field names in df. Treat this as the schema:
+ALLOWED_FIELDS = [COLS]
+You MUST treat this list as authoritative.
+- You may only use fields names that appear EXACTLY (string match) in ALLOWED_FIELDS.
+- You are NOT allowed to create new field names by:
+  - adding or removing prefixes like "used." or "generated."
+  - combining words
+  - guessing.
+- If a field name is not in ALLOWED_FIELDS, you MUST NOT use it.
+"""
 def get_example_values_prompt(example_values):
     values_prompt = f"""
@@ -39,7 +124,7 @@ def get_example_values_prompt(example_values):
     return values_prompt
-def get_df_schema_prompt(dynamic_schema, example_values):
+def get_df_schema_prompt(dynamic_schema, example_values, current_fields):
     schema_prompt = f"""
      ## DATAFRAME STRUCTURE
@@ -53,14 +138,19 @@ def get_df_schema_prompt(dynamic_schema, example_values):
         The schema for these fields is defined in the dictionary below.
         It maps each activity ID to its inputs (i) and outputs (o), using flattened field names that include `used.` or `generated.` prefixes to indicate the role the field played in the task. These names match the columns in the dataframe `df`.
-        ```python
         {dynamic_schema}
-        ```
         Use this schema and fields to understand what inputs and outputs are valid for each activity.
+        IMPORTANT: The user might say used for outputs or generated for inputs, which might confuse you. Do not get tricked by the user.
+         Ignore the natural-language words "used" and "generated".
+            - The English phrase "used in the calculation" does NOT mean you must use a `used.` column.
+            - The English word "generated" in the question does NOT force you to use a `generated.` column either.
+         ALWAYS CHECK THE ALLOWED_FIELDS list before proceeding. THIS IS CRITICAL.
         ### 2. Additional fields for tasks:
-        {COMMON_TASK_FIELDS}
+        {generate_common_task_fields(current_fields)}
         ---
     """
@@ -70,12 +160,12 @@ def get_df_schema_prompt(dynamic_schema, example_values):
     return prompt
-def generate_plot_code_prompt(query, dynamic_schema, example_values) -> str:
+def generate_plot_code_prompt(query, dynamic_schema, example_values, current_fields) -> str:
     PLOT_PROMPT = f"""
         You are a Streamlit chart expert.
         {DF_FORM}
-        {get_df_schema_prompt(dynamic_schema, example_values)}
+        {get_df_schema_prompt(dynamic_schema, example_values, current_fields)}
         ### 3. Guidelines
@@ -121,10 +211,14 @@ def generate_plot_code_prompt(query, dynamic_schema, example_values) -> str:
           "plot_code": "import matplotlib.pyplot as plt\nplt.hist(result['n_controls'])\nst.pyplot(plt)"
         }}
+        Your response must be only the raw Python code in the format:
+        result = ...
+        Except for the `result` variable, YOU MUST NEVER CREATE ANY OTHER VARIABLE. NEVER!
         User request:
         {query}
-        THE OUTPUT MUST BE A VALID JSON ONLY. DO NOT SAY ANYTHING ELSE.
     """
     return PLOT_PROMPT
@@ -139,7 +233,7 @@ QUERY_GUIDELINES = """
     - Use `df` as the base DataFrame.
     - Use `activity_id` to filter by task type (valid values = schema keys).
-    - Use `used.` for parameters (inputs) and `generated.` for outputs (metrics).
+    - ONLY IF the ALLOWED_FIELDS list allow, use `used.` for parameters (inputs) and `generated.` for outputs (metrics).
     - Use `telemetry_summary.duration_sec` for performance-related questions.
     - Use `hostname` when user mentions *where* a task ran.
     - Use `agent_id` when the user refers to agents (non-null means task was agent-run).
@@ -153,7 +247,7 @@ QUERY_GUIDELINES = """
     **THE COLUMN 'used' DOES NOT EXIST**
     **THE COLUMN 'generated' DOES NOT EXIST**
     - **When filtering by `activity_id`, only select columns that belong to that activity’s schema.**
-      - Use only `used.` and `generated.` fields listed in the schema for that `activity_id`.
+      - Always observing the ALLOWED_FIELDS list, use only `used.` and `generated.` fields listed in the schema for that `activity_id`.
      - Explicitly list the selected columns — **never return all columns**
     - **Only include telemetry columns if used in the query logic.**
       -THERE IS NOT A FIELD NAMED `telemetry_summary.start_time` or `telemetry_summary.end_time` or `used.start_time` or `used.end_time`. Use `started_at` and `ended_at` instead when you want to find the duration of a task, activity, or workflow execution.
@@ -187,6 +281,17 @@ QUERY_GUIDELINES = """
       -**Do NOT use any of those: df[df['started_at'].idxmax()], df[df['started_at'].idxmin()], df[df['ended_at'].idxmin()], df[df['ended_at'].idxmax()]. Those are not valid Pandas Code.**
       - When the query mentions "each task", or "each activity", or "each workflow", make sure you show (project) the correct id column in the results (i.e., respectively: `task_id`, `activity_id`, `workflow_id`) to identify those in the results.
       - Use df[<role>.field_name] == True or df[<role>.field_name] == False when user queries boolean fields, where <role> is either used or generated, depending on the field name. Make sure field_name is a valid field in the DataFrame.
+    If the query asks you to report which values appear in one or more columns
+        (for example “which X were used”, “list all Y”, “what X and Y were generated”), then:
+            For each relevant column, select that column from df.
+            Call .dropna() on that column to remove missing values.
+            After dropping NaNs, apply .unique(), .value_counts(), or any other aggregation as needed.
+            Select that column.
+            Call .dropna() on it.
+            Then call .unique(), .value_counts(), or any other aggregation.
     - **Do not include metadata columns unless explicitly required by the user query.**
 """
@@ -200,15 +305,16 @@ FEW_SHOTS = """
     # Q: How many tasks for each activity?
     result = df['activity_id'].value_counts()
-    # Q: What is the average loss across all tasks?
-    result = df['generated.loss'].mean()
-    # Q: select the 'choose_option' tasks executed by the agent, and show the planned controls, generated option, scores, explanations
-    result = df[(df['activity_id'] == 'choose_option') & (df['agent_id'].notna())][['used.planned_controls', 'generated.option', 'used.scores.scores', 'generated.explanation']].copy()
-    # Q: Show duration and generated scores for 'simulate_layer' tasks
-    result = df[df['activity_id'] == 'simulate_layer'][['telemetry_summary.duration_sec', 'generated.scores']]
 """
+# # Q: What is the average loss across all tasks?
+# result = df['generated.loss'].mean()
+#
+# # Q: select the 'choose_option' tasks executed by the agent, and show the planned controls, generated option, scores, explanations
+# result = df[(df['activity_id'] == 'choose_option') & (df['agent_id'].notna())][
+#     ['used.planned_controls', 'generated.option', 'used.scores.scores', 'generated.explanation']].copy()
+#
+# # Q: Show duration and generated scores for 'simulate_layer' tasks
+# result = df[df['activity_id'] == 'simulate_layer'][['telemetry_summary.duration_sec', 'generated.scores']]
 OUTPUT_FORMATTING = """
     6. Final Instructions
@@ -226,7 +332,7 @@ OUTPUT_FORMATTING = """
 """
-def generate_pandas_code_prompt(query: str, dynamic_schema, example_values, custom_user_guidances):
+def generate_pandas_code_prompt(query: str, dynamic_schema, example_values, custom_user_guidances, current_fields):
     if custom_user_guidances is not None and isinstance(custom_user_guidances, list) and len(custom_user_guidances):
         concatenated_guidance = "\n".join(f"- {msg}" for msg in custom_user_guidances)
         custom_user_guidance_prompt = (
@@ -236,11 +342,14 @@ def generate_pandas_code_prompt(query: str, dynamic_schema, example_values, cust
         )
     else:
         custom_user_guidance_prompt = ""
+    curr_cols = CURRENT_DF_COLUMNS_PROMPT.replace("[COLS]", str(current_fields))
     prompt = (
         f"{ROLE}"
         f"{JOB}"
         f"{DF_FORM}"
-        f"{get_df_schema_prompt(dynamic_schema, example_values)}"  # main tester
+        f"{curr_cols}"
+        f"{get_df_schema_prompt(dynamic_schema, example_values, current_fields)}"  # main tester
         f"{QUERY_GUIDELINES}"  # main tester
         f"{FEW_SHOTS}"  # main tester
         f"{custom_user_guidance_prompt}"
@@ -251,7 +360,7 @@ def generate_pandas_code_prompt(query: str, dynamic_schema, example_values, cust
     return prompt
-def dataframe_summarizer_context(code, reduced_df, dynamic_schema, example_values, query) -> str:
+def dataframe_summarizer_context(code, reduced_df, dynamic_schema, example_values, query, current_fields) -> str:
     job = "You are a Workflow Provenance Specialist analyzing a DataFrame that was obtained to answer a query."
     if "image" in reduced_df.columns:
@@ -272,7 +381,7 @@ def dataframe_summarizer_context(code, reduced_df, dynamic_schema, example_value
     {reduced_df}
     **Original df (before reduction) had this schema:
-    {get_df_schema_prompt(dynamic_schema, example_values)}
+    {get_df_schema_prompt(dynamic_schema, example_values, current_fields)}
     Your task is to find a concise and direct answer as an English sentence to the user query.
@@ -310,7 +419,7 @@ def extract_or_fix_json_code_prompt(raw_text) -> str:
     return prompt
-def extract_or_fix_python_code_prompt(raw_text):
+def extract_or_fix_python_code_prompt(raw_text, current_fields):
     prompt = f"""
     You are a Pandas DataFrame code extractor and fixer. Pandas is a well-known data science Python library for querying datasets.
     You are given a raw user message that may include explanations, markdown fences, or partial DataFrame code that queries a DataFrame `df`.
@@ -319,9 +428,13 @@ def extract_or_fix_python_code_prompt(raw_text):
     1. Check if the message contains a valid DataFrame code.
     2. If it does, extract the code.
     3. If there are any syntax errors, fix them.
-    4. Return only the corrected DataFrame query code — no explanations, no comments, no markdown.
+    4. Carefully analyze the list of columns in the query. The query must only use fields in this list:
+        ALLOWED_FIELDS = {current_fields}.
+       If there are fields not in this list, replace the fields to match according to the ALLOWED_FIELDS list.
+    5. Return only the corrected DataFrame query code — no explanations, no comments, no markdown.
     The output must be valid Python code, and must not include any other text.
+    Your output can only contain fields in the ALLOWED_FIELDS list.
     This output will be parsed by another program.
     ONCE AGAIN, ONLY PRODUCE THE PYTHON CODE. DO NOT SAY ANYTHING ELSE!

flowcept/agents/tools/general_tools.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import json
 from typing import List
-from flowcept.agents.agents_utils import build_llm_model, ToolResult
+from flowcept.agents.agents_utils import build_llm_model, ToolResult, normalize_message
 from flowcept.agents.flowcept_ctx_manager import mcp_flowcept
 from flowcept.agents.prompts.general_prompts import ROUTING_PROMPT, SMALL_TALK_PROMPT
@@ -105,6 +105,19 @@ def reset_records() -> ToolResult:
         return ToolResult(code=499, result=str(e))
+@mcp_flowcept.tool()
+def reset_context() -> ToolResult:
+    """
+    Resets all context.
+    """
+    try:
+        ctx = mcp_flowcept.get_context()
+        ctx.request_context.lifespan_context.reset_context()
+        return ToolResult(code=201, result="Context reset.")
+    except Exception as e:
+        return ToolResult(code=499, result=str(e))
 @mcp_flowcept.tool()
 def prompt_handler(message: str) -> ToolResult:
     """
@@ -120,20 +133,24 @@ def prompt_handler(message: str) -> ToolResult:
     TextContent
         The AI response or routing feedback.
     """
-    df_key_words = ["df", "save", "result = df", "reset context"]
+    df_key_words = ["df", "save", "result = df"]
     for key in df_key_words:
         if key in message:
             return run_df_query(llm=None, query=message, plot=False)
+    if "reset context" in message:
+        return reset_context()
     if "@record" in message:
         return record_guidance(message)
     if "@show records" in message:
         return show_records()
     if "@reset records" in message:
-        return reset_records(message)
+        return reset_records()
     llm = build_llm_model()
+    message = normalize_message(message)
     prompt = ROUTING_PROMPT + message
     route = llm.invoke(prompt)

flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py CHANGED Viewed

@@ -63,11 +63,6 @@ def run_df_query(llm, query: str, plot=False) -> ToolResult:
     Examples
     --------
-    Reset the context:
-    >>> run_df_query(llm, "reset context")
-    ToolResult(code=201, result="Context Reset!")
     Save the current DataFrame:
     >>> run_df_query(llm, "save")
@@ -90,10 +85,6 @@ def run_df_query(llm, query: str, plot=False) -> ToolResult:
     custom_user_guidance = ctx.request_context.lifespan_context.custom_guidance
     if df is None or not len(df):
         return ToolResult(code=404, result="Current df is empty or null.")
-    if "reset context" in query:
-        ctx.request_context.lifespan_context.df = pd.DataFrame()
-        return ToolResult(code=201, result="Context Reset!")
     elif "save" in query:
         return save_df(df, schema, value_examples)
     elif "result = df" in query:
@@ -173,7 +164,7 @@ def generate_plot_code(llm, query, dynamic_schema, value_examples, df, custom_us
     >>> print(result.result["plot_code"])
     plt.bar(result_df["region"], result_df["total_sales"])
     """
-    plot_prompt = generate_plot_code_prompt(query, dynamic_schema, value_examples)
+    plot_prompt = generate_plot_code_prompt(query, dynamic_schema, value_examples, list(df.columns))
     try:
         response = llm(plot_prompt)
     except Exception as e:
@@ -300,7 +291,9 @@ def generate_result_df(
     if llm is None:
         llm = build_llm_model()
     try:
-        prompt = generate_pandas_code_prompt(query, dynamic_schema, example_values, custom_user_guidance)
+        prompt = generate_pandas_code_prompt(
+            query, dynamic_schema, example_values, custom_user_guidance, list(df.columns)
+        )
         response = llm(prompt)
     except Exception as e:
         return ToolResult(code=400, result=str(e), extra=prompt)
@@ -317,9 +310,10 @@ def generate_result_df(
                 extra={"generated_code": result_code, "exception": str(e), "prompt": prompt},
             )
         else:
-            tool_result = extract_or_fix_python_code(llm, result_code)
+            tool_result = extract_or_fix_python_code(llm, result_code, list(df.columns))
             if tool_result.code == 201:
                 new_result_code = tool_result.result
+                result_code = new_result_code
                 try:
                     result_df = safe_execute(df, new_result_code)
                 except Exception as e:
@@ -357,12 +351,7 @@ def generate_result_df(
     if summarize:
         try:
             tool_result = summarize_result(
-                llm,
-                result_code,
-                result_df,
-                query,
-                dynamic_schema,
-                example_values,
+                llm, result_code, result_df, query, dynamic_schema, example_values, list(df.columns)
             )
             if tool_result.is_success():
                 return_code = 301
@@ -377,7 +366,7 @@ def generate_result_df(
             return_code = 303
     try:
-        result_df = format_result_df(result_df)
+        result_df_str = format_result_df(result_df)
     except Exception as e:
         return ToolResult(
             code=405,
@@ -387,7 +376,8 @@ def generate_result_df(
     this_result = {
         "result_code": result_code,
-        "result_df": result_df,
+        "result_df": result_df_str,
+        "result_df_markdown": result_df.to_markdown(index=False),
         "summary": summary,
         "summary_error": summary_error,
     }
@@ -473,7 +463,7 @@ def run_df_code(user_code: str, df):
 @mcp_flowcept.tool()
-def extract_or_fix_python_code(llm, raw_text):
+def extract_or_fix_python_code(llm, raw_text, current_fields):
     """
     Extract or repair JSON code from raw text using an LLM.
@@ -523,7 +513,7 @@ def extract_or_fix_python_code(llm, raw_text):
     >>> print(res)
     ToolResult(code=499, result='LLM service unavailable')
     """
-    prompt = extract_or_fix_python_code_prompt(raw_text)
+    prompt = extract_or_fix_python_code_prompt(raw_text, current_fields)
     try:
         response = llm(prompt)
         return ToolResult(code=201, result=response)
@@ -582,14 +572,7 @@ def extract_or_fix_json_code(llm, raw_text) -> ToolResult:
 @mcp_flowcept.tool()
-def summarize_result(
-    llm,
-    code,
-    result,
-    query: str,
-    dynamic_schema,
-    example_values,
-) -> ToolResult:
+def summarize_result(llm, code, result, query: str, dynamic_schema, example_values, current_fields) -> ToolResult:
     """
     Summarize the pandas result with local reduction for large DataFrames.
     - For wide DataFrames, selects top columns based on variance and uniqueness.
@@ -597,7 +580,7 @@ def summarize_result(
     - Constructs a detailed prompt for the LLM with original column context.
     """
     summarized_df = summarize_df(result, code)
-    prompt = dataframe_summarizer_context(code, summarized_df, dynamic_schema, example_values, query)
+    prompt = dataframe_summarizer_context(code, summarized_df, dynamic_schema, example_values, query, current_fields)
     try:
         response = llm(prompt)
         return ToolResult(code=201, result=response)

flowcept/commons/daos/docdb_dao/lmdb_dao.py CHANGED Viewed

@@ -115,6 +115,54 @@ class LMDBDAO(DocumentDBDAO):
             self.logger.exception(e)
             return False
+    def delete_task_keys(self, key_name, keys_list: List[str]) -> bool:
+        """Delete task documents by a key value list.
+        When deleting by task_id, deletes keys directly. Otherwise, scans
+        tasks and deletes matching entries.
+        """
+        if self._is_closed:
+            self._open()
+        if type(keys_list) is not list:
+            keys_list = [keys_list]
+        try:
+            with self._env.begin(write=True, db=self._tasks_db) as txn:
+                if key_name == "task_id":
+                    for key in keys_list:
+                        if key is None:
+                            continue
+                        txn.delete(str(key).encode())
+                else:
+                    cursor = txn.cursor()
+                    for key, value in cursor:
+                        entry = json.loads(value.decode())
+                        if entry.get(key_name) in keys_list:
+                            cursor.delete()
+            return True
+        except Exception as e:
+            self.logger.exception(e)
+            return False
+    def count_tasks(self) -> int:
+        """Count number of docs in tasks collection."""
+        if self._is_closed:
+            self._open()
+        try:
+            return self._env.stat(db=self._tasks_db).get("entries", 0)
+        except Exception as e:
+            self.logger.exception(e)
+            return -1
+    def count_workflows(self) -> int:
+        """Count number of docs in workflows collection."""
+        if self._is_closed:
+            self._open()
+        try:
+            return self._env.stat(db=self._workflows_db).get("entries", 0)
+        except Exception as e:
+            self.logger.exception(e)
+            return -1
     @staticmethod
     def _match_filter(entry, filter):
         """

flowcept/commons/daos/keyvalue_dao.py CHANGED Viewed

@@ -1,7 +1,5 @@
 """Key value module."""
-from flowcept.commons.daos.redis_conn import RedisConn
 from flowcept.commons.flowcept_logger import FlowceptLogger
 from flowcept.configs import (
     KVDB_HOST,
@@ -26,12 +24,23 @@ class KeyValueDAO:
     def __init__(self):
         if not hasattr(self, "_initialized"):
-            self._initialized = True
             self.logger = FlowceptLogger()
+            from flowcept.commons.daos.redis_conn import RedisConn
             self.redis_conn = RedisConn.build_redis_conn_pool(
                 host=KVDB_HOST, port=KVDB_PORT, password=KVDB_PASSWORD, uri=KVDB_URI
             )
+            self._initialized = True
+    @staticmethod
+    def get_set_name(set_id: str, exec_bundle_id=None) -> str:
+        """Return a consistent set name for KVDB sets."""
+        set_name = set_id
+        if exec_bundle_id is not None:
+            set_name += "_" + str(exec_bundle_id)
+        return set_name
     def delete_set(self, set_name: str):
         """Delete it."""
         self.redis_conn.delete(set_name)

flowcept 0.9.17__py3-none-any.whl → 0.9.19__py3-none-any.whl

flowcept 0.9.17py3-none-any.whl → 0.9.19py3-none-any.whl