flowcept 0.9.17__py3-none-any.whl → 0.9.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowcept/agents/agents_utils.py +42 -0
- flowcept/agents/flowcept_agent.py +4 -1
- flowcept/agents/flowcept_ctx_manager.py +99 -36
- flowcept/agents/gui/gui_utils.py +21 -3
- flowcept/agents/prompts/general_prompts.py +1 -1
- flowcept/agents/prompts/in_memory_query_prompts.py +158 -45
- flowcept/agents/tools/general_tools.py +20 -3
- flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +14 -31
- flowcept/commons/daos/docdb_dao/lmdb_dao.py +48 -0
- flowcept/commons/daos/mq_dao/mq_dao_kafka.py +2 -2
- flowcept/commons/daos/mq_dao/mq_dao_redis.py +33 -2
- flowcept/commons/flowcept_dataclasses/task_object.py +4 -1
- flowcept/configs.py +4 -1
- flowcept/flowcept_api/flowcept_controller.py +5 -1
- flowcept/flowceptor/adapters/mlflow/interception_event_handler.py +33 -2
- flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py +18 -4
- flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py +1 -0
- flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +7 -8
- flowcept/instrumentation/flowcept_task.py +147 -51
- flowcept/instrumentation/task_capture.py +10 -1
- flowcept/version.py +1 -1
- {flowcept-0.9.17.dist-info → flowcept-0.9.18.dist-info}/METADATA +8 -1
- {flowcept-0.9.17.dist-info → flowcept-0.9.18.dist-info}/RECORD +27 -27
- {flowcept-0.9.17.dist-info → flowcept-0.9.18.dist-info}/WHEEL +1 -1
- resources/sample_settings.yaml +2 -1
- {flowcept-0.9.17.dist-info → flowcept-0.9.18.dist-info}/entry_points.txt +0 -0
- {flowcept-0.9.17.dist-info → flowcept-0.9.18.dist-info}/licenses/LICENSE +0 -0
flowcept/agents/agents_utils.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import re
|
|
3
|
+
import unicodedata
|
|
2
4
|
from typing import Union, Dict
|
|
3
5
|
|
|
4
6
|
from flowcept.flowceptor.consumers.agent.base_agent_context_manager import BaseAgentContextManager
|
|
@@ -194,3 +196,43 @@ def build_llm_model(
|
|
|
194
196
|
if tool_task:
|
|
195
197
|
llm.parent_task_id = tool_task.task_id
|
|
196
198
|
return llm
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def normalize_message(user_msg: str) -> str:
|
|
202
|
+
"""
|
|
203
|
+
Normalize a user message into a canonical, comparison-friendly form.
|
|
204
|
+
|
|
205
|
+
The function standardizes text by trimming whitespace, applying Unicode
|
|
206
|
+
normalization, normalizing dash characters, collapsing repeated whitespace,
|
|
207
|
+
removing trailing punctuation that does not affect semantics, and converting
|
|
208
|
+
the result to lowercase.
|
|
209
|
+
|
|
210
|
+
Parameters
|
|
211
|
+
----------
|
|
212
|
+
user_msg : str
|
|
213
|
+
Raw user input message.
|
|
214
|
+
|
|
215
|
+
Returns
|
|
216
|
+
-------
|
|
217
|
+
str
|
|
218
|
+
Normalized message suitable for matching, comparison, or hashing.
|
|
219
|
+
"""
|
|
220
|
+
# 1) Strip leading/trailing whitespace
|
|
221
|
+
user_msg = user_msg.strip()
|
|
222
|
+
|
|
223
|
+
# 2) Unicode normalize to avoid weird characters (like fancy quotes, dashes)
|
|
224
|
+
user_msg = unicodedata.normalize("NFKC", user_msg)
|
|
225
|
+
|
|
226
|
+
# 3) Normalize dashes commonly used in chemistry (C–H, C—H, etc.)
|
|
227
|
+
user_msg = user_msg.replace("–", "-").replace("—", "-")
|
|
228
|
+
|
|
229
|
+
# 4) Collapse multiple spaces / newlines into a single space
|
|
230
|
+
user_msg = re.sub(r"\s+", " ", user_msg)
|
|
231
|
+
|
|
232
|
+
# 5) Remove trailing punctuation that doesn't change semantics
|
|
233
|
+
# e.g., "?", "!", "." at the VERY end
|
|
234
|
+
user_msg = re.sub(r"[?!.\s]+$", "", user_msg)
|
|
235
|
+
|
|
236
|
+
user_msg = user_msg.lower()
|
|
237
|
+
|
|
238
|
+
return user_msg
|
|
@@ -20,11 +20,14 @@ def main():
|
|
|
20
20
|
def run():
|
|
21
21
|
uvicorn.run(mcp_flowcept.streamable_http_app, host=AGENT_HOST, port=AGENT_PORT, lifespan="on")
|
|
22
22
|
|
|
23
|
-
Thread(target=run)
|
|
23
|
+
server_thread = Thread(target=run, daemon=False)
|
|
24
|
+
server_thread.start()
|
|
24
25
|
sleep(2)
|
|
25
26
|
# Wake up tool call
|
|
26
27
|
print(run_tool(check_liveness, host=AGENT_HOST, port=AGENT_PORT)[0])
|
|
27
28
|
|
|
29
|
+
server_thread.join()
|
|
30
|
+
|
|
28
31
|
|
|
29
32
|
if __name__ == "__main__":
|
|
30
33
|
main()
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
from flowcept.agents.dynamic_schema_tracker import DynamicSchemaTracker
|
|
2
2
|
from flowcept.agents.tools.in_memory_queries.pandas_agent_utils import load_saved_df
|
|
3
3
|
from flowcept.commons.flowcept_dataclasses.task_object import TaskObject
|
|
4
|
+
from flowcept.commons.flowcept_logger import FlowceptLogger
|
|
5
|
+
from flowcept.commons.vocabulary import Status
|
|
6
|
+
from flowcept.configs import AGENT
|
|
4
7
|
from mcp.server.fastmcp import FastMCP
|
|
5
8
|
|
|
6
9
|
import json
|
|
@@ -12,11 +15,12 @@ import pandas as pd
|
|
|
12
15
|
|
|
13
16
|
from flowcept.flowceptor.consumers.agent.base_agent_context_manager import BaseAgentContextManager, BaseAppContext
|
|
14
17
|
|
|
15
|
-
|
|
16
|
-
from flowcept.agents import agent_client
|
|
17
18
|
from flowcept.commons.task_data_preprocess import summarize_task
|
|
18
19
|
|
|
19
20
|
|
|
21
|
+
AGENT_DEBUG = AGENT.get("debug", False)
|
|
22
|
+
|
|
23
|
+
|
|
20
24
|
@dataclass
|
|
21
25
|
class FlowceptAppContext(BaseAppContext):
|
|
22
26
|
"""
|
|
@@ -39,6 +43,39 @@ class FlowceptAppContext(BaseAppContext):
|
|
|
39
43
|
tracker_config: Dict | None
|
|
40
44
|
custom_guidance: List[str] | None
|
|
41
45
|
|
|
46
|
+
def __init__(self):
|
|
47
|
+
self.logger = FlowceptLogger()
|
|
48
|
+
self.reset_context()
|
|
49
|
+
|
|
50
|
+
def reset_context(self):
|
|
51
|
+
"""
|
|
52
|
+
Reset the agent's context to a clean state, initializing a new QA setup.
|
|
53
|
+
"""
|
|
54
|
+
self.tasks = []
|
|
55
|
+
self.task_summaries = []
|
|
56
|
+
self.critical_tasks = []
|
|
57
|
+
self.df = pd.DataFrame()
|
|
58
|
+
self.tasks_schema = {}
|
|
59
|
+
self.value_examples = {}
|
|
60
|
+
self.custom_guidance = []
|
|
61
|
+
self.tracker_config = {}
|
|
62
|
+
|
|
63
|
+
if AGENT_DEBUG:
|
|
64
|
+
from flowcept.commons.flowcept_logger import FlowceptLogger
|
|
65
|
+
|
|
66
|
+
FlowceptLogger().warning("Running agent in DEBUG mode!")
|
|
67
|
+
df_path = "/tmp/current_agent_df.csv"
|
|
68
|
+
if os.path.exists(df_path):
|
|
69
|
+
self.logger.warning("Going to load df into context")
|
|
70
|
+
df = load_saved_df(df_path)
|
|
71
|
+
self.df = df
|
|
72
|
+
if os.path.exists("/tmp/current_tasks_schema.json"):
|
|
73
|
+
with open("/tmp/current_tasks_schema.json") as f:
|
|
74
|
+
self.tasks_schema = json.load(f)
|
|
75
|
+
if os.path.exists("/tmp/value_examples.json"):
|
|
76
|
+
with open("/tmp/value_examples.json") as f:
|
|
77
|
+
self.value_examples = json.load(f)
|
|
78
|
+
|
|
42
79
|
|
|
43
80
|
class FlowceptAgentContextManager(BaseAgentContextManager):
|
|
44
81
|
"""
|
|
@@ -61,7 +98,7 @@ class FlowceptAgentContextManager(BaseAgentContextManager):
|
|
|
61
98
|
"""
|
|
62
99
|
|
|
63
100
|
def __init__(self):
|
|
64
|
-
self.context
|
|
101
|
+
self.context = FlowceptAppContext()
|
|
65
102
|
self.tracker_config = dict(max_examples=3, max_str_len=50)
|
|
66
103
|
self.schema_tracker = DynamicSchemaTracker(**self.tracker_config)
|
|
67
104
|
self.msgs_counter = 0
|
|
@@ -82,7 +119,6 @@ class FlowceptAgentContextManager(BaseAgentContextManager):
|
|
|
82
119
|
bool
|
|
83
120
|
True if the message was handled successfully.
|
|
84
121
|
"""
|
|
85
|
-
print("Received:", msg_obj)
|
|
86
122
|
msg_type = msg_obj.get("type", None)
|
|
87
123
|
if msg_type == "task":
|
|
88
124
|
task_msg = TaskObject.from_dict(msg_obj)
|
|
@@ -90,8 +126,62 @@ class FlowceptAgentContextManager(BaseAgentContextManager):
|
|
|
90
126
|
self.logger.info(f"Going to ignore our own LLM messages: {task_msg}")
|
|
91
127
|
return True
|
|
92
128
|
|
|
93
|
-
self.msgs_counter += 1
|
|
94
129
|
self.logger.debug("Received task msg!")
|
|
130
|
+
if task_msg.subtype == "call_agent_task":
|
|
131
|
+
from flowcept.instrumentation.task_capture import FlowceptTask
|
|
132
|
+
|
|
133
|
+
if task_msg.activity_id == "reset_user_context":
|
|
134
|
+
self.context.reset_context()
|
|
135
|
+
self.msgs_counter = 0
|
|
136
|
+
FlowceptTask(
|
|
137
|
+
agent_id=self.agent_id,
|
|
138
|
+
generated={"msg": "Provenance Agent reset context."},
|
|
139
|
+
subtype="agent_task",
|
|
140
|
+
activity_id="reset_user_context",
|
|
141
|
+
).send()
|
|
142
|
+
return True
|
|
143
|
+
elif task_msg.activity_id == "provenance_query":
|
|
144
|
+
self.logger.info("Received a prov query message!")
|
|
145
|
+
query_text = task_msg.used.get("query")
|
|
146
|
+
from flowcept.agents import ToolResult
|
|
147
|
+
from flowcept.agents.tools.general_tools import prompt_handler
|
|
148
|
+
from flowcept.agents.agent_client import run_tool
|
|
149
|
+
|
|
150
|
+
resp = run_tool(tool_name=prompt_handler, kwargs={"message": query_text})[0]
|
|
151
|
+
|
|
152
|
+
try:
|
|
153
|
+
error = None
|
|
154
|
+
status = Status.FINISHED
|
|
155
|
+
tool_result = ToolResult(**json.loads(resp))
|
|
156
|
+
if tool_result.result_is_str():
|
|
157
|
+
generated = {"text": tool_result.result}
|
|
158
|
+
else:
|
|
159
|
+
generated = tool_result.result
|
|
160
|
+
except Exception as e:
|
|
161
|
+
status = Status.ERROR
|
|
162
|
+
error = f"Could not convert the following into a ToolResult:\n{resp}\nException: {e}"
|
|
163
|
+
generated = {"text": str(resp)}
|
|
164
|
+
FlowceptTask(
|
|
165
|
+
agent_id=self.agent_id,
|
|
166
|
+
generated=generated,
|
|
167
|
+
stderr=error,
|
|
168
|
+
status=status,
|
|
169
|
+
subtype="agent_task",
|
|
170
|
+
activity_id="provenance_query_response",
|
|
171
|
+
).send()
|
|
172
|
+
|
|
173
|
+
return True
|
|
174
|
+
|
|
175
|
+
elif (
|
|
176
|
+
task_msg.subtype == "agent_task"
|
|
177
|
+
and task_msg.agent_id is not None
|
|
178
|
+
and task_msg.agent_id == self.agent_id
|
|
179
|
+
):
|
|
180
|
+
self.logger.info(f"Ignoring agent tasks from myself: {task_msg}")
|
|
181
|
+
return True
|
|
182
|
+
|
|
183
|
+
self.msgs_counter += 1
|
|
184
|
+
|
|
95
185
|
self.context.tasks.append(msg_obj)
|
|
96
186
|
|
|
97
187
|
task_summary = summarize_task(msg_obj, logger=self.logger)
|
|
@@ -136,7 +226,9 @@ class FlowceptAgentContextManager(BaseAgentContextManager):
|
|
|
136
226
|
Perform LLM-based analysis on the current chunk of task messages and send the results.
|
|
137
227
|
"""
|
|
138
228
|
self.logger.debug(f"Going to begin LLM job! {self.msgs_counter}")
|
|
139
|
-
|
|
229
|
+
from flowcept.agents.agent_client import run_tool
|
|
230
|
+
|
|
231
|
+
result = run_tool("analyze_task_chunk")
|
|
140
232
|
if len(result):
|
|
141
233
|
content = result[0].text
|
|
142
234
|
if content != "Error executing tool":
|
|
@@ -146,36 +238,7 @@ class FlowceptAgentContextManager(BaseAgentContextManager):
|
|
|
146
238
|
else:
|
|
147
239
|
self.logger.error(content)
|
|
148
240
|
|
|
149
|
-
def reset_context(self):
|
|
150
|
-
"""
|
|
151
|
-
Reset the agent's context to a clean state, initializing a new QA setup.
|
|
152
|
-
"""
|
|
153
|
-
self.context = FlowceptAppContext(
|
|
154
|
-
tasks=[],
|
|
155
|
-
task_summaries=[],
|
|
156
|
-
critical_tasks=[],
|
|
157
|
-
df=pd.DataFrame(),
|
|
158
|
-
tasks_schema={},
|
|
159
|
-
value_examples={},
|
|
160
|
-
custom_guidance=[],
|
|
161
|
-
tracker_config=self.tracker_config,
|
|
162
|
-
)
|
|
163
|
-
DEBUG = True # TODO debugging!
|
|
164
|
-
if DEBUG:
|
|
165
|
-
self.logger.warning("Running agent in DEBUG mode!")
|
|
166
|
-
df_path = "/tmp/current_agent_df.csv"
|
|
167
|
-
if os.path.exists(df_path):
|
|
168
|
-
self.logger.warning("Going to load df into context")
|
|
169
|
-
df = load_saved_df(df_path)
|
|
170
|
-
self.context.df = df
|
|
171
|
-
if os.path.exists("/tmp/current_tasks_schema.json"):
|
|
172
|
-
with open("/tmp/current_tasks_schema.json") as f:
|
|
173
|
-
self.context.tasks_schema = json.load(f)
|
|
174
|
-
if os.path.exists("/tmp/value_examples.json"):
|
|
175
|
-
with open("/tmp/value_examples.json") as f:
|
|
176
|
-
self.context.value_examples = json.load(f)
|
|
177
|
-
|
|
178
241
|
|
|
179
242
|
# Exporting the ctx_manager and the mcp_flowcept
|
|
180
243
|
ctx_manager = FlowceptAgentContextManager()
|
|
181
|
-
mcp_flowcept = FastMCP("FlowceptAgent",
|
|
244
|
+
mcp_flowcept = FastMCP("FlowceptAgent", lifespan=ctx_manager.lifespan, stateless_http=True)
|
flowcept/agents/gui/gui_utils.py
CHANGED
|
@@ -351,10 +351,28 @@ def exec_st_plot_code(code, result_df, st_module):
|
|
|
351
351
|
>>> code = "st.line_chart(result)"
|
|
352
352
|
>>> exec_st_plot_code(code, df, st)
|
|
353
353
|
"""
|
|
354
|
-
|
|
354
|
+
# 1) Make a copy of result_df and rename columns with dots
|
|
355
|
+
plot_df = result_df.copy()
|
|
356
|
+
col_map = {}
|
|
357
|
+
|
|
358
|
+
for col in plot_df.columns:
|
|
359
|
+
if "." in col:
|
|
360
|
+
new_col = col.replace(".", "_")
|
|
361
|
+
col_map[col] = new_col
|
|
362
|
+
plot_df.rename(columns={col: new_col}, inplace=True)
|
|
363
|
+
|
|
364
|
+
# 2) Rewrite the code so column names match the renamed columns
|
|
365
|
+
sanitized_code = code
|
|
366
|
+
for old, new in col_map.items():
|
|
367
|
+
# replace only inside quotes: 'generated.bd_enthalpy' → 'generated_bd_enthalpy'
|
|
368
|
+
sanitized_code = sanitized_code.replace(f"'{old}'", f"'{new}'")
|
|
369
|
+
sanitized_code = sanitized_code.replace(f'"{old}"', f'"{new}"')
|
|
370
|
+
|
|
371
|
+
print("SANITIZED CODE:\n", sanitized_code)
|
|
372
|
+
print(f"Renamed DF columms: {plot_df}")
|
|
355
373
|
exec(
|
|
356
|
-
|
|
357
|
-
{"result":
|
|
374
|
+
sanitized_code,
|
|
375
|
+
{"result": plot_df, "st": st_module, "plt": __import__("matplotlib.pyplot"), "alt": __import__("altair")},
|
|
358
376
|
)
|
|
359
377
|
|
|
360
378
|
|
|
@@ -28,7 +28,7 @@ ROUTING_PROMPT = (
|
|
|
28
28
|
# "- in_context_query: if the user asks questions about tasks or data in running workflow (or a workflow that ran recently) or if the user mentions the in-memory 'df' or a dataframe.\n"
|
|
29
29
|
# "- historical_prov_query: if the user wants to query historical provenance data\n"
|
|
30
30
|
"- in_chat_query: if the user appears to be asking about something that has said recently in this chat.\n"
|
|
31
|
-
"-
|
|
31
|
+
"- in_context_query: if you don't know.\n"
|
|
32
32
|
"Respond with only the route label."
|
|
33
33
|
"User message is below:\n "
|
|
34
34
|
)
|
|
@@ -1,32 +1,117 @@
|
|
|
1
1
|
# flake8: noqa: E501
|
|
2
2
|
# flake8: noqa: D103
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
| `activity_id` | string | Type of task (e.g., 'choose_option'). Use this for "task type" queries. One activity_id is linked to multiple task_ids. |
|
|
11
|
-
| `campaign_id` | string | A group of workflows. |
|
|
12
|
-
| `hostname` | string | Compute node name. |
|
|
13
|
-
| `agent_id` | string | Set if executed by an agent. |
|
|
14
|
-
| `started_at` | datetime64[ns, UTC] | Start time of a task. Always use this field when the query is has any temporal reference related to the workflow execution, such as 'get the first 10 workflow executions' or 'the last workflow execution'. |
|
|
15
|
-
| `ended_at` | datetime64[ns, UTC] | End time of a task. |
|
|
16
|
-
| `subtype` | string | Subtype of a task. |
|
|
17
|
-
| `tags` | List[str] | List of descriptive tags. |
|
|
18
|
-
| `image` | blob | Raw binary data related to an image. |
|
|
19
|
-
| `telemetry_summary.duration_sec` | float | Task duration (seconds). |
|
|
20
|
-
| `telemetry_summary.cpu.percent_all_diff` | float | Difference in overall CPU utilization percentage across all cores between task end and start.|
|
|
21
|
-
| `telemetry_summary.cpu.user_time_diff` | float | Difference average per core CPU user time ( seconds ) between task start and end times.|
|
|
22
|
-
| `telemetry_summary.cpu.system_time_diff` | float | Difference in CPU system (kernel) time (seconds) used during the task execution.|
|
|
23
|
-
| `telemetry_summary.cpu.idle_time_diff` | float | Difference in CPU idle time (seconds) during task end and start.|
|
|
24
|
-
---
|
|
25
|
-
For any queries involving CPU, use fields that begin with telemetry_summary.cpu
|
|
4
|
+
|
|
5
|
+
def generate_common_task_fields(current_fields):
|
|
6
|
+
# TODO: make this better
|
|
7
|
+
common_task_fields = """
|
|
8
|
+
| Column | Data Type | Description |
|
|
9
|
+
|-------------------------------|-------------|
|
|
26
10
|
"""
|
|
11
|
+
common_task_fields += (
|
|
12
|
+
"| `workflow_id` | string | Workflow the task belongs to. Use this field when the query is asking about workflow execution |\n"
|
|
13
|
+
if "workflow_id" in current_fields
|
|
14
|
+
else ""
|
|
15
|
+
)
|
|
16
|
+
common_task_fields += (
|
|
17
|
+
"| `task_id` | string | Task identifier. |\n" if "task_id" in current_fields else ""
|
|
18
|
+
)
|
|
19
|
+
common_task_fields += (
|
|
20
|
+
"| `parent_task_id` | string | A task may be directly linked to others. Use this field when the query asks for a task informed by (or associated with or linked to) other task. |\n"
|
|
21
|
+
if "parent_task_id" in current_fields
|
|
22
|
+
else ""
|
|
23
|
+
)
|
|
24
|
+
common_task_fields += (
|
|
25
|
+
"| `activity_id` | string | Type of task (e.g., 'choose_option'). Use this for \"task type\" queries. One activity_id is linked to multiple task_ids. |\n"
|
|
26
|
+
if "activity_id" in current_fields
|
|
27
|
+
else ""
|
|
28
|
+
)
|
|
29
|
+
common_task_fields += (
|
|
30
|
+
"| `campaign_id` | string | A group of workflows. |\n"
|
|
31
|
+
if "campaign_id" in current_fields
|
|
32
|
+
else ""
|
|
33
|
+
)
|
|
34
|
+
common_task_fields += (
|
|
35
|
+
"| `hostname` | string | Compute node name. |\n" if "hostname" in current_fields else ""
|
|
36
|
+
)
|
|
37
|
+
common_task_fields += (
|
|
38
|
+
"| `agent_id` | string | Set if executed by an agent. |\n"
|
|
39
|
+
if "agent_id" in current_fields
|
|
40
|
+
else ""
|
|
41
|
+
)
|
|
42
|
+
common_task_fields += (
|
|
43
|
+
"| `started_at` | datetime64[ns, UTC] | Start time of a task. Always use this field when the query has any temporal reference related to the workflow execution, such as 'get the first 10 workflow executions' or 'the last workflow execution'. |\n"
|
|
44
|
+
if "started_at" in current_fields
|
|
45
|
+
else ""
|
|
46
|
+
)
|
|
47
|
+
common_task_fields += (
|
|
48
|
+
"| `ended_at` | datetime64[ns, UTC] | End time of a task. |\n"
|
|
49
|
+
if "ended_at" in current_fields
|
|
50
|
+
else ""
|
|
51
|
+
)
|
|
52
|
+
common_task_fields += (
|
|
53
|
+
"| `subtype` | string | Subtype of a task. |\n" if "subtype" in current_fields else ""
|
|
54
|
+
)
|
|
55
|
+
common_task_fields += (
|
|
56
|
+
"| `tags` | List[str] | List of descriptive tags. |\n"
|
|
57
|
+
if "tags" in current_fields
|
|
58
|
+
else ""
|
|
59
|
+
)
|
|
60
|
+
common_task_fields += (
|
|
61
|
+
"| `image` | blob | Raw binary data related to an image. |\n"
|
|
62
|
+
if "image" in current_fields
|
|
63
|
+
else ""
|
|
64
|
+
)
|
|
65
|
+
common_task_fields += (
|
|
66
|
+
"| `telemetry_summary.duration_sec` | float | Task duration (seconds). |\n"
|
|
67
|
+
if "telemetry_summary.duration_sec" in current_fields
|
|
68
|
+
else ""
|
|
69
|
+
)
|
|
70
|
+
common_task_fields += (
|
|
71
|
+
"| `telemetry_summary.cpu.percent_all_diff` | float | Difference in overall CPU utilization percentage across all cores between task end and start. |\n"
|
|
72
|
+
if "telemetry_summary.cpu.percent_all_diff" in current_fields
|
|
73
|
+
else ""
|
|
74
|
+
)
|
|
75
|
+
common_task_fields += (
|
|
76
|
+
"| `telemetry_summary.cpu.user_time_diff` | float | Difference average per core CPU user time (seconds) between task start and end times. |\n"
|
|
77
|
+
if "telemetry_summary.cpu.user_time_diff" in current_fields
|
|
78
|
+
else ""
|
|
79
|
+
)
|
|
80
|
+
common_task_fields += (
|
|
81
|
+
"| `telemetry_summary.cpu.system_time_diff` | float | Difference in CPU system (kernel) time (seconds) used during the task execution. |\n"
|
|
82
|
+
if "telemetry_summary.cpu.system_time_diff" in current_fields
|
|
83
|
+
else ""
|
|
84
|
+
)
|
|
85
|
+
common_task_fields += (
|
|
86
|
+
"| `telemetry_summary.cpu.idle_time_diff` | float | Difference in CPU idle time (seconds) during task end and start. |\n"
|
|
87
|
+
if "telemetry_summary.cpu.idle_time_diff" in current_fields
|
|
88
|
+
else ""
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
common_task_fields += "\n For any queries involving CPU, use fields that begin with telemetry_summary.cpu"
|
|
92
|
+
|
|
93
|
+
return common_task_fields
|
|
94
|
+
|
|
27
95
|
|
|
28
96
|
DF_FORM = "The user has a pandas DataFrame called `df`, created from flattened task objects using `pd.json_normalize`."
|
|
29
97
|
|
|
98
|
+
CURRENT_DF_COLUMNS_PROMPT = """
|
|
99
|
+
### ABSOLUTE FIELD CONSTRAINT -- THIS IS CRITICAL
|
|
100
|
+
|
|
101
|
+
The following list is the ONLY valid field names in df. Treat this as the schema:
|
|
102
|
+
|
|
103
|
+
ALLOWED_FIELDS = [COLS]
|
|
104
|
+
|
|
105
|
+
You MUST treat this list as authoritative.
|
|
106
|
+
|
|
107
|
+
- You may only use fields names that appear EXACTLY (string match) in ALLOWED_FIELDS.
|
|
108
|
+
- You are NOT allowed to create new field names by:
|
|
109
|
+
- adding or removing prefixes like "used." or "generated."
|
|
110
|
+
- combining words
|
|
111
|
+
- guessing.
|
|
112
|
+
- If a field name is not in ALLOWED_FIELDS, you MUST NOT use it.
|
|
113
|
+
"""
|
|
114
|
+
|
|
30
115
|
|
|
31
116
|
def get_example_values_prompt(example_values):
|
|
32
117
|
values_prompt = f"""
|
|
@@ -39,7 +124,7 @@ def get_example_values_prompt(example_values):
|
|
|
39
124
|
return values_prompt
|
|
40
125
|
|
|
41
126
|
|
|
42
|
-
def get_df_schema_prompt(dynamic_schema, example_values):
|
|
127
|
+
def get_df_schema_prompt(dynamic_schema, example_values, current_fields):
|
|
43
128
|
schema_prompt = f"""
|
|
44
129
|
## DATAFRAME STRUCTURE
|
|
45
130
|
|
|
@@ -53,14 +138,19 @@ def get_df_schema_prompt(dynamic_schema, example_values):
|
|
|
53
138
|
The schema for these fields is defined in the dictionary below.
|
|
54
139
|
It maps each activity ID to its inputs (i) and outputs (o), using flattened field names that include `used.` or `generated.` prefixes to indicate the role the field played in the task. These names match the columns in the dataframe `df`.
|
|
55
140
|
|
|
56
|
-
```python
|
|
57
141
|
{dynamic_schema}
|
|
58
|
-
```
|
|
59
142
|
Use this schema and fields to understand what inputs and outputs are valid for each activity.
|
|
143
|
+
|
|
144
|
+
IMPORTANT: The user might say used for outputs or generated for inputs, which might confuse you. Do not get tricked by the user.
|
|
145
|
+
Ignore the natural-language words "used" and "generated".
|
|
146
|
+
- The English phrase "used in the calculation" does NOT mean you must use a `used.` column.
|
|
147
|
+
- The English word "generated" in the question does NOT force you to use a `generated.` column either.
|
|
148
|
+
|
|
149
|
+
ALWAYS CHECK THE ALLOWED_FIELDS list before proceeding. THIS IS CRITICAL.
|
|
60
150
|
|
|
61
151
|
### 2. Additional fields for tasks:
|
|
62
152
|
|
|
63
|
-
{
|
|
153
|
+
{generate_common_task_fields(current_fields)}
|
|
64
154
|
---
|
|
65
155
|
"""
|
|
66
156
|
|
|
@@ -70,12 +160,12 @@ def get_df_schema_prompt(dynamic_schema, example_values):
|
|
|
70
160
|
return prompt
|
|
71
161
|
|
|
72
162
|
|
|
73
|
-
def generate_plot_code_prompt(query, dynamic_schema, example_values) -> str:
|
|
163
|
+
def generate_plot_code_prompt(query, dynamic_schema, example_values, current_fields) -> str:
|
|
74
164
|
PLOT_PROMPT = f"""
|
|
75
165
|
You are a Streamlit chart expert.
|
|
76
166
|
{DF_FORM}
|
|
77
167
|
|
|
78
|
-
{get_df_schema_prompt(dynamic_schema, example_values)}
|
|
168
|
+
{get_df_schema_prompt(dynamic_schema, example_values, current_fields)}
|
|
79
169
|
|
|
80
170
|
### 3. Guidelines
|
|
81
171
|
|
|
@@ -121,10 +211,14 @@ def generate_plot_code_prompt(query, dynamic_schema, example_values) -> str:
|
|
|
121
211
|
"plot_code": "import matplotlib.pyplot as plt\nplt.hist(result['n_controls'])\nst.pyplot(plt)"
|
|
122
212
|
}}
|
|
123
213
|
|
|
214
|
+
Your response must be only the raw Python code in the format:
|
|
215
|
+
result = ...
|
|
216
|
+
Except for the `result` variable, YOU MUST NEVER CREATE ANY OTHER VARIABLE. NEVER!
|
|
217
|
+
|
|
124
218
|
User request:
|
|
125
219
|
{query}
|
|
126
220
|
|
|
127
|
-
|
|
221
|
+
|
|
128
222
|
|
|
129
223
|
"""
|
|
130
224
|
return PLOT_PROMPT
|
|
@@ -139,7 +233,7 @@ QUERY_GUIDELINES = """
|
|
|
139
233
|
|
|
140
234
|
- Use `df` as the base DataFrame.
|
|
141
235
|
- Use `activity_id` to filter by task type (valid values = schema keys).
|
|
142
|
-
-
|
|
236
|
+
- ONLY IF the ALLOWED_FIELDS list allow, use `used.` for parameters (inputs) and `generated.` for outputs (metrics).
|
|
143
237
|
- Use `telemetry_summary.duration_sec` for performance-related questions.
|
|
144
238
|
- Use `hostname` when user mentions *where* a task ran.
|
|
145
239
|
- Use `agent_id` when the user refers to agents (non-null means task was agent-run).
|
|
@@ -153,7 +247,7 @@ QUERY_GUIDELINES = """
|
|
|
153
247
|
**THE COLUMN 'used' DOES NOT EXIST**
|
|
154
248
|
**THE COLUMN 'generated' DOES NOT EXIST**
|
|
155
249
|
- **When filtering by `activity_id`, only select columns that belong to that activity’s schema.**
|
|
156
|
-
-
|
|
250
|
+
- Always observing the ALLOWED_FIELDS list, use only `used.` and `generated.` fields listed in the schema for that `activity_id`.
|
|
157
251
|
- Explicitly list the selected columns — **never return all columns**
|
|
158
252
|
- **Only include telemetry columns if used in the query logic.**
|
|
159
253
|
-THERE IS NOT A FIELD NAMED `telemetry_summary.start_time` or `telemetry_summary.end_time` or `used.start_time` or `used.end_time`. Use `started_at` and `ended_at` instead when you want to find the duration of a task, activity, or workflow execution.
|
|
@@ -187,6 +281,17 @@ QUERY_GUIDELINES = """
|
|
|
187
281
|
-**Do NOT use any of those: df[df['started_at'].idxmax()], df[df['started_at'].idxmin()], df[df['ended_at'].idxmin()], df[df['ended_at'].idxmax()]. Those are not valid Pandas Code.**
|
|
188
282
|
- When the query mentions "each task", or "each activity", or "each workflow", make sure you show (project) the correct id column in the results (i.e., respectively: `task_id`, `activity_id`, `workflow_id`) to identify those in the results.
|
|
189
283
|
- Use df[<role>.field_name] == True or df[<role>.field_name] == False when user queries boolean fields, where <role> is either used or generated, depending on the field name. Make sure field_name is a valid field in the DataFrame.
|
|
284
|
+
|
|
285
|
+
If the query asks you to report which values appear in one or more columns
|
|
286
|
+
(for example “which X were used”, “list all Y”, “what X and Y were generated”), then:
|
|
287
|
+
|
|
288
|
+
For each relevant column, select that column from df.
|
|
289
|
+
Call .dropna() on that column to remove missing values.
|
|
290
|
+
After dropping NaNs, apply .unique(), .value_counts(), or any other aggregation as needed.
|
|
291
|
+
Select that column.
|
|
292
|
+
Call .dropna() on it.
|
|
293
|
+
Then call .unique(), .value_counts(), or any other aggregation.
|
|
294
|
+
|
|
190
295
|
|
|
191
296
|
- **Do not include metadata columns unless explicitly required by the user query.**
|
|
192
297
|
"""
|
|
@@ -200,15 +305,16 @@ FEW_SHOTS = """
|
|
|
200
305
|
# Q: How many tasks for each activity?
|
|
201
306
|
result = df['activity_id'].value_counts()
|
|
202
307
|
|
|
203
|
-
# Q: What is the average loss across all tasks?
|
|
204
|
-
result = df['generated.loss'].mean()
|
|
205
|
-
|
|
206
|
-
# Q: select the 'choose_option' tasks executed by the agent, and show the planned controls, generated option, scores, explanations
|
|
207
|
-
result = df[(df['activity_id'] == 'choose_option') & (df['agent_id'].notna())][['used.planned_controls', 'generated.option', 'used.scores.scores', 'generated.explanation']].copy()
|
|
208
|
-
|
|
209
|
-
# Q: Show duration and generated scores for 'simulate_layer' tasks
|
|
210
|
-
result = df[df['activity_id'] == 'simulate_layer'][['telemetry_summary.duration_sec', 'generated.scores']]
|
|
211
308
|
"""
|
|
309
|
+
# # Q: What is the average loss across all tasks?
|
|
310
|
+
# result = df['generated.loss'].mean()
|
|
311
|
+
#
|
|
312
|
+
# # Q: select the 'choose_option' tasks executed by the agent, and show the planned controls, generated option, scores, explanations
|
|
313
|
+
# result = df[(df['activity_id'] == 'choose_option') & (df['agent_id'].notna())][
|
|
314
|
+
# ['used.planned_controls', 'generated.option', 'used.scores.scores', 'generated.explanation']].copy()
|
|
315
|
+
#
|
|
316
|
+
# # Q: Show duration and generated scores for 'simulate_layer' tasks
|
|
317
|
+
# result = df[df['activity_id'] == 'simulate_layer'][['telemetry_summary.duration_sec', 'generated.scores']]
|
|
212
318
|
|
|
213
319
|
OUTPUT_FORMATTING = """
|
|
214
320
|
6. Final Instructions
|
|
@@ -226,7 +332,7 @@ OUTPUT_FORMATTING = """
|
|
|
226
332
|
"""
|
|
227
333
|
|
|
228
334
|
|
|
229
|
-
def generate_pandas_code_prompt(query: str, dynamic_schema, example_values, custom_user_guidances):
|
|
335
|
+
def generate_pandas_code_prompt(query: str, dynamic_schema, example_values, custom_user_guidances, current_fields):
|
|
230
336
|
if custom_user_guidances is not None and isinstance(custom_user_guidances, list) and len(custom_user_guidances):
|
|
231
337
|
concatenated_guidance = "\n".join(f"- {msg}" for msg in custom_user_guidances)
|
|
232
338
|
custom_user_guidance_prompt = (
|
|
@@ -236,11 +342,14 @@ def generate_pandas_code_prompt(query: str, dynamic_schema, example_values, cust
|
|
|
236
342
|
)
|
|
237
343
|
else:
|
|
238
344
|
custom_user_guidance_prompt = ""
|
|
345
|
+
|
|
346
|
+
curr_cols = CURRENT_DF_COLUMNS_PROMPT.replace("[COLS]", str(current_fields))
|
|
239
347
|
prompt = (
|
|
240
348
|
f"{ROLE}"
|
|
241
349
|
f"{JOB}"
|
|
242
350
|
f"{DF_FORM}"
|
|
243
|
-
f"{
|
|
351
|
+
f"{curr_cols}"
|
|
352
|
+
f"{get_df_schema_prompt(dynamic_schema, example_values, current_fields)}" # main tester
|
|
244
353
|
f"{QUERY_GUIDELINES}" # main tester
|
|
245
354
|
f"{FEW_SHOTS}" # main tester
|
|
246
355
|
f"{custom_user_guidance_prompt}"
|
|
@@ -251,7 +360,7 @@ def generate_pandas_code_prompt(query: str, dynamic_schema, example_values, cust
|
|
|
251
360
|
return prompt
|
|
252
361
|
|
|
253
362
|
|
|
254
|
-
def dataframe_summarizer_context(code, reduced_df, dynamic_schema, example_values, query) -> str:
|
|
363
|
+
def dataframe_summarizer_context(code, reduced_df, dynamic_schema, example_values, query, current_fields) -> str:
|
|
255
364
|
job = "You are a Workflow Provenance Specialist analyzing a DataFrame that was obtained to answer a query."
|
|
256
365
|
|
|
257
366
|
if "image" in reduced_df.columns:
|
|
@@ -272,7 +381,7 @@ def dataframe_summarizer_context(code, reduced_df, dynamic_schema, example_value
|
|
|
272
381
|
{reduced_df}
|
|
273
382
|
|
|
274
383
|
**Original df (before reduction) had this schema:
|
|
275
|
-
{get_df_schema_prompt(dynamic_schema, example_values)}
|
|
384
|
+
{get_df_schema_prompt(dynamic_schema, example_values, current_fields)}
|
|
276
385
|
|
|
277
386
|
Your task is to find a concise and direct answer as an English sentence to the user query.
|
|
278
387
|
|
|
@@ -310,7 +419,7 @@ def extract_or_fix_json_code_prompt(raw_text) -> str:
|
|
|
310
419
|
return prompt
|
|
311
420
|
|
|
312
421
|
|
|
313
|
-
def extract_or_fix_python_code_prompt(raw_text):
|
|
422
|
+
def extract_or_fix_python_code_prompt(raw_text, current_fields):
|
|
314
423
|
prompt = f"""
|
|
315
424
|
You are a Pandas DataFrame code extractor and fixer. Pandas is a well-known data science Python library for querying datasets.
|
|
316
425
|
You are given a raw user message that may include explanations, markdown fences, or partial DataFrame code that queries a DataFrame `df`.
|
|
@@ -319,9 +428,13 @@ def extract_or_fix_python_code_prompt(raw_text):
|
|
|
319
428
|
1. Check if the message contains a valid DataFrame code.
|
|
320
429
|
2. If it does, extract the code.
|
|
321
430
|
3. If there are any syntax errors, fix them.
|
|
322
|
-
4.
|
|
431
|
+
4. Carefully analyze the list of columns in the query. The query must only use fields in this list:
|
|
432
|
+
ALLOWED_FIELDS = {current_fields}.
|
|
433
|
+
If there are fields not in this list, replace the fields to match according to the ALLOWED_FIELDS list.
|
|
434
|
+
5. Return only the corrected DataFrame query code — no explanations, no comments, no markdown.
|
|
323
435
|
|
|
324
436
|
The output must be valid Python code, and must not include any other text.
|
|
437
|
+
Your output can only contain fields in the ALLOWED_FIELDS list.
|
|
325
438
|
This output will be parsed by another program.
|
|
326
439
|
|
|
327
440
|
ONCE AGAIN, ONLY PRODUCE THE PYTHON CODE. DO NOT SAY ANYTHING ELSE!
|