flowcept 0.9.17__py3-none-any.whl → 0.9.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowcept/agents/agent_client.py +10 -4
- flowcept/agents/agents_utils.py +54 -19
- flowcept/agents/flowcept_agent.py +116 -12
- flowcept/agents/flowcept_ctx_manager.py +116 -46
- flowcept/agents/gui/gui_utils.py +21 -3
- flowcept/agents/prompts/general_prompts.py +1 -1
- flowcept/agents/prompts/in_memory_query_prompts.py +158 -45
- flowcept/agents/tools/general_tools.py +20 -3
- flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +14 -31
- flowcept/commons/daos/docdb_dao/lmdb_dao.py +48 -0
- flowcept/commons/daos/keyvalue_dao.py +12 -3
- flowcept/commons/daos/mq_dao/mq_dao_base.py +37 -20
- flowcept/commons/daos/mq_dao/mq_dao_kafka.py +2 -2
- flowcept/commons/daos/mq_dao/mq_dao_redis.py +33 -2
- flowcept/commons/flowcept_dataclasses/task_object.py +4 -1
- flowcept/configs.py +17 -3
- flowcept/flowcept_api/flowcept_controller.py +5 -1
- flowcept/flowceptor/adapters/mlflow/interception_event_handler.py +33 -2
- flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py +18 -4
- flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py +1 -0
- flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +9 -10
- flowcept/flowceptor/consumers/base_consumer.py +22 -4
- flowcept/flowceptor/consumers/document_inserter.py +22 -1
- flowcept/instrumentation/flowcept_task.py +147 -51
- flowcept/instrumentation/task_capture.py +10 -1
- flowcept/version.py +1 -1
- {flowcept-0.9.17.dist-info → flowcept-0.9.19.dist-info}/METADATA +8 -1
- {flowcept-0.9.17.dist-info → flowcept-0.9.19.dist-info}/RECORD +32 -32
- {flowcept-0.9.17.dist-info → flowcept-0.9.19.dist-info}/WHEEL +1 -1
- resources/sample_settings.yaml +2 -1
- {flowcept-0.9.17.dist-info → flowcept-0.9.19.dist-info}/entry_points.txt +0 -0
- {flowcept-0.9.17.dist-info → flowcept-0.9.19.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,32 +1,117 @@
|
|
|
1
1
|
# flake8: noqa: E501
|
|
2
2
|
# flake8: noqa: D103
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
| `activity_id` | string | Type of task (e.g., 'choose_option'). Use this for "task type" queries. One activity_id is linked to multiple task_ids. |
|
|
11
|
-
| `campaign_id` | string | A group of workflows. |
|
|
12
|
-
| `hostname` | string | Compute node name. |
|
|
13
|
-
| `agent_id` | string | Set if executed by an agent. |
|
|
14
|
-
| `started_at` | datetime64[ns, UTC] | Start time of a task. Always use this field when the query is has any temporal reference related to the workflow execution, such as 'get the first 10 workflow executions' or 'the last workflow execution'. |
|
|
15
|
-
| `ended_at` | datetime64[ns, UTC] | End time of a task. |
|
|
16
|
-
| `subtype` | string | Subtype of a task. |
|
|
17
|
-
| `tags` | List[str] | List of descriptive tags. |
|
|
18
|
-
| `image` | blob | Raw binary data related to an image. |
|
|
19
|
-
| `telemetry_summary.duration_sec` | float | Task duration (seconds). |
|
|
20
|
-
| `telemetry_summary.cpu.percent_all_diff` | float | Difference in overall CPU utilization percentage across all cores between task end and start.|
|
|
21
|
-
| `telemetry_summary.cpu.user_time_diff` | float | Difference average per core CPU user time ( seconds ) between task start and end times.|
|
|
22
|
-
| `telemetry_summary.cpu.system_time_diff` | float | Difference in CPU system (kernel) time (seconds) used during the task execution.|
|
|
23
|
-
| `telemetry_summary.cpu.idle_time_diff` | float | Difference in CPU idle time (seconds) during task end and start.|
|
|
24
|
-
---
|
|
25
|
-
For any queries involving CPU, use fields that begin with telemetry_summary.cpu
|
|
4
|
+
|
|
5
|
+
def generate_common_task_fields(current_fields):
|
|
6
|
+
# TODO: make this better
|
|
7
|
+
common_task_fields = """
|
|
8
|
+
| Column | Data Type | Description |
|
|
9
|
+
|-------------------------------|-------------|
|
|
26
10
|
"""
|
|
11
|
+
common_task_fields += (
|
|
12
|
+
"| `workflow_id` | string | Workflow the task belongs to. Use this field when the query is asking about workflow execution |\n"
|
|
13
|
+
if "workflow_id" in current_fields
|
|
14
|
+
else ""
|
|
15
|
+
)
|
|
16
|
+
common_task_fields += (
|
|
17
|
+
"| `task_id` | string | Task identifier. |\n" if "task_id" in current_fields else ""
|
|
18
|
+
)
|
|
19
|
+
common_task_fields += (
|
|
20
|
+
"| `parent_task_id` | string | A task may be directly linked to others. Use this field when the query asks for a task informed by (or associated with or linked to) other task. |\n"
|
|
21
|
+
if "parent_task_id" in current_fields
|
|
22
|
+
else ""
|
|
23
|
+
)
|
|
24
|
+
common_task_fields += (
|
|
25
|
+
"| `activity_id` | string | Type of task (e.g., 'choose_option'). Use this for \"task type\" queries. One activity_id is linked to multiple task_ids. |\n"
|
|
26
|
+
if "activity_id" in current_fields
|
|
27
|
+
else ""
|
|
28
|
+
)
|
|
29
|
+
common_task_fields += (
|
|
30
|
+
"| `campaign_id` | string | A group of workflows. |\n"
|
|
31
|
+
if "campaign_id" in current_fields
|
|
32
|
+
else ""
|
|
33
|
+
)
|
|
34
|
+
common_task_fields += (
|
|
35
|
+
"| `hostname` | string | Compute node name. |\n" if "hostname" in current_fields else ""
|
|
36
|
+
)
|
|
37
|
+
common_task_fields += (
|
|
38
|
+
"| `agent_id` | string | Set if executed by an agent. |\n"
|
|
39
|
+
if "agent_id" in current_fields
|
|
40
|
+
else ""
|
|
41
|
+
)
|
|
42
|
+
common_task_fields += (
|
|
43
|
+
"| `started_at` | datetime64[ns, UTC] | Start time of a task. Always use this field when the query has any temporal reference related to the workflow execution, such as 'get the first 10 workflow executions' or 'the last workflow execution'. |\n"
|
|
44
|
+
if "started_at" in current_fields
|
|
45
|
+
else ""
|
|
46
|
+
)
|
|
47
|
+
common_task_fields += (
|
|
48
|
+
"| `ended_at` | datetime64[ns, UTC] | End time of a task. |\n"
|
|
49
|
+
if "ended_at" in current_fields
|
|
50
|
+
else ""
|
|
51
|
+
)
|
|
52
|
+
common_task_fields += (
|
|
53
|
+
"| `subtype` | string | Subtype of a task. |\n" if "subtype" in current_fields else ""
|
|
54
|
+
)
|
|
55
|
+
common_task_fields += (
|
|
56
|
+
"| `tags` | List[str] | List of descriptive tags. |\n"
|
|
57
|
+
if "tags" in current_fields
|
|
58
|
+
else ""
|
|
59
|
+
)
|
|
60
|
+
common_task_fields += (
|
|
61
|
+
"| `image` | blob | Raw binary data related to an image. |\n"
|
|
62
|
+
if "image" in current_fields
|
|
63
|
+
else ""
|
|
64
|
+
)
|
|
65
|
+
common_task_fields += (
|
|
66
|
+
"| `telemetry_summary.duration_sec` | float | Task duration (seconds). |\n"
|
|
67
|
+
if "telemetry_summary.duration_sec" in current_fields
|
|
68
|
+
else ""
|
|
69
|
+
)
|
|
70
|
+
common_task_fields += (
|
|
71
|
+
"| `telemetry_summary.cpu.percent_all_diff` | float | Difference in overall CPU utilization percentage across all cores between task end and start. |\n"
|
|
72
|
+
if "telemetry_summary.cpu.percent_all_diff" in current_fields
|
|
73
|
+
else ""
|
|
74
|
+
)
|
|
75
|
+
common_task_fields += (
|
|
76
|
+
"| `telemetry_summary.cpu.user_time_diff` | float | Difference average per core CPU user time (seconds) between task start and end times. |\n"
|
|
77
|
+
if "telemetry_summary.cpu.user_time_diff" in current_fields
|
|
78
|
+
else ""
|
|
79
|
+
)
|
|
80
|
+
common_task_fields += (
|
|
81
|
+
"| `telemetry_summary.cpu.system_time_diff` | float | Difference in CPU system (kernel) time (seconds) used during the task execution. |\n"
|
|
82
|
+
if "telemetry_summary.cpu.system_time_diff" in current_fields
|
|
83
|
+
else ""
|
|
84
|
+
)
|
|
85
|
+
common_task_fields += (
|
|
86
|
+
"| `telemetry_summary.cpu.idle_time_diff` | float | Difference in CPU idle time (seconds) during task end and start. |\n"
|
|
87
|
+
if "telemetry_summary.cpu.idle_time_diff" in current_fields
|
|
88
|
+
else ""
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
common_task_fields += "\n For any queries involving CPU, use fields that begin with telemetry_summary.cpu"
|
|
92
|
+
|
|
93
|
+
return common_task_fields
|
|
94
|
+
|
|
27
95
|
|
|
28
96
|
DF_FORM = "The user has a pandas DataFrame called `df`, created from flattened task objects using `pd.json_normalize`."
|
|
29
97
|
|
|
98
|
+
CURRENT_DF_COLUMNS_PROMPT = """
|
|
99
|
+
### ABSOLUTE FIELD CONSTRAINT -- THIS IS CRITICAL
|
|
100
|
+
|
|
101
|
+
The following list is the ONLY valid field names in df. Treat this as the schema:
|
|
102
|
+
|
|
103
|
+
ALLOWED_FIELDS = [COLS]
|
|
104
|
+
|
|
105
|
+
You MUST treat this list as authoritative.
|
|
106
|
+
|
|
107
|
+
- You may only use fields names that appear EXACTLY (string match) in ALLOWED_FIELDS.
|
|
108
|
+
- You are NOT allowed to create new field names by:
|
|
109
|
+
- adding or removing prefixes like "used." or "generated."
|
|
110
|
+
- combining words
|
|
111
|
+
- guessing.
|
|
112
|
+
- If a field name is not in ALLOWED_FIELDS, you MUST NOT use it.
|
|
113
|
+
"""
|
|
114
|
+
|
|
30
115
|
|
|
31
116
|
def get_example_values_prompt(example_values):
|
|
32
117
|
values_prompt = f"""
|
|
@@ -39,7 +124,7 @@ def get_example_values_prompt(example_values):
|
|
|
39
124
|
return values_prompt
|
|
40
125
|
|
|
41
126
|
|
|
42
|
-
def get_df_schema_prompt(dynamic_schema, example_values):
|
|
127
|
+
def get_df_schema_prompt(dynamic_schema, example_values, current_fields):
|
|
43
128
|
schema_prompt = f"""
|
|
44
129
|
## DATAFRAME STRUCTURE
|
|
45
130
|
|
|
@@ -53,14 +138,19 @@ def get_df_schema_prompt(dynamic_schema, example_values):
|
|
|
53
138
|
The schema for these fields is defined in the dictionary below.
|
|
54
139
|
It maps each activity ID to its inputs (i) and outputs (o), using flattened field names that include `used.` or `generated.` prefixes to indicate the role the field played in the task. These names match the columns in the dataframe `df`.
|
|
55
140
|
|
|
56
|
-
```python
|
|
57
141
|
{dynamic_schema}
|
|
58
|
-
```
|
|
59
142
|
Use this schema and fields to understand what inputs and outputs are valid for each activity.
|
|
143
|
+
|
|
144
|
+
IMPORTANT: The user might say used for outputs or generated for inputs, which might confuse you. Do not get tricked by the user.
|
|
145
|
+
Ignore the natural-language words "used" and "generated".
|
|
146
|
+
- The English phrase "used in the calculation" does NOT mean you must use a `used.` column.
|
|
147
|
+
- The English word "generated" in the question does NOT force you to use a `generated.` column either.
|
|
148
|
+
|
|
149
|
+
ALWAYS CHECK THE ALLOWED_FIELDS list before proceeding. THIS IS CRITICAL.
|
|
60
150
|
|
|
61
151
|
### 2. Additional fields for tasks:
|
|
62
152
|
|
|
63
|
-
{
|
|
153
|
+
{generate_common_task_fields(current_fields)}
|
|
64
154
|
---
|
|
65
155
|
"""
|
|
66
156
|
|
|
@@ -70,12 +160,12 @@ def get_df_schema_prompt(dynamic_schema, example_values):
|
|
|
70
160
|
return prompt
|
|
71
161
|
|
|
72
162
|
|
|
73
|
-
def generate_plot_code_prompt(query, dynamic_schema, example_values) -> str:
|
|
163
|
+
def generate_plot_code_prompt(query, dynamic_schema, example_values, current_fields) -> str:
|
|
74
164
|
PLOT_PROMPT = f"""
|
|
75
165
|
You are a Streamlit chart expert.
|
|
76
166
|
{DF_FORM}
|
|
77
167
|
|
|
78
|
-
{get_df_schema_prompt(dynamic_schema, example_values)}
|
|
168
|
+
{get_df_schema_prompt(dynamic_schema, example_values, current_fields)}
|
|
79
169
|
|
|
80
170
|
### 3. Guidelines
|
|
81
171
|
|
|
@@ -121,10 +211,14 @@ def generate_plot_code_prompt(query, dynamic_schema, example_values) -> str:
|
|
|
121
211
|
"plot_code": "import matplotlib.pyplot as plt\nplt.hist(result['n_controls'])\nst.pyplot(plt)"
|
|
122
212
|
}}
|
|
123
213
|
|
|
214
|
+
Your response must be only the raw Python code in the format:
|
|
215
|
+
result = ...
|
|
216
|
+
Except for the `result` variable, YOU MUST NEVER CREATE ANY OTHER VARIABLE. NEVER!
|
|
217
|
+
|
|
124
218
|
User request:
|
|
125
219
|
{query}
|
|
126
220
|
|
|
127
|
-
|
|
221
|
+
|
|
128
222
|
|
|
129
223
|
"""
|
|
130
224
|
return PLOT_PROMPT
|
|
@@ -139,7 +233,7 @@ QUERY_GUIDELINES = """
|
|
|
139
233
|
|
|
140
234
|
- Use `df` as the base DataFrame.
|
|
141
235
|
- Use `activity_id` to filter by task type (valid values = schema keys).
|
|
142
|
-
-
|
|
236
|
+
- ONLY IF the ALLOWED_FIELDS list allow, use `used.` for parameters (inputs) and `generated.` for outputs (metrics).
|
|
143
237
|
- Use `telemetry_summary.duration_sec` for performance-related questions.
|
|
144
238
|
- Use `hostname` when user mentions *where* a task ran.
|
|
145
239
|
- Use `agent_id` when the user refers to agents (non-null means task was agent-run).
|
|
@@ -153,7 +247,7 @@ QUERY_GUIDELINES = """
|
|
|
153
247
|
**THE COLUMN 'used' DOES NOT EXIST**
|
|
154
248
|
**THE COLUMN 'generated' DOES NOT EXIST**
|
|
155
249
|
- **When filtering by `activity_id`, only select columns that belong to that activity’s schema.**
|
|
156
|
-
-
|
|
250
|
+
- Always observing the ALLOWED_FIELDS list, use only `used.` and `generated.` fields listed in the schema for that `activity_id`.
|
|
157
251
|
- Explicitly list the selected columns — **never return all columns**
|
|
158
252
|
- **Only include telemetry columns if used in the query logic.**
|
|
159
253
|
-THERE IS NOT A FIELD NAMED `telemetry_summary.start_time` or `telemetry_summary.end_time` or `used.start_time` or `used.end_time`. Use `started_at` and `ended_at` instead when you want to find the duration of a task, activity, or workflow execution.
|
|
@@ -187,6 +281,17 @@ QUERY_GUIDELINES = """
|
|
|
187
281
|
-**Do NOT use any of those: df[df['started_at'].idxmax()], df[df['started_at'].idxmin()], df[df['ended_at'].idxmin()], df[df['ended_at'].idxmax()]. Those are not valid Pandas Code.**
|
|
188
282
|
- When the query mentions "each task", or "each activity", or "each workflow", make sure you show (project) the correct id column in the results (i.e., respectively: `task_id`, `activity_id`, `workflow_id`) to identify those in the results.
|
|
189
283
|
- Use df[<role>.field_name] == True or df[<role>.field_name] == False when user queries boolean fields, where <role> is either used or generated, depending on the field name. Make sure field_name is a valid field in the DataFrame.
|
|
284
|
+
|
|
285
|
+
If the query asks you to report which values appear in one or more columns
|
|
286
|
+
(for example “which X were used”, “list all Y”, “what X and Y were generated”), then:
|
|
287
|
+
|
|
288
|
+
For each relevant column, select that column from df.
|
|
289
|
+
Call .dropna() on that column to remove missing values.
|
|
290
|
+
After dropping NaNs, apply .unique(), .value_counts(), or any other aggregation as needed.
|
|
291
|
+
Select that column.
|
|
292
|
+
Call .dropna() on it.
|
|
293
|
+
Then call .unique(), .value_counts(), or any other aggregation.
|
|
294
|
+
|
|
190
295
|
|
|
191
296
|
- **Do not include metadata columns unless explicitly required by the user query.**
|
|
192
297
|
"""
|
|
@@ -200,15 +305,16 @@ FEW_SHOTS = """
|
|
|
200
305
|
# Q: How many tasks for each activity?
|
|
201
306
|
result = df['activity_id'].value_counts()
|
|
202
307
|
|
|
203
|
-
# Q: What is the average loss across all tasks?
|
|
204
|
-
result = df['generated.loss'].mean()
|
|
205
|
-
|
|
206
|
-
# Q: select the 'choose_option' tasks executed by the agent, and show the planned controls, generated option, scores, explanations
|
|
207
|
-
result = df[(df['activity_id'] == 'choose_option') & (df['agent_id'].notna())][['used.planned_controls', 'generated.option', 'used.scores.scores', 'generated.explanation']].copy()
|
|
208
|
-
|
|
209
|
-
# Q: Show duration and generated scores for 'simulate_layer' tasks
|
|
210
|
-
result = df[df['activity_id'] == 'simulate_layer'][['telemetry_summary.duration_sec', 'generated.scores']]
|
|
211
308
|
"""
|
|
309
|
+
# # Q: What is the average loss across all tasks?
|
|
310
|
+
# result = df['generated.loss'].mean()
|
|
311
|
+
#
|
|
312
|
+
# # Q: select the 'choose_option' tasks executed by the agent, and show the planned controls, generated option, scores, explanations
|
|
313
|
+
# result = df[(df['activity_id'] == 'choose_option') & (df['agent_id'].notna())][
|
|
314
|
+
# ['used.planned_controls', 'generated.option', 'used.scores.scores', 'generated.explanation']].copy()
|
|
315
|
+
#
|
|
316
|
+
# # Q: Show duration and generated scores for 'simulate_layer' tasks
|
|
317
|
+
# result = df[df['activity_id'] == 'simulate_layer'][['telemetry_summary.duration_sec', 'generated.scores']]
|
|
212
318
|
|
|
213
319
|
OUTPUT_FORMATTING = """
|
|
214
320
|
6. Final Instructions
|
|
@@ -226,7 +332,7 @@ OUTPUT_FORMATTING = """
|
|
|
226
332
|
"""
|
|
227
333
|
|
|
228
334
|
|
|
229
|
-
def generate_pandas_code_prompt(query: str, dynamic_schema, example_values, custom_user_guidances):
|
|
335
|
+
def generate_pandas_code_prompt(query: str, dynamic_schema, example_values, custom_user_guidances, current_fields):
|
|
230
336
|
if custom_user_guidances is not None and isinstance(custom_user_guidances, list) and len(custom_user_guidances):
|
|
231
337
|
concatenated_guidance = "\n".join(f"- {msg}" for msg in custom_user_guidances)
|
|
232
338
|
custom_user_guidance_prompt = (
|
|
@@ -236,11 +342,14 @@ def generate_pandas_code_prompt(query: str, dynamic_schema, example_values, cust
|
|
|
236
342
|
)
|
|
237
343
|
else:
|
|
238
344
|
custom_user_guidance_prompt = ""
|
|
345
|
+
|
|
346
|
+
curr_cols = CURRENT_DF_COLUMNS_PROMPT.replace("[COLS]", str(current_fields))
|
|
239
347
|
prompt = (
|
|
240
348
|
f"{ROLE}"
|
|
241
349
|
f"{JOB}"
|
|
242
350
|
f"{DF_FORM}"
|
|
243
|
-
f"{
|
|
351
|
+
f"{curr_cols}"
|
|
352
|
+
f"{get_df_schema_prompt(dynamic_schema, example_values, current_fields)}" # main tester
|
|
244
353
|
f"{QUERY_GUIDELINES}" # main tester
|
|
245
354
|
f"{FEW_SHOTS}" # main tester
|
|
246
355
|
f"{custom_user_guidance_prompt}"
|
|
@@ -251,7 +360,7 @@ def generate_pandas_code_prompt(query: str, dynamic_schema, example_values, cust
|
|
|
251
360
|
return prompt
|
|
252
361
|
|
|
253
362
|
|
|
254
|
-
def dataframe_summarizer_context(code, reduced_df, dynamic_schema, example_values, query) -> str:
|
|
363
|
+
def dataframe_summarizer_context(code, reduced_df, dynamic_schema, example_values, query, current_fields) -> str:
|
|
255
364
|
job = "You are a Workflow Provenance Specialist analyzing a DataFrame that was obtained to answer a query."
|
|
256
365
|
|
|
257
366
|
if "image" in reduced_df.columns:
|
|
@@ -272,7 +381,7 @@ def dataframe_summarizer_context(code, reduced_df, dynamic_schema, example_value
|
|
|
272
381
|
{reduced_df}
|
|
273
382
|
|
|
274
383
|
**Original df (before reduction) had this schema:
|
|
275
|
-
{get_df_schema_prompt(dynamic_schema, example_values)}
|
|
384
|
+
{get_df_schema_prompt(dynamic_schema, example_values, current_fields)}
|
|
276
385
|
|
|
277
386
|
Your task is to find a concise and direct answer as an English sentence to the user query.
|
|
278
387
|
|
|
@@ -310,7 +419,7 @@ def extract_or_fix_json_code_prompt(raw_text) -> str:
|
|
|
310
419
|
return prompt
|
|
311
420
|
|
|
312
421
|
|
|
313
|
-
def extract_or_fix_python_code_prompt(raw_text):
|
|
422
|
+
def extract_or_fix_python_code_prompt(raw_text, current_fields):
|
|
314
423
|
prompt = f"""
|
|
315
424
|
You are a Pandas DataFrame code extractor and fixer. Pandas is a well-known data science Python library for querying datasets.
|
|
316
425
|
You are given a raw user message that may include explanations, markdown fences, or partial DataFrame code that queries a DataFrame `df`.
|
|
@@ -319,9 +428,13 @@ def extract_or_fix_python_code_prompt(raw_text):
|
|
|
319
428
|
1. Check if the message contains a valid DataFrame code.
|
|
320
429
|
2. If it does, extract the code.
|
|
321
430
|
3. If there are any syntax errors, fix them.
|
|
322
|
-
4.
|
|
431
|
+
4. Carefully analyze the list of columns in the query. The query must only use fields in this list:
|
|
432
|
+
ALLOWED_FIELDS = {current_fields}.
|
|
433
|
+
If there are fields not in this list, replace the fields to match according to the ALLOWED_FIELDS list.
|
|
434
|
+
5. Return only the corrected DataFrame query code — no explanations, no comments, no markdown.
|
|
323
435
|
|
|
324
436
|
The output must be valid Python code, and must not include any other text.
|
|
437
|
+
Your output can only contain fields in the ALLOWED_FIELDS list.
|
|
325
438
|
This output will be parsed by another program.
|
|
326
439
|
|
|
327
440
|
ONCE AGAIN, ONLY PRODUCE THE PYTHON CODE. DO NOT SAY ANYTHING ELSE!
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from typing import List
|
|
3
3
|
|
|
4
|
-
from flowcept.agents.agents_utils import build_llm_model, ToolResult
|
|
4
|
+
from flowcept.agents.agents_utils import build_llm_model, ToolResult, normalize_message
|
|
5
5
|
from flowcept.agents.flowcept_ctx_manager import mcp_flowcept
|
|
6
6
|
from flowcept.agents.prompts.general_prompts import ROUTING_PROMPT, SMALL_TALK_PROMPT
|
|
7
7
|
|
|
@@ -105,6 +105,19 @@ def reset_records() -> ToolResult:
|
|
|
105
105
|
return ToolResult(code=499, result=str(e))
|
|
106
106
|
|
|
107
107
|
|
|
108
|
+
@mcp_flowcept.tool()
|
|
109
|
+
def reset_context() -> ToolResult:
|
|
110
|
+
"""
|
|
111
|
+
Resets all context.
|
|
112
|
+
"""
|
|
113
|
+
try:
|
|
114
|
+
ctx = mcp_flowcept.get_context()
|
|
115
|
+
ctx.request_context.lifespan_context.reset_context()
|
|
116
|
+
return ToolResult(code=201, result="Context reset.")
|
|
117
|
+
except Exception as e:
|
|
118
|
+
return ToolResult(code=499, result=str(e))
|
|
119
|
+
|
|
120
|
+
|
|
108
121
|
@mcp_flowcept.tool()
|
|
109
122
|
def prompt_handler(message: str) -> ToolResult:
|
|
110
123
|
"""
|
|
@@ -120,20 +133,24 @@ def prompt_handler(message: str) -> ToolResult:
|
|
|
120
133
|
TextContent
|
|
121
134
|
The AI response or routing feedback.
|
|
122
135
|
"""
|
|
123
|
-
df_key_words = ["df", "save", "result = df"
|
|
136
|
+
df_key_words = ["df", "save", "result = df"]
|
|
124
137
|
for key in df_key_words:
|
|
125
138
|
if key in message:
|
|
126
139
|
return run_df_query(llm=None, query=message, plot=False)
|
|
127
140
|
|
|
141
|
+
if "reset context" in message:
|
|
142
|
+
return reset_context()
|
|
128
143
|
if "@record" in message:
|
|
129
144
|
return record_guidance(message)
|
|
130
145
|
if "@show records" in message:
|
|
131
146
|
return show_records()
|
|
132
147
|
if "@reset records" in message:
|
|
133
|
-
return reset_records(
|
|
148
|
+
return reset_records()
|
|
134
149
|
|
|
135
150
|
llm = build_llm_model()
|
|
136
151
|
|
|
152
|
+
message = normalize_message(message)
|
|
153
|
+
|
|
137
154
|
prompt = ROUTING_PROMPT + message
|
|
138
155
|
route = llm.invoke(prompt)
|
|
139
156
|
|
|
@@ -63,11 +63,6 @@ def run_df_query(llm, query: str, plot=False) -> ToolResult:
|
|
|
63
63
|
|
|
64
64
|
Examples
|
|
65
65
|
--------
|
|
66
|
-
Reset the context:
|
|
67
|
-
|
|
68
|
-
>>> run_df_query(llm, "reset context")
|
|
69
|
-
ToolResult(code=201, result="Context Reset!")
|
|
70
|
-
|
|
71
66
|
Save the current DataFrame:
|
|
72
67
|
|
|
73
68
|
>>> run_df_query(llm, "save")
|
|
@@ -90,10 +85,6 @@ def run_df_query(llm, query: str, plot=False) -> ToolResult:
|
|
|
90
85
|
custom_user_guidance = ctx.request_context.lifespan_context.custom_guidance
|
|
91
86
|
if df is None or not len(df):
|
|
92
87
|
return ToolResult(code=404, result="Current df is empty or null.")
|
|
93
|
-
|
|
94
|
-
if "reset context" in query:
|
|
95
|
-
ctx.request_context.lifespan_context.df = pd.DataFrame()
|
|
96
|
-
return ToolResult(code=201, result="Context Reset!")
|
|
97
88
|
elif "save" in query:
|
|
98
89
|
return save_df(df, schema, value_examples)
|
|
99
90
|
elif "result = df" in query:
|
|
@@ -173,7 +164,7 @@ def generate_plot_code(llm, query, dynamic_schema, value_examples, df, custom_us
|
|
|
173
164
|
>>> print(result.result["plot_code"])
|
|
174
165
|
plt.bar(result_df["region"], result_df["total_sales"])
|
|
175
166
|
"""
|
|
176
|
-
plot_prompt = generate_plot_code_prompt(query, dynamic_schema, value_examples)
|
|
167
|
+
plot_prompt = generate_plot_code_prompt(query, dynamic_schema, value_examples, list(df.columns))
|
|
177
168
|
try:
|
|
178
169
|
response = llm(plot_prompt)
|
|
179
170
|
except Exception as e:
|
|
@@ -300,7 +291,9 @@ def generate_result_df(
|
|
|
300
291
|
if llm is None:
|
|
301
292
|
llm = build_llm_model()
|
|
302
293
|
try:
|
|
303
|
-
prompt = generate_pandas_code_prompt(
|
|
294
|
+
prompt = generate_pandas_code_prompt(
|
|
295
|
+
query, dynamic_schema, example_values, custom_user_guidance, list(df.columns)
|
|
296
|
+
)
|
|
304
297
|
response = llm(prompt)
|
|
305
298
|
except Exception as e:
|
|
306
299
|
return ToolResult(code=400, result=str(e), extra=prompt)
|
|
@@ -317,9 +310,10 @@ def generate_result_df(
|
|
|
317
310
|
extra={"generated_code": result_code, "exception": str(e), "prompt": prompt},
|
|
318
311
|
)
|
|
319
312
|
else:
|
|
320
|
-
tool_result = extract_or_fix_python_code(llm, result_code)
|
|
313
|
+
tool_result = extract_or_fix_python_code(llm, result_code, list(df.columns))
|
|
321
314
|
if tool_result.code == 201:
|
|
322
315
|
new_result_code = tool_result.result
|
|
316
|
+
result_code = new_result_code
|
|
323
317
|
try:
|
|
324
318
|
result_df = safe_execute(df, new_result_code)
|
|
325
319
|
except Exception as e:
|
|
@@ -357,12 +351,7 @@ def generate_result_df(
|
|
|
357
351
|
if summarize:
|
|
358
352
|
try:
|
|
359
353
|
tool_result = summarize_result(
|
|
360
|
-
llm,
|
|
361
|
-
result_code,
|
|
362
|
-
result_df,
|
|
363
|
-
query,
|
|
364
|
-
dynamic_schema,
|
|
365
|
-
example_values,
|
|
354
|
+
llm, result_code, result_df, query, dynamic_schema, example_values, list(df.columns)
|
|
366
355
|
)
|
|
367
356
|
if tool_result.is_success():
|
|
368
357
|
return_code = 301
|
|
@@ -377,7 +366,7 @@ def generate_result_df(
|
|
|
377
366
|
return_code = 303
|
|
378
367
|
|
|
379
368
|
try:
|
|
380
|
-
|
|
369
|
+
result_df_str = format_result_df(result_df)
|
|
381
370
|
except Exception as e:
|
|
382
371
|
return ToolResult(
|
|
383
372
|
code=405,
|
|
@@ -387,7 +376,8 @@ def generate_result_df(
|
|
|
387
376
|
|
|
388
377
|
this_result = {
|
|
389
378
|
"result_code": result_code,
|
|
390
|
-
"result_df":
|
|
379
|
+
"result_df": result_df_str,
|
|
380
|
+
"result_df_markdown": result_df.to_markdown(index=False),
|
|
391
381
|
"summary": summary,
|
|
392
382
|
"summary_error": summary_error,
|
|
393
383
|
}
|
|
@@ -473,7 +463,7 @@ def run_df_code(user_code: str, df):
|
|
|
473
463
|
|
|
474
464
|
|
|
475
465
|
@mcp_flowcept.tool()
|
|
476
|
-
def extract_or_fix_python_code(llm, raw_text):
|
|
466
|
+
def extract_or_fix_python_code(llm, raw_text, current_fields):
|
|
477
467
|
"""
|
|
478
468
|
Extract or repair JSON code from raw text using an LLM.
|
|
479
469
|
|
|
@@ -523,7 +513,7 @@ def extract_or_fix_python_code(llm, raw_text):
|
|
|
523
513
|
>>> print(res)
|
|
524
514
|
ToolResult(code=499, result='LLM service unavailable')
|
|
525
515
|
"""
|
|
526
|
-
prompt = extract_or_fix_python_code_prompt(raw_text)
|
|
516
|
+
prompt = extract_or_fix_python_code_prompt(raw_text, current_fields)
|
|
527
517
|
try:
|
|
528
518
|
response = llm(prompt)
|
|
529
519
|
return ToolResult(code=201, result=response)
|
|
@@ -582,14 +572,7 @@ def extract_or_fix_json_code(llm, raw_text) -> ToolResult:
|
|
|
582
572
|
|
|
583
573
|
|
|
584
574
|
@mcp_flowcept.tool()
|
|
585
|
-
def summarize_result(
|
|
586
|
-
llm,
|
|
587
|
-
code,
|
|
588
|
-
result,
|
|
589
|
-
query: str,
|
|
590
|
-
dynamic_schema,
|
|
591
|
-
example_values,
|
|
592
|
-
) -> ToolResult:
|
|
575
|
+
def summarize_result(llm, code, result, query: str, dynamic_schema, example_values, current_fields) -> ToolResult:
|
|
593
576
|
"""
|
|
594
577
|
Summarize the pandas result with local reduction for large DataFrames.
|
|
595
578
|
- For wide DataFrames, selects top columns based on variance and uniqueness.
|
|
@@ -597,7 +580,7 @@ def summarize_result(
|
|
|
597
580
|
- Constructs a detailed prompt for the LLM with original column context.
|
|
598
581
|
"""
|
|
599
582
|
summarized_df = summarize_df(result, code)
|
|
600
|
-
prompt = dataframe_summarizer_context(code, summarized_df, dynamic_schema, example_values, query)
|
|
583
|
+
prompt = dataframe_summarizer_context(code, summarized_df, dynamic_schema, example_values, query, current_fields)
|
|
601
584
|
try:
|
|
602
585
|
response = llm(prompt)
|
|
603
586
|
return ToolResult(code=201, result=response)
|
|
@@ -115,6 +115,54 @@ class LMDBDAO(DocumentDBDAO):
|
|
|
115
115
|
self.logger.exception(e)
|
|
116
116
|
return False
|
|
117
117
|
|
|
118
|
+
def delete_task_keys(self, key_name, keys_list: List[str]) -> bool:
|
|
119
|
+
"""Delete task documents by a key value list.
|
|
120
|
+
|
|
121
|
+
When deleting by task_id, deletes keys directly. Otherwise, scans
|
|
122
|
+
tasks and deletes matching entries.
|
|
123
|
+
"""
|
|
124
|
+
if self._is_closed:
|
|
125
|
+
self._open()
|
|
126
|
+
if type(keys_list) is not list:
|
|
127
|
+
keys_list = [keys_list]
|
|
128
|
+
try:
|
|
129
|
+
with self._env.begin(write=True, db=self._tasks_db) as txn:
|
|
130
|
+
if key_name == "task_id":
|
|
131
|
+
for key in keys_list:
|
|
132
|
+
if key is None:
|
|
133
|
+
continue
|
|
134
|
+
txn.delete(str(key).encode())
|
|
135
|
+
else:
|
|
136
|
+
cursor = txn.cursor()
|
|
137
|
+
for key, value in cursor:
|
|
138
|
+
entry = json.loads(value.decode())
|
|
139
|
+
if entry.get(key_name) in keys_list:
|
|
140
|
+
cursor.delete()
|
|
141
|
+
return True
|
|
142
|
+
except Exception as e:
|
|
143
|
+
self.logger.exception(e)
|
|
144
|
+
return False
|
|
145
|
+
|
|
146
|
+
def count_tasks(self) -> int:
|
|
147
|
+
"""Count number of docs in tasks collection."""
|
|
148
|
+
if self._is_closed:
|
|
149
|
+
self._open()
|
|
150
|
+
try:
|
|
151
|
+
return self._env.stat(db=self._tasks_db).get("entries", 0)
|
|
152
|
+
except Exception as e:
|
|
153
|
+
self.logger.exception(e)
|
|
154
|
+
return -1
|
|
155
|
+
|
|
156
|
+
def count_workflows(self) -> int:
|
|
157
|
+
"""Count number of docs in workflows collection."""
|
|
158
|
+
if self._is_closed:
|
|
159
|
+
self._open()
|
|
160
|
+
try:
|
|
161
|
+
return self._env.stat(db=self._workflows_db).get("entries", 0)
|
|
162
|
+
except Exception as e:
|
|
163
|
+
self.logger.exception(e)
|
|
164
|
+
return -1
|
|
165
|
+
|
|
118
166
|
@staticmethod
|
|
119
167
|
def _match_filter(entry, filter):
|
|
120
168
|
"""
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
"""Key value module."""
|
|
2
2
|
|
|
3
|
-
from flowcept.commons.daos.redis_conn import RedisConn
|
|
4
|
-
|
|
5
3
|
from flowcept.commons.flowcept_logger import FlowceptLogger
|
|
6
4
|
from flowcept.configs import (
|
|
7
5
|
KVDB_HOST,
|
|
@@ -26,12 +24,23 @@ class KeyValueDAO:
|
|
|
26
24
|
|
|
27
25
|
def __init__(self):
|
|
28
26
|
if not hasattr(self, "_initialized"):
|
|
29
|
-
self._initialized = True
|
|
30
27
|
self.logger = FlowceptLogger()
|
|
28
|
+
from flowcept.commons.daos.redis_conn import RedisConn
|
|
29
|
+
|
|
31
30
|
self.redis_conn = RedisConn.build_redis_conn_pool(
|
|
32
31
|
host=KVDB_HOST, port=KVDB_PORT, password=KVDB_PASSWORD, uri=KVDB_URI
|
|
33
32
|
)
|
|
34
33
|
|
|
34
|
+
self._initialized = True
|
|
35
|
+
|
|
36
|
+
@staticmethod
|
|
37
|
+
def get_set_name(set_id: str, exec_bundle_id=None) -> str:
|
|
38
|
+
"""Return a consistent set name for KVDB sets."""
|
|
39
|
+
set_name = set_id
|
|
40
|
+
if exec_bundle_id is not None:
|
|
41
|
+
set_name += "_" + str(exec_bundle_id)
|
|
42
|
+
return set_name
|
|
43
|
+
|
|
35
44
|
def delete_set(self, set_name: str):
|
|
36
45
|
"""Delete it."""
|
|
37
46
|
self.redis_conn.delete(set_name)
|