flowcept 0.8.11__py3-none-any.whl → 0.8.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowcept/__init__.py +7 -4
- flowcept/agents/__init__.py +5 -0
- flowcept/{flowceptor/consumers/agent/client_agent.py → agents/agent_client.py} +22 -12
- flowcept/agents/agents_utils.py +181 -0
- flowcept/agents/dynamic_schema_tracker.py +191 -0
- flowcept/agents/flowcept_agent.py +30 -0
- flowcept/agents/flowcept_ctx_manager.py +175 -0
- flowcept/agents/gui/__init__.py +5 -0
- flowcept/agents/gui/agent_gui.py +76 -0
- flowcept/agents/gui/gui_utils.py +239 -0
- flowcept/agents/llms/__init__.py +1 -0
- flowcept/agents/llms/claude_gcp.py +139 -0
- flowcept/agents/llms/gemini25.py +119 -0
- flowcept/agents/prompts/__init__.py +1 -0
- flowcept/{flowceptor/adapters/agents/prompts.py → agents/prompts/general_prompts.py} +18 -0
- flowcept/agents/prompts/in_memory_query_prompts.py +297 -0
- flowcept/agents/tools/__init__.py +1 -0
- flowcept/agents/tools/general_tools.py +102 -0
- flowcept/agents/tools/in_memory_queries/__init__.py +1 -0
- flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +704 -0
- flowcept/agents/tools/in_memory_queries/pandas_agent_utils.py +309 -0
- flowcept/cli.py +286 -44
- flowcept/commons/daos/docdb_dao/mongodb_dao.py +47 -0
- flowcept/commons/daos/mq_dao/mq_dao_base.py +24 -13
- flowcept/commons/daos/mq_dao/mq_dao_kafka.py +18 -2
- flowcept/commons/flowcept_dataclasses/task_object.py +16 -21
- flowcept/commons/flowcept_dataclasses/workflow_object.py +9 -1
- flowcept/commons/task_data_preprocess.py +260 -60
- flowcept/commons/utils.py +25 -6
- flowcept/configs.py +41 -26
- flowcept/flowcept_api/flowcept_controller.py +73 -6
- flowcept/flowceptor/adapters/base_interceptor.py +11 -5
- flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +25 -1
- flowcept/flowceptor/consumers/base_consumer.py +4 -0
- flowcept/flowceptor/consumers/consumer_utils.py +5 -4
- flowcept/flowceptor/consumers/document_inserter.py +2 -2
- flowcept/flowceptor/telemetry_capture.py +5 -2
- flowcept/instrumentation/flowcept_agent_task.py +294 -0
- flowcept/instrumentation/flowcept_decorator.py +43 -0
- flowcept/instrumentation/flowcept_loop.py +3 -3
- flowcept/instrumentation/flowcept_task.py +64 -24
- flowcept/instrumentation/flowcept_torch.py +5 -5
- flowcept/instrumentation/task_capture.py +83 -6
- flowcept/version.py +1 -1
- {flowcept-0.8.11.dist-info → flowcept-0.8.12.dist-info}/METADATA +42 -14
- {flowcept-0.8.11.dist-info → flowcept-0.8.12.dist-info}/RECORD +50 -36
- resources/sample_settings.yaml +12 -4
- flowcept/flowceptor/adapters/agents/__init__.py +0 -1
- flowcept/flowceptor/adapters/agents/agents_utils.py +0 -89
- flowcept/flowceptor/adapters/agents/flowcept_agent.py +0 -292
- flowcept/flowceptor/adapters/agents/flowcept_llm_prov_capture.py +0 -186
- flowcept/flowceptor/consumers/agent/flowcept_agent_context_manager.py +0 -145
- flowcept/flowceptor/consumers/agent/flowcept_qa_manager.py +0 -112
- {flowcept-0.8.11.dist-info → flowcept-0.8.12.dist-info}/WHEEL +0 -0
- {flowcept-0.8.11.dist-info → flowcept-0.8.12.dist-info}/entry_points.txt +0 -0
- {flowcept-0.8.11.dist-info → flowcept-0.8.12.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,704 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from flowcept.agents.agents_utils import ToolResult, build_llm_model
|
|
4
|
+
from flowcept.agents.flowcept_ctx_manager import mcp_flowcept, ctx_manager
|
|
5
|
+
from flowcept.agents.prompts.in_memory_query_prompts import (
|
|
6
|
+
generate_plot_code_prompt,
|
|
7
|
+
extract_or_fix_json_code_prompt,
|
|
8
|
+
generate_pandas_code_prompt,
|
|
9
|
+
dataframe_summarizer_context,
|
|
10
|
+
extract_or_fix_python_code_prompt,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
from flowcept.agents.tools.in_memory_queries.pandas_agent_utils import (
|
|
14
|
+
load_saved_df,
|
|
15
|
+
safe_execute,
|
|
16
|
+
safe_json_parse,
|
|
17
|
+
normalize_output,
|
|
18
|
+
format_result_df,
|
|
19
|
+
summarize_df,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@mcp_flowcept.tool()
|
|
24
|
+
def run_df_query(llm, query: str, plot=False) -> ToolResult:
|
|
25
|
+
r"""
|
|
26
|
+
Run a natural language query against the current context DataFrame.
|
|
27
|
+
|
|
28
|
+
This tool retrieves the active DataFrame, schema, and example values
|
|
29
|
+
from the MCP Flowcept context and uses an LLM to process the query.
|
|
30
|
+
Depending on the query and flags, it may reset the context, save the
|
|
31
|
+
current DataFrame, execute raw code, generate a result DataFrame, or
|
|
32
|
+
produce plotting code.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
llm : callable
|
|
37
|
+
A language model function or wrapper that accepts a prompt string
|
|
38
|
+
and returns a response.
|
|
39
|
+
query : str
|
|
40
|
+
Natural language query or Python code snippet to run against the
|
|
41
|
+
current DataFrame context.
|
|
42
|
+
plot : bool, default=False
|
|
43
|
+
If True, generate plotting code along with a result DataFrame.
|
|
44
|
+
If False, only generate and return the result DataFrame.
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
ToolResult
|
|
49
|
+
- ``code=201`` : Context reset or DataFrame/schema saved.
|
|
50
|
+
- ``code=301`` : Successful result DataFrame (and optional plot code).
|
|
51
|
+
- ``code=404`` : No active DataFrame in context.
|
|
52
|
+
- Other codes indicate execution or formatting errors from underlying tools.
|
|
53
|
+
|
|
54
|
+
Notes
|
|
55
|
+
-----
|
|
56
|
+
- Querying with "reset context" clears the active DataFrame and resets
|
|
57
|
+
the context.
|
|
58
|
+
- Querying with "save" persists the DataFrame, schema, and example
|
|
59
|
+
values to disk via ``save_df``.
|
|
60
|
+
- Queries containing "result = df" are executed directly as code.
|
|
61
|
+
- With ``plot=True``, the tool delegates to ``generate_plot_code``;
|
|
62
|
+
otherwise, it calls ``generate_result_df``.
|
|
63
|
+
|
|
64
|
+
Examples
|
|
65
|
+
--------
|
|
66
|
+
Reset the context:
|
|
67
|
+
|
|
68
|
+
>>> run_df_query(llm, "reset context")
|
|
69
|
+
ToolResult(code=201, result="Context Reset!")
|
|
70
|
+
|
|
71
|
+
Save the current DataFrame:
|
|
72
|
+
|
|
73
|
+
>>> run_df_query(llm, "save")
|
|
74
|
+
ToolResult(code=201, result="Saved df and schema to /tmp directory")
|
|
75
|
+
|
|
76
|
+
Generate a result DataFrame:
|
|
77
|
+
|
|
78
|
+
>>> run_df_query(llm, "Show average sales by region")
|
|
79
|
+
ToolResult(code=301, result={'result_df': 'region,avg_sales\\nNorth,100\\nSouth,95'})
|
|
80
|
+
|
|
81
|
+
Generate a plot along with the DataFrame:
|
|
82
|
+
|
|
83
|
+
>>> run_df_query(llm, "Show sales trend as a line chart", plot=True)
|
|
84
|
+
ToolResult(code=301, result={'result_df': '...', 'plot_code': 'plt.plot(...)'})
|
|
85
|
+
"""
|
|
86
|
+
ctx = mcp_flowcept.get_context()
|
|
87
|
+
df: pd.DataFrame = ctx.request_context.lifespan_context.df
|
|
88
|
+
schema = ctx.request_context.lifespan_context.tasks_schema
|
|
89
|
+
value_examples = ctx.request_context.lifespan_context.value_examples
|
|
90
|
+
if df is None or not len(df):
|
|
91
|
+
return ToolResult(code=404, result="Current df is empty or null.")
|
|
92
|
+
|
|
93
|
+
if "reset context" in query:
|
|
94
|
+
ctx.request_context.lifespan_context.df = pd.DataFrame()
|
|
95
|
+
return ToolResult(code=201, result="Context Reset!")
|
|
96
|
+
elif "save" in query:
|
|
97
|
+
return save_df(df, schema, value_examples)
|
|
98
|
+
elif "result = df" in query:
|
|
99
|
+
return run_df_code(user_code=query, df=df)
|
|
100
|
+
|
|
101
|
+
if plot:
|
|
102
|
+
return generate_plot_code(llm, query, schema, value_examples, df)
|
|
103
|
+
else:
|
|
104
|
+
return generate_result_df(llm, query, schema, value_examples, df)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@mcp_flowcept.tool()
|
|
108
|
+
def generate_plot_code(llm, query, dynamic_schema, value_examples, df) -> ToolResult:
|
|
109
|
+
"""
|
|
110
|
+
Generate DataFrame and plotting code from a natural language query using an LLM.
|
|
111
|
+
|
|
112
|
+
This tool builds a prompt with the query, dynamic schema, and example values,
|
|
113
|
+
and asks the LLM to return JSON with two fields: ``result_code`` (Python code
|
|
114
|
+
to transform the DataFrame) and ``plot_code`` (Python code to generate a plot).
|
|
115
|
+
The resulting code is validated, executed, and the DataFrame result is
|
|
116
|
+
formatted as CSV. If the LLM output is invalid JSON, the tool attempts to
|
|
117
|
+
repair or extract valid JSON before failing.
|
|
118
|
+
|
|
119
|
+
Parameters
|
|
120
|
+
----------
|
|
121
|
+
llm : callable
|
|
122
|
+
A language model function or wrapper that accepts a prompt string
|
|
123
|
+
and returns a response.
|
|
124
|
+
query : str
|
|
125
|
+
Natural language query describing the desired data transformation
|
|
126
|
+
and plot.
|
|
127
|
+
dynamic_schema : dict
|
|
128
|
+
Schema definition describing the structure of the DataFrame.
|
|
129
|
+
value_examples : dict
|
|
130
|
+
Example values associated with the schema to guide the LLM.
|
|
131
|
+
df : pandas.DataFrame
|
|
132
|
+
The DataFrame to query and transform.
|
|
133
|
+
|
|
134
|
+
Returns
|
|
135
|
+
-------
|
|
136
|
+
ToolResult
|
|
137
|
+
- On success (code=301): contains a dictionary with:
|
|
138
|
+
- ``result_df`` : str, CSV-formatted DataFrame result.
|
|
139
|
+
- ``plot_code`` : str, Python code to generate the plot.
|
|
140
|
+
- ``result_code`` : str, Python code used to transform the DataFrame.
|
|
141
|
+
- On failure (codes 400, 404–406, 499): contains an error message and
|
|
142
|
+
optionally the original prompt for debugging.
|
|
143
|
+
|
|
144
|
+
Raises
|
|
145
|
+
------
|
|
146
|
+
Exception
|
|
147
|
+
Any unhandled error during LLM invocation, JSON parsing, code execution,
|
|
148
|
+
or DataFrame formatting will be caught and converted into a ``ToolResult``
|
|
149
|
+
with the appropriate error code.
|
|
150
|
+
|
|
151
|
+
Notes
|
|
152
|
+
-----
|
|
153
|
+
- Invalid JSON responses from the LLM are automatically retried using
|
|
154
|
+
an extraction/fix helper.
|
|
155
|
+
- Both transformation and plotting code must be present in the LLM output,
|
|
156
|
+
otherwise the tool fails with an error.
|
|
157
|
+
- Columns that contain only NaN values are dropped from the result.
|
|
158
|
+
|
|
159
|
+
Examples
|
|
160
|
+
--------
|
|
161
|
+
Generate a bar chart from a sales DataFrame:
|
|
162
|
+
|
|
163
|
+
>>> result = generate_plot_code(
|
|
164
|
+
... llm,
|
|
165
|
+
... query="Show total sales by region as a bar chart",
|
|
166
|
+
... dynamic_schema=schema,
|
|
167
|
+
... value_examples=examples,
|
|
168
|
+
... df=sales_df
|
|
169
|
+
... )
|
|
170
|
+
>>> print(result.code)
|
|
171
|
+
301
|
|
172
|
+
>>> print(result.result["plot_code"])
|
|
173
|
+
plt.bar(result_df["region"], result_df["total_sales"])
|
|
174
|
+
"""
|
|
175
|
+
plot_prompt = generate_plot_code_prompt(query, dynamic_schema, value_examples)
|
|
176
|
+
try:
|
|
177
|
+
response = llm(plot_prompt)
|
|
178
|
+
except Exception as e:
|
|
179
|
+
return ToolResult(code=400, result=str(e), extra=plot_prompt)
|
|
180
|
+
|
|
181
|
+
result_code, plot_code = None, None
|
|
182
|
+
try:
|
|
183
|
+
result = safe_json_parse(response)
|
|
184
|
+
result_code = result["result_code"]
|
|
185
|
+
plot_code = result["plot_code"]
|
|
186
|
+
|
|
187
|
+
except ValueError:
|
|
188
|
+
tool_response = extract_or_fix_json_code(llm, response)
|
|
189
|
+
response = tool_response.result
|
|
190
|
+
if tool_response.code == 201:
|
|
191
|
+
try:
|
|
192
|
+
result = safe_json_parse(response)
|
|
193
|
+
assert "result_code" in result
|
|
194
|
+
assert "plot_code" in result
|
|
195
|
+
ToolResult(code=301, result=result, extra=plot_prompt)
|
|
196
|
+
except ValueError as e:
|
|
197
|
+
return ToolResult(
|
|
198
|
+
code=405, result=f"Tried to parse this as JSON: {response}, but got Error: {e}", extra=plot_prompt
|
|
199
|
+
)
|
|
200
|
+
except AssertionError as e:
|
|
201
|
+
return ToolResult(code=405, result=str(e), extra=plot_prompt)
|
|
202
|
+
|
|
203
|
+
else:
|
|
204
|
+
return ToolResult(code=499, result=tool_response.result)
|
|
205
|
+
except AssertionError as e:
|
|
206
|
+
return ToolResult(code=405, result=str(e), extra=plot_prompt)
|
|
207
|
+
except Exception as e:
|
|
208
|
+
return ToolResult(code=499, result=str(e), extra=plot_prompt)
|
|
209
|
+
|
|
210
|
+
try:
|
|
211
|
+
result_df = safe_execute(df, result_code)
|
|
212
|
+
except Exception as e:
|
|
213
|
+
return ToolResult(code=406, result=str(e))
|
|
214
|
+
try:
|
|
215
|
+
result_df = format_result_df(result_df)
|
|
216
|
+
except Exception as e:
|
|
217
|
+
return ToolResult(code=404, result=str(e))
|
|
218
|
+
|
|
219
|
+
this_result = {"result_df": result_df, "plot_code": plot_code, "result_code": result_code}
|
|
220
|
+
return ToolResult(code=301, result=this_result, tool_name=generate_plot_code.__name__)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
@mcp_flowcept.tool()
|
|
224
|
+
def generate_result_df(llm, query: str, dynamic_schema, example_values, df, attempt_fix=True, summarize=True):
|
|
225
|
+
"""
|
|
226
|
+
Generate a result DataFrame from a natural language query using an LLM.
|
|
227
|
+
|
|
228
|
+
This tool constructs a prompt with the query, dynamic schema, and example values,
|
|
229
|
+
then asks the LLM to generate executable pandas code. The generated code is
|
|
230
|
+
executed against the provided DataFrame. If execution fails and ``attempt_fix``
|
|
231
|
+
is enabled, the tool will try to repair or extract valid Python code using
|
|
232
|
+
another LLM call. The resulting DataFrame is normalized, formatted, and can be
|
|
233
|
+
optionally summarized.
|
|
234
|
+
|
|
235
|
+
Parameters
|
|
236
|
+
----------
|
|
237
|
+
llm : callable
|
|
238
|
+
A language model function or wrapper that accepts a prompt string and
|
|
239
|
+
returns a response (e.g., generated code or summary).
|
|
240
|
+
query : str
|
|
241
|
+
Natural language query to be executed against the DataFrame.
|
|
242
|
+
dynamic_schema : dict
|
|
243
|
+
Schema definition describing the structure of the DataFrame.
|
|
244
|
+
example_values : dict
|
|
245
|
+
Example values associated with the schema to guide the LLM.
|
|
246
|
+
df : pandas.DataFrame
|
|
247
|
+
The DataFrame to run the query against.
|
|
248
|
+
attempt_fix : bool, default=True
|
|
249
|
+
If True, attempt to fix invalid generated code by calling a repair LLM.
|
|
250
|
+
summarize : bool, default=True
|
|
251
|
+
If True, attempt to generate a natural language summary of the result.
|
|
252
|
+
|
|
253
|
+
Returns
|
|
254
|
+
-------
|
|
255
|
+
ToolResult
|
|
256
|
+
- On success (codes 301–303): contains a dictionary with:
|
|
257
|
+
- ``result_code`` : str, the generated Python code.
|
|
258
|
+
- ``result_df`` : str, CSV-formatted result DataFrame.
|
|
259
|
+
- ``summary`` : str, summary text if generated successfully.
|
|
260
|
+
- ``summary_error`` : str or None, error message if summarization failed.
|
|
261
|
+
- On failure (codes 400, 405, 504): contains an error message and
|
|
262
|
+
relevant debugging context.
|
|
263
|
+
|
|
264
|
+
Raises
|
|
265
|
+
------
|
|
266
|
+
Exception
|
|
267
|
+
Any unhandled error during code execution, normalization, or summarization
|
|
268
|
+
will be caught and converted into a ``ToolResult`` with the appropriate code.
|
|
269
|
+
|
|
270
|
+
Notes
|
|
271
|
+
-----
|
|
272
|
+
- Columns with only NaN values are dropped from the result.
|
|
273
|
+
- Summarization errors are non-blocking; the result DataFrame is still returned.
|
|
274
|
+
- The original LLM prompt and any generated code are included in the ``extra``
|
|
275
|
+
field of the ToolResult for debugging.
|
|
276
|
+
|
|
277
|
+
Examples
|
|
278
|
+
--------
|
|
279
|
+
Query with valid LLM-generated code:
|
|
280
|
+
|
|
281
|
+
>>> result = generate_result_df(
|
|
282
|
+
... llm,
|
|
283
|
+
... query="Show average sales by region",
|
|
284
|
+
... dynamic_schema=schema,
|
|
285
|
+
... example_values=examples,
|
|
286
|
+
... df=sales_df
|
|
287
|
+
... )
|
|
288
|
+
>>> print(result.code)
|
|
289
|
+
301
|
|
290
|
+
>>> print(result.result["result_df"])
|
|
291
|
+
|
|
292
|
+
Handle invalid code with auto-fix disabled:
|
|
293
|
+
|
|
294
|
+
>>> generate_result_df(llm, "bad query", schema, examples, df, attempt_fix=False)
|
|
295
|
+
ToolResult(code=405, result="Failed to parse this as Python code: ...")
|
|
296
|
+
"""
|
|
297
|
+
try:
|
|
298
|
+
prompt = generate_pandas_code_prompt(query, dynamic_schema, example_values)
|
|
299
|
+
response = llm(prompt)
|
|
300
|
+
except Exception as e:
|
|
301
|
+
return ToolResult(code=400, result=str(e), extra=prompt)
|
|
302
|
+
|
|
303
|
+
try:
|
|
304
|
+
result_code = response
|
|
305
|
+
result_df = safe_execute(df, result_code)
|
|
306
|
+
except Exception as e:
|
|
307
|
+
if not attempt_fix:
|
|
308
|
+
return ToolResult(
|
|
309
|
+
code=405,
|
|
310
|
+
result=f"Failed to parse this as Python code: \n\n ```python\n {result_code} \n```\n "
|
|
311
|
+
f"but got error:\n\n {e}.",
|
|
312
|
+
extra={"generated_code": result_code, "exception": str(e), "prompt": prompt},
|
|
313
|
+
)
|
|
314
|
+
else:
|
|
315
|
+
tool_result = extract_or_fix_python_code(llm, result_code)
|
|
316
|
+
if tool_result.code == 201:
|
|
317
|
+
new_result_code = tool_result.result
|
|
318
|
+
try:
|
|
319
|
+
result_df = safe_execute(df, new_result_code)
|
|
320
|
+
except Exception as e:
|
|
321
|
+
return ToolResult(
|
|
322
|
+
code=405,
|
|
323
|
+
result=f"Failed to parse this as Python code: \n\n"
|
|
324
|
+
f"```python\n {result_code} \n```\n "
|
|
325
|
+
f"Then tried to LLM extract the Python code, got: \n\n "
|
|
326
|
+
f"```python\n{new_result_code}```\n "
|
|
327
|
+
f"but got error:\n\n {e}.",
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
else:
|
|
331
|
+
return ToolResult(
|
|
332
|
+
code=405,
|
|
333
|
+
result=f"Failed to parse this as Python code: {result_code}."
|
|
334
|
+
f"Exception: {e}\n"
|
|
335
|
+
f"Then tried to LLM extract the Python code, but got error:"
|
|
336
|
+
f" {tool_result.result}",
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
try:
|
|
340
|
+
result_df = normalize_output(result_df)
|
|
341
|
+
except Exception as e:
|
|
342
|
+
return ToolResult(
|
|
343
|
+
code=504,
|
|
344
|
+
result="Failed to normalize output of the resulting dataframe.",
|
|
345
|
+
extra={"generated_code": result_code, "exception": str(e), "prompt": prompt},
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
result_df = result_df.dropna(axis=1, how="all")
|
|
349
|
+
|
|
350
|
+
return_code = 301
|
|
351
|
+
summary, summary_error = None, None
|
|
352
|
+
if summarize:
|
|
353
|
+
try:
|
|
354
|
+
tool_result = summarize_result(llm, result_code, result_df, query)
|
|
355
|
+
if tool_result.is_success():
|
|
356
|
+
return_code = 301
|
|
357
|
+
summary = tool_result.result
|
|
358
|
+
else:
|
|
359
|
+
return_code = 302
|
|
360
|
+
summary_error = tool_result.result
|
|
361
|
+
except Exception as e:
|
|
362
|
+
ctx_manager.logger.exception(e)
|
|
363
|
+
summary = ""
|
|
364
|
+
summary_error = str(e)
|
|
365
|
+
return_code = 303
|
|
366
|
+
|
|
367
|
+
try:
|
|
368
|
+
result_df = format_result_df(result_df)
|
|
369
|
+
except Exception as e:
|
|
370
|
+
return ToolResult(
|
|
371
|
+
code=405,
|
|
372
|
+
result="Failed to format output of the resulting dataframe.",
|
|
373
|
+
extra={"generated_code": result_code, "exception": str(e), "prompt": prompt},
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
this_result = {
|
|
377
|
+
"result_code": result_code,
|
|
378
|
+
"result_df": result_df,
|
|
379
|
+
"summary": summary,
|
|
380
|
+
"summary_error": summary_error,
|
|
381
|
+
}
|
|
382
|
+
return ToolResult(
|
|
383
|
+
code=return_code, result=this_result, tool_name=generate_result_df.__name__, extra={"prompt": prompt}
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
@mcp_flowcept.tool()
|
|
388
|
+
def run_df_code(user_code: str, df):
|
|
389
|
+
"""
|
|
390
|
+
Execute user-provided Python code on a DataFrame and format the result.
|
|
391
|
+
|
|
392
|
+
This tool safely executes Python code against a given DataFrame,
|
|
393
|
+
normalizes and formats the result, and returns it as part of a
|
|
394
|
+
``ToolResult``. It is designed to let users run custom code snippets
|
|
395
|
+
for data analysis while capturing errors gracefully.
|
|
396
|
+
|
|
397
|
+
Parameters
|
|
398
|
+
----------
|
|
399
|
+
user_code : str
|
|
400
|
+
A string of Python code intended to operate on the provided DataFrame.
|
|
401
|
+
The code must be valid and compatible with the execution environment.
|
|
402
|
+
df : pandas.DataFrame
|
|
403
|
+
The input DataFrame on which the code will be executed.
|
|
404
|
+
|
|
405
|
+
Returns
|
|
406
|
+
-------
|
|
407
|
+
ToolResult
|
|
408
|
+
- On success (code=301): a dictionary with keys:
|
|
409
|
+
- ``result_code`` : str, the original code snippet.
|
|
410
|
+
- ``result_df`` : str, the CSV-formatted result DataFrame.
|
|
411
|
+
- On failure (code=405): the error message indicating why execution failed.
|
|
412
|
+
|
|
413
|
+
Raises
|
|
414
|
+
------
|
|
415
|
+
Exception
|
|
416
|
+
Errors during execution or normalization are caught and
|
|
417
|
+
converted into a ``ToolResult`` with code 405.
|
|
418
|
+
|
|
419
|
+
Notes
|
|
420
|
+
-----
|
|
421
|
+
- Columns that contain only ``NaN`` values are dropped from the result.
|
|
422
|
+
- If the result DataFrame is empty or not valid, an error is returned.
|
|
423
|
+
- The output DataFrame is always formatted as CSV text.
|
|
424
|
+
|
|
425
|
+
Examples
|
|
426
|
+
--------
|
|
427
|
+
Run a simple aggregation:
|
|
428
|
+
|
|
429
|
+
>>> import pandas as pd
|
|
430
|
+
>>> df = pd.DataFrame({"a": [1, 2, 3], "b": [10, 20, 30]})
|
|
431
|
+
>>> res = run_df_code("df[['a']].sum()", df)
|
|
432
|
+
>>> print(res.code)
|
|
433
|
+
301
|
|
434
|
+
>>> print(res.result["result_df"])
|
|
435
|
+
a
|
|
436
|
+
6
|
|
437
|
+
|
|
438
|
+
Handle an invalid code snippet:
|
|
439
|
+
|
|
440
|
+
>>> run_df_code("df.non_existing()", df)
|
|
441
|
+
ToolResult(code=405, result="Failed to run this as Python code: df.non_existing(). Got error ...")
|
|
442
|
+
"""
|
|
443
|
+
try:
|
|
444
|
+
result_df = safe_execute(df, user_code)
|
|
445
|
+
except Exception as e:
|
|
446
|
+
return ToolResult(code=405, result=f"Failed to run this as Python code: {user_code}. Got error {e}")
|
|
447
|
+
|
|
448
|
+
try:
|
|
449
|
+
result_df = normalize_output(result_df)
|
|
450
|
+
except Exception as e:
|
|
451
|
+
return ToolResult(code=405, result=str(e))
|
|
452
|
+
|
|
453
|
+
result_df = result_df.dropna(axis=1, how="all")
|
|
454
|
+
result_df = format_result_df(result_df)
|
|
455
|
+
|
|
456
|
+
this_result = {
|
|
457
|
+
"result_code": user_code,
|
|
458
|
+
"result_df": result_df,
|
|
459
|
+
}
|
|
460
|
+
return ToolResult(code=301, result=this_result, tool_name=run_df_code.__name__)
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
@mcp_flowcept.tool()
|
|
464
|
+
def extract_or_fix_python_code(llm, raw_text):
|
|
465
|
+
"""
|
|
466
|
+
Extract or repair JSON code from raw text using an LLM.
|
|
467
|
+
|
|
468
|
+
This tool constructs a prompt with the given raw text and passes it
|
|
469
|
+
to the provided language model (LLM). The LLM is expected to either
|
|
470
|
+
extract valid JSON content or repair malformed JSON from the text.
|
|
471
|
+
The result is wrapped in a ``ToolResult`` object.
|
|
472
|
+
|
|
473
|
+
Parameters
|
|
474
|
+
----------
|
|
475
|
+
llm : callable
|
|
476
|
+
A language model function or object that can be invoked with a
|
|
477
|
+
prompt string and returns a response (e.g., an LLM wrapper).
|
|
478
|
+
raw_text : str
|
|
479
|
+
The raw text containing JSON code or fragments that may need to
|
|
480
|
+
be extracted or fixed.
|
|
481
|
+
|
|
482
|
+
Returns
|
|
483
|
+
-------
|
|
484
|
+
ToolResult
|
|
485
|
+
A result object containing:
|
|
486
|
+
- ``code=201`` if the extraction/fix succeeded, with the LLM
|
|
487
|
+
output in ``result``.
|
|
488
|
+
- ``code=499`` if an exception occurred, with the error message
|
|
489
|
+
in ``result``.
|
|
490
|
+
|
|
491
|
+
Raises
|
|
492
|
+
------
|
|
493
|
+
Exception
|
|
494
|
+
Any unhandled exception from the LLM call will be caught and
|
|
495
|
+
returned as part of the ``ToolResult``.
|
|
496
|
+
|
|
497
|
+
Examples
|
|
498
|
+
--------
|
|
499
|
+
>>> # Example with a mock LLM that just echoes back
|
|
500
|
+
>>> def mock_llm(prompt):
|
|
501
|
+
... return '{"a": 1, "b": 2}'
|
|
502
|
+
>>> res = extract_or_fix_json_code(mock_llm, "Here is some JSON: {a:1, b:2}")
|
|
503
|
+
>>> print(res)
|
|
504
|
+
ToolResult(code=201, result='{"a": 1, "b": 2}')
|
|
505
|
+
|
|
506
|
+
Example with an invalid call:
|
|
507
|
+
|
|
508
|
+
>>> def broken_llm(prompt):
|
|
509
|
+
... raise RuntimeError("LLM service unavailable")
|
|
510
|
+
>>> res = extract_or_fix_json_code(broken_llm, "{a:1}")
|
|
511
|
+
>>> print(res)
|
|
512
|
+
ToolResult(code=499, result='LLM service unavailable')
|
|
513
|
+
"""
|
|
514
|
+
prompt = extract_or_fix_python_code_prompt(raw_text)
|
|
515
|
+
try:
|
|
516
|
+
response = llm(prompt)
|
|
517
|
+
return ToolResult(code=201, result=response)
|
|
518
|
+
except Exception as e:
|
|
519
|
+
return ToolResult(code=499, result=str(e))
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
@mcp_flowcept.tool()
|
|
523
|
+
def extract_or_fix_json_code(llm, raw_text) -> ToolResult:
|
|
524
|
+
"""
|
|
525
|
+
Extract or repair JSON code from raw text using a language model.
|
|
526
|
+
|
|
527
|
+
This function builds a prompt around the provided raw text and sends
|
|
528
|
+
it to the given language model (LLM). The LLM is expected to extract
|
|
529
|
+
valid JSON or attempt to fix malformed JSON structures. The outcome
|
|
530
|
+
is returned in a ``ToolResult`` object, with a success or error code.
|
|
531
|
+
|
|
532
|
+
Parameters
|
|
533
|
+
----------
|
|
534
|
+
llm : Callable[[str], str]
|
|
535
|
+
A callable LLM function or wrapper that accepts a prompt string
|
|
536
|
+
and returns a string response.
|
|
537
|
+
raw_text : str
|
|
538
|
+
Input text that contains JSON code or fragments that may be
|
|
539
|
+
incomplete or malformed.
|
|
540
|
+
|
|
541
|
+
Returns
|
|
542
|
+
-------
|
|
543
|
+
ToolResult
|
|
544
|
+
A result object with:
|
|
545
|
+
- ``code=201`` and the LLM response in ``result`` if successful.
|
|
546
|
+
- ``code=499`` and the error message in ``result`` if an error occurs.
|
|
547
|
+
|
|
548
|
+
Examples
|
|
549
|
+
--------
|
|
550
|
+
Successful extraction/fix:
|
|
551
|
+
|
|
552
|
+
>>> def mock_llm(prompt: str) -> str:
|
|
553
|
+
... return '{"foo": "bar"}'
|
|
554
|
+
>>> extract_or_fix_json_code(mock_llm, "Broken JSON: {foo: bar}")
|
|
555
|
+
ToolResult(code=201, result='{"foo": "bar"}')
|
|
556
|
+
|
|
557
|
+
Error handling:
|
|
558
|
+
|
|
559
|
+
>>> def broken_llm(prompt: str) -> str:
|
|
560
|
+
... raise RuntimeError("LLM not available")
|
|
561
|
+
>>> extract_or_fix_json_code(broken_llm, "{foo: bar}")
|
|
562
|
+
ToolResult(code=499, result='LLM not available')
|
|
563
|
+
"""
|
|
564
|
+
prompt = extract_or_fix_json_code_prompt(raw_text)
|
|
565
|
+
try:
|
|
566
|
+
response = llm(prompt)
|
|
567
|
+
return ToolResult(code=201, result=response)
|
|
568
|
+
except Exception as e:
|
|
569
|
+
return ToolResult(code=499, result=str(e))
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
@mcp_flowcept.tool()
|
|
573
|
+
def summarize_result(llm, code, result, query: str) -> ToolResult:
|
|
574
|
+
"""
|
|
575
|
+
Summarize the pandas result with local reduction for large DataFrames.
|
|
576
|
+
- For wide DataFrames, selects top columns based on variance and uniqueness.
|
|
577
|
+
- For long DataFrames, truncates to preview rows.
|
|
578
|
+
- Constructs a detailed prompt for the LLM with original column context.
|
|
579
|
+
"""
|
|
580
|
+
summarized_df = summarize_df(result, code)
|
|
581
|
+
prompt = dataframe_summarizer_context(code, summarized_df, query)
|
|
582
|
+
try:
|
|
583
|
+
response = llm(prompt)
|
|
584
|
+
return ToolResult(code=201, result=response)
|
|
585
|
+
except Exception as e:
|
|
586
|
+
return ToolResult(code=400, result=str(e))
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
@mcp_flowcept.tool()
|
|
590
|
+
def save_df(df, schema, value_examples):
|
|
591
|
+
"""
|
|
592
|
+
Save a DataFrame, its schema, and example values to temporary files.
|
|
593
|
+
|
|
594
|
+
This function writes the provided DataFrame, schema, and value
|
|
595
|
+
examples to the ``/tmp`` directory. The schema and value examples
|
|
596
|
+
are saved as JSON files, while the DataFrame is saved as a CSV
|
|
597
|
+
file. This can be useful for persisting the current state of an
|
|
598
|
+
agent's task data for later querying or debugging.
|
|
599
|
+
|
|
600
|
+
Parameters
|
|
601
|
+
----------
|
|
602
|
+
df : pandas.DataFrame
|
|
603
|
+
The DataFrame to save.
|
|
604
|
+
schema : dict
|
|
605
|
+
A dictionary describing the schema of the DataFrame.
|
|
606
|
+
value_examples : dict
|
|
607
|
+
Example values associated with the DataFrame schema.
|
|
608
|
+
|
|
609
|
+
Returns
|
|
610
|
+
-------
|
|
611
|
+
ToolResult
|
|
612
|
+
An object with a status code and result message confirming
|
|
613
|
+
successful persistence of the data.
|
|
614
|
+
|
|
615
|
+
Notes
|
|
616
|
+
-----
|
|
617
|
+
Files are written to fixed locations in ``/tmp``:
|
|
618
|
+
|
|
619
|
+
- ``/tmp/current_tasks_schema.json`` — schema
|
|
620
|
+
- ``/tmp/value_examples.json`` — example values
|
|
621
|
+
- ``/tmp/current_agent_df.csv`` — DataFrame contents
|
|
622
|
+
|
|
623
|
+
Examples
|
|
624
|
+
--------
|
|
625
|
+
>>> import pandas as pd
|
|
626
|
+
>>> df = pd.DataFrame({"name": ["Alice", "Bob"], "score": [85, 92]})
|
|
627
|
+
>>> schema = {"fields": [{"name": "name", "type": "string"},
|
|
628
|
+
... {"name": "score", "type": "integer"}]}
|
|
629
|
+
>>> examples = {"name": ["Alice"], "score": [85]}
|
|
630
|
+
>>> result = save_df(df, schema, examples)
|
|
631
|
+
>>> print(result)
|
|
632
|
+
ToolResult(code=201, result='Saved df and schema to /tmp directory')
|
|
633
|
+
"""
|
|
634
|
+
with open("/tmp/current_tasks_schema.json", "w") as f:
|
|
635
|
+
json.dump(schema, f, indent=2)
|
|
636
|
+
with open("/tmp/value_examples.json", "w") as f:
|
|
637
|
+
json.dump(value_examples, f, indent=2)
|
|
638
|
+
df.to_csv("/tmp/current_agent_df.csv", index=False)
|
|
639
|
+
return ToolResult(code=201, result="Saved df and schema to /tmp directory")
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
@mcp_flowcept.tool()
|
|
643
|
+
def query_on_saved_df(query: str, dynamic_schema_path, value_examples_path, df_path):
|
|
644
|
+
"""
|
|
645
|
+
Run a natural language query against a saved DataFrame with schema and value examples.
|
|
646
|
+
|
|
647
|
+
This function loads a previously saved DataFrame, dynamic schema,
|
|
648
|
+
and value examples from disk, then uses a language model (LLM) to
|
|
649
|
+
interpret the query and generate a new result DataFrame. The query
|
|
650
|
+
is executed through the LLM using the provided schema and examples
|
|
651
|
+
for better accuracy.
|
|
652
|
+
|
|
653
|
+
Parameters
|
|
654
|
+
----------
|
|
655
|
+
query : str
|
|
656
|
+
Natural language query to execute against the DataFrame.
|
|
657
|
+
dynamic_schema_path : str
|
|
658
|
+
Path to a JSON file containing the schema definition used by the LLM.
|
|
659
|
+
value_examples_path : str
|
|
660
|
+
Path to a JSON file with example values to guide the LLM query.
|
|
661
|
+
df_path : str
|
|
662
|
+
Path to the saved DataFrame file.
|
|
663
|
+
|
|
664
|
+
Returns
|
|
665
|
+
-------
|
|
666
|
+
pandas.DataFrame
|
|
667
|
+
The DataFrame result generated by the LLM query.
|
|
668
|
+
|
|
669
|
+
Raises
|
|
670
|
+
------
|
|
671
|
+
FileNotFoundError
|
|
672
|
+
If any of the provided paths (schema, examples, DataFrame) do not exist.
|
|
673
|
+
json.JSONDecodeError
|
|
674
|
+
If schema or examples JSON files cannot be parsed.
|
|
675
|
+
Exception
|
|
676
|
+
Propagates exceptions from the LLM query or DataFrame loading.
|
|
677
|
+
|
|
678
|
+
Examples
|
|
679
|
+
--------
|
|
680
|
+
Query a saved DataFrame of sales data:
|
|
681
|
+
|
|
682
|
+
>>> query = "Show me the total sales by region"
|
|
683
|
+
>>> result = query_on_saved_df(
|
|
684
|
+
... query,
|
|
685
|
+
... dynamic_schema_path="schemas/sales_schema.json",
|
|
686
|
+
... value_examples_path="schemas/sales_examples.json",
|
|
687
|
+
... df_path="data/sales.parquet"
|
|
688
|
+
... )
|
|
689
|
+
>>> print(result.head())
|
|
690
|
+
region total_sales
|
|
691
|
+
0 North 12345
|
|
692
|
+
1 South 9876
|
|
693
|
+
2 West 5432
|
|
694
|
+
"""
|
|
695
|
+
df = load_saved_df(df_path)
|
|
696
|
+
|
|
697
|
+
with open(dynamic_schema_path) as f:
|
|
698
|
+
dynamic_schema = json.load(f)
|
|
699
|
+
|
|
700
|
+
with open(value_examples_path) as f:
|
|
701
|
+
value_examples = json.load(f)
|
|
702
|
+
|
|
703
|
+
llm = build_llm_model()
|
|
704
|
+
return generate_result_df(llm, query, dynamic_schema, value_examples, df, attempt_fix=False, summarize=False)
|