flowcept 0.8.11__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. flowcept/__init__.py +7 -4
  2. flowcept/agents/__init__.py +5 -0
  3. flowcept/{flowceptor/consumers/agent/client_agent.py → agents/agent_client.py} +22 -12
  4. flowcept/agents/agents_utils.py +181 -0
  5. flowcept/agents/dynamic_schema_tracker.py +191 -0
  6. flowcept/agents/flowcept_agent.py +30 -0
  7. flowcept/agents/flowcept_ctx_manager.py +175 -0
  8. flowcept/agents/gui/__init__.py +5 -0
  9. flowcept/agents/gui/agent_gui.py +76 -0
  10. flowcept/agents/gui/gui_utils.py +239 -0
  11. flowcept/agents/llms/__init__.py +1 -0
  12. flowcept/agents/llms/claude_gcp.py +139 -0
  13. flowcept/agents/llms/gemini25.py +119 -0
  14. flowcept/agents/prompts/__init__.py +1 -0
  15. flowcept/{flowceptor/adapters/agents/prompts.py → agents/prompts/general_prompts.py} +18 -0
  16. flowcept/agents/prompts/in_memory_query_prompts.py +297 -0
  17. flowcept/agents/tools/__init__.py +1 -0
  18. flowcept/agents/tools/general_tools.py +102 -0
  19. flowcept/agents/tools/in_memory_queries/__init__.py +1 -0
  20. flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +704 -0
  21. flowcept/agents/tools/in_memory_queries/pandas_agent_utils.py +309 -0
  22. flowcept/cli.py +286 -44
  23. flowcept/commons/daos/docdb_dao/mongodb_dao.py +47 -0
  24. flowcept/commons/daos/mq_dao/mq_dao_base.py +24 -13
  25. flowcept/commons/daos/mq_dao/mq_dao_kafka.py +18 -2
  26. flowcept/commons/flowcept_dataclasses/task_object.py +16 -21
  27. flowcept/commons/flowcept_dataclasses/workflow_object.py +9 -1
  28. flowcept/commons/task_data_preprocess.py +260 -60
  29. flowcept/commons/utils.py +25 -6
  30. flowcept/configs.py +41 -26
  31. flowcept/flowcept_api/flowcept_controller.py +73 -6
  32. flowcept/flowceptor/adapters/base_interceptor.py +11 -5
  33. flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +25 -1
  34. flowcept/flowceptor/consumers/base_consumer.py +4 -0
  35. flowcept/flowceptor/consumers/consumer_utils.py +5 -4
  36. flowcept/flowceptor/consumers/document_inserter.py +2 -2
  37. flowcept/flowceptor/telemetry_capture.py +5 -2
  38. flowcept/instrumentation/flowcept_agent_task.py +294 -0
  39. flowcept/instrumentation/flowcept_decorator.py +43 -0
  40. flowcept/instrumentation/flowcept_loop.py +3 -3
  41. flowcept/instrumentation/flowcept_task.py +64 -24
  42. flowcept/instrumentation/flowcept_torch.py +5 -5
  43. flowcept/instrumentation/task_capture.py +83 -6
  44. flowcept/version.py +1 -1
  45. {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/METADATA +42 -14
  46. {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/RECORD +50 -36
  47. resources/sample_settings.yaml +12 -4
  48. flowcept/flowceptor/adapters/agents/__init__.py +0 -1
  49. flowcept/flowceptor/adapters/agents/agents_utils.py +0 -89
  50. flowcept/flowceptor/adapters/agents/flowcept_agent.py +0 -292
  51. flowcept/flowceptor/adapters/agents/flowcept_llm_prov_capture.py +0 -186
  52. flowcept/flowceptor/consumers/agent/flowcept_agent_context_manager.py +0 -145
  53. flowcept/flowceptor/consumers/agent/flowcept_qa_manager.py +0 -112
  54. {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/WHEEL +0 -0
  55. {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/entry_points.txt +0 -0
  56. {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,704 @@
1
+ import json
2
+ import pandas as pd
3
+ from flowcept.agents.agents_utils import ToolResult, build_llm_model
4
+ from flowcept.agents.flowcept_ctx_manager import mcp_flowcept, ctx_manager
5
+ from flowcept.agents.prompts.in_memory_query_prompts import (
6
+ generate_plot_code_prompt,
7
+ extract_or_fix_json_code_prompt,
8
+ generate_pandas_code_prompt,
9
+ dataframe_summarizer_context,
10
+ extract_or_fix_python_code_prompt,
11
+ )
12
+
13
+ from flowcept.agents.tools.in_memory_queries.pandas_agent_utils import (
14
+ load_saved_df,
15
+ safe_execute,
16
+ safe_json_parse,
17
+ normalize_output,
18
+ format_result_df,
19
+ summarize_df,
20
+ )
21
+
22
+
23
+ @mcp_flowcept.tool()
24
+ def run_df_query(llm, query: str, plot=False) -> ToolResult:
25
+ r"""
26
+ Run a natural language query against the current context DataFrame.
27
+
28
+ This tool retrieves the active DataFrame, schema, and example values
29
+ from the MCP Flowcept context and uses an LLM to process the query.
30
+ Depending on the query and flags, it may reset the context, save the
31
+ current DataFrame, execute raw code, generate a result DataFrame, or
32
+ produce plotting code.
33
+
34
+ Parameters
35
+ ----------
36
+ llm : callable
37
+ A language model function or wrapper that accepts a prompt string
38
+ and returns a response.
39
+ query : str
40
+ Natural language query or Python code snippet to run against the
41
+ current DataFrame context.
42
+ plot : bool, default=False
43
+ If True, generate plotting code along with a result DataFrame.
44
+ If False, only generate and return the result DataFrame.
45
+
46
+ Returns
47
+ -------
48
+ ToolResult
49
+ - ``code=201`` : Context reset or DataFrame/schema saved.
50
+ - ``code=301`` : Successful result DataFrame (and optional plot code).
51
+ - ``code=404`` : No active DataFrame in context.
52
+ - Other codes indicate execution or formatting errors from underlying tools.
53
+
54
+ Notes
55
+ -----
56
+ - Querying with "reset context" clears the active DataFrame and resets
57
+ the context.
58
+ - Querying with "save" persists the DataFrame, schema, and example
59
+ values to disk via ``save_df``.
60
+ - Queries containing "result = df" are executed directly as code.
61
+ - With ``plot=True``, the tool delegates to ``generate_plot_code``;
62
+ otherwise, it calls ``generate_result_df``.
63
+
64
+ Examples
65
+ --------
66
+ Reset the context:
67
+
68
+ >>> run_df_query(llm, "reset context")
69
+ ToolResult(code=201, result="Context Reset!")
70
+
71
+ Save the current DataFrame:
72
+
73
+ >>> run_df_query(llm, "save")
74
+ ToolResult(code=201, result="Saved df and schema to /tmp directory")
75
+
76
+ Generate a result DataFrame:
77
+
78
+ >>> run_df_query(llm, "Show average sales by region")
79
+ ToolResult(code=301, result={'result_df': 'region,avg_sales\\nNorth,100\\nSouth,95'})
80
+
81
+ Generate a plot along with the DataFrame:
82
+
83
+ >>> run_df_query(llm, "Show sales trend as a line chart", plot=True)
84
+ ToolResult(code=301, result={'result_df': '...', 'plot_code': 'plt.plot(...)'})
85
+ """
86
+ ctx = mcp_flowcept.get_context()
87
+ df: pd.DataFrame = ctx.request_context.lifespan_context.df
88
+ schema = ctx.request_context.lifespan_context.tasks_schema
89
+ value_examples = ctx.request_context.lifespan_context.value_examples
90
+ if df is None or not len(df):
91
+ return ToolResult(code=404, result="Current df is empty or null.")
92
+
93
+ if "reset context" in query:
94
+ ctx.request_context.lifespan_context.df = pd.DataFrame()
95
+ return ToolResult(code=201, result="Context Reset!")
96
+ elif "save" in query:
97
+ return save_df(df, schema, value_examples)
98
+ elif "result = df" in query:
99
+ return run_df_code(user_code=query, df=df)
100
+
101
+ if plot:
102
+ return generate_plot_code(llm, query, schema, value_examples, df)
103
+ else:
104
+ return generate_result_df(llm, query, schema, value_examples, df)
105
+
106
+
107
+ @mcp_flowcept.tool()
108
+ def generate_plot_code(llm, query, dynamic_schema, value_examples, df) -> ToolResult:
109
+ """
110
+ Generate DataFrame and plotting code from a natural language query using an LLM.
111
+
112
+ This tool builds a prompt with the query, dynamic schema, and example values,
113
+ and asks the LLM to return JSON with two fields: ``result_code`` (Python code
114
+ to transform the DataFrame) and ``plot_code`` (Python code to generate a plot).
115
+ The resulting code is validated, executed, and the DataFrame result is
116
+ formatted as CSV. If the LLM output is invalid JSON, the tool attempts to
117
+ repair or extract valid JSON before failing.
118
+
119
+ Parameters
120
+ ----------
121
+ llm : callable
122
+ A language model function or wrapper that accepts a prompt string
123
+ and returns a response.
124
+ query : str
125
+ Natural language query describing the desired data transformation
126
+ and plot.
127
+ dynamic_schema : dict
128
+ Schema definition describing the structure of the DataFrame.
129
+ value_examples : dict
130
+ Example values associated with the schema to guide the LLM.
131
+ df : pandas.DataFrame
132
+ The DataFrame to query and transform.
133
+
134
+ Returns
135
+ -------
136
+ ToolResult
137
+ - On success (code=301): contains a dictionary with:
138
+ - ``result_df`` : str, CSV-formatted DataFrame result.
139
+ - ``plot_code`` : str, Python code to generate the plot.
140
+ - ``result_code`` : str, Python code used to transform the DataFrame.
141
+ - On failure (codes 400, 404–406, 499): contains an error message and
142
+ optionally the original prompt for debugging.
143
+
144
+ Raises
145
+ ------
146
+ Exception
147
+ Any unhandled error during LLM invocation, JSON parsing, code execution,
148
+ or DataFrame formatting will be caught and converted into a ``ToolResult``
149
+ with the appropriate error code.
150
+
151
+ Notes
152
+ -----
153
+ - Invalid JSON responses from the LLM are automatically retried using
154
+ an extraction/fix helper.
155
+ - Both transformation and plotting code must be present in the LLM output,
156
+ otherwise the tool fails with an error.
157
+ - Columns that contain only NaN values are dropped from the result.
158
+
159
+ Examples
160
+ --------
161
+ Generate a bar chart from a sales DataFrame:
162
+
163
+ >>> result = generate_plot_code(
164
+ ... llm,
165
+ ... query="Show total sales by region as a bar chart",
166
+ ... dynamic_schema=schema,
167
+ ... value_examples=examples,
168
+ ... df=sales_df
169
+ ... )
170
+ >>> print(result.code)
171
+ 301
172
+ >>> print(result.result["plot_code"])
173
+ plt.bar(result_df["region"], result_df["total_sales"])
174
+ """
175
+ plot_prompt = generate_plot_code_prompt(query, dynamic_schema, value_examples)
176
+ try:
177
+ response = llm(plot_prompt)
178
+ except Exception as e:
179
+ return ToolResult(code=400, result=str(e), extra=plot_prompt)
180
+
181
+ result_code, plot_code = None, None
182
+ try:
183
+ result = safe_json_parse(response)
184
+ result_code = result["result_code"]
185
+ plot_code = result["plot_code"]
186
+
187
+ except ValueError:
188
+ tool_response = extract_or_fix_json_code(llm, response)
189
+ response = tool_response.result
190
+ if tool_response.code == 201:
191
+ try:
192
+ result = safe_json_parse(response)
193
+ assert "result_code" in result
194
+ assert "plot_code" in result
195
+ ToolResult(code=301, result=result, extra=plot_prompt)
196
+ except ValueError as e:
197
+ return ToolResult(
198
+ code=405, result=f"Tried to parse this as JSON: {response}, but got Error: {e}", extra=plot_prompt
199
+ )
200
+ except AssertionError as e:
201
+ return ToolResult(code=405, result=str(e), extra=plot_prompt)
202
+
203
+ else:
204
+ return ToolResult(code=499, result=tool_response.result)
205
+ except AssertionError as e:
206
+ return ToolResult(code=405, result=str(e), extra=plot_prompt)
207
+ except Exception as e:
208
+ return ToolResult(code=499, result=str(e), extra=plot_prompt)
209
+
210
+ try:
211
+ result_df = safe_execute(df, result_code)
212
+ except Exception as e:
213
+ return ToolResult(code=406, result=str(e))
214
+ try:
215
+ result_df = format_result_df(result_df)
216
+ except Exception as e:
217
+ return ToolResult(code=404, result=str(e))
218
+
219
+ this_result = {"result_df": result_df, "plot_code": plot_code, "result_code": result_code}
220
+ return ToolResult(code=301, result=this_result, tool_name=generate_plot_code.__name__)
221
+
222
+
223
+ @mcp_flowcept.tool()
224
+ def generate_result_df(llm, query: str, dynamic_schema, example_values, df, attempt_fix=True, summarize=True):
225
+ """
226
+ Generate a result DataFrame from a natural language query using an LLM.
227
+
228
+ This tool constructs a prompt with the query, dynamic schema, and example values,
229
+ then asks the LLM to generate executable pandas code. The generated code is
230
+ executed against the provided DataFrame. If execution fails and ``attempt_fix``
231
+ is enabled, the tool will try to repair or extract valid Python code using
232
+ another LLM call. The resulting DataFrame is normalized, formatted, and can be
233
+ optionally summarized.
234
+
235
+ Parameters
236
+ ----------
237
+ llm : callable
238
+ A language model function or wrapper that accepts a prompt string and
239
+ returns a response (e.g., generated code or summary).
240
+ query : str
241
+ Natural language query to be executed against the DataFrame.
242
+ dynamic_schema : dict
243
+ Schema definition describing the structure of the DataFrame.
244
+ example_values : dict
245
+ Example values associated with the schema to guide the LLM.
246
+ df : pandas.DataFrame
247
+ The DataFrame to run the query against.
248
+ attempt_fix : bool, default=True
249
+ If True, attempt to fix invalid generated code by calling a repair LLM.
250
+ summarize : bool, default=True
251
+ If True, attempt to generate a natural language summary of the result.
252
+
253
+ Returns
254
+ -------
255
+ ToolResult
256
+ - On success (codes 301–303): contains a dictionary with:
257
+ - ``result_code`` : str, the generated Python code.
258
+ - ``result_df`` : str, CSV-formatted result DataFrame.
259
+ - ``summary`` : str, summary text if generated successfully.
260
+ - ``summary_error`` : str or None, error message if summarization failed.
261
+ - On failure (codes 400, 405, 504): contains an error message and
262
+ relevant debugging context.
263
+
264
+ Raises
265
+ ------
266
+ Exception
267
+ Any unhandled error during code execution, normalization, or summarization
268
+ will be caught and converted into a ``ToolResult`` with the appropriate code.
269
+
270
+ Notes
271
+ -----
272
+ - Columns with only NaN values are dropped from the result.
273
+ - Summarization errors are non-blocking; the result DataFrame is still returned.
274
+ - The original LLM prompt and any generated code are included in the ``extra``
275
+ field of the ToolResult for debugging.
276
+
277
+ Examples
278
+ --------
279
+ Query with valid LLM-generated code:
280
+
281
+ >>> result = generate_result_df(
282
+ ... llm,
283
+ ... query="Show average sales by region",
284
+ ... dynamic_schema=schema,
285
+ ... example_values=examples,
286
+ ... df=sales_df
287
+ ... )
288
+ >>> print(result.code)
289
+ 301
290
+ >>> print(result.result["result_df"])
291
+
292
+ Handle invalid code with auto-fix disabled:
293
+
294
+ >>> generate_result_df(llm, "bad query", schema, examples, df, attempt_fix=False)
295
+ ToolResult(code=405, result="Failed to parse this as Python code: ...")
296
+ """
297
+ try:
298
+ prompt = generate_pandas_code_prompt(query, dynamic_schema, example_values)
299
+ response = llm(prompt)
300
+ except Exception as e:
301
+ return ToolResult(code=400, result=str(e), extra=prompt)
302
+
303
+ try:
304
+ result_code = response
305
+ result_df = safe_execute(df, result_code)
306
+ except Exception as e:
307
+ if not attempt_fix:
308
+ return ToolResult(
309
+ code=405,
310
+ result=f"Failed to parse this as Python code: \n\n ```python\n {result_code} \n```\n "
311
+ f"but got error:\n\n {e}.",
312
+ extra={"generated_code": result_code, "exception": str(e), "prompt": prompt},
313
+ )
314
+ else:
315
+ tool_result = extract_or_fix_python_code(llm, result_code)
316
+ if tool_result.code == 201:
317
+ new_result_code = tool_result.result
318
+ try:
319
+ result_df = safe_execute(df, new_result_code)
320
+ except Exception as e:
321
+ return ToolResult(
322
+ code=405,
323
+ result=f"Failed to parse this as Python code: \n\n"
324
+ f"```python\n {result_code} \n```\n "
325
+ f"Then tried to LLM extract the Python code, got: \n\n "
326
+ f"```python\n{new_result_code}```\n "
327
+ f"but got error:\n\n {e}.",
328
+ )
329
+
330
+ else:
331
+ return ToolResult(
332
+ code=405,
333
+ result=f"Failed to parse this as Python code: {result_code}."
334
+ f"Exception: {e}\n"
335
+ f"Then tried to LLM extract the Python code, but got error:"
336
+ f" {tool_result.result}",
337
+ )
338
+
339
+ try:
340
+ result_df = normalize_output(result_df)
341
+ except Exception as e:
342
+ return ToolResult(
343
+ code=504,
344
+ result="Failed to normalize output of the resulting dataframe.",
345
+ extra={"generated_code": result_code, "exception": str(e), "prompt": prompt},
346
+ )
347
+
348
+ result_df = result_df.dropna(axis=1, how="all")
349
+
350
+ return_code = 301
351
+ summary, summary_error = None, None
352
+ if summarize:
353
+ try:
354
+ tool_result = summarize_result(llm, result_code, result_df, query)
355
+ if tool_result.is_success():
356
+ return_code = 301
357
+ summary = tool_result.result
358
+ else:
359
+ return_code = 302
360
+ summary_error = tool_result.result
361
+ except Exception as e:
362
+ ctx_manager.logger.exception(e)
363
+ summary = ""
364
+ summary_error = str(e)
365
+ return_code = 303
366
+
367
+ try:
368
+ result_df = format_result_df(result_df)
369
+ except Exception as e:
370
+ return ToolResult(
371
+ code=405,
372
+ result="Failed to format output of the resulting dataframe.",
373
+ extra={"generated_code": result_code, "exception": str(e), "prompt": prompt},
374
+ )
375
+
376
+ this_result = {
377
+ "result_code": result_code,
378
+ "result_df": result_df,
379
+ "summary": summary,
380
+ "summary_error": summary_error,
381
+ }
382
+ return ToolResult(
383
+ code=return_code, result=this_result, tool_name=generate_result_df.__name__, extra={"prompt": prompt}
384
+ )
385
+
386
+
387
+ @mcp_flowcept.tool()
388
+ def run_df_code(user_code: str, df):
389
+ """
390
+ Execute user-provided Python code on a DataFrame and format the result.
391
+
392
+ This tool safely executes Python code against a given DataFrame,
393
+ normalizes and formats the result, and returns it as part of a
394
+ ``ToolResult``. It is designed to let users run custom code snippets
395
+ for data analysis while capturing errors gracefully.
396
+
397
+ Parameters
398
+ ----------
399
+ user_code : str
400
+ A string of Python code intended to operate on the provided DataFrame.
401
+ The code must be valid and compatible with the execution environment.
402
+ df : pandas.DataFrame
403
+ The input DataFrame on which the code will be executed.
404
+
405
+ Returns
406
+ -------
407
+ ToolResult
408
+ - On success (code=301): a dictionary with keys:
409
+ - ``result_code`` : str, the original code snippet.
410
+ - ``result_df`` : str, the CSV-formatted result DataFrame.
411
+ - On failure (code=405): the error message indicating why execution failed.
412
+
413
+ Raises
414
+ ------
415
+ Exception
416
+ Errors during execution or normalization are caught and
417
+ converted into a ``ToolResult`` with code 405.
418
+
419
+ Notes
420
+ -----
421
+ - Columns that contain only ``NaN`` values are dropped from the result.
422
+ - If the result DataFrame is empty or not valid, an error is returned.
423
+ - The output DataFrame is always formatted as CSV text.
424
+
425
+ Examples
426
+ --------
427
+ Run a simple aggregation:
428
+
429
+ >>> import pandas as pd
430
+ >>> df = pd.DataFrame({"a": [1, 2, 3], "b": [10, 20, 30]})
431
+ >>> res = run_df_code("df[['a']].sum()", df)
432
+ >>> print(res.code)
433
+ 301
434
+ >>> print(res.result["result_df"])
435
+ a
436
+ 6
437
+
438
+ Handle an invalid code snippet:
439
+
440
+ >>> run_df_code("df.non_existing()", df)
441
+ ToolResult(code=405, result="Failed to run this as Python code: df.non_existing(). Got error ...")
442
+ """
443
+ try:
444
+ result_df = safe_execute(df, user_code)
445
+ except Exception as e:
446
+ return ToolResult(code=405, result=f"Failed to run this as Python code: {user_code}. Got error {e}")
447
+
448
+ try:
449
+ result_df = normalize_output(result_df)
450
+ except Exception as e:
451
+ return ToolResult(code=405, result=str(e))
452
+
453
+ result_df = result_df.dropna(axis=1, how="all")
454
+ result_df = format_result_df(result_df)
455
+
456
+ this_result = {
457
+ "result_code": user_code,
458
+ "result_df": result_df,
459
+ }
460
+ return ToolResult(code=301, result=this_result, tool_name=run_df_code.__name__)
461
+
462
+
463
+ @mcp_flowcept.tool()
464
+ def extract_or_fix_python_code(llm, raw_text):
465
+ """
466
+ Extract or repair JSON code from raw text using an LLM.
467
+
468
+ This tool constructs a prompt with the given raw text and passes it
469
+ to the provided language model (LLM). The LLM is expected to either
470
+ extract valid JSON content or repair malformed JSON from the text.
471
+ The result is wrapped in a ``ToolResult`` object.
472
+
473
+ Parameters
474
+ ----------
475
+ llm : callable
476
+ A language model function or object that can be invoked with a
477
+ prompt string and returns a response (e.g., an LLM wrapper).
478
+ raw_text : str
479
+ The raw text containing JSON code or fragments that may need to
480
+ be extracted or fixed.
481
+
482
+ Returns
483
+ -------
484
+ ToolResult
485
+ A result object containing:
486
+ - ``code=201`` if the extraction/fix succeeded, with the LLM
487
+ output in ``result``.
488
+ - ``code=499`` if an exception occurred, with the error message
489
+ in ``result``.
490
+
491
+ Raises
492
+ ------
493
+ Exception
494
+ Any unhandled exception from the LLM call will be caught and
495
+ returned as part of the ``ToolResult``.
496
+
497
+ Examples
498
+ --------
499
+ >>> # Example with a mock LLM that just echoes back
500
+ >>> def mock_llm(prompt):
501
+ ... return '{"a": 1, "b": 2}'
502
+ >>> res = extract_or_fix_json_code(mock_llm, "Here is some JSON: {a:1, b:2}")
503
+ >>> print(res)
504
+ ToolResult(code=201, result='{"a": 1, "b": 2}')
505
+
506
+ Example with an invalid call:
507
+
508
+ >>> def broken_llm(prompt):
509
+ ... raise RuntimeError("LLM service unavailable")
510
+ >>> res = extract_or_fix_json_code(broken_llm, "{a:1}")
511
+ >>> print(res)
512
+ ToolResult(code=499, result='LLM service unavailable')
513
+ """
514
+ prompt = extract_or_fix_python_code_prompt(raw_text)
515
+ try:
516
+ response = llm(prompt)
517
+ return ToolResult(code=201, result=response)
518
+ except Exception as e:
519
+ return ToolResult(code=499, result=str(e))
520
+
521
+
522
+ @mcp_flowcept.tool()
523
+ def extract_or_fix_json_code(llm, raw_text) -> ToolResult:
524
+ """
525
+ Extract or repair JSON code from raw text using a language model.
526
+
527
+ This function builds a prompt around the provided raw text and sends
528
+ it to the given language model (LLM). The LLM is expected to extract
529
+ valid JSON or attempt to fix malformed JSON structures. The outcome
530
+ is returned in a ``ToolResult`` object, with a success or error code.
531
+
532
+ Parameters
533
+ ----------
534
+ llm : Callable[[str], str]
535
+ A callable LLM function or wrapper that accepts a prompt string
536
+ and returns a string response.
537
+ raw_text : str
538
+ Input text that contains JSON code or fragments that may be
539
+ incomplete or malformed.
540
+
541
+ Returns
542
+ -------
543
+ ToolResult
544
+ A result object with:
545
+ - ``code=201`` and the LLM response in ``result`` if successful.
546
+ - ``code=499`` and the error message in ``result`` if an error occurs.
547
+
548
+ Examples
549
+ --------
550
+ Successful extraction/fix:
551
+
552
+ >>> def mock_llm(prompt: str) -> str:
553
+ ... return '{"foo": "bar"}'
554
+ >>> extract_or_fix_json_code(mock_llm, "Broken JSON: {foo: bar}")
555
+ ToolResult(code=201, result='{"foo": "bar"}')
556
+
557
+ Error handling:
558
+
559
+ >>> def broken_llm(prompt: str) -> str:
560
+ ... raise RuntimeError("LLM not available")
561
+ >>> extract_or_fix_json_code(broken_llm, "{foo: bar}")
562
+ ToolResult(code=499, result='LLM not available')
563
+ """
564
+ prompt = extract_or_fix_json_code_prompt(raw_text)
565
+ try:
566
+ response = llm(prompt)
567
+ return ToolResult(code=201, result=response)
568
+ except Exception as e:
569
+ return ToolResult(code=499, result=str(e))
570
+
571
+
572
+ @mcp_flowcept.tool()
573
+ def summarize_result(llm, code, result, query: str) -> ToolResult:
574
+ """
575
+ Summarize the pandas result with local reduction for large DataFrames.
576
+ - For wide DataFrames, selects top columns based on variance and uniqueness.
577
+ - For long DataFrames, truncates to preview rows.
578
+ - Constructs a detailed prompt for the LLM with original column context.
579
+ """
580
+ summarized_df = summarize_df(result, code)
581
+ prompt = dataframe_summarizer_context(code, summarized_df, query)
582
+ try:
583
+ response = llm(prompt)
584
+ return ToolResult(code=201, result=response)
585
+ except Exception as e:
586
+ return ToolResult(code=400, result=str(e))
587
+
588
+
589
+ @mcp_flowcept.tool()
590
+ def save_df(df, schema, value_examples):
591
+ """
592
+ Save a DataFrame, its schema, and example values to temporary files.
593
+
594
+ This function writes the provided DataFrame, schema, and value
595
+ examples to the ``/tmp`` directory. The schema and value examples
596
+ are saved as JSON files, while the DataFrame is saved as a CSV
597
+ file. This can be useful for persisting the current state of an
598
+ agent's task data for later querying or debugging.
599
+
600
+ Parameters
601
+ ----------
602
+ df : pandas.DataFrame
603
+ The DataFrame to save.
604
+ schema : dict
605
+ A dictionary describing the schema of the DataFrame.
606
+ value_examples : dict
607
+ Example values associated with the DataFrame schema.
608
+
609
+ Returns
610
+ -------
611
+ ToolResult
612
+ An object with a status code and result message confirming
613
+ successful persistence of the data.
614
+
615
+ Notes
616
+ -----
617
+ Files are written to fixed locations in ``/tmp``:
618
+
619
+ - ``/tmp/current_tasks_schema.json`` — schema
620
+ - ``/tmp/value_examples.json`` — example values
621
+ - ``/tmp/current_agent_df.csv`` — DataFrame contents
622
+
623
+ Examples
624
+ --------
625
+ >>> import pandas as pd
626
+ >>> df = pd.DataFrame({"name": ["Alice", "Bob"], "score": [85, 92]})
627
+ >>> schema = {"fields": [{"name": "name", "type": "string"},
628
+ ... {"name": "score", "type": "integer"}]}
629
+ >>> examples = {"name": ["Alice"], "score": [85]}
630
+ >>> result = save_df(df, schema, examples)
631
+ >>> print(result)
632
+ ToolResult(code=201, result='Saved df and schema to /tmp directory')
633
+ """
634
+ with open("/tmp/current_tasks_schema.json", "w") as f:
635
+ json.dump(schema, f, indent=2)
636
+ with open("/tmp/value_examples.json", "w") as f:
637
+ json.dump(value_examples, f, indent=2)
638
+ df.to_csv("/tmp/current_agent_df.csv", index=False)
639
+ return ToolResult(code=201, result="Saved df and schema to /tmp directory")
640
+
641
+
642
+ @mcp_flowcept.tool()
643
+ def query_on_saved_df(query: str, dynamic_schema_path, value_examples_path, df_path):
644
+ """
645
+ Run a natural language query against a saved DataFrame with schema and value examples.
646
+
647
+ This function loads a previously saved DataFrame, dynamic schema,
648
+ and value examples from disk, then uses a language model (LLM) to
649
+ interpret the query and generate a new result DataFrame. The query
650
+ is executed through the LLM using the provided schema and examples
651
+ for better accuracy.
652
+
653
+ Parameters
654
+ ----------
655
+ query : str
656
+ Natural language query to execute against the DataFrame.
657
+ dynamic_schema_path : str
658
+ Path to a JSON file containing the schema definition used by the LLM.
659
+ value_examples_path : str
660
+ Path to a JSON file with example values to guide the LLM query.
661
+ df_path : str
662
+ Path to the saved DataFrame file.
663
+
664
+ Returns
665
+ -------
666
+ pandas.DataFrame
667
+ The DataFrame result generated by the LLM query.
668
+
669
+ Raises
670
+ ------
671
+ FileNotFoundError
672
+ If any of the provided paths (schema, examples, DataFrame) do not exist.
673
+ json.JSONDecodeError
674
+ If schema or examples JSON files cannot be parsed.
675
+ Exception
676
+ Propagates exceptions from the LLM query or DataFrame loading.
677
+
678
+ Examples
679
+ --------
680
+ Query a saved DataFrame of sales data:
681
+
682
+ >>> query = "Show me the total sales by region"
683
+ >>> result = query_on_saved_df(
684
+ ... query,
685
+ ... dynamic_schema_path="schemas/sales_schema.json",
686
+ ... value_examples_path="schemas/sales_examples.json",
687
+ ... df_path="data/sales.parquet"
688
+ ... )
689
+ >>> print(result.head())
690
+ region total_sales
691
+ 0 North 12345
692
+ 1 South 9876
693
+ 2 West 5432
694
+ """
695
+ df = load_saved_df(df_path)
696
+
697
+ with open(dynamic_schema_path) as f:
698
+ dynamic_schema = json.load(f)
699
+
700
+ with open(value_examples_path) as f:
701
+ value_examples = json.load(f)
702
+
703
+ llm = build_llm_model()
704
+ return generate_result_df(llm, query, dynamic_schema, value_examples, df, attempt_fix=False, summarize=False)