flowcept 0.8.11__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. flowcept/__init__.py +7 -4
  2. flowcept/agents/__init__.py +5 -0
  3. flowcept/{flowceptor/consumers/agent/client_agent.py → agents/agent_client.py} +22 -12
  4. flowcept/agents/agents_utils.py +181 -0
  5. flowcept/agents/dynamic_schema_tracker.py +191 -0
  6. flowcept/agents/flowcept_agent.py +30 -0
  7. flowcept/agents/flowcept_ctx_manager.py +175 -0
  8. flowcept/agents/gui/__init__.py +5 -0
  9. flowcept/agents/gui/agent_gui.py +76 -0
  10. flowcept/agents/gui/gui_utils.py +239 -0
  11. flowcept/agents/llms/__init__.py +1 -0
  12. flowcept/agents/llms/claude_gcp.py +139 -0
  13. flowcept/agents/llms/gemini25.py +119 -0
  14. flowcept/agents/prompts/__init__.py +1 -0
  15. flowcept/{flowceptor/adapters/agents/prompts.py → agents/prompts/general_prompts.py} +18 -0
  16. flowcept/agents/prompts/in_memory_query_prompts.py +297 -0
  17. flowcept/agents/tools/__init__.py +1 -0
  18. flowcept/agents/tools/general_tools.py +102 -0
  19. flowcept/agents/tools/in_memory_queries/__init__.py +1 -0
  20. flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +704 -0
  21. flowcept/agents/tools/in_memory_queries/pandas_agent_utils.py +309 -0
  22. flowcept/cli.py +286 -44
  23. flowcept/commons/daos/docdb_dao/mongodb_dao.py +47 -0
  24. flowcept/commons/daos/mq_dao/mq_dao_base.py +24 -13
  25. flowcept/commons/daos/mq_dao/mq_dao_kafka.py +18 -2
  26. flowcept/commons/flowcept_dataclasses/task_object.py +16 -21
  27. flowcept/commons/flowcept_dataclasses/workflow_object.py +9 -1
  28. flowcept/commons/task_data_preprocess.py +260 -60
  29. flowcept/commons/utils.py +25 -6
  30. flowcept/configs.py +41 -26
  31. flowcept/flowcept_api/flowcept_controller.py +73 -6
  32. flowcept/flowceptor/adapters/base_interceptor.py +11 -5
  33. flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +25 -1
  34. flowcept/flowceptor/consumers/base_consumer.py +4 -0
  35. flowcept/flowceptor/consumers/consumer_utils.py +5 -4
  36. flowcept/flowceptor/consumers/document_inserter.py +2 -2
  37. flowcept/flowceptor/telemetry_capture.py +5 -2
  38. flowcept/instrumentation/flowcept_agent_task.py +294 -0
  39. flowcept/instrumentation/flowcept_decorator.py +43 -0
  40. flowcept/instrumentation/flowcept_loop.py +3 -3
  41. flowcept/instrumentation/flowcept_task.py +64 -24
  42. flowcept/instrumentation/flowcept_torch.py +5 -5
  43. flowcept/instrumentation/task_capture.py +83 -6
  44. flowcept/version.py +1 -1
  45. {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/METADATA +42 -14
  46. {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/RECORD +50 -36
  47. resources/sample_settings.yaml +12 -4
  48. flowcept/flowceptor/adapters/agents/__init__.py +0 -1
  49. flowcept/flowceptor/adapters/agents/agents_utils.py +0 -89
  50. flowcept/flowceptor/adapters/agents/flowcept_agent.py +0 -292
  51. flowcept/flowceptor/adapters/agents/flowcept_llm_prov_capture.py +0 -186
  52. flowcept/flowceptor/consumers/agent/flowcept_agent_context_manager.py +0 -145
  53. flowcept/flowceptor/consumers/agent/flowcept_qa_manager.py +0 -112
  54. {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/WHEEL +0 -0
  55. {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/entry_points.txt +0 -0
  56. {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,309 @@
1
+ import json
2
+ import re
3
+ import pandas as pd
4
+ import numpy as np
5
+ import ast
6
+
7
+
8
+ def load_saved_df(df_path: str) -> pd.DataFrame:
9
+ """
10
+ Load a DataFrame from a CSV file.
11
+
12
+ Parameters
13
+ ----------
14
+ df_path : str
15
+ Path to the CSV file containing the DataFrame.
16
+
17
+ Returns
18
+ -------
19
+ pd.DataFrame
20
+ The loaded DataFrame.
21
+ """
22
+ df = pd.read_csv(df_path, index_col=False)
23
+ str_types = ["task_id", "parent_task_id", "workflow_id", "activity_id", "agent_id", "campaign_id", "hostname"]
24
+ for col in str_types:
25
+ if col in df.columns:
26
+ df[col] = df[col].astype(str)
27
+ dates = ["started_at", "ended_at"]
28
+ for date in dates:
29
+ if date in df.columns:
30
+ df[date] = pd.to_datetime(df[date], errors="coerce")
31
+ return df
32
+
33
+
34
+ def normalize_output(result):
35
+ """
36
+ Ensures the result is returned as a pandas DataFrame.
37
+ Converts scalars to 1-row, 1-col DataFrame, Series to 1-row DataFrame,
38
+ and supports lists, NumPy arrays, and NumPy scalars.
39
+
40
+ Parameters
41
+ ----------
42
+ result : Any
43
+ The result from code execution (can be DataFrame, Series, scalar, list, or array).
44
+
45
+ Returns
46
+ -------
47
+ pd.DataFrame
48
+ A well-formatted DataFrame representation of the result.
49
+ """
50
+ if result is None:
51
+ raise Exception("Result Data Frame is Empty.")
52
+
53
+ _df: pd.DataFrame = None
54
+ if isinstance(result, pd.DataFrame):
55
+ _df = result
56
+
57
+ elif isinstance(result, pd.Series):
58
+ # Convert Series to single-row DataFrame
59
+ _df = pd.DataFrame([result])
60
+
61
+ elif isinstance(result, (int, float, str, bool, np.generic)):
62
+ # Scalars or numpy scalars
63
+ _df = pd.DataFrame({"Scalar_Value": [result]})
64
+
65
+ elif isinstance(result, (list, tuple)):
66
+ _df = pd.DataFrame({"List_Value": result})
67
+
68
+ elif isinstance(result, np.ndarray):
69
+ if result.ndim == 1:
70
+ _df = pd.DataFrame({"Array_Value": result})
71
+ elif result.ndim == 2:
72
+ _df = pd.DataFrame(result)
73
+ else:
74
+ raise ValueError(f"Unsupported ndarray shape: {result.shape}")
75
+
76
+ else:
77
+ raise TypeError(f"Unsupported result type: {type(result)}")
78
+
79
+ if not len(_df):
80
+ raise ValueError("Result DataFrame is Empty.")
81
+
82
+ return _df
83
+
84
+
85
+ def safe_execute(df: pd.DataFrame, code: str):
86
+ """
87
+ Strip any leftover fences, then execute the code in a limited namespace.
88
+ Returns result or None.
89
+ """
90
+ code = clean_code(code)
91
+ local_env = {"df": df, "pd": pd, "np": np}
92
+ exec(code, {}, local_env)
93
+ return local_env.get("result", None)
94
+
95
+
96
+ def format_result_df(result_df) -> str:
97
+ """
98
+ Format a pandas DataFrame as a CSV string with safety checks.
99
+
100
+ This function validates that the input is a pandas DataFrame,
101
+ ensures it is not empty, and converts it to a CSV string. If the
102
+ DataFrame is very large, only the first 100 rows are included.
103
+
104
+ Parameters
105
+ ----------
106
+ result_df : pandas.DataFrame
107
+ The DataFrame to format.
108
+
109
+ Returns
110
+ -------
111
+ str
112
+ A CSV-formatted string representation of the DataFrame.
113
+
114
+ Raises
115
+ ------
116
+ Exception
117
+ If the input is not a DataFrame or if the DataFrame is empty.
118
+
119
+ Notes
120
+ -----
121
+ - The maximum number of rows returned is 100. If the DataFrame has
122
+ more than 100 rows, a message is printed and only the first 100
123
+ rows are included.
124
+ - The index column is omitted in the CSV output.
125
+
126
+ Examples
127
+ --------
128
+ >>> import pandas as pd
129
+ >>> df = pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
130
+ >>> print(format_result_df(df))
131
+ a,b
132
+ 1,x
133
+ 2,y
134
+ 3,z
135
+
136
+ Example with more than 100 rows:
137
+
138
+ >>> df = pd.DataFrame({"col": range(200)})
139
+ >>> csv_str = format_result_df(df) # prints warning
140
+ >>> len(csv_str.splitlines()) # 101 lines (header + 100 rows)
141
+ 101
142
+ """
143
+ if isinstance(result_df, pd.DataFrame):
144
+ if not len(result_df):
145
+ raise Exception("Empty DataFrame")
146
+ if len(result_df) > 100:
147
+ print("Result set is too long. We are only going to send the head.") # TODO log
148
+ # TODO deal with very long results later
149
+ result_df = result_df.head(100)
150
+ result_df = result_df.to_csv(index=False)
151
+ return result_df
152
+ else:
153
+ raise Exception("Not a valid DataFrame")
154
+
155
+
156
+ def safe_json_parse(text):
157
+ r"""
158
+ Safely parse a string into a JSON object with minimal error recovery.
159
+
160
+ This function attempts to parse the given text as JSON. If the
161
+ initial parsing fails, it tries to fix common issues such as:
162
+
163
+ - Leading/trailing whitespace
164
+ - Surrounding backticks (e.g., Markdown-formatted JSON)
165
+ - Missing opening/closing braces
166
+
167
+ Parameters
168
+ ----------
169
+ text : str
170
+ The input string expected to contain JSON content.
171
+
172
+ Returns
173
+ -------
174
+ object
175
+ The Python object resulting from parsing the JSON string
176
+ (usually a dict or list).
177
+
178
+ Raises
179
+ ------
180
+ ValueError
181
+ If the text cannot be parsed as valid JSON even after recovery attempts.
182
+
183
+ Examples
184
+ --------
185
+ Valid JSON string:
186
+
187
+ >>> safe_json_parse('{"a": 1, "b": 2}')
188
+ {'a': 1, 'b': 2}
189
+
190
+ With backticks and whitespace:
191
+
192
+ >>> safe_json_parse("``` { \\"x\\": 42 } ```")
193
+ {'x': 42}
194
+
195
+ Missing braces:
196
+
197
+ >>> safe_json_parse('"y": 99')
198
+ {'y': 99}
199
+ """
200
+ try:
201
+ return json.loads(text)
202
+ except json.JSONDecodeError:
203
+ # Try to fix common issues
204
+ text = text.strip().strip("`") # remove backticks or whitespace
205
+ if not text.startswith("{"):
206
+ text = "{" + text
207
+ if not text.endswith("}"):
208
+ text = text + "}"
209
+ try:
210
+ return json.loads(text)
211
+ except Exception as e:
212
+ raise ValueError(f"Still failed to parse JSON: {e}")
213
+
214
+
215
+ def clean_code(text):
216
+ """
217
+ Extracts the first valid Python code block or line that starts with 'result =' from a model response.
218
+
219
+ Parameters
220
+ ----------
221
+ text : str
222
+ The raw string response from the agent.
223
+
224
+ Returns
225
+ -------
226
+ str
227
+ The extracted Python code or an empty string if none found.
228
+ """
229
+ # Try to find code block with triple backticks first
230
+ block_match = re.search(r"```(?:python)?\s*(.*?)```", text, re.DOTALL)
231
+ if block_match:
232
+ return block_match.group(1).strip()
233
+
234
+ # Fallback: try to find a line that starts with "result ="
235
+ line_match = re.search(r"(result\s*=\s*.+)", text)
236
+ if line_match:
237
+ return line_match.group(1).strip()
238
+
239
+ return ""
240
+
241
+
242
+ def summarize_df(df: pd.DataFrame, df_query_code: str, max_rows: int = 5, max_cols: int = 10) -> pd.DataFrame:
243
+ """
244
+ Given a DataFrame and query code string that operates on it, return a reduced version
245
+ of the DataFrame that includes only the used columns and a small number of rows,
246
+ but only if the DataFrame exceeds the row or column limits.
247
+
248
+ Parameters
249
+ ----------
250
+ df : pd.DataFrame
251
+ The full DataFrame.
252
+ df_query_code : str
253
+ The string containing Python code that operates on the DataFrame `df`.
254
+ max_rows : int, optional
255
+ Maximum number of rows to include in the reduced DataFrame (default is 5).
256
+ max_cols : int, optional
257
+ Maximum number of columns to include (default is 10).
258
+
259
+ Returns
260
+ -------
261
+ pd.DataFrame
262
+ A reduced version of the DataFrame suitable for sending to an LLM.
263
+ """
264
+
265
+ def extract_columns_from_code(code: str) -> list:
266
+ """Extract column names accessed via df[...] or df.<column> in the code string."""
267
+ try:
268
+ tree = ast.parse(code)
269
+ except SyntaxError:
270
+ return []
271
+
272
+ columns = set()
273
+
274
+ class ColumnVisitor(ast.NodeVisitor):
275
+ def visit_Subscript(self, node):
276
+ if isinstance(node.slice, ast.Constant) and isinstance(node.slice.value, str):
277
+ columns.add(node.slice.value)
278
+ elif isinstance(node.slice, ast.Index) and isinstance(node.slice.value, ast.Str):
279
+ columns.add(node.slice.value.s)
280
+ self.generic_visit(node)
281
+
282
+ def visit_Attribute(self, node):
283
+ columns.add(node.attr)
284
+ self.generic_visit(node)
285
+
286
+ ColumnVisitor().visit(tree)
287
+
288
+ string_accesses = re.findall(r'\[["\']([\w\.\-]+)["\']\]', code)
289
+ columns.update(string_accesses)
290
+
291
+ return list(columns)
292
+
293
+ used_columns = extract_columns_from_code(df_query_code)
294
+ relevant_cols = [col for col in used_columns if col in df.columns]
295
+
296
+ if not relevant_cols:
297
+ relevant_cols = list(df.columns)
298
+
299
+ # Only apply column reduction if column count exceeds max_cols
300
+ if len(relevant_cols) > max_cols:
301
+ relevant_cols = relevant_cols[:max_cols]
302
+
303
+ reduced_df = df[relevant_cols]
304
+
305
+ # Only apply row reduction if row count exceeds max_rows
306
+ if reduced_df.shape[0] > max_rows:
307
+ reduced_df = reduced_df.sample(n=max_rows, random_state=42)
308
+
309
+ return reduced_df.reset_index(drop=True)