flowcept 0.8.11__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowcept/__init__.py +7 -4
- flowcept/agents/__init__.py +5 -0
- flowcept/{flowceptor/consumers/agent/client_agent.py → agents/agent_client.py} +22 -12
- flowcept/agents/agents_utils.py +181 -0
- flowcept/agents/dynamic_schema_tracker.py +191 -0
- flowcept/agents/flowcept_agent.py +30 -0
- flowcept/agents/flowcept_ctx_manager.py +175 -0
- flowcept/agents/gui/__init__.py +5 -0
- flowcept/agents/gui/agent_gui.py +76 -0
- flowcept/agents/gui/gui_utils.py +239 -0
- flowcept/agents/llms/__init__.py +1 -0
- flowcept/agents/llms/claude_gcp.py +139 -0
- flowcept/agents/llms/gemini25.py +119 -0
- flowcept/agents/prompts/__init__.py +1 -0
- flowcept/{flowceptor/adapters/agents/prompts.py → agents/prompts/general_prompts.py} +18 -0
- flowcept/agents/prompts/in_memory_query_prompts.py +297 -0
- flowcept/agents/tools/__init__.py +1 -0
- flowcept/agents/tools/general_tools.py +102 -0
- flowcept/agents/tools/in_memory_queries/__init__.py +1 -0
- flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +704 -0
- flowcept/agents/tools/in_memory_queries/pandas_agent_utils.py +309 -0
- flowcept/cli.py +286 -44
- flowcept/commons/daos/docdb_dao/mongodb_dao.py +47 -0
- flowcept/commons/daos/mq_dao/mq_dao_base.py +24 -13
- flowcept/commons/daos/mq_dao/mq_dao_kafka.py +18 -2
- flowcept/commons/flowcept_dataclasses/task_object.py +16 -21
- flowcept/commons/flowcept_dataclasses/workflow_object.py +9 -1
- flowcept/commons/task_data_preprocess.py +260 -60
- flowcept/commons/utils.py +25 -6
- flowcept/configs.py +41 -26
- flowcept/flowcept_api/flowcept_controller.py +73 -6
- flowcept/flowceptor/adapters/base_interceptor.py +11 -5
- flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +25 -1
- flowcept/flowceptor/consumers/base_consumer.py +4 -0
- flowcept/flowceptor/consumers/consumer_utils.py +5 -4
- flowcept/flowceptor/consumers/document_inserter.py +2 -2
- flowcept/flowceptor/telemetry_capture.py +5 -2
- flowcept/instrumentation/flowcept_agent_task.py +294 -0
- flowcept/instrumentation/flowcept_decorator.py +43 -0
- flowcept/instrumentation/flowcept_loop.py +3 -3
- flowcept/instrumentation/flowcept_task.py +64 -24
- flowcept/instrumentation/flowcept_torch.py +5 -5
- flowcept/instrumentation/task_capture.py +83 -6
- flowcept/version.py +1 -1
- {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/METADATA +42 -14
- {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/RECORD +50 -36
- resources/sample_settings.yaml +12 -4
- flowcept/flowceptor/adapters/agents/__init__.py +0 -1
- flowcept/flowceptor/adapters/agents/agents_utils.py +0 -89
- flowcept/flowceptor/adapters/agents/flowcept_agent.py +0 -292
- flowcept/flowceptor/adapters/agents/flowcept_llm_prov_capture.py +0 -186
- flowcept/flowceptor/consumers/agent/flowcept_agent_context_manager.py +0 -145
- flowcept/flowceptor/consumers/agent/flowcept_qa_manager.py +0 -112
- {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/WHEEL +0 -0
- {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/entry_points.txt +0 -0
- {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import numpy as np
|
|
5
|
+
import ast
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def load_saved_df(df_path: str) -> pd.DataFrame:
|
|
9
|
+
"""
|
|
10
|
+
Load a DataFrame from a CSV file.
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
df_path : str
|
|
15
|
+
Path to the CSV file containing the DataFrame.
|
|
16
|
+
|
|
17
|
+
Returns
|
|
18
|
+
-------
|
|
19
|
+
pd.DataFrame
|
|
20
|
+
The loaded DataFrame.
|
|
21
|
+
"""
|
|
22
|
+
df = pd.read_csv(df_path, index_col=False)
|
|
23
|
+
str_types = ["task_id", "parent_task_id", "workflow_id", "activity_id", "agent_id", "campaign_id", "hostname"]
|
|
24
|
+
for col in str_types:
|
|
25
|
+
if col in df.columns:
|
|
26
|
+
df[col] = df[col].astype(str)
|
|
27
|
+
dates = ["started_at", "ended_at"]
|
|
28
|
+
for date in dates:
|
|
29
|
+
if date in df.columns:
|
|
30
|
+
df[date] = pd.to_datetime(df[date], errors="coerce")
|
|
31
|
+
return df
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def normalize_output(result):
|
|
35
|
+
"""
|
|
36
|
+
Ensures the result is returned as a pandas DataFrame.
|
|
37
|
+
Converts scalars to 1-row, 1-col DataFrame, Series to 1-row DataFrame,
|
|
38
|
+
and supports lists, NumPy arrays, and NumPy scalars.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
result : Any
|
|
43
|
+
The result from code execution (can be DataFrame, Series, scalar, list, or array).
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
pd.DataFrame
|
|
48
|
+
A well-formatted DataFrame representation of the result.
|
|
49
|
+
"""
|
|
50
|
+
if result is None:
|
|
51
|
+
raise Exception("Result Data Frame is Empty.")
|
|
52
|
+
|
|
53
|
+
_df: pd.DataFrame = None
|
|
54
|
+
if isinstance(result, pd.DataFrame):
|
|
55
|
+
_df = result
|
|
56
|
+
|
|
57
|
+
elif isinstance(result, pd.Series):
|
|
58
|
+
# Convert Series to single-row DataFrame
|
|
59
|
+
_df = pd.DataFrame([result])
|
|
60
|
+
|
|
61
|
+
elif isinstance(result, (int, float, str, bool, np.generic)):
|
|
62
|
+
# Scalars or numpy scalars
|
|
63
|
+
_df = pd.DataFrame({"Scalar_Value": [result]})
|
|
64
|
+
|
|
65
|
+
elif isinstance(result, (list, tuple)):
|
|
66
|
+
_df = pd.DataFrame({"List_Value": result})
|
|
67
|
+
|
|
68
|
+
elif isinstance(result, np.ndarray):
|
|
69
|
+
if result.ndim == 1:
|
|
70
|
+
_df = pd.DataFrame({"Array_Value": result})
|
|
71
|
+
elif result.ndim == 2:
|
|
72
|
+
_df = pd.DataFrame(result)
|
|
73
|
+
else:
|
|
74
|
+
raise ValueError(f"Unsupported ndarray shape: {result.shape}")
|
|
75
|
+
|
|
76
|
+
else:
|
|
77
|
+
raise TypeError(f"Unsupported result type: {type(result)}")
|
|
78
|
+
|
|
79
|
+
if not len(_df):
|
|
80
|
+
raise ValueError("Result DataFrame is Empty.")
|
|
81
|
+
|
|
82
|
+
return _df
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def safe_execute(df: pd.DataFrame, code: str):
|
|
86
|
+
"""
|
|
87
|
+
Strip any leftover fences, then execute the code in a limited namespace.
|
|
88
|
+
Returns result or None.
|
|
89
|
+
"""
|
|
90
|
+
code = clean_code(code)
|
|
91
|
+
local_env = {"df": df, "pd": pd, "np": np}
|
|
92
|
+
exec(code, {}, local_env)
|
|
93
|
+
return local_env.get("result", None)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def format_result_df(result_df) -> str:
|
|
97
|
+
"""
|
|
98
|
+
Format a pandas DataFrame as a CSV string with safety checks.
|
|
99
|
+
|
|
100
|
+
This function validates that the input is a pandas DataFrame,
|
|
101
|
+
ensures it is not empty, and converts it to a CSV string. If the
|
|
102
|
+
DataFrame is very large, only the first 100 rows are included.
|
|
103
|
+
|
|
104
|
+
Parameters
|
|
105
|
+
----------
|
|
106
|
+
result_df : pandas.DataFrame
|
|
107
|
+
The DataFrame to format.
|
|
108
|
+
|
|
109
|
+
Returns
|
|
110
|
+
-------
|
|
111
|
+
str
|
|
112
|
+
A CSV-formatted string representation of the DataFrame.
|
|
113
|
+
|
|
114
|
+
Raises
|
|
115
|
+
------
|
|
116
|
+
Exception
|
|
117
|
+
If the input is not a DataFrame or if the DataFrame is empty.
|
|
118
|
+
|
|
119
|
+
Notes
|
|
120
|
+
-----
|
|
121
|
+
- The maximum number of rows returned is 100. If the DataFrame has
|
|
122
|
+
more than 100 rows, a message is printed and only the first 100
|
|
123
|
+
rows are included.
|
|
124
|
+
- The index column is omitted in the CSV output.
|
|
125
|
+
|
|
126
|
+
Examples
|
|
127
|
+
--------
|
|
128
|
+
>>> import pandas as pd
|
|
129
|
+
>>> df = pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
|
|
130
|
+
>>> print(format_result_df(df))
|
|
131
|
+
a,b
|
|
132
|
+
1,x
|
|
133
|
+
2,y
|
|
134
|
+
3,z
|
|
135
|
+
|
|
136
|
+
Example with more than 100 rows:
|
|
137
|
+
|
|
138
|
+
>>> df = pd.DataFrame({"col": range(200)})
|
|
139
|
+
>>> csv_str = format_result_df(df) # prints warning
|
|
140
|
+
>>> len(csv_str.splitlines()) # 101 lines (header + 100 rows)
|
|
141
|
+
101
|
|
142
|
+
"""
|
|
143
|
+
if isinstance(result_df, pd.DataFrame):
|
|
144
|
+
if not len(result_df):
|
|
145
|
+
raise Exception("Empty DataFrame")
|
|
146
|
+
if len(result_df) > 100:
|
|
147
|
+
print("Result set is too long. We are only going to send the head.") # TODO log
|
|
148
|
+
# TODO deal with very long results later
|
|
149
|
+
result_df = result_df.head(100)
|
|
150
|
+
result_df = result_df.to_csv(index=False)
|
|
151
|
+
return result_df
|
|
152
|
+
else:
|
|
153
|
+
raise Exception("Not a valid DataFrame")
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def safe_json_parse(text):
|
|
157
|
+
r"""
|
|
158
|
+
Safely parse a string into a JSON object with minimal error recovery.
|
|
159
|
+
|
|
160
|
+
This function attempts to parse the given text as JSON. If the
|
|
161
|
+
initial parsing fails, it tries to fix common issues such as:
|
|
162
|
+
|
|
163
|
+
- Leading/trailing whitespace
|
|
164
|
+
- Surrounding backticks (e.g., Markdown-formatted JSON)
|
|
165
|
+
- Missing opening/closing braces
|
|
166
|
+
|
|
167
|
+
Parameters
|
|
168
|
+
----------
|
|
169
|
+
text : str
|
|
170
|
+
The input string expected to contain JSON content.
|
|
171
|
+
|
|
172
|
+
Returns
|
|
173
|
+
-------
|
|
174
|
+
object
|
|
175
|
+
The Python object resulting from parsing the JSON string
|
|
176
|
+
(usually a dict or list).
|
|
177
|
+
|
|
178
|
+
Raises
|
|
179
|
+
------
|
|
180
|
+
ValueError
|
|
181
|
+
If the text cannot be parsed as valid JSON even after recovery attempts.
|
|
182
|
+
|
|
183
|
+
Examples
|
|
184
|
+
--------
|
|
185
|
+
Valid JSON string:
|
|
186
|
+
|
|
187
|
+
>>> safe_json_parse('{"a": 1, "b": 2}')
|
|
188
|
+
{'a': 1, 'b': 2}
|
|
189
|
+
|
|
190
|
+
With backticks and whitespace:
|
|
191
|
+
|
|
192
|
+
>>> safe_json_parse("``` { \\"x\\": 42 } ```")
|
|
193
|
+
{'x': 42}
|
|
194
|
+
|
|
195
|
+
Missing braces:
|
|
196
|
+
|
|
197
|
+
>>> safe_json_parse('"y": 99')
|
|
198
|
+
{'y': 99}
|
|
199
|
+
"""
|
|
200
|
+
try:
|
|
201
|
+
return json.loads(text)
|
|
202
|
+
except json.JSONDecodeError:
|
|
203
|
+
# Try to fix common issues
|
|
204
|
+
text = text.strip().strip("`") # remove backticks or whitespace
|
|
205
|
+
if not text.startswith("{"):
|
|
206
|
+
text = "{" + text
|
|
207
|
+
if not text.endswith("}"):
|
|
208
|
+
text = text + "}"
|
|
209
|
+
try:
|
|
210
|
+
return json.loads(text)
|
|
211
|
+
except Exception as e:
|
|
212
|
+
raise ValueError(f"Still failed to parse JSON: {e}")
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def clean_code(text):
|
|
216
|
+
"""
|
|
217
|
+
Extracts the first valid Python code block or line that starts with 'result =' from a model response.
|
|
218
|
+
|
|
219
|
+
Parameters
|
|
220
|
+
----------
|
|
221
|
+
text : str
|
|
222
|
+
The raw string response from the agent.
|
|
223
|
+
|
|
224
|
+
Returns
|
|
225
|
+
-------
|
|
226
|
+
str
|
|
227
|
+
The extracted Python code or an empty string if none found.
|
|
228
|
+
"""
|
|
229
|
+
# Try to find code block with triple backticks first
|
|
230
|
+
block_match = re.search(r"```(?:python)?\s*(.*?)```", text, re.DOTALL)
|
|
231
|
+
if block_match:
|
|
232
|
+
return block_match.group(1).strip()
|
|
233
|
+
|
|
234
|
+
# Fallback: try to find a line that starts with "result ="
|
|
235
|
+
line_match = re.search(r"(result\s*=\s*.+)", text)
|
|
236
|
+
if line_match:
|
|
237
|
+
return line_match.group(1).strip()
|
|
238
|
+
|
|
239
|
+
return ""
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def summarize_df(df: pd.DataFrame, df_query_code: str, max_rows: int = 5, max_cols: int = 10) -> pd.DataFrame:
|
|
243
|
+
"""
|
|
244
|
+
Given a DataFrame and query code string that operates on it, return a reduced version
|
|
245
|
+
of the DataFrame that includes only the used columns and a small number of rows,
|
|
246
|
+
but only if the DataFrame exceeds the row or column limits.
|
|
247
|
+
|
|
248
|
+
Parameters
|
|
249
|
+
----------
|
|
250
|
+
df : pd.DataFrame
|
|
251
|
+
The full DataFrame.
|
|
252
|
+
df_query_code : str
|
|
253
|
+
The string containing Python code that operates on the DataFrame `df`.
|
|
254
|
+
max_rows : int, optional
|
|
255
|
+
Maximum number of rows to include in the reduced DataFrame (default is 5).
|
|
256
|
+
max_cols : int, optional
|
|
257
|
+
Maximum number of columns to include (default is 10).
|
|
258
|
+
|
|
259
|
+
Returns
|
|
260
|
+
-------
|
|
261
|
+
pd.DataFrame
|
|
262
|
+
A reduced version of the DataFrame suitable for sending to an LLM.
|
|
263
|
+
"""
|
|
264
|
+
|
|
265
|
+
def extract_columns_from_code(code: str) -> list:
|
|
266
|
+
"""Extract column names accessed via df[...] or df.<column> in the code string."""
|
|
267
|
+
try:
|
|
268
|
+
tree = ast.parse(code)
|
|
269
|
+
except SyntaxError:
|
|
270
|
+
return []
|
|
271
|
+
|
|
272
|
+
columns = set()
|
|
273
|
+
|
|
274
|
+
class ColumnVisitor(ast.NodeVisitor):
|
|
275
|
+
def visit_Subscript(self, node):
|
|
276
|
+
if isinstance(node.slice, ast.Constant) and isinstance(node.slice.value, str):
|
|
277
|
+
columns.add(node.slice.value)
|
|
278
|
+
elif isinstance(node.slice, ast.Index) and isinstance(node.slice.value, ast.Str):
|
|
279
|
+
columns.add(node.slice.value.s)
|
|
280
|
+
self.generic_visit(node)
|
|
281
|
+
|
|
282
|
+
def visit_Attribute(self, node):
|
|
283
|
+
columns.add(node.attr)
|
|
284
|
+
self.generic_visit(node)
|
|
285
|
+
|
|
286
|
+
ColumnVisitor().visit(tree)
|
|
287
|
+
|
|
288
|
+
string_accesses = re.findall(r'\[["\']([\w\.\-]+)["\']\]', code)
|
|
289
|
+
columns.update(string_accesses)
|
|
290
|
+
|
|
291
|
+
return list(columns)
|
|
292
|
+
|
|
293
|
+
used_columns = extract_columns_from_code(df_query_code)
|
|
294
|
+
relevant_cols = [col for col in used_columns if col in df.columns]
|
|
295
|
+
|
|
296
|
+
if not relevant_cols:
|
|
297
|
+
relevant_cols = list(df.columns)
|
|
298
|
+
|
|
299
|
+
# Only apply column reduction if column count exceeds max_cols
|
|
300
|
+
if len(relevant_cols) > max_cols:
|
|
301
|
+
relevant_cols = relevant_cols[:max_cols]
|
|
302
|
+
|
|
303
|
+
reduced_df = df[relevant_cols]
|
|
304
|
+
|
|
305
|
+
# Only apply row reduction if row count exceeds max_rows
|
|
306
|
+
if reduced_df.shape[0] > max_rows:
|
|
307
|
+
reduced_df = reduced_df.sample(n=max_rows, random_state=42)
|
|
308
|
+
|
|
309
|
+
return reduced_df.reset_index(drop=True)
|