flowcept 0.8.10__py3-none-any.whl → 0.8.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowcept/__init__.py +7 -4
- flowcept/agents/__init__.py +5 -0
- flowcept/agents/agent_client.py +58 -0
- flowcept/agents/agents_utils.py +181 -0
- flowcept/agents/dynamic_schema_tracker.py +191 -0
- flowcept/agents/flowcept_agent.py +30 -0
- flowcept/agents/flowcept_ctx_manager.py +175 -0
- flowcept/agents/gui/__init__.py +5 -0
- flowcept/agents/gui/agent_gui.py +76 -0
- flowcept/agents/gui/gui_utils.py +239 -0
- flowcept/agents/llms/__init__.py +1 -0
- flowcept/agents/llms/claude_gcp.py +139 -0
- flowcept/agents/llms/gemini25.py +119 -0
- flowcept/agents/prompts/__init__.py +1 -0
- flowcept/agents/prompts/general_prompts.py +69 -0
- flowcept/agents/prompts/in_memory_query_prompts.py +297 -0
- flowcept/agents/tools/__init__.py +1 -0
- flowcept/agents/tools/general_tools.py +102 -0
- flowcept/agents/tools/in_memory_queries/__init__.py +1 -0
- flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +704 -0
- flowcept/agents/tools/in_memory_queries/pandas_agent_utils.py +309 -0
- flowcept/cli.py +459 -17
- flowcept/commons/daos/docdb_dao/mongodb_dao.py +47 -0
- flowcept/commons/daos/keyvalue_dao.py +19 -23
- flowcept/commons/daos/mq_dao/mq_dao_base.py +49 -38
- flowcept/commons/daos/mq_dao/mq_dao_kafka.py +20 -3
- flowcept/commons/daos/mq_dao/mq_dao_mofka.py +4 -0
- flowcept/commons/daos/mq_dao/mq_dao_redis.py +38 -5
- flowcept/commons/daos/redis_conn.py +47 -0
- flowcept/commons/flowcept_dataclasses/task_object.py +50 -27
- flowcept/commons/flowcept_dataclasses/workflow_object.py +9 -1
- flowcept/commons/settings_factory.py +2 -4
- flowcept/commons/task_data_preprocess.py +400 -0
- flowcept/commons/utils.py +26 -7
- flowcept/configs.py +48 -29
- flowcept/flowcept_api/flowcept_controller.py +102 -18
- flowcept/flowceptor/adapters/base_interceptor.py +24 -11
- flowcept/flowceptor/adapters/brokers/__init__.py +1 -0
- flowcept/flowceptor/adapters/brokers/mqtt_interceptor.py +132 -0
- flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py +3 -3
- flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py +3 -3
- flowcept/flowceptor/consumers/agent/__init__.py +1 -0
- flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +125 -0
- flowcept/flowceptor/consumers/base_consumer.py +94 -0
- flowcept/flowceptor/consumers/consumer_utils.py +5 -4
- flowcept/flowceptor/consumers/document_inserter.py +135 -36
- flowcept/flowceptor/telemetry_capture.py +6 -3
- flowcept/instrumentation/flowcept_agent_task.py +294 -0
- flowcept/instrumentation/flowcept_decorator.py +43 -0
- flowcept/instrumentation/flowcept_loop.py +3 -3
- flowcept/instrumentation/flowcept_task.py +64 -24
- flowcept/instrumentation/flowcept_torch.py +5 -5
- flowcept/instrumentation/task_capture.py +87 -4
- flowcept/version.py +1 -1
- {flowcept-0.8.10.dist-info → flowcept-0.8.12.dist-info}/METADATA +48 -11
- flowcept-0.8.12.dist-info/RECORD +101 -0
- resources/sample_settings.yaml +46 -14
- flowcept/flowceptor/adapters/zambeze/__init__.py +0 -1
- flowcept/flowceptor/adapters/zambeze/zambeze_dataclasses.py +0 -41
- flowcept/flowceptor/adapters/zambeze/zambeze_interceptor.py +0 -102
- flowcept-0.8.10.dist-info/RECORD +0 -75
- {flowcept-0.8.10.dist-info → flowcept-0.8.12.dist-info}/WHEEL +0 -0
- {flowcept-0.8.10.dist-info → flowcept-0.8.12.dist-info}/entry_points.txt +0 -0
- {flowcept-0.8.10.dist-info → flowcept-0.8.12.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,400 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Dict, List
|
|
3
|
+
import copy
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import pytz
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def summarize_telemetry(task: Dict, logger) -> Dict:
|
|
11
|
+
"""
|
|
12
|
+
Extract and compute the telemetry summary for a task based on start and end telemetry snapshots.
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
task : dict
|
|
17
|
+
The task dictionary containing telemetry_at_start and telemetry_at_end.
|
|
18
|
+
|
|
19
|
+
Returns
|
|
20
|
+
-------
|
|
21
|
+
dict
|
|
22
|
+
A summary of telemetry differences including CPU, disk, memory, and network metrics, and task duration.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def extract_cpu_info(start: Dict, end: Dict) -> Dict:
|
|
26
|
+
return {
|
|
27
|
+
"percent_all_diff": end["percent_all"] - start["percent_all"],
|
|
28
|
+
"user_time_diff": end["times_avg"]["user"] - start["times_avg"]["user"],
|
|
29
|
+
"system_time_diff": end["times_avg"]["system"] - start["times_avg"]["system"],
|
|
30
|
+
"idle_time_diff": end["times_avg"]["idle"] - start["times_avg"]["idle"],
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
def extract_disk_info(start: Dict, end: Dict) -> Dict:
|
|
34
|
+
io_start = start["io_sum"]
|
|
35
|
+
io_end = end["io_sum"]
|
|
36
|
+
return {
|
|
37
|
+
"read_bytes_diff": io_end["read_bytes"] - io_start["read_bytes"],
|
|
38
|
+
"write_bytes_diff": io_end["write_bytes"] - io_start["write_bytes"],
|
|
39
|
+
"read_count_diff": io_end["read_count"] - io_start["read_count"],
|
|
40
|
+
"write_count_diff": io_end["write_count"] - io_start["write_count"],
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
def extract_mem_info(start: Dict, end: Dict) -> Dict:
|
|
44
|
+
return {
|
|
45
|
+
"used_mem_diff": end["virtual"]["used"] - start["virtual"]["used"],
|
|
46
|
+
"percent_diff": end["virtual"]["percent"] - start["virtual"]["percent"],
|
|
47
|
+
"swap_used_diff": end["swap"]["used"] - start["swap"]["used"],
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
def extract_network_info(start: Dict, end: Dict) -> Dict:
|
|
51
|
+
net_start = start["netio_sum"]
|
|
52
|
+
net_end = end["netio_sum"]
|
|
53
|
+
return {
|
|
54
|
+
"bytes_sent_diff": net_end["bytes_sent"] - net_start["bytes_sent"],
|
|
55
|
+
"bytes_recv_diff": net_end["bytes_recv"] - net_start["bytes_recv"],
|
|
56
|
+
"packets_sent_diff": net_end["packets_sent"] - net_start["packets_sent"],
|
|
57
|
+
"packets_recv_diff": net_end["packets_recv"] - net_start["packets_recv"],
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
tel_funcs = {
|
|
61
|
+
"cpu": extract_cpu_info,
|
|
62
|
+
"disk": extract_disk_info,
|
|
63
|
+
"memory": extract_mem_info,
|
|
64
|
+
"network": extract_network_info,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
start_tele = task.get("telemetry_at_start", {})
|
|
68
|
+
end_tele = task.get("telemetry_at_end", {})
|
|
69
|
+
|
|
70
|
+
telemetry_summary = {}
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
started_at = task.get("started_at", None)
|
|
74
|
+
ended_at = task.get("ended_at", None)
|
|
75
|
+
if started_at is None or ended_at is None:
|
|
76
|
+
logger.warning(f"We can't summarize telemetry for duration_sec for task {task}")
|
|
77
|
+
else:
|
|
78
|
+
duration = ended_at - started_at
|
|
79
|
+
telemetry_summary["duration_sec"] = duration
|
|
80
|
+
except Exception as e:
|
|
81
|
+
logger.error(f"Error to summarize telemetry for duration_sec in {task}")
|
|
82
|
+
logger.exception(e)
|
|
83
|
+
|
|
84
|
+
for key in start_tele.keys():
|
|
85
|
+
try:
|
|
86
|
+
if key not in tel_funcs:
|
|
87
|
+
continue
|
|
88
|
+
func = tel_funcs[key]
|
|
89
|
+
if key in end_tele:
|
|
90
|
+
telemetry_summary[key] = func(start_tele[key], end_tele[key])
|
|
91
|
+
else:
|
|
92
|
+
logger.warning(
|
|
93
|
+
f"We can't summarize telemetry {key} for task {task} because the key is not in the end_tele"
|
|
94
|
+
)
|
|
95
|
+
except Exception as e:
|
|
96
|
+
logger.warning(f"Error to summarize telemetry for {key} for task {task}. Exception: {e}")
|
|
97
|
+
logger.exception(e)
|
|
98
|
+
|
|
99
|
+
return telemetry_summary
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _safe_get(task, key):
|
|
103
|
+
try:
|
|
104
|
+
return task.get(key)
|
|
105
|
+
except Exception:
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def summarize_task(task: Dict, thresholds: Dict = None, logger=None) -> Dict:
|
|
110
|
+
"""
|
|
111
|
+
Summarize key metadata and telemetry for a task, optionally tagging critical conditions.
|
|
112
|
+
|
|
113
|
+
Parameters
|
|
114
|
+
----------
|
|
115
|
+
task : dict
|
|
116
|
+
The task dictionary containing metadata and telemetry snapshots.
|
|
117
|
+
thresholds : dict, optional
|
|
118
|
+
Threshold values used to tag abnormal resource usage.
|
|
119
|
+
|
|
120
|
+
Returns
|
|
121
|
+
-------
|
|
122
|
+
dict
|
|
123
|
+
Summary of the task including identifiers, telemetry summary, and optional critical tags.
|
|
124
|
+
"""
|
|
125
|
+
task_summary = {}
|
|
126
|
+
|
|
127
|
+
# Keys that can be copied directly
|
|
128
|
+
for key in [
|
|
129
|
+
"workflow_id",
|
|
130
|
+
"task_id",
|
|
131
|
+
"parent_task_id",
|
|
132
|
+
"activity_id",
|
|
133
|
+
"used",
|
|
134
|
+
"generated",
|
|
135
|
+
"hostname",
|
|
136
|
+
"status",
|
|
137
|
+
"agent_id",
|
|
138
|
+
"campaign_id",
|
|
139
|
+
"subtype",
|
|
140
|
+
"custom_metadata",
|
|
141
|
+
]:
|
|
142
|
+
value = _safe_get(task, key)
|
|
143
|
+
if value is not None:
|
|
144
|
+
if "_id" in key:
|
|
145
|
+
task_summary[key] = str(value)
|
|
146
|
+
else:
|
|
147
|
+
task_summary[key] = value
|
|
148
|
+
|
|
149
|
+
# Special handling for timestamp field
|
|
150
|
+
try:
|
|
151
|
+
time_keys = ["started_at", "ended_at"]
|
|
152
|
+
for time_key in time_keys:
|
|
153
|
+
timestamp = _safe_get(task, time_key)
|
|
154
|
+
if timestamp is not None:
|
|
155
|
+
task_summary[time_key] = datetime.fromtimestamp(timestamp, pytz.utc)
|
|
156
|
+
except Exception as e:
|
|
157
|
+
if logger:
|
|
158
|
+
logger.exception(f"Error {e} converting timestamp for task {task.get('task_id', 'unknown')}")
|
|
159
|
+
|
|
160
|
+
try:
|
|
161
|
+
telemetry_summary = summarize_telemetry(task, logger)
|
|
162
|
+
try:
|
|
163
|
+
tags = tag_critical_task(
|
|
164
|
+
generated=task.get("generated", {}), telemetry_summary=telemetry_summary, thresholds=thresholds
|
|
165
|
+
)
|
|
166
|
+
if tags:
|
|
167
|
+
task_summary["tags"] = tags
|
|
168
|
+
except Exception as e:
|
|
169
|
+
logger.exception(e)
|
|
170
|
+
task_summary["telemetry_summary"] = telemetry_summary
|
|
171
|
+
except Exception as e:
|
|
172
|
+
if logger:
|
|
173
|
+
logger.exception(e)
|
|
174
|
+
else:
|
|
175
|
+
print(e)
|
|
176
|
+
|
|
177
|
+
return task_summary
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def tag_critical_task(
|
|
181
|
+
generated: Dict, telemetry_summary: Dict, generated_keywords: List[str] = ["result"], thresholds: Dict = None
|
|
182
|
+
) -> List[str]:
|
|
183
|
+
"""
|
|
184
|
+
Tag a task with labels indicating abnormal or noteworthy resource usage or result anomalies.
|
|
185
|
+
|
|
186
|
+
Parameters
|
|
187
|
+
----------
|
|
188
|
+
generated : dict
|
|
189
|
+
Dictionary of generated output values (e.g., results).
|
|
190
|
+
telemetry_summary : dict
|
|
191
|
+
Telemetry summary produced from summarize_telemetry().
|
|
192
|
+
generated_keywords : list of str, optional
|
|
193
|
+
List of keys in the generated output to check for anomalies.
|
|
194
|
+
thresholds : dict, optional
|
|
195
|
+
Custom thresholds for tagging high CPU, memory, disk, etc.
|
|
196
|
+
|
|
197
|
+
Returns
|
|
198
|
+
-------
|
|
199
|
+
list of str
|
|
200
|
+
Tags indicating abnormal patterns (e.g., "high_cpu", "low_output").
|
|
201
|
+
"""
|
|
202
|
+
if thresholds is None:
|
|
203
|
+
thresholds = {
|
|
204
|
+
"high_cpu": 80,
|
|
205
|
+
"high_mem": 1e9,
|
|
206
|
+
"high_disk": 1e8,
|
|
207
|
+
"long_duration": 0.8,
|
|
208
|
+
"low_output": 0.1,
|
|
209
|
+
"high_output": 0.9,
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
cpu = abs(telemetry_summary.get("cpu", {}).get("percent_all_diff", 0))
|
|
213
|
+
mem = telemetry_summary.get("mem", {}).get("used_mem_diff", 0)
|
|
214
|
+
disk = telemetry_summary.get("disk", {}).get("read_bytes_diff", 0) + telemetry_summary.get("disk", {}).get(
|
|
215
|
+
"write_bytes_diff", 0
|
|
216
|
+
)
|
|
217
|
+
# TODO gpu
|
|
218
|
+
duration = telemetry_summary.get("duration_sec", 0)
|
|
219
|
+
|
|
220
|
+
tags = []
|
|
221
|
+
|
|
222
|
+
if cpu > thresholds["high_cpu"]:
|
|
223
|
+
tags.append("high_cpu")
|
|
224
|
+
if mem > thresholds["high_mem"]:
|
|
225
|
+
tags.append("high_mem")
|
|
226
|
+
if disk > thresholds["high_disk"]:
|
|
227
|
+
tags.append("high_disk")
|
|
228
|
+
if duration > thresholds["long_duration"]:
|
|
229
|
+
tags.append("long_duration")
|
|
230
|
+
|
|
231
|
+
for key in generated_keywords:
|
|
232
|
+
value = generated.get(key, 0)
|
|
233
|
+
if value < thresholds["low_output"]:
|
|
234
|
+
tags.append("low_output")
|
|
235
|
+
if value > thresholds["high_output"]:
|
|
236
|
+
tags.append("high_output")
|
|
237
|
+
|
|
238
|
+
return tags
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
sample_tasks = [
|
|
242
|
+
{
|
|
243
|
+
"task_id": "t1",
|
|
244
|
+
"activity_id": "train_model",
|
|
245
|
+
"used": {
|
|
246
|
+
"dataset": {"name": "MNIST", "size": 60000, "source": {"url": "http://example.com/mnist", "format": "csv"}},
|
|
247
|
+
"params": {"epochs": 5, "batch_size": 32, "shuffle": True},
|
|
248
|
+
},
|
|
249
|
+
"generated": {"model": {"accuracy": 0.98, "layers": [64, 64, 10], "saved_path": "/models/mnist_v1.pth"}},
|
|
250
|
+
"telemetry_summary": {"duration_sec": 42.7, "cpu_percent": 85.2},
|
|
251
|
+
},
|
|
252
|
+
{
|
|
253
|
+
"task_id": "t2",
|
|
254
|
+
"activity_id": "train_model",
|
|
255
|
+
"used": {
|
|
256
|
+
"dataset": {
|
|
257
|
+
"name": "CIFAR-10",
|
|
258
|
+
"size": 50000,
|
|
259
|
+
"source": {"url": "http://example.com/cifar", "format": "jpeg"},
|
|
260
|
+
},
|
|
261
|
+
"params": {"epochs": 10, "batch_size": 64, "shuffle": False},
|
|
262
|
+
},
|
|
263
|
+
"generated": {"model": {"accuracy": 0.91, "layers": [128, 128, 10], "saved_path": "/models/cifar_v1.pth"}},
|
|
264
|
+
"telemetry_summary": {"duration_sec": 120.5, "cpu_percent": 92.0},
|
|
265
|
+
},
|
|
266
|
+
{
|
|
267
|
+
"task_id": "t3",
|
|
268
|
+
"activity_id": "evaluate_model",
|
|
269
|
+
"used": {"model_path": "/models/mnist_v1.pth", "test_data": {"name": "MNIST-test", "samples": 10000}},
|
|
270
|
+
"generated": {"metrics": {"accuracy": 0.97, "confusion_matrix": [[8500, 100], [50, 1350]]}},
|
|
271
|
+
"telemetry_summary": {"duration_sec": 15.3},
|
|
272
|
+
},
|
|
273
|
+
{
|
|
274
|
+
"task_id": "t4",
|
|
275
|
+
"activity_id": "evaluate_model",
|
|
276
|
+
"used": {"model_path": "/models/cifar_v1.pth", "test_data": {"name": "CIFAR-test", "samples": 10000}},
|
|
277
|
+
"generated": {"metrics": {"accuracy": 0.88, "confusion_matrix": [[4000, 500], [300, 5200]]}},
|
|
278
|
+
"telemetry_summary": {"duration_sec": 18.9},
|
|
279
|
+
},
|
|
280
|
+
]
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def infer_dtype(value: Any) -> str:
|
|
284
|
+
"""Infer a simplified dtype label for the value."""
|
|
285
|
+
if isinstance(value, bool):
|
|
286
|
+
return "bool"
|
|
287
|
+
elif isinstance(value, int):
|
|
288
|
+
return "int"
|
|
289
|
+
elif isinstance(value, float):
|
|
290
|
+
return "float"
|
|
291
|
+
elif isinstance(value, str):
|
|
292
|
+
return "str"
|
|
293
|
+
elif isinstance(value, list):
|
|
294
|
+
return "list"
|
|
295
|
+
return "str" # fallback for other types
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def flatten_dict(d: dict, parent_key: str = "", sep: str = ".") -> dict:
|
|
299
|
+
"""Recursively flatten nested dicts using dot notation."""
|
|
300
|
+
items = {}
|
|
301
|
+
for k, v in d.items():
|
|
302
|
+
new_key = f"{parent_key}{sep}{k}" if parent_key else k
|
|
303
|
+
if isinstance(v, dict):
|
|
304
|
+
items.update(flatten_dict(v, new_key, sep=sep))
|
|
305
|
+
else:
|
|
306
|
+
items[new_key] = v
|
|
307
|
+
return items
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def update_schema(schema_section: list, flat_fields: dict):
|
|
311
|
+
"""Update schema section with flattened fields and example values."""
|
|
312
|
+
field_map = {f["n"]: f for f in schema_section}
|
|
313
|
+
|
|
314
|
+
for key, value in flat_fields.items():
|
|
315
|
+
dtype = infer_dtype(value)
|
|
316
|
+
if isinstance(value, float):
|
|
317
|
+
val_repr = round(value, 2)
|
|
318
|
+
elif isinstance(value, (dict, list)):
|
|
319
|
+
val_repr = str(value)
|
|
320
|
+
else:
|
|
321
|
+
val_repr = value
|
|
322
|
+
|
|
323
|
+
if isinstance(val_repr, str) and len(val_repr) > 100:
|
|
324
|
+
val_repr = val_repr[:100] + "#TRUNCATED"
|
|
325
|
+
|
|
326
|
+
if key not in field_map:
|
|
327
|
+
field = {
|
|
328
|
+
"n": key,
|
|
329
|
+
"d": dtype,
|
|
330
|
+
"v": [val_repr] if val_repr is not None else [],
|
|
331
|
+
}
|
|
332
|
+
schema_section.append(field)
|
|
333
|
+
field_map[key] = field
|
|
334
|
+
else:
|
|
335
|
+
field = field_map[key]
|
|
336
|
+
if val_repr not in field["v"] and len(field["v"]) < 3:
|
|
337
|
+
field["v"].append(val_repr)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def update_tasks_summary_schema(tasks: list[dict], schema) -> dict:
|
|
341
|
+
"""Update tasks_summary schema."""
|
|
342
|
+
act_schema = update_activity_schema(tasks)
|
|
343
|
+
merged_schema = deep_merge_dicts(act_schema, schema)
|
|
344
|
+
return merged_schema
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def update_activity_schema(tasks: list[dict]) -> dict:
|
|
348
|
+
"""Build schema for each activity_id from list of task dicts."""
|
|
349
|
+
schema = defaultdict(
|
|
350
|
+
lambda: {
|
|
351
|
+
"in": [],
|
|
352
|
+
"out": [],
|
|
353
|
+
# "tel": [],
|
|
354
|
+
}
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
for task in tasks:
|
|
358
|
+
activity_id = task.get("activity_id")
|
|
359
|
+
if not activity_id:
|
|
360
|
+
continue
|
|
361
|
+
|
|
362
|
+
activity_schema = schema[activity_id]
|
|
363
|
+
|
|
364
|
+
for section_key, schema_key in [
|
|
365
|
+
("used", "in"),
|
|
366
|
+
("generated", "out"),
|
|
367
|
+
# ("telemetry_summary", "tel"),
|
|
368
|
+
]:
|
|
369
|
+
section_data = task.get(section_key)
|
|
370
|
+
if isinstance(section_data, dict):
|
|
371
|
+
flat_fields = flatten_dict(section_data, parent_key=section_key)
|
|
372
|
+
update_schema(activity_schema[schema_key], flat_fields)
|
|
373
|
+
|
|
374
|
+
schema = dict(schema)
|
|
375
|
+
return schema
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def deep_merge_dicts(a: dict, b: dict) -> dict:
|
|
379
|
+
"""
|
|
380
|
+
Recursively merge dict b into dict a:
|
|
381
|
+
- Does not overwrite existing values in a.
|
|
382
|
+
- If both values are dicts, merges recursively.
|
|
383
|
+
- If both values are lists, concatenates and deduplicates.
|
|
384
|
+
- Otherwise, keeps value from a.
|
|
385
|
+
Returns a new dict (does not mutate inputs).
|
|
386
|
+
"""
|
|
387
|
+
result = copy.deepcopy(a)
|
|
388
|
+
|
|
389
|
+
for key, b_val in b.items():
|
|
390
|
+
if key not in result:
|
|
391
|
+
result[key] = copy.deepcopy(b_val)
|
|
392
|
+
else:
|
|
393
|
+
a_val = result[key]
|
|
394
|
+
if isinstance(a_val, dict) and isinstance(b_val, dict):
|
|
395
|
+
result[key] = deep_merge_dicts(a_val, b_val)
|
|
396
|
+
elif isinstance(a_val, list) and isinstance(b_val, list):
|
|
397
|
+
combined = a_val + [item for item in b_val if item not in a_val]
|
|
398
|
+
result[key] = combined
|
|
399
|
+
# preserve a_val otherwise
|
|
400
|
+
return result
|
flowcept/commons/utils.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Utilities."""
|
|
2
2
|
|
|
3
|
+
import argparse
|
|
3
4
|
from datetime import datetime, timedelta, timezone
|
|
4
5
|
import json
|
|
5
6
|
from time import time, sleep
|
|
@@ -9,7 +10,6 @@ import platform
|
|
|
9
10
|
import subprocess
|
|
10
11
|
import types
|
|
11
12
|
import numpy as np
|
|
12
|
-
import pytz
|
|
13
13
|
|
|
14
14
|
from flowcept import configs
|
|
15
15
|
from flowcept.commons.flowcept_dataclasses.task_object import TaskObject
|
|
@@ -19,14 +19,14 @@ from flowcept.commons.vocabulary import Status
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def get_utc_now() -> float:
|
|
22
|
-
"""Get UTC time."""
|
|
22
|
+
"""Get current UTC time as a timestamp (seconds since epoch)."""
|
|
23
23
|
now = datetime.now(timezone.utc)
|
|
24
24
|
return now.timestamp()
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
def get_utc_now_str() -> str:
|
|
28
28
|
"""Get UTC string."""
|
|
29
|
-
format_string = "%Y-%m-%dT%H:%M:%S.%f"
|
|
29
|
+
format_string = "%Y-%m-%dT%H:%M:%S.%f%z"
|
|
30
30
|
now = datetime.now(timezone.utc)
|
|
31
31
|
return now.strftime(format_string)
|
|
32
32
|
|
|
@@ -159,11 +159,14 @@ class GenericJSONEncoder(json.JSONEncoder):
|
|
|
159
159
|
return super().default(obj)
|
|
160
160
|
|
|
161
161
|
|
|
162
|
-
def replace_non_serializable_times(obj, tz=
|
|
163
|
-
"""Replace non-serializable
|
|
162
|
+
def replace_non_serializable_times(obj, tz=timezone.utc):
|
|
163
|
+
"""Replace non-serializable datetimes in an object with ISO 8601 strings (ms precision)."""
|
|
164
164
|
for time_field in TaskObject.get_time_field_names():
|
|
165
|
-
if time_field in obj:
|
|
166
|
-
obj[time_field] = obj[time_field].
|
|
165
|
+
if time_field in obj and isinstance(obj[time_field], datetime):
|
|
166
|
+
obj[time_field] = obj[time_field].astimezone(tz).isoformat(timespec="milliseconds")
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
__DICT__CLASSES = (argparse.Namespace,)
|
|
167
170
|
|
|
168
171
|
|
|
169
172
|
def replace_non_serializable(obj):
|
|
@@ -180,6 +183,8 @@ def replace_non_serializable(obj):
|
|
|
180
183
|
return obj.to_flowcept_dict()
|
|
181
184
|
elif hasattr(obj, "to_dict"):
|
|
182
185
|
return obj.to_dict()
|
|
186
|
+
elif isinstance(obj, __DICT__CLASSES):
|
|
187
|
+
return obj.__dict__
|
|
183
188
|
else:
|
|
184
189
|
# Replace non-serializable values with id()
|
|
185
190
|
return f"{obj.__class__.__name__}_instance_id_{id(obj)}"
|
|
@@ -262,6 +267,20 @@ class GenericJSONDecoder(json.JSONDecoder):
|
|
|
262
267
|
return inst
|
|
263
268
|
|
|
264
269
|
|
|
270
|
+
def get_git_info(path: str = "."):
|
|
271
|
+
"""Get Git Repo metadata."""
|
|
272
|
+
from git import Repo
|
|
273
|
+
|
|
274
|
+
repo = Repo(path, search_parent_directories=True)
|
|
275
|
+
head = repo.head.commit.hexsha
|
|
276
|
+
short = repo.git.rev_parse(head, short=True)
|
|
277
|
+
branch = repo.active_branch.name if not repo.head.is_detached else "HEAD"
|
|
278
|
+
remote = next(iter(repo.remotes)).url if repo.remotes else None
|
|
279
|
+
dirty = "dirty" if repo.is_dirty() else "clean"
|
|
280
|
+
root = repo.working_tree_dir
|
|
281
|
+
return {"sha": head, "short_sha": short, "branch": branch, "root": root, "remote": remote, "dirty": dirty}
|
|
282
|
+
|
|
283
|
+
|
|
265
284
|
class ClassProperty:
|
|
266
285
|
"""Wrapper to simulate property of class methods, removed in py313."""
|
|
267
286
|
|
flowcept/configs.py
CHANGED
|
@@ -4,29 +4,34 @@ import os
|
|
|
4
4
|
import socket
|
|
5
5
|
import getpass
|
|
6
6
|
|
|
7
|
+
from flowcept.version import __version__
|
|
8
|
+
|
|
7
9
|
PROJECT_NAME = "flowcept"
|
|
10
|
+
|
|
11
|
+
DEFAULT_SETTINGS = {
|
|
12
|
+
"version": __version__,
|
|
13
|
+
"log": {"log_file_level": "disable", "log_stream_level": "disable"},
|
|
14
|
+
"project": {"dump_buffer_path": "flowcept_messages.jsonl"},
|
|
15
|
+
"telemetry_capture": {},
|
|
16
|
+
"instrumentation": {},
|
|
17
|
+
"experiment": {},
|
|
18
|
+
"mq": {"enabled": False},
|
|
19
|
+
"kv_db": {"enabled": False},
|
|
20
|
+
"web_server": {},
|
|
21
|
+
"sys_metadata": {},
|
|
22
|
+
"extra_metadata": {},
|
|
23
|
+
"analytics": {},
|
|
24
|
+
"db_buffer": {},
|
|
25
|
+
"databases": {"mongodb": {"enabled": False}, "lmdb": {"enabled": False}},
|
|
26
|
+
"adapters": {},
|
|
27
|
+
"agent": {},
|
|
28
|
+
}
|
|
29
|
+
|
|
8
30
|
USE_DEFAULT = os.getenv("FLOWCEPT_USE_DEFAULT", "False").lower() == "true"
|
|
9
|
-
########################
|
|
10
|
-
# Project Settings #
|
|
11
|
-
########################
|
|
12
31
|
|
|
13
32
|
if USE_DEFAULT:
|
|
14
|
-
settings =
|
|
15
|
-
|
|
16
|
-
"project": {},
|
|
17
|
-
"telemetry_capture": {},
|
|
18
|
-
"instrumentation": {},
|
|
19
|
-
"experiment": {},
|
|
20
|
-
"mq": {},
|
|
21
|
-
"kv_db": {},
|
|
22
|
-
"web_server": {},
|
|
23
|
-
"sys_metadata": {},
|
|
24
|
-
"extra_metadata": {},
|
|
25
|
-
"analytics": {},
|
|
26
|
-
"buffer": {},
|
|
27
|
-
"databases": {},
|
|
28
|
-
"adapters": {},
|
|
29
|
-
}
|
|
33
|
+
settings = DEFAULT_SETTINGS.copy()
|
|
34
|
+
|
|
30
35
|
else:
|
|
31
36
|
from omegaconf import OmegaConf
|
|
32
37
|
|
|
@@ -42,7 +47,13 @@ else:
|
|
|
42
47
|
settings = OmegaConf.load(f)
|
|
43
48
|
else:
|
|
44
49
|
settings = OmegaConf.load(SETTINGS_PATH)
|
|
45
|
-
|
|
50
|
+
|
|
51
|
+
# Making sure all settings are in place.
|
|
52
|
+
keys = DEFAULT_SETTINGS.keys() - settings.keys()
|
|
53
|
+
if len(keys):
|
|
54
|
+
for k in keys:
|
|
55
|
+
settings[k] = DEFAULT_SETTINGS[k]
|
|
56
|
+
|
|
46
57
|
########################
|
|
47
58
|
# Log Settings #
|
|
48
59
|
########################
|
|
@@ -68,11 +79,13 @@ FLOWCEPT_USER = settings["experiment"].get("user", "blank_user")
|
|
|
68
79
|
|
|
69
80
|
MQ_INSTANCES = settings["mq"].get("instances", None)
|
|
70
81
|
MQ_SETTINGS = settings["mq"]
|
|
82
|
+
MQ_ENABLED = os.getenv("MQ_ENABLED", settings["mq"].get("enabled", True))
|
|
71
83
|
MQ_TYPE = os.getenv("MQ_TYPE", settings["mq"].get("type", "redis"))
|
|
72
|
-
MQ_CHANNEL = settings["mq"].get("channel", "interception")
|
|
84
|
+
MQ_CHANNEL = os.getenv("MQ_CHANNEL", settings["mq"].get("channel", "interception"))
|
|
73
85
|
MQ_PASSWORD = settings["mq"].get("password", None)
|
|
74
86
|
MQ_HOST = os.getenv("MQ_HOST", settings["mq"].get("host", "localhost"))
|
|
75
87
|
MQ_PORT = int(os.getenv("MQ_PORT", settings["mq"].get("port", "6379")))
|
|
88
|
+
MQ_URI = os.getenv("MQ_URI", settings["mq"].get("uri", None))
|
|
76
89
|
MQ_BUFFER_SIZE = settings["mq"].get("buffer_size", None)
|
|
77
90
|
MQ_INSERTION_BUFFER_TIME = settings["mq"].get("insertion_buffer_time_secs", None)
|
|
78
91
|
MQ_TIMING = settings["mq"].get("timing", False)
|
|
@@ -86,6 +99,7 @@ KVDB_PASSWORD = settings["kv_db"].get("password", None)
|
|
|
86
99
|
KVDB_HOST = os.getenv("KVDB_HOST", settings["kv_db"].get("host", "localhost"))
|
|
87
100
|
KVDB_PORT = int(os.getenv("KVDB_PORT", settings["kv_db"].get("port", "6379")))
|
|
88
101
|
KVDB_URI = os.getenv("KVDB_URI", settings["kv_db"].get("uri", None))
|
|
102
|
+
KVDB_ENABLED = settings["kv_db"].get("enabled", False)
|
|
89
103
|
|
|
90
104
|
|
|
91
105
|
DATABASES = settings.get("databases", {})
|
|
@@ -118,9 +132,9 @@ if LMDB_SETTINGS:
|
|
|
118
132
|
else:
|
|
119
133
|
LMDB_ENABLED = LMDB_SETTINGS.get("enabled", False)
|
|
120
134
|
|
|
121
|
-
if not LMDB_ENABLED and not MONGO_ENABLED:
|
|
122
|
-
|
|
123
|
-
|
|
135
|
+
# if not LMDB_ENABLED and not MONGO_ENABLED:
|
|
136
|
+
# # At least one of these variables need to be enabled.
|
|
137
|
+
# LMDB_ENABLED = True
|
|
124
138
|
|
|
125
139
|
##########################
|
|
126
140
|
# DB Buffer Settings #
|
|
@@ -134,19 +148,20 @@ DB_INSERTER_MAX_TRIALS_STOP = db_buffer_settings.get("stop_max_trials", 240)
|
|
|
134
148
|
DB_INSERTER_SLEEP_TRIALS_STOP = db_buffer_settings.get("stop_trials_sleep", 0.01)
|
|
135
149
|
|
|
136
150
|
|
|
137
|
-
|
|
151
|
+
###########################
|
|
138
152
|
# PROJECT SYSTEM SETTINGS #
|
|
139
|
-
|
|
153
|
+
###########################
|
|
140
154
|
|
|
141
|
-
DB_FLUSH_MODE = settings["project"].get("db_flush_mode", "
|
|
155
|
+
DB_FLUSH_MODE = settings["project"].get("db_flush_mode", "offline")
|
|
142
156
|
# DEBUG_MODE = settings["project"].get("debug", False)
|
|
143
157
|
PERF_LOG = settings["project"].get("performance_logging", False)
|
|
144
158
|
JSON_SERIALIZER = settings["project"].get("json_serializer", "default")
|
|
145
159
|
REPLACE_NON_JSON_SERIALIZABLE = settings["project"].get("replace_non_json_serializable", True)
|
|
146
160
|
ENRICH_MESSAGES = settings["project"].get("enrich_messages", True)
|
|
147
|
-
|
|
161
|
+
DUMP_BUFFER_PATH = settings["project"].get("dump_buffer_path", None)
|
|
148
162
|
|
|
149
163
|
TELEMETRY_CAPTURE = settings.get("telemetry_capture", None)
|
|
164
|
+
TELEMETRY_ENABLED = TELEMETRY_CAPTURE is not None and len(TELEMETRY_CAPTURE)
|
|
150
165
|
|
|
151
166
|
######################
|
|
152
167
|
# SYS METADATA #
|
|
@@ -217,7 +232,11 @@ ANALYTICS = settings.get("analytics", None)
|
|
|
217
232
|
####################
|
|
218
233
|
|
|
219
234
|
INSTRUMENTATION = settings.get("instrumentation", {})
|
|
220
|
-
INSTRUMENTATION_ENABLED = INSTRUMENTATION.get("enabled",
|
|
235
|
+
INSTRUMENTATION_ENABLED = INSTRUMENTATION.get("enabled", True)
|
|
236
|
+
|
|
237
|
+
AGENT = settings.get("agent", {})
|
|
238
|
+
AGENT_HOST = os.getenv("AGENT_HOST", settings["agent"].get("mcp_host", "localhost"))
|
|
239
|
+
AGENT_PORT = int(os.getenv("AGENT_PORT", settings["agent"].get("mcp_port", "8000")))
|
|
221
240
|
|
|
222
241
|
####################
|
|
223
242
|
# Enabled ADAPTERS #
|