flowcept 0.8.11__py3-none-any.whl → 0.8.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. flowcept/__init__.py +7 -4
  2. flowcept/agents/__init__.py +5 -0
  3. flowcept/{flowceptor/consumers/agent/client_agent.py → agents/agent_client.py} +22 -12
  4. flowcept/agents/agents_utils.py +181 -0
  5. flowcept/agents/dynamic_schema_tracker.py +191 -0
  6. flowcept/agents/flowcept_agent.py +30 -0
  7. flowcept/agents/flowcept_ctx_manager.py +175 -0
  8. flowcept/agents/gui/__init__.py +5 -0
  9. flowcept/agents/gui/agent_gui.py +76 -0
  10. flowcept/agents/gui/gui_utils.py +239 -0
  11. flowcept/agents/llms/__init__.py +1 -0
  12. flowcept/agents/llms/claude_gcp.py +139 -0
  13. flowcept/agents/llms/gemini25.py +119 -0
  14. flowcept/agents/prompts/__init__.py +1 -0
  15. flowcept/{flowceptor/adapters/agents/prompts.py → agents/prompts/general_prompts.py} +18 -0
  16. flowcept/agents/prompts/in_memory_query_prompts.py +297 -0
  17. flowcept/agents/tools/__init__.py +1 -0
  18. flowcept/agents/tools/general_tools.py +102 -0
  19. flowcept/agents/tools/in_memory_queries/__init__.py +1 -0
  20. flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +704 -0
  21. flowcept/agents/tools/in_memory_queries/pandas_agent_utils.py +309 -0
  22. flowcept/cli.py +286 -44
  23. flowcept/commons/daos/docdb_dao/mongodb_dao.py +47 -0
  24. flowcept/commons/daos/mq_dao/mq_dao_base.py +24 -13
  25. flowcept/commons/daos/mq_dao/mq_dao_kafka.py +18 -2
  26. flowcept/commons/flowcept_dataclasses/task_object.py +16 -21
  27. flowcept/commons/flowcept_dataclasses/workflow_object.py +9 -1
  28. flowcept/commons/task_data_preprocess.py +260 -60
  29. flowcept/commons/utils.py +25 -6
  30. flowcept/configs.py +41 -26
  31. flowcept/flowcept_api/flowcept_controller.py +73 -6
  32. flowcept/flowceptor/adapters/base_interceptor.py +11 -5
  33. flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +25 -1
  34. flowcept/flowceptor/consumers/base_consumer.py +4 -0
  35. flowcept/flowceptor/consumers/consumer_utils.py +5 -4
  36. flowcept/flowceptor/consumers/document_inserter.py +2 -2
  37. flowcept/flowceptor/telemetry_capture.py +5 -2
  38. flowcept/instrumentation/flowcept_agent_task.py +294 -0
  39. flowcept/instrumentation/flowcept_decorator.py +43 -0
  40. flowcept/instrumentation/flowcept_loop.py +3 -3
  41. flowcept/instrumentation/flowcept_task.py +64 -24
  42. flowcept/instrumentation/flowcept_torch.py +5 -5
  43. flowcept/instrumentation/task_capture.py +83 -6
  44. flowcept/version.py +1 -1
  45. {flowcept-0.8.11.dist-info → flowcept-0.8.12.dist-info}/METADATA +42 -14
  46. {flowcept-0.8.11.dist-info → flowcept-0.8.12.dist-info}/RECORD +50 -36
  47. resources/sample_settings.yaml +12 -4
  48. flowcept/flowceptor/adapters/agents/__init__.py +0 -1
  49. flowcept/flowceptor/adapters/agents/agents_utils.py +0 -89
  50. flowcept/flowceptor/adapters/agents/flowcept_agent.py +0 -292
  51. flowcept/flowceptor/adapters/agents/flowcept_llm_prov_capture.py +0 -186
  52. flowcept/flowceptor/consumers/agent/flowcept_agent_context_manager.py +0 -145
  53. flowcept/flowceptor/consumers/agent/flowcept_qa_manager.py +0 -112
  54. {flowcept-0.8.11.dist-info → flowcept-0.8.12.dist-info}/WHEEL +0 -0
  55. {flowcept-0.8.11.dist-info → flowcept-0.8.12.dist-info}/entry_points.txt +0 -0
  56. {flowcept-0.8.11.dist-info → flowcept-0.8.12.dist-info}/licenses/LICENSE +0 -0
@@ -24,6 +24,7 @@ class TaskObject:
24
24
  utc_timestamp: float = None
25
25
  adapter_id: AnyStr = None
26
26
  user: AnyStr = None
27
+ data: Any = None
27
28
  used: Dict[AnyStr, Any] = None # Used parameter and files
28
29
  campaign_id: AnyStr = None
29
30
  generated: Dict[AnyStr, Any] = None # Generated results and files
@@ -53,6 +54,7 @@ class TaskObject:
53
54
  dependencies: List = None
54
55
  dependents: List = None
55
56
  tags: List = None
57
+ agent_id: str = None
56
58
 
57
59
  _DEFAULT_ENRICH_VALUES = {
58
60
  "node_name": NODE_NAME,
@@ -104,20 +106,16 @@ class TaskObject:
104
106
  if self.utc_timestamp is None:
105
107
  self.utc_timestamp = flowcept.commons.utils.get_utc_now()
106
108
 
107
- if self.node_name is None and NODE_NAME is not None:
108
- self.node_name = NODE_NAME
109
-
110
- if self.login_name is None and LOGIN_NAME is not None:
111
- self.login_name = LOGIN_NAME
112
-
113
- if self.public_ip is None and PUBLIC_IP is not None:
114
- self.public_ip = PUBLIC_IP
115
-
116
- if self.private_ip is None and PRIVATE_IP is not None:
117
- self.private_ip = PRIVATE_IP
109
+ for key, fallback_value in TaskObject._DEFAULT_ENRICH_VALUES.items():
110
+ if getattr(self, key) is None and fallback_value is not None:
111
+ setattr(self, key, fallback_value)
118
112
 
119
- if self.hostname is None and HOSTNAME is not None:
120
- self.hostname = HOSTNAME
113
+ @staticmethod
114
+ def enrich_task_dict(task_dict: dict):
115
+ """Enrich the task."""
116
+ for key, fallback_value in TaskObject._DEFAULT_ENRICH_VALUES.items():
117
+ if (key not in task_dict or task_dict[key] is None) and fallback_value is not None:
118
+ task_dict[key] = fallback_value
121
119
 
122
120
  def to_dict(self):
123
121
  """Convert to dictionary."""
@@ -139,13 +137,6 @@ class TaskObject:
139
137
  """Serialize it."""
140
138
  return msgpack.dumps(self.to_dict())
141
139
 
142
- @staticmethod
143
- def enrich_task_dict(task_dict: dict):
144
- """Enrich the task."""
145
- for key, fallback_value in TaskObject._DEFAULT_ENRICH_VALUES.items():
146
- if (key not in task_dict or task_dict[key] is None) and fallback_value is not None:
147
- task_dict[key] = fallback_value
148
-
149
140
  @staticmethod
150
141
  def from_dict(task_obj_dict: Dict[AnyStr, Any]) -> "TaskObject":
151
142
  """Create a TaskObject from a dictionary.
@@ -177,6 +168,10 @@ class TaskObject:
177
168
 
178
169
  def __repr__(self):
179
170
  """Return an unambiguous string representation of the TaskObject."""
180
- attrs = ["task_id", "workflow_id", "campaign_id", "activity_id", "custom_metadata", "started_at", "ended_at"]
171
+ attrs = ["task_id", "workflow_id", "campaign_id", "activity_id", "started_at", "ended_at"]
172
+ optionals = ["subtype", "parent_task_id", "agent_id"]
173
+ for opt in optionals:
174
+ if getattr(self, opt) is not None:
175
+ attrs.append(opt)
181
176
  attr_str = ", ".join(f"{attr}={repr(getattr(self, attr))}" for attr in attrs)
182
177
  return f"TaskObject({attr_str})"
@@ -5,7 +5,7 @@ import msgpack
5
5
  from omegaconf import OmegaConf, DictConfig
6
6
 
7
7
  from flowcept.version import __version__
8
- from flowcept.commons.utils import get_utc_now
8
+ from flowcept.commons.utils import get_utc_now, get_git_info
9
9
  from flowcept.configs import (
10
10
  settings,
11
11
  FLOWCEPT_USER,
@@ -38,6 +38,7 @@ class WorkflowObject:
38
38
  sys_name: str = None
39
39
  extra_metadata: str = None
40
40
  used: Dict = None
41
+ code_repository: Dict = None
41
42
  generated: Dict = None
42
43
 
43
44
  def __init__(self, workflow_id=None, name=None, used=None, generated=None):
@@ -93,6 +94,13 @@ class WorkflowObject:
93
94
  )
94
95
  self.extra_metadata = _extra_metadata
95
96
 
97
+ if self.code_repository is None:
98
+ try:
99
+ self.code_repository = get_git_info()
100
+ except Exception as e:
101
+ print(e)
102
+ pass
103
+
96
104
  if self.flowcept_version is None:
97
105
  self.flowcept_version = __version__
98
106
 
@@ -1,35 +1,13 @@
1
- """
2
- The base of this code was generated using ChatGPT.
3
-
4
- Prompt:
5
-
6
- Here I have a list containing one real task.
7
-
8
- <Paste one real task here>
9
-
10
- I want to create a list of summarized task data, per task, containing:
11
- - activity_id
12
- - task_id
13
- - used
14
- - generated
15
- - task_duration (ended_at - started_at)
16
- - hostname
17
- - cpu_info
18
- - disk_info
19
- - mem_info
20
- - network_info
21
- <Consider adding GPU info too, if you have gpu in your task data>
22
-
23
- Where info about cpu, disk, mem, and network must consider telemetry_at_end and telemetry_at_start.
24
-
25
- We will use this summarized data as input for LLM questions to find patterns in the resource usage and how they relate
26
- to input (used) and output (generated) of each task.
27
- """
28
-
1
+ from datetime import datetime
29
2
  from typing import Dict, List
3
+ import copy
4
+ from collections import defaultdict
5
+ from typing import Any
6
+
7
+ import pytz
30
8
 
31
9
 
32
- def summarize_telemetry(task: Dict) -> Dict:
10
+ def summarize_telemetry(task: Dict, logger) -> Dict:
33
11
  """
34
12
  Extract and compute the telemetry summary for a task based on start and end telemetry snapshots.
35
13
 
@@ -79,24 +57,55 @@ def summarize_telemetry(task: Dict) -> Dict:
79
57
  "packets_recv_diff": net_end["packets_recv"] - net_start["packets_recv"],
80
58
  }
81
59
 
82
- start_tele = task["telemetry_at_start"]
83
- end_tele = task["telemetry_at_end"]
60
+ tel_funcs = {
61
+ "cpu": extract_cpu_info,
62
+ "disk": extract_disk_info,
63
+ "memory": extract_mem_info,
64
+ "network": extract_network_info,
65
+ }
66
+
67
+ start_tele = task.get("telemetry_at_start", {})
68
+ end_tele = task.get("telemetry_at_end", {})
84
69
 
85
- started_at = task["started_at"]
86
- ended_at = task["ended_at"]
87
- duration = ended_at - started_at
70
+ telemetry_summary = {}
88
71
 
89
- telemetry_summary = {
90
- "duration_sec": duration,
91
- "cpu_info": extract_cpu_info(start_tele["cpu"], end_tele["cpu"]),
92
- "disk_info": extract_disk_info(start_tele["disk"], end_tele["disk"]),
93
- "mem_info": extract_mem_info(start_tele["memory"], end_tele["memory"]),
94
- "network_info": extract_network_info(start_tele["network"], end_tele["network"]),
95
- }
72
+ try:
73
+ started_at = task.get("started_at", None)
74
+ ended_at = task.get("ended_at", None)
75
+ if started_at is None or ended_at is None:
76
+ logger.warning(f"We can't summarize telemetry for duration_sec for task {task}")
77
+ else:
78
+ duration = ended_at - started_at
79
+ telemetry_summary["duration_sec"] = duration
80
+ except Exception as e:
81
+ logger.error(f"Error to summarize telemetry for duration_sec in {task}")
82
+ logger.exception(e)
83
+
84
+ for key in start_tele.keys():
85
+ try:
86
+ if key not in tel_funcs:
87
+ continue
88
+ func = tel_funcs[key]
89
+ if key in end_tele:
90
+ telemetry_summary[key] = func(start_tele[key], end_tele[key])
91
+ else:
92
+ logger.warning(
93
+ f"We can't summarize telemetry {key} for task {task} because the key is not in the end_tele"
94
+ )
95
+ except Exception as e:
96
+ logger.warning(f"Error to summarize telemetry for {key} for task {task}. Exception: {e}")
97
+ logger.exception(e)
96
98
 
97
99
  return telemetry_summary
98
100
 
99
101
 
102
+ def _safe_get(task, key):
103
+ try:
104
+ return task.get(key)
105
+ except Exception:
106
+ return None
107
+
108
+
100
109
  def summarize_task(task: Dict, thresholds: Dict = None, logger=None) -> Dict:
101
110
  """
102
111
  Summarize key metadata and telemetry for a task, optionally tagging critical conditions.
@@ -113,23 +122,51 @@ def summarize_task(task: Dict, thresholds: Dict = None, logger=None) -> Dict:
113
122
  dict
114
123
  Summary of the task including identifiers, telemetry summary, and optional critical tags.
115
124
  """
116
- task_summary = {
117
- "workflow_id": task.get("workflow_id"),
118
- "task_id": task.get("task_id"),
119
- "activity_id": task.get("activity_id"),
120
- "used": task.get("used"),
121
- "generated": task.get("generated"),
122
- "hostname": task.get("hostname"),
123
- "status": task.get("status"),
124
- }
125
+ task_summary = {}
126
+
127
+ # Keys that can be copied directly
128
+ for key in [
129
+ "workflow_id",
130
+ "task_id",
131
+ "parent_task_id",
132
+ "activity_id",
133
+ "used",
134
+ "generated",
135
+ "hostname",
136
+ "status",
137
+ "agent_id",
138
+ "campaign_id",
139
+ "subtype",
140
+ "custom_metadata",
141
+ ]:
142
+ value = _safe_get(task, key)
143
+ if value is not None:
144
+ if "_id" in key:
145
+ task_summary[key] = str(value)
146
+ else:
147
+ task_summary[key] = value
148
+
149
+ # Special handling for timestamp field
150
+ try:
151
+ time_keys = ["started_at", "ended_at"]
152
+ for time_key in time_keys:
153
+ timestamp = _safe_get(task, time_key)
154
+ if timestamp is not None:
155
+ task_summary[time_key] = datetime.fromtimestamp(timestamp, pytz.utc)
156
+ except Exception as e:
157
+ if logger:
158
+ logger.exception(f"Error {e} converting timestamp for task {task.get('task_id', 'unknown')}")
125
159
 
126
160
  try:
127
- telemetry_summary = summarize_telemetry(task)
128
- tags = tag_critical_task(
129
- generated=task.get("generated", {}), telemetry_summary=telemetry_summary, thresholds=thresholds
130
- )
131
- if tags:
132
- task_summary["tags"] = tags
161
+ telemetry_summary = summarize_telemetry(task, logger)
162
+ try:
163
+ tags = tag_critical_task(
164
+ generated=task.get("generated", {}), telemetry_summary=telemetry_summary, thresholds=thresholds
165
+ )
166
+ if tags:
167
+ task_summary["tags"] = tags
168
+ except Exception as e:
169
+ logger.exception(e)
133
170
  task_summary["telemetry_summary"] = telemetry_summary
134
171
  except Exception as e:
135
172
  if logger:
@@ -172,12 +209,13 @@ def tag_critical_task(
172
209
  "high_output": 0.9,
173
210
  }
174
211
 
175
- cpu = abs(telemetry_summary["cpu_info"].get("percent_all_diff", 0))
176
- mem = telemetry_summary["mem_info"].get("used_mem_diff", 0)
177
- disk = telemetry_summary["disk_info"].get("read_bytes_diff", 0) + telemetry_summary["disk_info"].get(
212
+ cpu = abs(telemetry_summary.get("cpu", {}).get("percent_all_diff", 0))
213
+ mem = telemetry_summary.get("mem", {}).get("used_mem_diff", 0)
214
+ disk = telemetry_summary.get("disk", {}).get("read_bytes_diff", 0) + telemetry_summary.get("disk", {}).get(
178
215
  "write_bytes_diff", 0
179
216
  )
180
- duration = telemetry_summary["duration_sec"]
217
+ # TODO gpu
218
+ duration = telemetry_summary.get("duration_sec", 0)
181
219
 
182
220
  tags = []
183
221
 
@@ -198,3 +236,165 @@ def tag_critical_task(
198
236
  tags.append("high_output")
199
237
 
200
238
  return tags
239
+
240
+
241
+ sample_tasks = [
242
+ {
243
+ "task_id": "t1",
244
+ "activity_id": "train_model",
245
+ "used": {
246
+ "dataset": {"name": "MNIST", "size": 60000, "source": {"url": "http://example.com/mnist", "format": "csv"}},
247
+ "params": {"epochs": 5, "batch_size": 32, "shuffle": True},
248
+ },
249
+ "generated": {"model": {"accuracy": 0.98, "layers": [64, 64, 10], "saved_path": "/models/mnist_v1.pth"}},
250
+ "telemetry_summary": {"duration_sec": 42.7, "cpu_percent": 85.2},
251
+ },
252
+ {
253
+ "task_id": "t2",
254
+ "activity_id": "train_model",
255
+ "used": {
256
+ "dataset": {
257
+ "name": "CIFAR-10",
258
+ "size": 50000,
259
+ "source": {"url": "http://example.com/cifar", "format": "jpeg"},
260
+ },
261
+ "params": {"epochs": 10, "batch_size": 64, "shuffle": False},
262
+ },
263
+ "generated": {"model": {"accuracy": 0.91, "layers": [128, 128, 10], "saved_path": "/models/cifar_v1.pth"}},
264
+ "telemetry_summary": {"duration_sec": 120.5, "cpu_percent": 92.0},
265
+ },
266
+ {
267
+ "task_id": "t3",
268
+ "activity_id": "evaluate_model",
269
+ "used": {"model_path": "/models/mnist_v1.pth", "test_data": {"name": "MNIST-test", "samples": 10000}},
270
+ "generated": {"metrics": {"accuracy": 0.97, "confusion_matrix": [[8500, 100], [50, 1350]]}},
271
+ "telemetry_summary": {"duration_sec": 15.3},
272
+ },
273
+ {
274
+ "task_id": "t4",
275
+ "activity_id": "evaluate_model",
276
+ "used": {"model_path": "/models/cifar_v1.pth", "test_data": {"name": "CIFAR-test", "samples": 10000}},
277
+ "generated": {"metrics": {"accuracy": 0.88, "confusion_matrix": [[4000, 500], [300, 5200]]}},
278
+ "telemetry_summary": {"duration_sec": 18.9},
279
+ },
280
+ ]
281
+
282
+
283
+ def infer_dtype(value: Any) -> str:
284
+ """Infer a simplified dtype label for the value."""
285
+ if isinstance(value, bool):
286
+ return "bool"
287
+ elif isinstance(value, int):
288
+ return "int"
289
+ elif isinstance(value, float):
290
+ return "float"
291
+ elif isinstance(value, str):
292
+ return "str"
293
+ elif isinstance(value, list):
294
+ return "list"
295
+ return "str" # fallback for other types
296
+
297
+
298
+ def flatten_dict(d: dict, parent_key: str = "", sep: str = ".") -> dict:
299
+ """Recursively flatten nested dicts using dot notation."""
300
+ items = {}
301
+ for k, v in d.items():
302
+ new_key = f"{parent_key}{sep}{k}" if parent_key else k
303
+ if isinstance(v, dict):
304
+ items.update(flatten_dict(v, new_key, sep=sep))
305
+ else:
306
+ items[new_key] = v
307
+ return items
308
+
309
+
310
+ def update_schema(schema_section: list, flat_fields: dict):
311
+ """Update schema section with flattened fields and example values."""
312
+ field_map = {f["n"]: f for f in schema_section}
313
+
314
+ for key, value in flat_fields.items():
315
+ dtype = infer_dtype(value)
316
+ if isinstance(value, float):
317
+ val_repr = round(value, 2)
318
+ elif isinstance(value, (dict, list)):
319
+ val_repr = str(value)
320
+ else:
321
+ val_repr = value
322
+
323
+ if isinstance(val_repr, str) and len(val_repr) > 100:
324
+ val_repr = val_repr[:100] + "#TRUNCATED"
325
+
326
+ if key not in field_map:
327
+ field = {
328
+ "n": key,
329
+ "d": dtype,
330
+ "v": [val_repr] if val_repr is not None else [],
331
+ }
332
+ schema_section.append(field)
333
+ field_map[key] = field
334
+ else:
335
+ field = field_map[key]
336
+ if val_repr not in field["v"] and len(field["v"]) < 3:
337
+ field["v"].append(val_repr)
338
+
339
+
340
+ def update_tasks_summary_schema(tasks: list[dict], schema) -> dict:
341
+ """Update tasks_summary schema."""
342
+ act_schema = update_activity_schema(tasks)
343
+ merged_schema = deep_merge_dicts(act_schema, schema)
344
+ return merged_schema
345
+
346
+
347
+ def update_activity_schema(tasks: list[dict]) -> dict:
348
+ """Build schema for each activity_id from list of task dicts."""
349
+ schema = defaultdict(
350
+ lambda: {
351
+ "in": [],
352
+ "out": [],
353
+ # "tel": [],
354
+ }
355
+ )
356
+
357
+ for task in tasks:
358
+ activity_id = task.get("activity_id")
359
+ if not activity_id:
360
+ continue
361
+
362
+ activity_schema = schema[activity_id]
363
+
364
+ for section_key, schema_key in [
365
+ ("used", "in"),
366
+ ("generated", "out"),
367
+ # ("telemetry_summary", "tel"),
368
+ ]:
369
+ section_data = task.get(section_key)
370
+ if isinstance(section_data, dict):
371
+ flat_fields = flatten_dict(section_data, parent_key=section_key)
372
+ update_schema(activity_schema[schema_key], flat_fields)
373
+
374
+ schema = dict(schema)
375
+ return schema
376
+
377
+
378
+ def deep_merge_dicts(a: dict, b: dict) -> dict:
379
+ """
380
+ Recursively merge dict b into dict a:
381
+ - Does not overwrite existing values in a.
382
+ - If both values are dicts, merges recursively.
383
+ - If both values are lists, concatenates and deduplicates.
384
+ - Otherwise, keeps value from a.
385
+ Returns a new dict (does not mutate inputs).
386
+ """
387
+ result = copy.deepcopy(a)
388
+
389
+ for key, b_val in b.items():
390
+ if key not in result:
391
+ result[key] = copy.deepcopy(b_val)
392
+ else:
393
+ a_val = result[key]
394
+ if isinstance(a_val, dict) and isinstance(b_val, dict):
395
+ result[key] = deep_merge_dicts(a_val, b_val)
396
+ elif isinstance(a_val, list) and isinstance(b_val, list):
397
+ combined = a_val + [item for item in b_val if item not in a_val]
398
+ result[key] = combined
399
+ # preserve a_val otherwise
400
+ return result
flowcept/commons/utils.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """Utilities."""
2
2
 
3
+ import argparse
3
4
  from datetime import datetime, timedelta, timezone
4
5
  import json
5
6
  from time import time, sleep
@@ -9,7 +10,6 @@ import platform
9
10
  import subprocess
10
11
  import types
11
12
  import numpy as np
12
- import pytz
13
13
 
14
14
  from flowcept import configs
15
15
  from flowcept.commons.flowcept_dataclasses.task_object import TaskObject
@@ -19,7 +19,7 @@ from flowcept.commons.vocabulary import Status
19
19
 
20
20
 
21
21
  def get_utc_now() -> float:
22
- """Get UTC time."""
22
+ """Get current UTC time as a timestamp (seconds since epoch)."""
23
23
  now = datetime.now(timezone.utc)
24
24
  return now.timestamp()
25
25
 
@@ -159,11 +159,14 @@ class GenericJSONEncoder(json.JSONEncoder):
159
159
  return super().default(obj)
160
160
 
161
161
 
162
- def replace_non_serializable_times(obj, tz=pytz.utc):
163
- """Replace non-serializable times in an object."""
162
+ def replace_non_serializable_times(obj, tz=timezone.utc):
163
+ """Replace non-serializable datetimes in an object with ISO 8601 strings (ms precision)."""
164
164
  for time_field in TaskObject.get_time_field_names():
165
- if time_field in obj:
166
- obj[time_field] = obj[time_field].strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] + f" {tz}"
165
+ if time_field in obj and isinstance(obj[time_field], datetime):
166
+ obj[time_field] = obj[time_field].astimezone(tz).isoformat(timespec="milliseconds")
167
+
168
+
169
+ __DICT__CLASSES = (argparse.Namespace,)
167
170
 
168
171
 
169
172
  def replace_non_serializable(obj):
@@ -180,6 +183,8 @@ def replace_non_serializable(obj):
180
183
  return obj.to_flowcept_dict()
181
184
  elif hasattr(obj, "to_dict"):
182
185
  return obj.to_dict()
186
+ elif isinstance(obj, __DICT__CLASSES):
187
+ return obj.__dict__
183
188
  else:
184
189
  # Replace non-serializable values with id()
185
190
  return f"{obj.__class__.__name__}_instance_id_{id(obj)}"
@@ -262,6 +267,20 @@ class GenericJSONDecoder(json.JSONDecoder):
262
267
  return inst
263
268
 
264
269
 
270
+ def get_git_info(path: str = "."):
271
+ """Get Git Repo metadata."""
272
+ from git import Repo
273
+
274
+ repo = Repo(path, search_parent_directories=True)
275
+ head = repo.head.commit.hexsha
276
+ short = repo.git.rev_parse(head, short=True)
277
+ branch = repo.active_branch.name if not repo.head.is_detached else "HEAD"
278
+ remote = next(iter(repo.remotes)).url if repo.remotes else None
279
+ dirty = "dirty" if repo.is_dirty() else "clean"
280
+ root = repo.working_tree_dir
281
+ return {"sha": head, "short_sha": short, "branch": branch, "root": root, "remote": remote, "dirty": dirty}
282
+
283
+
265
284
  class ClassProperty:
266
285
  """Wrapper to simulate property of class methods, removed in py313."""
267
286
 
flowcept/configs.py CHANGED
@@ -4,29 +4,34 @@ import os
4
4
  import socket
5
5
  import getpass
6
6
 
7
+ from flowcept.version import __version__
8
+
7
9
  PROJECT_NAME = "flowcept"
10
+
11
+ DEFAULT_SETTINGS = {
12
+ "version": __version__,
13
+ "log": {"log_file_level": "disable", "log_stream_level": "disable"},
14
+ "project": {"dump_buffer_path": "flowcept_messages.jsonl"},
15
+ "telemetry_capture": {},
16
+ "instrumentation": {},
17
+ "experiment": {},
18
+ "mq": {"enabled": False},
19
+ "kv_db": {"enabled": False},
20
+ "web_server": {},
21
+ "sys_metadata": {},
22
+ "extra_metadata": {},
23
+ "analytics": {},
24
+ "db_buffer": {},
25
+ "databases": {"mongodb": {"enabled": False}, "lmdb": {"enabled": False}},
26
+ "adapters": {},
27
+ "agent": {},
28
+ }
29
+
8
30
  USE_DEFAULT = os.getenv("FLOWCEPT_USE_DEFAULT", "False").lower() == "true"
9
- ########################
10
- # Project Settings #
11
- ########################
12
31
 
13
32
  if USE_DEFAULT:
14
- settings = {
15
- "log": {},
16
- "project": {},
17
- "telemetry_capture": {},
18
- "instrumentation": {},
19
- "experiment": {},
20
- "mq": {},
21
- "kv_db": {},
22
- "web_server": {},
23
- "sys_metadata": {},
24
- "extra_metadata": {},
25
- "analytics": {},
26
- "buffer": {},
27
- "databases": {},
28
- "adapters": {},
29
- }
33
+ settings = DEFAULT_SETTINGS.copy()
34
+
30
35
  else:
31
36
  from omegaconf import OmegaConf
32
37
 
@@ -42,7 +47,13 @@ else:
42
47
  settings = OmegaConf.load(f)
43
48
  else:
44
49
  settings = OmegaConf.load(SETTINGS_PATH)
45
- # print(SETTINGS_PATH)
50
+
51
+ # Making sure all settings are in place.
52
+ keys = DEFAULT_SETTINGS.keys() - settings.keys()
53
+ if len(keys):
54
+ for k in keys:
55
+ settings[k] = DEFAULT_SETTINGS[k]
56
+
46
57
  ########################
47
58
  # Log Settings #
48
59
  ########################
@@ -68,6 +79,7 @@ FLOWCEPT_USER = settings["experiment"].get("user", "blank_user")
68
79
 
69
80
  MQ_INSTANCES = settings["mq"].get("instances", None)
70
81
  MQ_SETTINGS = settings["mq"]
82
+ MQ_ENABLED = os.getenv("MQ_ENABLED", settings["mq"].get("enabled", True))
71
83
  MQ_TYPE = os.getenv("MQ_TYPE", settings["mq"].get("type", "redis"))
72
84
  MQ_CHANNEL = os.getenv("MQ_CHANNEL", settings["mq"].get("channel", "interception"))
73
85
  MQ_PASSWORD = settings["mq"].get("password", None)
@@ -87,7 +99,7 @@ KVDB_PASSWORD = settings["kv_db"].get("password", None)
87
99
  KVDB_HOST = os.getenv("KVDB_HOST", settings["kv_db"].get("host", "localhost"))
88
100
  KVDB_PORT = int(os.getenv("KVDB_PORT", settings["kv_db"].get("port", "6379")))
89
101
  KVDB_URI = os.getenv("KVDB_URI", settings["kv_db"].get("uri", None))
90
- KVDB_ENABLED = settings["kv_db"].get("enabled", True)
102
+ KVDB_ENABLED = settings["kv_db"].get("enabled", False)
91
103
 
92
104
 
93
105
  DATABASES = settings.get("databases", {})
@@ -136,19 +148,20 @@ DB_INSERTER_MAX_TRIALS_STOP = db_buffer_settings.get("stop_max_trials", 240)
136
148
  DB_INSERTER_SLEEP_TRIALS_STOP = db_buffer_settings.get("stop_trials_sleep", 0.01)
137
149
 
138
150
 
139
- ######################
151
+ ###########################
140
152
  # PROJECT SYSTEM SETTINGS #
141
- ######################
153
+ ###########################
142
154
 
143
- DB_FLUSH_MODE = settings["project"].get("db_flush_mode", "online")
155
+ DB_FLUSH_MODE = settings["project"].get("db_flush_mode", "offline")
144
156
  # DEBUG_MODE = settings["project"].get("debug", False)
145
157
  PERF_LOG = settings["project"].get("performance_logging", False)
146
158
  JSON_SERIALIZER = settings["project"].get("json_serializer", "default")
147
159
  REPLACE_NON_JSON_SERIALIZABLE = settings["project"].get("replace_non_json_serializable", True)
148
160
  ENRICH_MESSAGES = settings["project"].get("enrich_messages", True)
149
-
161
+ DUMP_BUFFER_PATH = settings["project"].get("dump_buffer_path", None)
150
162
 
151
163
  TELEMETRY_CAPTURE = settings.get("telemetry_capture", None)
164
+ TELEMETRY_ENABLED = TELEMETRY_CAPTURE is not None and len(TELEMETRY_CAPTURE)
152
165
 
153
166
  ######################
154
167
  # SYS METADATA #
@@ -219,9 +232,11 @@ ANALYTICS = settings.get("analytics", None)
219
232
  ####################
220
233
 
221
234
  INSTRUMENTATION = settings.get("instrumentation", {})
222
- INSTRUMENTATION_ENABLED = INSTRUMENTATION.get("enabled", False)
235
+ INSTRUMENTATION_ENABLED = INSTRUMENTATION.get("enabled", True)
223
236
 
224
237
  AGENT = settings.get("agent", {})
238
+ AGENT_HOST = os.getenv("AGENT_HOST", settings["agent"].get("mcp_host", "localhost"))
239
+ AGENT_PORT = int(os.getenv("AGENT_PORT", settings["agent"].get("mcp_port", "8000")))
225
240
 
226
241
  ####################
227
242
  # Enabled ADAPTERS #