flowcept 0.8.10__py3-none-any.whl → 0.8.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. flowcept/__init__.py +7 -4
  2. flowcept/agents/__init__.py +5 -0
  3. flowcept/agents/agent_client.py +58 -0
  4. flowcept/agents/agents_utils.py +181 -0
  5. flowcept/agents/dynamic_schema_tracker.py +191 -0
  6. flowcept/agents/flowcept_agent.py +30 -0
  7. flowcept/agents/flowcept_ctx_manager.py +175 -0
  8. flowcept/agents/gui/__init__.py +5 -0
  9. flowcept/agents/gui/agent_gui.py +76 -0
  10. flowcept/agents/gui/gui_utils.py +239 -0
  11. flowcept/agents/llms/__init__.py +1 -0
  12. flowcept/agents/llms/claude_gcp.py +139 -0
  13. flowcept/agents/llms/gemini25.py +119 -0
  14. flowcept/agents/prompts/__init__.py +1 -0
  15. flowcept/agents/prompts/general_prompts.py +69 -0
  16. flowcept/agents/prompts/in_memory_query_prompts.py +297 -0
  17. flowcept/agents/tools/__init__.py +1 -0
  18. flowcept/agents/tools/general_tools.py +102 -0
  19. flowcept/agents/tools/in_memory_queries/__init__.py +1 -0
  20. flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +704 -0
  21. flowcept/agents/tools/in_memory_queries/pandas_agent_utils.py +309 -0
  22. flowcept/cli.py +459 -17
  23. flowcept/commons/daos/docdb_dao/mongodb_dao.py +47 -0
  24. flowcept/commons/daos/keyvalue_dao.py +19 -23
  25. flowcept/commons/daos/mq_dao/mq_dao_base.py +49 -38
  26. flowcept/commons/daos/mq_dao/mq_dao_kafka.py +20 -3
  27. flowcept/commons/daos/mq_dao/mq_dao_mofka.py +4 -0
  28. flowcept/commons/daos/mq_dao/mq_dao_redis.py +38 -5
  29. flowcept/commons/daos/redis_conn.py +47 -0
  30. flowcept/commons/flowcept_dataclasses/task_object.py +50 -27
  31. flowcept/commons/flowcept_dataclasses/workflow_object.py +9 -1
  32. flowcept/commons/settings_factory.py +2 -4
  33. flowcept/commons/task_data_preprocess.py +400 -0
  34. flowcept/commons/utils.py +26 -7
  35. flowcept/configs.py +48 -29
  36. flowcept/flowcept_api/flowcept_controller.py +102 -18
  37. flowcept/flowceptor/adapters/base_interceptor.py +24 -11
  38. flowcept/flowceptor/adapters/brokers/__init__.py +1 -0
  39. flowcept/flowceptor/adapters/brokers/mqtt_interceptor.py +132 -0
  40. flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py +3 -3
  41. flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py +3 -3
  42. flowcept/flowceptor/consumers/agent/__init__.py +1 -0
  43. flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +125 -0
  44. flowcept/flowceptor/consumers/base_consumer.py +94 -0
  45. flowcept/flowceptor/consumers/consumer_utils.py +5 -4
  46. flowcept/flowceptor/consumers/document_inserter.py +135 -36
  47. flowcept/flowceptor/telemetry_capture.py +6 -3
  48. flowcept/instrumentation/flowcept_agent_task.py +294 -0
  49. flowcept/instrumentation/flowcept_decorator.py +43 -0
  50. flowcept/instrumentation/flowcept_loop.py +3 -3
  51. flowcept/instrumentation/flowcept_task.py +64 -24
  52. flowcept/instrumentation/flowcept_torch.py +5 -5
  53. flowcept/instrumentation/task_capture.py +87 -4
  54. flowcept/version.py +1 -1
  55. {flowcept-0.8.10.dist-info → flowcept-0.8.12.dist-info}/METADATA +48 -11
  56. flowcept-0.8.12.dist-info/RECORD +101 -0
  57. resources/sample_settings.yaml +46 -14
  58. flowcept/flowceptor/adapters/zambeze/__init__.py +0 -1
  59. flowcept/flowceptor/adapters/zambeze/zambeze_dataclasses.py +0 -41
  60. flowcept/flowceptor/adapters/zambeze/zambeze_interceptor.py +0 -102
  61. flowcept-0.8.10.dist-info/RECORD +0 -75
  62. {flowcept-0.8.10.dist-info → flowcept-0.8.12.dist-info}/WHEEL +0 -0
  63. {flowcept-0.8.10.dist-info → flowcept-0.8.12.dist-info}/entry_points.txt +0 -0
  64. {flowcept-0.8.10.dist-info → flowcept-0.8.12.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,400 @@
1
+ from datetime import datetime
2
+ from typing import Dict, List
3
+ import copy
4
+ from collections import defaultdict
5
+ from typing import Any
6
+
7
+ import pytz
8
+
9
+
10
+ def summarize_telemetry(task: Dict, logger) -> Dict:
11
+ """
12
+ Extract and compute the telemetry summary for a task based on start and end telemetry snapshots.
13
+
14
+ Parameters
15
+ ----------
16
+ task : dict
17
+ The task dictionary containing telemetry_at_start and telemetry_at_end.
18
+
19
+ Returns
20
+ -------
21
+ dict
22
+ A summary of telemetry differences including CPU, disk, memory, and network metrics, and task duration.
23
+ """
24
+
25
+ def extract_cpu_info(start: Dict, end: Dict) -> Dict:
26
+ return {
27
+ "percent_all_diff": end["percent_all"] - start["percent_all"],
28
+ "user_time_diff": end["times_avg"]["user"] - start["times_avg"]["user"],
29
+ "system_time_diff": end["times_avg"]["system"] - start["times_avg"]["system"],
30
+ "idle_time_diff": end["times_avg"]["idle"] - start["times_avg"]["idle"],
31
+ }
32
+
33
+ def extract_disk_info(start: Dict, end: Dict) -> Dict:
34
+ io_start = start["io_sum"]
35
+ io_end = end["io_sum"]
36
+ return {
37
+ "read_bytes_diff": io_end["read_bytes"] - io_start["read_bytes"],
38
+ "write_bytes_diff": io_end["write_bytes"] - io_start["write_bytes"],
39
+ "read_count_diff": io_end["read_count"] - io_start["read_count"],
40
+ "write_count_diff": io_end["write_count"] - io_start["write_count"],
41
+ }
42
+
43
+ def extract_mem_info(start: Dict, end: Dict) -> Dict:
44
+ return {
45
+ "used_mem_diff": end["virtual"]["used"] - start["virtual"]["used"],
46
+ "percent_diff": end["virtual"]["percent"] - start["virtual"]["percent"],
47
+ "swap_used_diff": end["swap"]["used"] - start["swap"]["used"],
48
+ }
49
+
50
+ def extract_network_info(start: Dict, end: Dict) -> Dict:
51
+ net_start = start["netio_sum"]
52
+ net_end = end["netio_sum"]
53
+ return {
54
+ "bytes_sent_diff": net_end["bytes_sent"] - net_start["bytes_sent"],
55
+ "bytes_recv_diff": net_end["bytes_recv"] - net_start["bytes_recv"],
56
+ "packets_sent_diff": net_end["packets_sent"] - net_start["packets_sent"],
57
+ "packets_recv_diff": net_end["packets_recv"] - net_start["packets_recv"],
58
+ }
59
+
60
+ tel_funcs = {
61
+ "cpu": extract_cpu_info,
62
+ "disk": extract_disk_info,
63
+ "memory": extract_mem_info,
64
+ "network": extract_network_info,
65
+ }
66
+
67
+ start_tele = task.get("telemetry_at_start", {})
68
+ end_tele = task.get("telemetry_at_end", {})
69
+
70
+ telemetry_summary = {}
71
+
72
+ try:
73
+ started_at = task.get("started_at", None)
74
+ ended_at = task.get("ended_at", None)
75
+ if started_at is None or ended_at is None:
76
+ logger.warning(f"We can't summarize telemetry for duration_sec for task {task}")
77
+ else:
78
+ duration = ended_at - started_at
79
+ telemetry_summary["duration_sec"] = duration
80
+ except Exception as e:
81
+ logger.error(f"Error to summarize telemetry for duration_sec in {task}")
82
+ logger.exception(e)
83
+
84
+ for key in start_tele.keys():
85
+ try:
86
+ if key not in tel_funcs:
87
+ continue
88
+ func = tel_funcs[key]
89
+ if key in end_tele:
90
+ telemetry_summary[key] = func(start_tele[key], end_tele[key])
91
+ else:
92
+ logger.warning(
93
+ f"We can't summarize telemetry {key} for task {task} because the key is not in the end_tele"
94
+ )
95
+ except Exception as e:
96
+ logger.warning(f"Error to summarize telemetry for {key} for task {task}. Exception: {e}")
97
+ logger.exception(e)
98
+
99
+ return telemetry_summary
100
+
101
+
102
+ def _safe_get(task, key):
103
+ try:
104
+ return task.get(key)
105
+ except Exception:
106
+ return None
107
+
108
+
109
+ def summarize_task(task: Dict, thresholds: Dict = None, logger=None) -> Dict:
110
+ """
111
+ Summarize key metadata and telemetry for a task, optionally tagging critical conditions.
112
+
113
+ Parameters
114
+ ----------
115
+ task : dict
116
+ The task dictionary containing metadata and telemetry snapshots.
117
+ thresholds : dict, optional
118
+ Threshold values used to tag abnormal resource usage.
119
+
120
+ Returns
121
+ -------
122
+ dict
123
+ Summary of the task including identifiers, telemetry summary, and optional critical tags.
124
+ """
125
+ task_summary = {}
126
+
127
+ # Keys that can be copied directly
128
+ for key in [
129
+ "workflow_id",
130
+ "task_id",
131
+ "parent_task_id",
132
+ "activity_id",
133
+ "used",
134
+ "generated",
135
+ "hostname",
136
+ "status",
137
+ "agent_id",
138
+ "campaign_id",
139
+ "subtype",
140
+ "custom_metadata",
141
+ ]:
142
+ value = _safe_get(task, key)
143
+ if value is not None:
144
+ if "_id" in key:
145
+ task_summary[key] = str(value)
146
+ else:
147
+ task_summary[key] = value
148
+
149
+ # Special handling for timestamp field
150
+ try:
151
+ time_keys = ["started_at", "ended_at"]
152
+ for time_key in time_keys:
153
+ timestamp = _safe_get(task, time_key)
154
+ if timestamp is not None:
155
+ task_summary[time_key] = datetime.fromtimestamp(timestamp, pytz.utc)
156
+ except Exception as e:
157
+ if logger:
158
+ logger.exception(f"Error {e} converting timestamp for task {task.get('task_id', 'unknown')}")
159
+
160
+ try:
161
+ telemetry_summary = summarize_telemetry(task, logger)
162
+ try:
163
+ tags = tag_critical_task(
164
+ generated=task.get("generated", {}), telemetry_summary=telemetry_summary, thresholds=thresholds
165
+ )
166
+ if tags:
167
+ task_summary["tags"] = tags
168
+ except Exception as e:
169
+ logger.exception(e)
170
+ task_summary["telemetry_summary"] = telemetry_summary
171
+ except Exception as e:
172
+ if logger:
173
+ logger.exception(e)
174
+ else:
175
+ print(e)
176
+
177
+ return task_summary
178
+
179
+
180
+ def tag_critical_task(
181
+ generated: Dict, telemetry_summary: Dict, generated_keywords: List[str] = ["result"], thresholds: Dict = None
182
+ ) -> List[str]:
183
+ """
184
+ Tag a task with labels indicating abnormal or noteworthy resource usage or result anomalies.
185
+
186
+ Parameters
187
+ ----------
188
+ generated : dict
189
+ Dictionary of generated output values (e.g., results).
190
+ telemetry_summary : dict
191
+ Telemetry summary produced from summarize_telemetry().
192
+ generated_keywords : list of str, optional
193
+ List of keys in the generated output to check for anomalies.
194
+ thresholds : dict, optional
195
+ Custom thresholds for tagging high CPU, memory, disk, etc.
196
+
197
+ Returns
198
+ -------
199
+ list of str
200
+ Tags indicating abnormal patterns (e.g., "high_cpu", "low_output").
201
+ """
202
+ if thresholds is None:
203
+ thresholds = {
204
+ "high_cpu": 80,
205
+ "high_mem": 1e9,
206
+ "high_disk": 1e8,
207
+ "long_duration": 0.8,
208
+ "low_output": 0.1,
209
+ "high_output": 0.9,
210
+ }
211
+
212
+ cpu = abs(telemetry_summary.get("cpu", {}).get("percent_all_diff", 0))
213
+ mem = telemetry_summary.get("mem", {}).get("used_mem_diff", 0)
214
+ disk = telemetry_summary.get("disk", {}).get("read_bytes_diff", 0) + telemetry_summary.get("disk", {}).get(
215
+ "write_bytes_diff", 0
216
+ )
217
+ # TODO gpu
218
+ duration = telemetry_summary.get("duration_sec", 0)
219
+
220
+ tags = []
221
+
222
+ if cpu > thresholds["high_cpu"]:
223
+ tags.append("high_cpu")
224
+ if mem > thresholds["high_mem"]:
225
+ tags.append("high_mem")
226
+ if disk > thresholds["high_disk"]:
227
+ tags.append("high_disk")
228
+ if duration > thresholds["long_duration"]:
229
+ tags.append("long_duration")
230
+
231
+ for key in generated_keywords:
232
+ value = generated.get(key, 0)
233
+ if value < thresholds["low_output"]:
234
+ tags.append("low_output")
235
+ if value > thresholds["high_output"]:
236
+ tags.append("high_output")
237
+
238
+ return tags
239
+
240
+
241
+ sample_tasks = [
242
+ {
243
+ "task_id": "t1",
244
+ "activity_id": "train_model",
245
+ "used": {
246
+ "dataset": {"name": "MNIST", "size": 60000, "source": {"url": "http://example.com/mnist", "format": "csv"}},
247
+ "params": {"epochs": 5, "batch_size": 32, "shuffle": True},
248
+ },
249
+ "generated": {"model": {"accuracy": 0.98, "layers": [64, 64, 10], "saved_path": "/models/mnist_v1.pth"}},
250
+ "telemetry_summary": {"duration_sec": 42.7, "cpu_percent": 85.2},
251
+ },
252
+ {
253
+ "task_id": "t2",
254
+ "activity_id": "train_model",
255
+ "used": {
256
+ "dataset": {
257
+ "name": "CIFAR-10",
258
+ "size": 50000,
259
+ "source": {"url": "http://example.com/cifar", "format": "jpeg"},
260
+ },
261
+ "params": {"epochs": 10, "batch_size": 64, "shuffle": False},
262
+ },
263
+ "generated": {"model": {"accuracy": 0.91, "layers": [128, 128, 10], "saved_path": "/models/cifar_v1.pth"}},
264
+ "telemetry_summary": {"duration_sec": 120.5, "cpu_percent": 92.0},
265
+ },
266
+ {
267
+ "task_id": "t3",
268
+ "activity_id": "evaluate_model",
269
+ "used": {"model_path": "/models/mnist_v1.pth", "test_data": {"name": "MNIST-test", "samples": 10000}},
270
+ "generated": {"metrics": {"accuracy": 0.97, "confusion_matrix": [[8500, 100], [50, 1350]]}},
271
+ "telemetry_summary": {"duration_sec": 15.3},
272
+ },
273
+ {
274
+ "task_id": "t4",
275
+ "activity_id": "evaluate_model",
276
+ "used": {"model_path": "/models/cifar_v1.pth", "test_data": {"name": "CIFAR-test", "samples": 10000}},
277
+ "generated": {"metrics": {"accuracy": 0.88, "confusion_matrix": [[4000, 500], [300, 5200]]}},
278
+ "telemetry_summary": {"duration_sec": 18.9},
279
+ },
280
+ ]
281
+
282
+
283
+ def infer_dtype(value: Any) -> str:
284
+ """Infer a simplified dtype label for the value."""
285
+ if isinstance(value, bool):
286
+ return "bool"
287
+ elif isinstance(value, int):
288
+ return "int"
289
+ elif isinstance(value, float):
290
+ return "float"
291
+ elif isinstance(value, str):
292
+ return "str"
293
+ elif isinstance(value, list):
294
+ return "list"
295
+ return "str" # fallback for other types
296
+
297
+
298
+ def flatten_dict(d: dict, parent_key: str = "", sep: str = ".") -> dict:
299
+ """Recursively flatten nested dicts using dot notation."""
300
+ items = {}
301
+ for k, v in d.items():
302
+ new_key = f"{parent_key}{sep}{k}" if parent_key else k
303
+ if isinstance(v, dict):
304
+ items.update(flatten_dict(v, new_key, sep=sep))
305
+ else:
306
+ items[new_key] = v
307
+ return items
308
+
309
+
310
+ def update_schema(schema_section: list, flat_fields: dict):
311
+ """Update schema section with flattened fields and example values."""
312
+ field_map = {f["n"]: f for f in schema_section}
313
+
314
+ for key, value in flat_fields.items():
315
+ dtype = infer_dtype(value)
316
+ if isinstance(value, float):
317
+ val_repr = round(value, 2)
318
+ elif isinstance(value, (dict, list)):
319
+ val_repr = str(value)
320
+ else:
321
+ val_repr = value
322
+
323
+ if isinstance(val_repr, str) and len(val_repr) > 100:
324
+ val_repr = val_repr[:100] + "#TRUNCATED"
325
+
326
+ if key not in field_map:
327
+ field = {
328
+ "n": key,
329
+ "d": dtype,
330
+ "v": [val_repr] if val_repr is not None else [],
331
+ }
332
+ schema_section.append(field)
333
+ field_map[key] = field
334
+ else:
335
+ field = field_map[key]
336
+ if val_repr not in field["v"] and len(field["v"]) < 3:
337
+ field["v"].append(val_repr)
338
+
339
+
340
+ def update_tasks_summary_schema(tasks: list[dict], schema) -> dict:
341
+ """Update tasks_summary schema."""
342
+ act_schema = update_activity_schema(tasks)
343
+ merged_schema = deep_merge_dicts(act_schema, schema)
344
+ return merged_schema
345
+
346
+
347
+ def update_activity_schema(tasks: list[dict]) -> dict:
348
+ """Build schema for each activity_id from list of task dicts."""
349
+ schema = defaultdict(
350
+ lambda: {
351
+ "in": [],
352
+ "out": [],
353
+ # "tel": [],
354
+ }
355
+ )
356
+
357
+ for task in tasks:
358
+ activity_id = task.get("activity_id")
359
+ if not activity_id:
360
+ continue
361
+
362
+ activity_schema = schema[activity_id]
363
+
364
+ for section_key, schema_key in [
365
+ ("used", "in"),
366
+ ("generated", "out"),
367
+ # ("telemetry_summary", "tel"),
368
+ ]:
369
+ section_data = task.get(section_key)
370
+ if isinstance(section_data, dict):
371
+ flat_fields = flatten_dict(section_data, parent_key=section_key)
372
+ update_schema(activity_schema[schema_key], flat_fields)
373
+
374
+ schema = dict(schema)
375
+ return schema
376
+
377
+
378
+ def deep_merge_dicts(a: dict, b: dict) -> dict:
379
+ """
380
+ Recursively merge dict b into dict a:
381
+ - Does not overwrite existing values in a.
382
+ - If both values are dicts, merges recursively.
383
+ - If both values are lists, concatenates and deduplicates.
384
+ - Otherwise, keeps value from a.
385
+ Returns a new dict (does not mutate inputs).
386
+ """
387
+ result = copy.deepcopy(a)
388
+
389
+ for key, b_val in b.items():
390
+ if key not in result:
391
+ result[key] = copy.deepcopy(b_val)
392
+ else:
393
+ a_val = result[key]
394
+ if isinstance(a_val, dict) and isinstance(b_val, dict):
395
+ result[key] = deep_merge_dicts(a_val, b_val)
396
+ elif isinstance(a_val, list) and isinstance(b_val, list):
397
+ combined = a_val + [item for item in b_val if item not in a_val]
398
+ result[key] = combined
399
+ # preserve a_val otherwise
400
+ return result
flowcept/commons/utils.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """Utilities."""
2
2
 
3
+ import argparse
3
4
  from datetime import datetime, timedelta, timezone
4
5
  import json
5
6
  from time import time, sleep
@@ -9,7 +10,6 @@ import platform
9
10
  import subprocess
10
11
  import types
11
12
  import numpy as np
12
- import pytz
13
13
 
14
14
  from flowcept import configs
15
15
  from flowcept.commons.flowcept_dataclasses.task_object import TaskObject
@@ -19,14 +19,14 @@ from flowcept.commons.vocabulary import Status
19
19
 
20
20
 
21
21
  def get_utc_now() -> float:
22
- """Get UTC time."""
22
+ """Get current UTC time as a timestamp (seconds since epoch)."""
23
23
  now = datetime.now(timezone.utc)
24
24
  return now.timestamp()
25
25
 
26
26
 
27
27
  def get_utc_now_str() -> str:
28
28
  """Get UTC string."""
29
- format_string = "%Y-%m-%dT%H:%M:%S.%f"
29
+ format_string = "%Y-%m-%dT%H:%M:%S.%f%z"
30
30
  now = datetime.now(timezone.utc)
31
31
  return now.strftime(format_string)
32
32
 
@@ -159,11 +159,14 @@ class GenericJSONEncoder(json.JSONEncoder):
159
159
  return super().default(obj)
160
160
 
161
161
 
162
- def replace_non_serializable_times(obj, tz=pytz.utc):
163
- """Replace non-serializable times in an object."""
162
+ def replace_non_serializable_times(obj, tz=timezone.utc):
163
+ """Replace non-serializable datetimes in an object with ISO 8601 strings (ms precision)."""
164
164
  for time_field in TaskObject.get_time_field_names():
165
- if time_field in obj:
166
- obj[time_field] = obj[time_field].strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] + f" {tz}"
165
+ if time_field in obj and isinstance(obj[time_field], datetime):
166
+ obj[time_field] = obj[time_field].astimezone(tz).isoformat(timespec="milliseconds")
167
+
168
+
169
+ __DICT__CLASSES = (argparse.Namespace,)
167
170
 
168
171
 
169
172
  def replace_non_serializable(obj):
@@ -180,6 +183,8 @@ def replace_non_serializable(obj):
180
183
  return obj.to_flowcept_dict()
181
184
  elif hasattr(obj, "to_dict"):
182
185
  return obj.to_dict()
186
+ elif isinstance(obj, __DICT__CLASSES):
187
+ return obj.__dict__
183
188
  else:
184
189
  # Replace non-serializable values with id()
185
190
  return f"{obj.__class__.__name__}_instance_id_{id(obj)}"
@@ -262,6 +267,20 @@ class GenericJSONDecoder(json.JSONDecoder):
262
267
  return inst
263
268
 
264
269
 
270
+ def get_git_info(path: str = "."):
271
+ """Get Git Repo metadata."""
272
+ from git import Repo
273
+
274
+ repo = Repo(path, search_parent_directories=True)
275
+ head = repo.head.commit.hexsha
276
+ short = repo.git.rev_parse(head, short=True)
277
+ branch = repo.active_branch.name if not repo.head.is_detached else "HEAD"
278
+ remote = next(iter(repo.remotes)).url if repo.remotes else None
279
+ dirty = "dirty" if repo.is_dirty() else "clean"
280
+ root = repo.working_tree_dir
281
+ return {"sha": head, "short_sha": short, "branch": branch, "root": root, "remote": remote, "dirty": dirty}
282
+
283
+
265
284
  class ClassProperty:
266
285
  """Wrapper to simulate property of class methods, removed in py313."""
267
286
 
flowcept/configs.py CHANGED
@@ -4,29 +4,34 @@ import os
4
4
  import socket
5
5
  import getpass
6
6
 
7
+ from flowcept.version import __version__
8
+
7
9
  PROJECT_NAME = "flowcept"
10
+
11
+ DEFAULT_SETTINGS = {
12
+ "version": __version__,
13
+ "log": {"log_file_level": "disable", "log_stream_level": "disable"},
14
+ "project": {"dump_buffer_path": "flowcept_messages.jsonl"},
15
+ "telemetry_capture": {},
16
+ "instrumentation": {},
17
+ "experiment": {},
18
+ "mq": {"enabled": False},
19
+ "kv_db": {"enabled": False},
20
+ "web_server": {},
21
+ "sys_metadata": {},
22
+ "extra_metadata": {},
23
+ "analytics": {},
24
+ "db_buffer": {},
25
+ "databases": {"mongodb": {"enabled": False}, "lmdb": {"enabled": False}},
26
+ "adapters": {},
27
+ "agent": {},
28
+ }
29
+
8
30
  USE_DEFAULT = os.getenv("FLOWCEPT_USE_DEFAULT", "False").lower() == "true"
9
- ########################
10
- # Project Settings #
11
- ########################
12
31
 
13
32
  if USE_DEFAULT:
14
- settings = {
15
- "log": {},
16
- "project": {},
17
- "telemetry_capture": {},
18
- "instrumentation": {},
19
- "experiment": {},
20
- "mq": {},
21
- "kv_db": {},
22
- "web_server": {},
23
- "sys_metadata": {},
24
- "extra_metadata": {},
25
- "analytics": {},
26
- "buffer": {},
27
- "databases": {},
28
- "adapters": {},
29
- }
33
+ settings = DEFAULT_SETTINGS.copy()
34
+
30
35
  else:
31
36
  from omegaconf import OmegaConf
32
37
 
@@ -42,7 +47,13 @@ else:
42
47
  settings = OmegaConf.load(f)
43
48
  else:
44
49
  settings = OmegaConf.load(SETTINGS_PATH)
45
- # print(SETTINGS_PATH)
50
+
51
+ # Making sure all settings are in place.
52
+ keys = DEFAULT_SETTINGS.keys() - settings.keys()
53
+ if len(keys):
54
+ for k in keys:
55
+ settings[k] = DEFAULT_SETTINGS[k]
56
+
46
57
  ########################
47
58
  # Log Settings #
48
59
  ########################
@@ -68,11 +79,13 @@ FLOWCEPT_USER = settings["experiment"].get("user", "blank_user")
68
79
 
69
80
  MQ_INSTANCES = settings["mq"].get("instances", None)
70
81
  MQ_SETTINGS = settings["mq"]
82
+ MQ_ENABLED = os.getenv("MQ_ENABLED", settings["mq"].get("enabled", True))
71
83
  MQ_TYPE = os.getenv("MQ_TYPE", settings["mq"].get("type", "redis"))
72
- MQ_CHANNEL = settings["mq"].get("channel", "interception")
84
+ MQ_CHANNEL = os.getenv("MQ_CHANNEL", settings["mq"].get("channel", "interception"))
73
85
  MQ_PASSWORD = settings["mq"].get("password", None)
74
86
  MQ_HOST = os.getenv("MQ_HOST", settings["mq"].get("host", "localhost"))
75
87
  MQ_PORT = int(os.getenv("MQ_PORT", settings["mq"].get("port", "6379")))
88
+ MQ_URI = os.getenv("MQ_URI", settings["mq"].get("uri", None))
76
89
  MQ_BUFFER_SIZE = settings["mq"].get("buffer_size", None)
77
90
  MQ_INSERTION_BUFFER_TIME = settings["mq"].get("insertion_buffer_time_secs", None)
78
91
  MQ_TIMING = settings["mq"].get("timing", False)
@@ -86,6 +99,7 @@ KVDB_PASSWORD = settings["kv_db"].get("password", None)
86
99
  KVDB_HOST = os.getenv("KVDB_HOST", settings["kv_db"].get("host", "localhost"))
87
100
  KVDB_PORT = int(os.getenv("KVDB_PORT", settings["kv_db"].get("port", "6379")))
88
101
  KVDB_URI = os.getenv("KVDB_URI", settings["kv_db"].get("uri", None))
102
+ KVDB_ENABLED = settings["kv_db"].get("enabled", False)
89
103
 
90
104
 
91
105
  DATABASES = settings.get("databases", {})
@@ -118,9 +132,9 @@ if LMDB_SETTINGS:
118
132
  else:
119
133
  LMDB_ENABLED = LMDB_SETTINGS.get("enabled", False)
120
134
 
121
- if not LMDB_ENABLED and not MONGO_ENABLED:
122
- # At least one of these variables need to be enabled.
123
- LMDB_ENABLED = True
135
+ # if not LMDB_ENABLED and not MONGO_ENABLED:
136
+ # # At least one of these variables need to be enabled.
137
+ # LMDB_ENABLED = True
124
138
 
125
139
  ##########################
126
140
  # DB Buffer Settings #
@@ -134,19 +148,20 @@ DB_INSERTER_MAX_TRIALS_STOP = db_buffer_settings.get("stop_max_trials", 240)
134
148
  DB_INSERTER_SLEEP_TRIALS_STOP = db_buffer_settings.get("stop_trials_sleep", 0.01)
135
149
 
136
150
 
137
- ######################
151
+ ###########################
138
152
  # PROJECT SYSTEM SETTINGS #
139
- ######################
153
+ ###########################
140
154
 
141
- DB_FLUSH_MODE = settings["project"].get("db_flush_mode", "online")
155
+ DB_FLUSH_MODE = settings["project"].get("db_flush_mode", "offline")
142
156
  # DEBUG_MODE = settings["project"].get("debug", False)
143
157
  PERF_LOG = settings["project"].get("performance_logging", False)
144
158
  JSON_SERIALIZER = settings["project"].get("json_serializer", "default")
145
159
  REPLACE_NON_JSON_SERIALIZABLE = settings["project"].get("replace_non_json_serializable", True)
146
160
  ENRICH_MESSAGES = settings["project"].get("enrich_messages", True)
147
-
161
+ DUMP_BUFFER_PATH = settings["project"].get("dump_buffer_path", None)
148
162
 
149
163
  TELEMETRY_CAPTURE = settings.get("telemetry_capture", None)
164
+ TELEMETRY_ENABLED = TELEMETRY_CAPTURE is not None and len(TELEMETRY_CAPTURE)
150
165
 
151
166
  ######################
152
167
  # SYS METADATA #
@@ -217,7 +232,11 @@ ANALYTICS = settings.get("analytics", None)
217
232
  ####################
218
233
 
219
234
  INSTRUMENTATION = settings.get("instrumentation", {})
220
- INSTRUMENTATION_ENABLED = INSTRUMENTATION.get("enabled", False)
235
+ INSTRUMENTATION_ENABLED = INSTRUMENTATION.get("enabled", True)
236
+
237
+ AGENT = settings.get("agent", {})
238
+ AGENT_HOST = os.getenv("AGENT_HOST", settings["agent"].get("mcp_host", "localhost"))
239
+ AGENT_PORT = int(os.getenv("AGENT_PORT", settings["agent"].get("mcp_port", "8000")))
221
240
 
222
241
  ####################
223
242
  # Enabled ADAPTERS #