flowcept 0.9.10__py3-none-any.whl → 0.9.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowcept/__init__.py +1 -1
- flowcept/cli.py +81 -2
- flowcept/commons/daos/mq_dao/mq_dao_base.py +10 -17
- flowcept/commons/utils.py +19 -1
- flowcept/configs.py +20 -11
- flowcept/flowcept_api/flowcept_controller.py +144 -9
- flowcept/flowceptor/adapters/base_interceptor.py +2 -1
- flowcept/instrumentation/flowcept_loop.py +1 -1
- flowcept/version.py +1 -1
- {flowcept-0.9.10.dist-info → flowcept-0.9.12.dist-info}/METADATA +18 -3
- {flowcept-0.9.10.dist-info → flowcept-0.9.12.dist-info}/RECORD +15 -15
- resources/sample_settings.yaml +6 -3
- {flowcept-0.9.10.dist-info → flowcept-0.9.12.dist-info}/WHEEL +0 -0
- {flowcept-0.9.10.dist-info → flowcept-0.9.12.dist-info}/entry_points.txt +0 -0
- {flowcept-0.9.10.dist-info → flowcept-0.9.12.dist-info}/licenses/LICENSE +0 -0
flowcept/__init__.py
CHANGED
flowcept/cli.py
CHANGED
|
@@ -459,23 +459,85 @@ def start_mongo() -> None:
|
|
|
459
459
|
databases:
|
|
460
460
|
mongodb:
|
|
461
461
|
- bin : str (required) path to the mongod executable
|
|
462
|
+
- db_path: str, required path to the db data directory
|
|
462
463
|
- log_path : str, optional (adds --fork --logpath)
|
|
463
464
|
- lock_file_path : str, optional (adds --pidfilepath)
|
|
464
465
|
|
|
466
|
+
|
|
465
467
|
Builds and runs the startup command.
|
|
466
468
|
"""
|
|
469
|
+
import time
|
|
470
|
+
import socket
|
|
471
|
+
from flowcept.configs import MONGO_HOST, MONGO_PORT, MONGO_URI
|
|
472
|
+
|
|
473
|
+
def _port_open(host: str, port: int, timeout: float = 0.5) -> bool:
|
|
474
|
+
try:
|
|
475
|
+
with socket.create_connection((host, port), timeout=timeout):
|
|
476
|
+
return True
|
|
477
|
+
except OSError:
|
|
478
|
+
return False
|
|
479
|
+
|
|
480
|
+
def _await_mongo(host: str, port: int, uri: str | None, timeout: float = 20.0) -> bool:
|
|
481
|
+
"""Wait until MongoDB is accepting connections (and ping if pymongo is available)."""
|
|
482
|
+
deadline = time.time() + timeout
|
|
483
|
+
have_pymongo = False
|
|
484
|
+
try:
|
|
485
|
+
from pymongo import MongoClient # optional
|
|
486
|
+
|
|
487
|
+
have_pymongo = True
|
|
488
|
+
except Exception:
|
|
489
|
+
pass
|
|
490
|
+
|
|
491
|
+
while time.time() < deadline:
|
|
492
|
+
if not _port_open(host, port):
|
|
493
|
+
time.sleep(0.25)
|
|
494
|
+
continue
|
|
495
|
+
|
|
496
|
+
if not have_pymongo:
|
|
497
|
+
return True # port is open; assume OK
|
|
498
|
+
|
|
499
|
+
try:
|
|
500
|
+
from pymongo import MongoClient
|
|
501
|
+
|
|
502
|
+
client = MongoClient(uri or f"mongodb://{host}:{port}", serverSelectionTimeoutMS=800)
|
|
503
|
+
client.admin.command("ping")
|
|
504
|
+
return True
|
|
505
|
+
except Exception:
|
|
506
|
+
time.sleep(0.25)
|
|
507
|
+
|
|
508
|
+
return False
|
|
509
|
+
|
|
510
|
+
def _tail(path: str, lines: int = 40) -> str:
|
|
511
|
+
try:
|
|
512
|
+
with open(path, "rb") as f:
|
|
513
|
+
f.seek(0, os.SEEK_END)
|
|
514
|
+
size = f.tell()
|
|
515
|
+
block = 1024
|
|
516
|
+
data = b""
|
|
517
|
+
while size > 0 and data.count(b"\n") <= lines:
|
|
518
|
+
size = max(0, size - block)
|
|
519
|
+
f.seek(size)
|
|
520
|
+
data = f.read(min(block, size)) + data
|
|
521
|
+
return data.decode(errors="replace").splitlines()[-lines:]
|
|
522
|
+
except Exception:
|
|
523
|
+
return []
|
|
524
|
+
|
|
467
525
|
# Safe nested gets
|
|
468
526
|
settings = getattr(configs, "settings", {}) or {}
|
|
469
527
|
databases = settings.get("databases") or {}
|
|
470
528
|
mongodb = databases.get("mongodb") or {}
|
|
471
529
|
|
|
472
530
|
bin_path = mongodb.get("bin")
|
|
473
|
-
|
|
474
|
-
|
|
531
|
+
db_path = mongodb.get("db_path")
|
|
532
|
+
log_path = mongodb.get("log_path", None)
|
|
533
|
+
lock_file_path = mongodb.get("lock_file_path", None)
|
|
475
534
|
|
|
476
535
|
if not bin_path:
|
|
477
536
|
print("Error: settings['databases']['mongodb']['bin'] is required.")
|
|
478
537
|
return
|
|
538
|
+
if not db_path:
|
|
539
|
+
print("Error: settings['databases']['mongodb']['db_path'] is required.")
|
|
540
|
+
return
|
|
479
541
|
|
|
480
542
|
# Build command
|
|
481
543
|
parts = [shlex.quote(str(bin_path))]
|
|
@@ -483,12 +545,29 @@ def start_mongo() -> None:
|
|
|
483
545
|
parts += ["--fork", "--logpath", shlex.quote(str(log_path))]
|
|
484
546
|
if lock_file_path:
|
|
485
547
|
parts += ["--pidfilepath", shlex.quote(str(lock_file_path))]
|
|
548
|
+
if db_path:
|
|
549
|
+
parts += ["--dbpath", shlex.quote(str(db_path))]
|
|
486
550
|
|
|
487
551
|
cmd = " ".join(parts)
|
|
488
552
|
try:
|
|
553
|
+
# Background start returns immediately because --fork is set
|
|
489
554
|
out = _run_command(cmd, check_output=True)
|
|
490
555
|
if out:
|
|
491
556
|
print(out)
|
|
557
|
+
print(f"mongod launched (logs: {log_path}). Waiting for readiness on {MONGO_HOST}:{MONGO_PORT} ...")
|
|
558
|
+
|
|
559
|
+
ok = _await_mongo(MONGO_HOST, MONGO_PORT, MONGO_URI, timeout=20.0)
|
|
560
|
+
if ok:
|
|
561
|
+
print("✅ MongoDB is up and responding.")
|
|
562
|
+
else:
|
|
563
|
+
print("❌ MongoDB did not become ready in time.")
|
|
564
|
+
if log_path:
|
|
565
|
+
last_lines = _tail(log_path, 60)
|
|
566
|
+
if last_lines:
|
|
567
|
+
print("---- mongod last log lines ----")
|
|
568
|
+
for line in last_lines:
|
|
569
|
+
print(line)
|
|
570
|
+
print("---- end ----")
|
|
492
571
|
except subprocess.CalledProcessError as e:
|
|
493
572
|
print(f"Failed to start MongoDB: {e}")
|
|
494
573
|
|
|
@@ -19,7 +19,6 @@ from flowcept.configs import (
|
|
|
19
19
|
MQ_TIMING,
|
|
20
20
|
KVDB_ENABLED,
|
|
21
21
|
MQ_ENABLED,
|
|
22
|
-
DUMP_BUFFER_PATH,
|
|
23
22
|
)
|
|
24
23
|
|
|
25
24
|
from flowcept.commons.utils import GenericJSONEncoder
|
|
@@ -96,22 +95,11 @@ class MQDao(object):
|
|
|
96
95
|
def bulk_publish(self, buffer):
|
|
97
96
|
"""Publish it."""
|
|
98
97
|
# self.logger.info(f"Going to flush {len(buffer)} to MQ...")
|
|
99
|
-
if
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
with open(DUMP_BUFFER_PATH, "wb", buffering=1_048_576) as f:
|
|
104
|
-
for obj in buffer:
|
|
105
|
-
obj.pop("data", None) # We are not going to store data in the buffer file.
|
|
106
|
-
f.write(orjson.dumps(obj))
|
|
107
|
-
f.write(b"\n")
|
|
108
|
-
self.logger.info(f"Saved Flowcept messages into {DUMP_BUFFER_PATH}.")
|
|
98
|
+
if MQ_CHUNK_SIZE > 1:
|
|
99
|
+
for chunk in chunked(buffer, MQ_CHUNK_SIZE):
|
|
100
|
+
self._bulk_publish(chunk)
|
|
109
101
|
else:
|
|
110
|
-
|
|
111
|
-
for chunk in chunked(buffer, MQ_CHUNK_SIZE):
|
|
112
|
-
self._bulk_publish(chunk)
|
|
113
|
-
else:
|
|
114
|
-
self._bulk_publish(buffer)
|
|
102
|
+
self._bulk_publish(buffer)
|
|
115
103
|
|
|
116
104
|
def register_time_based_thread_init(self, interceptor_instance_id: str, exec_bundle_id=None):
|
|
117
105
|
"""Register the time."""
|
|
@@ -174,6 +162,12 @@ class MQDao(object):
|
|
|
174
162
|
self.started = True
|
|
175
163
|
|
|
176
164
|
def _close_buffer(self):
|
|
165
|
+
if flowcept.configs.DUMP_BUFFER_ENABLED and flowcept.configs.DUMP_BUFFER_PATH is not None:
|
|
166
|
+
from flowcept.commons.utils import buffer_to_disk
|
|
167
|
+
|
|
168
|
+
_buf = self.buffer.current_buffer if isinstance(self.buffer, AutoflushBuffer) else self.buffer
|
|
169
|
+
buffer_to_disk(_buf, flowcept.configs.DUMP_BUFFER_PATH, self.logger)
|
|
170
|
+
|
|
177
171
|
if flowcept.configs.DB_FLUSH_MODE == "online":
|
|
178
172
|
if self._time_based_flushing_started:
|
|
179
173
|
self.buffer.stop()
|
|
@@ -181,7 +175,6 @@ class MQDao(object):
|
|
|
181
175
|
else:
|
|
182
176
|
self.logger.error("MQ time-based flushing is not started")
|
|
183
177
|
else:
|
|
184
|
-
self.bulk_publish(self.buffer)
|
|
185
178
|
self.buffer = list()
|
|
186
179
|
|
|
187
180
|
def _stop_timed(self, interceptor_instance_id: str, check_safe_stops: bool = True, bundle_exec_id: int = None):
|
flowcept/commons/utils.py
CHANGED
|
@@ -4,7 +4,7 @@ import argparse
|
|
|
4
4
|
from datetime import datetime, timedelta, timezone
|
|
5
5
|
import json
|
|
6
6
|
from time import time, sleep
|
|
7
|
-
from typing import Callable
|
|
7
|
+
from typing import Callable, List, Dict
|
|
8
8
|
import os
|
|
9
9
|
import platform
|
|
10
10
|
import subprocess
|
|
@@ -245,6 +245,24 @@ def get_current_config_values():
|
|
|
245
245
|
return _vars
|
|
246
246
|
|
|
247
247
|
|
|
248
|
+
def buffer_to_disk(buffer: List[Dict], path: str, logger):
|
|
249
|
+
"""
|
|
250
|
+
Append the in-memory buffer to a JSON Lines (JSONL) file on disk.
|
|
251
|
+
"""
|
|
252
|
+
if not buffer:
|
|
253
|
+
logger.warning("The buffer is currently empty.")
|
|
254
|
+
return
|
|
255
|
+
with open(path, "ab", buffering=1_048_576) as f:
|
|
256
|
+
for obj in buffer:
|
|
257
|
+
obj.pop("data", None) # We are not going to store data in the buffer file.
|
|
258
|
+
from orjson import orjson
|
|
259
|
+
|
|
260
|
+
f.write(orjson.dumps(obj))
|
|
261
|
+
f.write(b"\n")
|
|
262
|
+
|
|
263
|
+
logger.info(f"Saved Flowcept buffer into {path}.")
|
|
264
|
+
|
|
265
|
+
|
|
248
266
|
class GenericJSONDecoder(json.JSONDecoder):
|
|
249
267
|
"""JSON decoder class."""
|
|
250
268
|
|
flowcept/configs.py
CHANGED
|
@@ -11,7 +11,7 @@ PROJECT_NAME = "flowcept"
|
|
|
11
11
|
DEFAULT_SETTINGS = {
|
|
12
12
|
"version": __version__,
|
|
13
13
|
"log": {"log_file_level": "disable", "log_stream_level": "disable"},
|
|
14
|
-
"project": {"
|
|
14
|
+
"project": {"dump_buffer": {"enabled": True}},
|
|
15
15
|
"telemetry_capture": {},
|
|
16
16
|
"instrumentation": {},
|
|
17
17
|
"experiment": {},
|
|
@@ -27,7 +27,9 @@ DEFAULT_SETTINGS = {
|
|
|
27
27
|
"agent": {},
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
_TRUE_VALUES = {"1", "true", "yes", "y", "t"}
|
|
31
|
+
|
|
32
|
+
USE_DEFAULT = os.getenv("FLOWCEPT_USE_DEFAULT", "False").lower() in _TRUE_VALUES
|
|
31
33
|
|
|
32
34
|
if USE_DEFAULT:
|
|
33
35
|
settings = DEFAULT_SETTINGS.copy()
|
|
@@ -158,10 +160,21 @@ PERF_LOG = settings["project"].get("performance_logging", False)
|
|
|
158
160
|
JSON_SERIALIZER = settings["project"].get("json_serializer", "default")
|
|
159
161
|
REPLACE_NON_JSON_SERIALIZABLE = settings["project"].get("replace_non_json_serializable", True)
|
|
160
162
|
ENRICH_MESSAGES = settings["project"].get("enrich_messages", True)
|
|
161
|
-
|
|
163
|
+
|
|
164
|
+
_DEFAULT_DUMP_BUFFER_ENABLED = DB_FLUSH_MODE == "offline"
|
|
165
|
+
DUMP_BUFFER_ENABLED = (
|
|
166
|
+
os.getenv(
|
|
167
|
+
"DUMP_BUFFER", str(settings["project"].get("dump_buffer", {}).get("enabled", _DEFAULT_DUMP_BUFFER_ENABLED))
|
|
168
|
+
)
|
|
169
|
+
.strip()
|
|
170
|
+
.lower()
|
|
171
|
+
in _TRUE_VALUES
|
|
172
|
+
)
|
|
173
|
+
DUMP_BUFFER_PATH = settings["project"].get("dump_buffer", {}).get("path", "flowcept_buffer.jsonl")
|
|
162
174
|
|
|
163
175
|
TELEMETRY_CAPTURE = settings.get("telemetry_capture", None)
|
|
164
|
-
TELEMETRY_ENABLED =
|
|
176
|
+
TELEMETRY_ENABLED = os.getenv("TELEMETRY_ENABLED", "true").strip().lower() in _TRUE_VALUES
|
|
177
|
+
TELEMETRY_ENABLED = TELEMETRY_ENABLED and (TELEMETRY_CAPTURE is not None) and (len(TELEMETRY_CAPTURE) > 0)
|
|
165
178
|
|
|
166
179
|
######################
|
|
167
180
|
# SYS METADATA #
|
|
@@ -235,13 +248,9 @@ INSTRUMENTATION = settings.get("instrumentation", {})
|
|
|
235
248
|
INSTRUMENTATION_ENABLED = INSTRUMENTATION.get("enabled", True)
|
|
236
249
|
|
|
237
250
|
AGENT = settings.get("agent", {})
|
|
238
|
-
AGENT_AUDIO =
|
|
239
|
-
"
|
|
240
|
-
|
|
241
|
-
"yes",
|
|
242
|
-
"y",
|
|
243
|
-
"t",
|
|
244
|
-
}
|
|
251
|
+
AGENT_AUDIO = (
|
|
252
|
+
os.getenv("AGENT_AUDIO", str(settings["agent"].get("audio_enabled", "false"))).strip().lower() in _TRUE_VALUES
|
|
253
|
+
)
|
|
245
254
|
AGENT_HOST = os.getenv("AGENT_HOST", settings["agent"].get("mcp_host", "localhost"))
|
|
246
255
|
AGENT_PORT = int(os.getenv("AGENT_PORT", settings["agent"].get("mcp_port", "8000")))
|
|
247
256
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Controller module."""
|
|
2
2
|
|
|
3
|
+
import os
|
|
3
4
|
from typing import List, Dict, Any
|
|
4
5
|
from uuid import uuid4
|
|
5
6
|
|
|
@@ -9,7 +10,7 @@ from flowcept.commons.flowcept_dataclasses.workflow_object import (
|
|
|
9
10
|
WorkflowObject,
|
|
10
11
|
)
|
|
11
12
|
from flowcept.commons.flowcept_logger import FlowceptLogger
|
|
12
|
-
from flowcept.commons.utils import ClassProperty
|
|
13
|
+
from flowcept.commons.utils import ClassProperty, buffer_to_disk
|
|
13
14
|
from flowcept.configs import (
|
|
14
15
|
MQ_INSTANCES,
|
|
15
16
|
INSTRUMENTATION_ENABLED,
|
|
@@ -52,6 +53,7 @@ class Flowcept(object):
|
|
|
52
53
|
start_persistence=True,
|
|
53
54
|
check_safe_stops=True, # TODO add to docstring
|
|
54
55
|
save_workflow=True,
|
|
56
|
+
delete_buffer_file=True,
|
|
55
57
|
*args,
|
|
56
58
|
**kwargs,
|
|
57
59
|
):
|
|
@@ -90,6 +92,9 @@ class Flowcept(object):
|
|
|
90
92
|
save_workflow : bool, default=True
|
|
91
93
|
If True, a workflow object message is sent.
|
|
92
94
|
|
|
95
|
+
delete_buffer_file : bool, default=True
|
|
96
|
+
if True, deletes an existing existing buffer file or ignores if it doesn't exist.
|
|
97
|
+
|
|
93
98
|
Additional arguments (`*args`, `**kwargs`) are used for specific adapters.
|
|
94
99
|
For example, when using the Dask interceptor, the `dask_client` argument
|
|
95
100
|
should be provided in `kwargs` to enable saving the Dask workflow, which is recommended.
|
|
@@ -129,8 +134,11 @@ class Flowcept(object):
|
|
|
129
134
|
self.workflow_name = workflow_name
|
|
130
135
|
self.workflow_args = workflow_args
|
|
131
136
|
|
|
137
|
+
if delete_buffer_file:
|
|
138
|
+
Flowcept.delete_buffer_file()
|
|
139
|
+
|
|
132
140
|
def start(self):
|
|
133
|
-
"""Start
|
|
141
|
+
"""Start Flowcept Controller."""
|
|
134
142
|
if self.is_started or not self.enabled:
|
|
135
143
|
self.logger.warning("DB inserter may be already started or instrumentation is not set")
|
|
136
144
|
return self
|
|
@@ -170,11 +178,88 @@ class Flowcept(object):
|
|
|
170
178
|
self.logger.debug("Flowcept started successfully.")
|
|
171
179
|
return self
|
|
172
180
|
|
|
181
|
+
def get_buffer(self, return_df: bool = False):
|
|
182
|
+
"""
|
|
183
|
+
Retrieve the in-memory message buffer.
|
|
184
|
+
|
|
185
|
+
Parameters
|
|
186
|
+
----------
|
|
187
|
+
return_df : bool, optional
|
|
188
|
+
If False (default), return the raw buffer as a list of dictionaries.
|
|
189
|
+
If True, normalize the buffer into a pandas DataFrame with dotted
|
|
190
|
+
notation for nested keys. Requires ``pandas`` to be installed.
|
|
191
|
+
|
|
192
|
+
Returns
|
|
193
|
+
-------
|
|
194
|
+
list of dict or pandas.DataFrame
|
|
195
|
+
- If ``return_df=False``: the buffer as a list of dictionaries.
|
|
196
|
+
- If ``return_df=True``: the buffer as a normalized DataFrame.
|
|
197
|
+
|
|
198
|
+
Raises
|
|
199
|
+
------
|
|
200
|
+
ModuleNotFoundError
|
|
201
|
+
If ``return_df=True`` but ``pandas`` is not installed.
|
|
202
|
+
|
|
203
|
+
Examples
|
|
204
|
+
--------
|
|
205
|
+
>>> buf = flowcept.get_buffer()
|
|
206
|
+
>>> isinstance(buf, list)
|
|
207
|
+
True
|
|
208
|
+
|
|
209
|
+
>>> df = flowcept.get_buffer(return_df=True)
|
|
210
|
+
>>> "generated.attention" in df.columns
|
|
211
|
+
True
|
|
212
|
+
"""
|
|
213
|
+
if return_df:
|
|
214
|
+
try:
|
|
215
|
+
import pandas as pd
|
|
216
|
+
except ModuleNotFoundError as e:
|
|
217
|
+
raise ModuleNotFoundError("pandas is required when return_df=True. Please install pandas.") from e
|
|
218
|
+
return pd.json_normalize(self.buffer, sep=".")
|
|
219
|
+
return self.buffer
|
|
220
|
+
|
|
173
221
|
def _publish_buffer(self):
|
|
174
222
|
self._interceptor_instances[0]._mq_dao.bulk_publish(self.buffer)
|
|
175
223
|
|
|
224
|
+
def dump_buffer(self, path: str = None):
|
|
225
|
+
"""
|
|
226
|
+
Dump the current in-memory buffer to a JSON Lines (JSONL) file.
|
|
227
|
+
|
|
228
|
+
Each element of the buffer (a dictionary) is serialized as a single line
|
|
229
|
+
of JSON. If no path is provided, the default path from the settings file
|
|
230
|
+
is used.
|
|
231
|
+
|
|
232
|
+
Parameters
|
|
233
|
+
----------
|
|
234
|
+
path : str, optional
|
|
235
|
+
Destination file path for the JSONL output. If not provided,
|
|
236
|
+
defaults to ``DUMP_BUFFER_PATH`` as configured in the settings.
|
|
237
|
+
|
|
238
|
+
Returns
|
|
239
|
+
-------
|
|
240
|
+
None
|
|
241
|
+
The buffer is written to disk, no value is returned.
|
|
242
|
+
|
|
243
|
+
Notes
|
|
244
|
+
-----
|
|
245
|
+
- The buffer is expected to be a list of dictionaries.
|
|
246
|
+
- Existing files at the specified path will be overwritten.
|
|
247
|
+
- Logging is performed through the class logger.
|
|
248
|
+
|
|
249
|
+
Examples
|
|
250
|
+
--------
|
|
251
|
+
>>> flowcept.dump_buffer("buffer.jsonl")
|
|
252
|
+
# Writes buffer contents to buffer.jsonl
|
|
253
|
+
|
|
254
|
+
>>> flowcept.dump_buffer()
|
|
255
|
+
# Writes buffer contents to the default path defined in settings
|
|
256
|
+
"""
|
|
257
|
+
if path is None:
|
|
258
|
+
path = DUMP_BUFFER_PATH
|
|
259
|
+
buffer_to_disk(self.buffer, path, self.logger)
|
|
260
|
+
|
|
176
261
|
@staticmethod
|
|
177
|
-
def
|
|
262
|
+
def read_buffer_file(file_path: str | None = None, return_df: bool = False, normalize_df: bool = False):
|
|
178
263
|
"""
|
|
179
264
|
Read a JSON Lines (JSONL) file containing captured Flowcept messages.
|
|
180
265
|
|
|
@@ -187,12 +272,15 @@ class Flowcept(object):
|
|
|
187
272
|
Parameters
|
|
188
273
|
----------
|
|
189
274
|
file_path : str, optional
|
|
190
|
-
Path to the
|
|
275
|
+
Path to the buffer file. If not provided, defaults to the value of
|
|
191
276
|
``DUMP_BUFFER_PATH`` from the configuration. If neither is provided,
|
|
192
277
|
an assertion error is raised.
|
|
193
278
|
return_df : bool, default False
|
|
194
279
|
If True, return a normalized pandas DataFrame. If False, return the
|
|
195
280
|
parsed list of dictionaries.
|
|
281
|
+
normalize_df: bool, default False
|
|
282
|
+
If True, normalize the inner dicts (e.g., used, generated, custom_metadata) as individual columns in the
|
|
283
|
+
returned DataFrame.
|
|
196
284
|
|
|
197
285
|
Returns
|
|
198
286
|
-------
|
|
@@ -215,13 +303,13 @@ class Flowcept(object):
|
|
|
215
303
|
--------
|
|
216
304
|
Read messages as a list:
|
|
217
305
|
|
|
218
|
-
>>> msgs =
|
|
306
|
+
>>> msgs = read_buffer_file("offline_buffer.jsonl")
|
|
219
307
|
>>> len(msgs) > 0
|
|
220
308
|
True
|
|
221
309
|
|
|
222
310
|
Read messages as a normalized DataFrame:
|
|
223
311
|
|
|
224
|
-
>>> df =
|
|
312
|
+
>>> df = read_buffer_file("offline_buffer.jsonl", return_df=True)
|
|
225
313
|
>>> "generated.attention" in df.columns
|
|
226
314
|
True
|
|
227
315
|
"""
|
|
@@ -232,7 +320,7 @@ class Flowcept(object):
|
|
|
232
320
|
file_path = DUMP_BUFFER_PATH
|
|
233
321
|
assert file_path is not None, "Please indicate file_path either in the argument or in the config file."
|
|
234
322
|
if not os.path.exists(file_path):
|
|
235
|
-
raise FileNotFoundError(f"
|
|
323
|
+
raise FileNotFoundError(f"Flowcept buffer file '{file_path}' was not found.")
|
|
236
324
|
|
|
237
325
|
with open(file_path, "rb") as f:
|
|
238
326
|
lines = [ln for ln in f.read().splitlines() if ln]
|
|
@@ -244,10 +332,57 @@ class Flowcept(object):
|
|
|
244
332
|
import pandas as pd
|
|
245
333
|
except ModuleNotFoundError as e:
|
|
246
334
|
raise ModuleNotFoundError("pandas is required when return_df=True. Please install pandas.") from e
|
|
247
|
-
|
|
335
|
+
if normalize_df:
|
|
336
|
+
return pd.json_normalize(buffer, sep=".")
|
|
337
|
+
else:
|
|
338
|
+
return pd.read_json(file_path, lines=True)
|
|
248
339
|
|
|
249
340
|
return buffer
|
|
250
341
|
|
|
342
|
+
@staticmethod
|
|
343
|
+
def delete_buffer_file(path: str = None):
|
|
344
|
+
"""
|
|
345
|
+
Delete the buffer file from disk if it exists.
|
|
346
|
+
|
|
347
|
+
If no path is provided, the default path from the settings file
|
|
348
|
+
is used. Logs whether the file was successfully removed or not found.
|
|
349
|
+
|
|
350
|
+
Parameters
|
|
351
|
+
----------
|
|
352
|
+
path : str, optional
|
|
353
|
+
Path to the buffer JSONL file. If not provided,
|
|
354
|
+
defaults to ``DUMP_BUFFER_PATH`` as configured in the settings.
|
|
355
|
+
|
|
356
|
+
Returns
|
|
357
|
+
-------
|
|
358
|
+
None
|
|
359
|
+
The file is deleted from disk if it exists, no value is returned.
|
|
360
|
+
|
|
361
|
+
Notes
|
|
362
|
+
-----
|
|
363
|
+
- This operation only affects the file on disk. It does not clear
|
|
364
|
+
the in-memory buffer.
|
|
365
|
+
- Logging is performed through the class logger.
|
|
366
|
+
|
|
367
|
+
Examples
|
|
368
|
+
--------
|
|
369
|
+
>>> flowcept.delete_buffer_file("buffer.jsonl")
|
|
370
|
+
# Deletes buffer.jsonl if it exists
|
|
371
|
+
|
|
372
|
+
>>> flowcept.delete_buffer_file()
|
|
373
|
+
# Deletes the default buffer file defined in settings
|
|
374
|
+
"""
|
|
375
|
+
if path is None:
|
|
376
|
+
path = DUMP_BUFFER_PATH
|
|
377
|
+
|
|
378
|
+
try:
|
|
379
|
+
if os.path.exists(path):
|
|
380
|
+
os.remove(path)
|
|
381
|
+
FlowceptLogger().info(f"Buffer file deleted: {path}")
|
|
382
|
+
except Exception as e:
|
|
383
|
+
FlowceptLogger().error(f"Failed to delete buffer file: {path}")
|
|
384
|
+
FlowceptLogger().exception(e)
|
|
385
|
+
|
|
251
386
|
def save_workflow(self, interceptor: str, interceptor_instance: BaseInterceptor):
|
|
252
387
|
"""
|
|
253
388
|
Save the current workflow and send its metadata using the provided interceptor.
|
|
@@ -302,7 +437,7 @@ class Flowcept(object):
|
|
|
302
437
|
self._db_inserters.append(doc_inserter)
|
|
303
438
|
|
|
304
439
|
def stop(self):
|
|
305
|
-
"""Stop
|
|
440
|
+
"""Stop Flowcept controller."""
|
|
306
441
|
if not self.is_started or not self.enabled:
|
|
307
442
|
self.logger.warning("Flowcept is already stopped or may never have been started!")
|
|
308
443
|
return
|
|
@@ -10,6 +10,7 @@ from flowcept.commons.flowcept_dataclasses.workflow_object import (
|
|
|
10
10
|
from flowcept.configs import (
|
|
11
11
|
ENRICH_MESSAGES,
|
|
12
12
|
TELEMETRY_ENABLED,
|
|
13
|
+
TELEMETRY_CAPTURE,
|
|
13
14
|
)
|
|
14
15
|
from flowcept.commons.flowcept_logger import FlowceptLogger
|
|
15
16
|
from flowcept.commons.daos.mq_dao.mq_dao_base import MQDao
|
|
@@ -135,7 +136,7 @@ class BaseInterceptor(object):
|
|
|
135
136
|
# TODO :base-interceptor-refactor: :code-reorg: :usability:
|
|
136
137
|
raise Exception(f"This interceptor {id(self)} has never been started!")
|
|
137
138
|
workflow_obj.interceptor_ids = [self._interceptor_instance_id]
|
|
138
|
-
if self.telemetry_capture:
|
|
139
|
+
if self.telemetry_capture and TELEMETRY_CAPTURE.get("machine_info", False):
|
|
139
140
|
machine_info = self.telemetry_capture.capture_machine_info()
|
|
140
141
|
if workflow_obj.machine_info is None:
|
|
141
142
|
workflow_obj.machine_info = dict()
|
flowcept/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: flowcept
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.12
|
|
4
4
|
Summary: Capture and query workflow provenance data using data observability
|
|
5
5
|
Author: Oak Ridge National Laboratory
|
|
6
6
|
License-Expression: MIT
|
|
@@ -149,11 +149,16 @@ Description-Content-Type: text/markdown
|
|
|
149
149
|
|
|
150
150
|
<p align="center">
|
|
151
151
|
<picture>
|
|
152
|
+
<!-- Dark theme -->
|
|
153
|
+
<source srcset="./docs/img/flowcept-logo-dark.png" media="(prefers-color-scheme: dark)" />
|
|
154
|
+
<!-- Light theme -->
|
|
155
|
+
<source srcset="./docs/img/flowcept-logo.png" media="(prefers-color-scheme: light)" />
|
|
156
|
+
<!-- Fallback -->
|
|
152
157
|
<img src="./docs/img/flowcept-logo.png" alt="Flowcept Logo" width="200"/>
|
|
153
158
|
</picture>
|
|
154
159
|
</p>
|
|
155
|
-
<h3 align="center">Lightweight Distributed Workflow Provenance</h3>
|
|
156
160
|
|
|
161
|
+
<h3 align="center">Lightweight Distributed Workflow Provenance</h3>
|
|
157
162
|
|
|
158
163
|
|
|
159
164
|
---
|
|
@@ -162,6 +167,7 @@ Flowcept captures and queries workflow provenance at runtime with minimal code c
|
|
|
162
167
|
|
|
163
168
|
---
|
|
164
169
|
|
|
170
|
+
|
|
165
171
|
[](https://flowcept.readthedocs.io/)
|
|
166
172
|
[](https://github.com/ORNL/flowcept/actions/workflows/create-release-n-publish.yml)
|
|
167
173
|
[](https://pypi.org/project/flowcept)
|
|
@@ -169,6 +175,15 @@ Flowcept captures and queries workflow provenance at runtime with minimal code c
|
|
|
169
175
|
[](https://github.com/ORNL/flowcept/actions/workflows/checks.yml)
|
|
170
176
|
[](LICENSE)
|
|
171
177
|
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
<h4 align="center">
|
|
182
|
+
<a href="https://flowcept.readthedocs.io/">Documentation</a> •
|
|
183
|
+
<a href="./docs/publications">Publications</a>
|
|
184
|
+
</h4>
|
|
185
|
+
|
|
186
|
+
|
|
172
187
|
---
|
|
173
188
|
|
|
174
189
|
# Quickstart
|
|
@@ -220,7 +235,7 @@ def main():
|
|
|
220
235
|
if __name__ == "__main__":
|
|
221
236
|
main()
|
|
222
237
|
|
|
223
|
-
prov_messages = Flowcept.
|
|
238
|
+
prov_messages = Flowcept.read_buffer_file()
|
|
224
239
|
assert len(prov_messages) == 2
|
|
225
240
|
print(json.dumps(prov_messages, indent=2))
|
|
226
241
|
```
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
flowcept/__init__.py,sha256=
|
|
2
|
-
flowcept/cli.py,sha256=
|
|
3
|
-
flowcept/configs.py,sha256=
|
|
4
|
-
flowcept/version.py,sha256=
|
|
1
|
+
flowcept/__init__.py,sha256=tvVZKyymdqv3qOsgpAyDppBlUiBc0ag4QF21IcS-mVk,2449
|
|
2
|
+
flowcept/cli.py,sha256=AU6EuawCboCv933Ghb1xFL74UMIJeVTzNocPOoaJS0Q,25696
|
|
3
|
+
flowcept/configs.py,sha256=DBkYx0CAaDSl8x2EJY1665PFY80eCp9PEriYH-BNwL4,8781
|
|
4
|
+
flowcept/version.py,sha256=oReJgxDE1qrMx6aSgAcaDwhbfLZibvBc13ImnrzFA-U,307
|
|
5
5
|
flowcept/agents/__init__.py,sha256=8eeD2CiKBtHiDsWdrHK_UreIkKlTq4dUbhHDyzw372o,175
|
|
6
6
|
flowcept/agents/agent_client.py,sha256=UiBQkC9WE2weLZR2OTkEOEQt9-zqQOkPwRA17HfI-jk,2027
|
|
7
7
|
flowcept/agents/agents_utils.py,sha256=Az5lvWTsBHs_3sWWwy7jSdDjNn-PvZ7KmYd79wxvdyU,6666
|
|
@@ -33,7 +33,7 @@ flowcept/commons/flowcept_logger.py,sha256=0asRucrDMeRXvsdhuCmH6lWO7lAt_Z5o5uW7r
|
|
|
33
33
|
flowcept/commons/query_utils.py,sha256=3tyK5VYA10iDtmtzNwa8OQGn93DBxsu6rTjHDphftSc,2208
|
|
34
34
|
flowcept/commons/settings_factory.py,sha256=bMTjgXRfb5HsL2lPnLfem-9trqELbNWE04Ie7lSlxYM,1731
|
|
35
35
|
flowcept/commons/task_data_preprocess.py,sha256=-ceLexv2ZfZOAYF43DPagGwQPgt_L_lNKuK8ZCpnzXs,13914
|
|
36
|
-
flowcept/commons/utils.py,sha256=
|
|
36
|
+
flowcept/commons/utils.py,sha256=okCShkcuWhzznBtADDDusTdfPXO0W041b2f4Aog-7SE,9831
|
|
37
37
|
flowcept/commons/vocabulary.py,sha256=0psC4NulNFn88mjTcoT_aT4QxX8ljMFgTOF3FxzM40A,1118
|
|
38
38
|
flowcept/commons/daos/__init__.py,sha256=RO51svfHOg9naN676zuQwbj_RQ6IFHu-RALeefvtwwk,23
|
|
39
39
|
flowcept/commons/daos/keyvalue_dao.py,sha256=g7zgC9hVC1NTllwUAqGt44YqdqYUgAKgPlX8_G4BRGw,3599
|
|
@@ -43,7 +43,7 @@ flowcept/commons/daos/docdb_dao/docdb_dao_base.py,sha256=YbfSVJPwZGK2GBYkeapRC83
|
|
|
43
43
|
flowcept/commons/daos/docdb_dao/lmdb_dao.py,sha256=ZuCsdEhI2wGAmjAf82j-1t3tbR6YMmDeaJ_C3HcsLYo,10461
|
|
44
44
|
flowcept/commons/daos/docdb_dao/mongodb_dao.py,sha256=5x0un15uCDTcnuITOyOhvF9mKj_bUmF2du0AHQfjN9k,40055
|
|
45
45
|
flowcept/commons/daos/mq_dao/__init__.py,sha256=Xxm4FmbBUZDQ7XIAmSFbeKE_AdHsbgFmSuftvMWSykQ,21
|
|
46
|
-
flowcept/commons/daos/mq_dao/mq_dao_base.py,sha256=
|
|
46
|
+
flowcept/commons/daos/mq_dao/mq_dao_base.py,sha256=VXqXzesU01dCHE5i0urnYQppixUNGZbJMRmm4jSAcgM,9424
|
|
47
47
|
flowcept/commons/daos/mq_dao/mq_dao_kafka.py,sha256=kjZqPLIu5PaNeM4IDvOxkDRVGTd5UWwq3zhDvVirqW8,5067
|
|
48
48
|
flowcept/commons/daos/mq_dao/mq_dao_mofka.py,sha256=tRdMGYDzdeIJxad-B4-DE6u8Wzs61eTzOW4ojZrnTxs,4057
|
|
49
49
|
flowcept/commons/daos/mq_dao/mq_dao_redis.py,sha256=ejBMxImA-h2KuMEAk3l7aU0chCcObCbUXEOXM6L4Zhc,5571
|
|
@@ -54,7 +54,7 @@ flowcept/commons/flowcept_dataclasses/telemetry.py,sha256=9_5ONCo-06r5nKHXmi5HfI
|
|
|
54
54
|
flowcept/commons/flowcept_dataclasses/workflow_object.py,sha256=cauWtXHhBv9lHS-q6cb7yUsNiwQ6PkZPuSinR1TKcqU,6161
|
|
55
55
|
flowcept/flowcept_api/__init__.py,sha256=T1ty86YlocQ5Z18l5fUqHj_CC6Unq_iBv0lFyiI7Ao8,22
|
|
56
56
|
flowcept/flowcept_api/db_api.py,sha256=hKXep-n50rp9cAzV0ljk2QVEF8O64yxi3ujXv5_Ibac,9723
|
|
57
|
-
flowcept/flowcept_api/flowcept_controller.py,sha256=
|
|
57
|
+
flowcept/flowcept_api/flowcept_controller.py,sha256=az1bktiL8_xs4pc97Zqgd1ezsg-cD0whf3XWA1ZN08Q,20652
|
|
58
58
|
flowcept/flowcept_api/task_query_api.py,sha256=SrwB0OCVtbpvCPECkE2ySM10G_g8Wlk5PJ8h-0xEaNc,23821
|
|
59
59
|
flowcept/flowcept_webserver/__init__.py,sha256=8411GIXGddKTKoHUvbo_Rq6svosNG7tG8VzvUEBd7WI,28
|
|
60
60
|
flowcept/flowcept_webserver/app.py,sha256=VUV8_JZbIbx9u_1O7m7XtRdhZb_7uifUa-iNlPhmZws,658
|
|
@@ -64,7 +64,7 @@ flowcept/flowcept_webserver/resources/task_messages_rsrc.py,sha256=0u68it2W-9NzU
|
|
|
64
64
|
flowcept/flowceptor/__init__.py,sha256=wVxRXUv07iNx6SMRRma2vqhR_GIcRl0re_WCYG65PUs,29
|
|
65
65
|
flowcept/flowceptor/telemetry_capture.py,sha256=CWyR8E1rTAjFbUFI9BxaGfJyDd2UbiK0uLGt4m8BnSU,13932
|
|
66
66
|
flowcept/flowceptor/adapters/__init__.py,sha256=SuZbSZVVQeBJ9zXW-M9jF09dw3XIjre3lSGrUO1Y8Po,27
|
|
67
|
-
flowcept/flowceptor/adapters/base_interceptor.py,sha256=
|
|
67
|
+
flowcept/flowceptor/adapters/base_interceptor.py,sha256=oqnD19TNqi1FHlwAsbyEkiLgywpm5HYLHT1gYlsOHTk,6589
|
|
68
68
|
flowcept/flowceptor/adapters/instrumentation_interceptor.py,sha256=DhK2bBnpghqPSeA62BUqRg6pl8zxuYrP33dK4x6PhRE,733
|
|
69
69
|
flowcept/flowceptor/adapters/interceptor_state_manager.py,sha256=xRzmi5YFKBEqNtX8F5s6XlMTRe27ml4BmQtBO4WtG2c,919
|
|
70
70
|
flowcept/flowceptor/adapters/brokers/__init__.py,sha256=mhQXVmh0JklvL93GUtJZLJnPRYX9Nmb8IqcyKJGQBzk,36
|
|
@@ -90,13 +90,13 @@ flowcept/flowceptor/consumers/agent/base_agent_context_manager.py,sha256=5fBPYs-
|
|
|
90
90
|
flowcept/instrumentation/__init__.py,sha256=M5bTmg80E4QyN91gUX3qfw_nbtJSXwGWcKxdZP3vJz0,34
|
|
91
91
|
flowcept/instrumentation/flowcept_agent_task.py,sha256=XN9JU4LODca0SgojUm4F5iU_V8tuWkOt1fAKcoOAG34,10757
|
|
92
92
|
flowcept/instrumentation/flowcept_decorator.py,sha256=X4Lp_FSsoL08K8ZhRM4mC0OjKupbQtbMQR8zxy3ezDY,1350
|
|
93
|
-
flowcept/instrumentation/flowcept_loop.py,sha256=
|
|
93
|
+
flowcept/instrumentation/flowcept_loop.py,sha256=nF7Sov-DCDapyYvS8zx-1ZFrnjc3CPg2VsjDaxFs0Cc,15667
|
|
94
94
|
flowcept/instrumentation/flowcept_task.py,sha256=EmKODpjl8usNklKSVmsKYyCa6gC_QMqKhAr3DKaw44s,8199
|
|
95
95
|
flowcept/instrumentation/flowcept_torch.py,sha256=kkZQRYq6cDBpdBU6J39_4oKRVkhyF3ODlz8ydV5WGKw,23455
|
|
96
96
|
flowcept/instrumentation/task_capture.py,sha256=1g9EtLdqsTB0RHsF-eRmA2Xh9l_YqTd953d4v89IC24,8287
|
|
97
|
-
resources/sample_settings.yaml,sha256
|
|
98
|
-
flowcept-0.9.
|
|
99
|
-
flowcept-0.9.
|
|
100
|
-
flowcept-0.9.
|
|
101
|
-
flowcept-0.9.
|
|
102
|
-
flowcept-0.9.
|
|
97
|
+
resources/sample_settings.yaml,sha256=8IIhePR5hqHN1QaUcoHATH2Nrp6yl6DMRvoYDGOUWFQ,6881
|
|
98
|
+
flowcept-0.9.12.dist-info/METADATA,sha256=KjhFj4dpu0BhBs5AvJFkcJq7jNCWgxdlVaT5-Q3_boA,32896
|
|
99
|
+
flowcept-0.9.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
100
|
+
flowcept-0.9.12.dist-info/entry_points.txt,sha256=i8q67WE0201rVxYI2lyBtS52shvgl93x2Szp4q8zMlw,47
|
|
101
|
+
flowcept-0.9.12.dist-info/licenses/LICENSE,sha256=r5-2P6tFTuRGWT5TiX32s1y0tnp4cIqBEC1QjTaXe2k,1086
|
|
102
|
+
flowcept-0.9.12.dist-info/RECORD,,
|
resources/sample_settings.yaml
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
|
-
flowcept_version: 0.9.
|
|
1
|
+
flowcept_version: 0.9.12 # Version of the Flowcept package. This setting file is compatible with this version.
|
|
2
2
|
|
|
3
3
|
project:
|
|
4
4
|
debug: true # Toggle debug mode. This will add a property `debug: true` to all saved data, making it easier to retrieve/delete them later.
|
|
5
5
|
json_serializer: default # JSON serialization mode: default or complex. If "complex", Flowcept will deal with complex python dicts that may contain JSON unserializable values
|
|
6
6
|
replace_non_json_serializable: true # Replace values that can't be JSON serialized
|
|
7
7
|
performance_logging: false # Enable performance logging if true. Particularly useful for MQ flushes.
|
|
8
|
-
enrich_messages: true # Add extra metadata to task messages, such as IP addresses
|
|
8
|
+
enrich_messages: true # Add extra metadata to task messages, such as IP addresses of the node that executed the task, UTC timestamps, GitHub repo metadata.
|
|
9
9
|
db_flush_mode: online # Mode for flushing DB entries: "online" or "offline". If online, flushes to the DB will happen before the workflow ends.
|
|
10
|
-
|
|
10
|
+
dump_buffer: # This is particularly useful if you need to run completely offline. If you omit this, even offline, buffer data will not be persisted.
|
|
11
|
+
enabled: false
|
|
12
|
+
path: flowcept_buffer.jsonl
|
|
11
13
|
|
|
12
14
|
log:
|
|
13
15
|
log_path: "default" # Path for log file output; "default" will write the log in the directory where the main executable is running from.
|
|
@@ -106,6 +108,7 @@ databases:
|
|
|
106
108
|
db: flowcept
|
|
107
109
|
create_collection_index: true # Whether flowcept should create collection indices if they haven't been created yet. This is done only at the Flowcept start up.
|
|
108
110
|
# bin: /usr/bin/mongod
|
|
111
|
+
# db_path:
|
|
109
112
|
# log_path: /var/log/mongodb/mongod.log
|
|
110
113
|
# lock_file_path: /var/run/mongod.pid
|
|
111
114
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|