flowcept 0.9.1__py3-none-any.whl → 0.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowcept/__init__.py +5 -0
- flowcept/agents/prompts/general_prompts.py +1 -1
- flowcept/cli.py +41 -42
- flowcept/commons/autoflush_buffer.py +5 -0
- flowcept/commons/daos/docdb_dao/lmdb_dao.py +4 -1
- flowcept/commons/daos/mq_dao/mq_dao_base.py +1 -0
- flowcept/commons/flowcept_dataclasses/task_object.py +86 -6
- flowcept/commons/flowcept_dataclasses/workflow_object.py +41 -1
- flowcept/configs.py +1 -1
- flowcept/flowcept_api/flowcept_controller.py +9 -1
- flowcept/flowceptor/consumers/base_consumer.py +5 -0
- flowcept/flowceptor/consumers/consumer_utils.py +11 -3
- flowcept/flowceptor/telemetry_capture.py +2 -1
- flowcept/instrumentation/task_capture.py +16 -16
- flowcept/version.py +1 -1
- flowcept-0.9.2.dist-info/METADATA +589 -0
- {flowcept-0.9.1.dist-info → flowcept-0.9.2.dist-info}/RECORD +21 -21
- resources/sample_settings.yaml +2 -2
- flowcept-0.9.1.dist-info/METADATA +0 -439
- {flowcept-0.9.1.dist-info → flowcept-0.9.2.dist-info}/WHEEL +0 -0
- {flowcept-0.9.1.dist-info → flowcept-0.9.2.dist-info}/entry_points.txt +0 -0
- {flowcept-0.9.1.dist-info → flowcept-0.9.2.dist-info}/licenses/LICENSE +0 -0
flowcept/__init__.py
CHANGED
|
@@ -16,6 +16,11 @@ def __getattr__(name):
|
|
|
16
16
|
|
|
17
17
|
return WorkflowObject
|
|
18
18
|
|
|
19
|
+
elif name == "TaskObject":
|
|
20
|
+
from flowcept.commons.flowcept_dataclasses.task_object import TaskObject
|
|
21
|
+
|
|
22
|
+
return TaskObject
|
|
23
|
+
|
|
19
24
|
elif name == "flowcept_task":
|
|
20
25
|
from flowcept.instrumentation.flowcept_task import flowcept_task
|
|
21
26
|
|
|
@@ -24,8 +24,8 @@ ROUTING_PROMPT = (
|
|
|
24
24
|
"Given the following user message, classify it into one of the following routes:\n"
|
|
25
25
|
"- small_talk: if it's casual conversation or some random word (e.g., 'hausdn', 'a', hello, how are you, what can you do, what's your name)\n"
|
|
26
26
|
"- plot: if user is requesting plots (e.g., plot, chart, visualize)\n"
|
|
27
|
+
"- in_context_query: if the user asks questions about tasks or data in running workflow (or a workflow that ran recently) or if the user mentions the in-memory 'df' or a dataframe.\n"
|
|
27
28
|
"- historical_prov_query: if the user wants to query historical provenance data\n"
|
|
28
|
-
"- in_context_query: if the user appears to ask questions about tasks or data in running workflow (or a workflow that ran recently) or if the user mentions the in-memory 'df' or a dataframe.\n"
|
|
29
29
|
"- in_chat_query: if the user appears to be asking about something that has said recently in this chat.\n"
|
|
30
30
|
"- unknown: if you don't know.\n"
|
|
31
31
|
"Respond with only the route label."
|
flowcept/cli.py
CHANGED
|
@@ -101,17 +101,17 @@ def version():
|
|
|
101
101
|
print(f"Flowcept {__version__}")
|
|
102
102
|
|
|
103
103
|
|
|
104
|
-
def stream_messages(
|
|
104
|
+
def stream_messages(messages_file_path: Optional[str] = None, keys_to_show: List[str] = None):
|
|
105
105
|
"""
|
|
106
106
|
Listen to Flowcept's message stream and optionally echo/save messages.
|
|
107
107
|
|
|
108
108
|
Parameters.
|
|
109
109
|
----------
|
|
110
|
-
print_messages : bool, optional
|
|
111
|
-
If True, print each decoded message to stdout.
|
|
112
110
|
messages_file_path : str, optional
|
|
113
111
|
If provided, append each message as JSON (one per line) to this file.
|
|
114
112
|
If the file already exists, a new timestamped file is created instead.
|
|
113
|
+
keys_to_show : List[str], optional
|
|
114
|
+
List of object keys to show in the prints. Use comma-separated list: --keys-to-show 'activity_id','workflow_id'
|
|
115
115
|
"""
|
|
116
116
|
# Local imports to avoid changing module-level deps
|
|
117
117
|
from flowcept.configs import MQ_TYPE
|
|
@@ -123,10 +123,7 @@ def stream_messages(print_messages: bool = False, messages_file_path: Optional[s
|
|
|
123
123
|
import os
|
|
124
124
|
import json
|
|
125
125
|
from datetime import datetime
|
|
126
|
-
import
|
|
127
|
-
import msgpack
|
|
128
|
-
from flowcept.configs import MQ_HOST, MQ_PORT, MQ_CHANNEL, KVDB_URI
|
|
129
|
-
from flowcept.commons.daos.mq_dao.mq_dao_redis import MQDaoRedis
|
|
126
|
+
from flowcept.flowceptor.consumers.base_consumer import BaseConsumer
|
|
130
127
|
|
|
131
128
|
def _timestamped_path_if_exists(path: Optional[str]) -> Optional[str]:
|
|
132
129
|
if not path:
|
|
@@ -150,53 +147,53 @@ def stream_messages(print_messages: bool = False, messages_file_path: Optional[s
|
|
|
150
147
|
|
|
151
148
|
return json.dumps(obj, ensure_ascii=False, separators=(",", ":"), default=_default)
|
|
152
149
|
|
|
153
|
-
# Prepare output file (JSONL)
|
|
154
150
|
out_fh = None
|
|
155
151
|
if messages_file_path:
|
|
156
152
|
out_path = _timestamped_path_if_exists(messages_file_path)
|
|
157
153
|
out_fh = open(out_path, "w", encoding="utf-8", buffering=1) # line-buffered
|
|
158
154
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
pubsub.subscribe(MQ_CHANNEL)
|
|
163
|
-
|
|
164
|
-
print(f"Listening for messages on channel '{MQ_CHANNEL}'... (Ctrl+C to exit)")
|
|
165
|
-
|
|
166
|
-
try:
|
|
167
|
-
for message in pubsub.listen():
|
|
168
|
-
if not message or message.get("type") in MQDaoRedis.MESSAGE_TYPES_IGNORE:
|
|
169
|
-
continue
|
|
170
|
-
|
|
171
|
-
data = message.get("data")
|
|
172
|
-
if not isinstance(data, (bytes, bytearray)):
|
|
173
|
-
print(f"Skipping message with unexpected data type: {type(data)} - {data}")
|
|
174
|
-
continue
|
|
155
|
+
class MyConsumer(BaseConsumer):
|
|
156
|
+
def __init__(self):
|
|
157
|
+
super().__init__()
|
|
175
158
|
|
|
159
|
+
def message_handler(self, msg_obj: Dict) -> bool:
|
|
176
160
|
try:
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
161
|
+
if keys_to_show is not None:
|
|
162
|
+
obj_to_print = {}
|
|
163
|
+
for k in keys_to_show:
|
|
164
|
+
v = msg_obj.get(k, None)
|
|
165
|
+
if v is not None:
|
|
166
|
+
obj_to_print[k] = v
|
|
167
|
+
if not obj_to_print:
|
|
168
|
+
obj_to_print = msg_obj
|
|
169
|
+
else:
|
|
170
|
+
obj_to_print = msg_obj
|
|
171
|
+
|
|
172
|
+
print(_json_dumps(obj_to_print))
|
|
183
173
|
|
|
184
174
|
if out_fh is not None:
|
|
185
|
-
out_fh.write(_json_dumps(
|
|
175
|
+
out_fh.write(_json_dumps(obj_to_print))
|
|
186
176
|
out_fh.write("\n")
|
|
187
|
-
|
|
177
|
+
except KeyboardInterrupt:
|
|
178
|
+
print("\nGracefully interrupted, shutting down...")
|
|
179
|
+
return False
|
|
188
180
|
except Exception as e:
|
|
189
|
-
print(
|
|
181
|
+
print(e)
|
|
182
|
+
return False
|
|
183
|
+
finally:
|
|
184
|
+
try:
|
|
185
|
+
if out_fh:
|
|
186
|
+
out_fh.close()
|
|
187
|
+
except Exception as e:
|
|
188
|
+
print(e)
|
|
189
|
+
return False
|
|
190
190
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
pubsub.close()
|
|
198
|
-
except Exception:
|
|
199
|
-
pass
|
|
191
|
+
return True
|
|
192
|
+
|
|
193
|
+
m = f"Printing only the keys {keys_to_show}" if keys_to_show is not None else ""
|
|
194
|
+
print(f"Listening for messages.{m} Ctrl+C to exit")
|
|
195
|
+
consumer = MyConsumer()
|
|
196
|
+
consumer.start(daemon=False)
|
|
200
197
|
|
|
201
198
|
|
|
202
199
|
def start_consumption_services(bundle_exec_id: str = None, check_safe_stops: bool = False, consumers: List[str] = None):
|
|
@@ -700,3 +697,5 @@ def main(): # noqa: D103
|
|
|
700
697
|
if __name__ == "__main__":
|
|
701
698
|
main()
|
|
702
699
|
# check_services()
|
|
700
|
+
|
|
701
|
+
__doc__ = None
|
|
@@ -47,6 +47,11 @@ class AutoflushBuffer:
|
|
|
47
47
|
if len(buffer) >= self._max_size:
|
|
48
48
|
self._swap_event.set()
|
|
49
49
|
|
|
50
|
+
@property
|
|
51
|
+
def current_buffer(self):
|
|
52
|
+
"""Return the currently active buffer (read-only)."""
|
|
53
|
+
return self._buffers[self._current_buffer_index]
|
|
54
|
+
|
|
50
55
|
def time_based_flush(self):
|
|
51
56
|
"""Time flush."""
|
|
52
57
|
while not self._stop_event.is_set():
|
|
@@ -56,7 +56,10 @@ class LMDBDAO(DocumentDBDAO):
|
|
|
56
56
|
t0 = 0
|
|
57
57
|
if PERF_LOG:
|
|
58
58
|
t0 = time()
|
|
59
|
-
indexed_buffer = curate_dict_task_messages(
|
|
59
|
+
indexed_buffer = curate_dict_task_messages(
|
|
60
|
+
docs, indexing_key, t0, convert_times=False, keys_to_drop=["data"]
|
|
61
|
+
)
|
|
62
|
+
|
|
60
63
|
with self._env.begin(write=True, db=self._tasks_db) as txn:
|
|
61
64
|
for key, value in indexed_buffer.items():
|
|
62
65
|
k, v = key.encode(), json.dumps(value).encode()
|
|
@@ -102,6 +102,7 @@ class MQDao(object):
|
|
|
102
102
|
|
|
103
103
|
with open(DUMP_BUFFER_PATH, "wb", buffering=1_048_576) as f:
|
|
104
104
|
for obj in buffer:
|
|
105
|
+
obj.pop("data", None) # We are not going to store data in the buffer file.
|
|
105
106
|
f.write(orjson.dumps(obj))
|
|
106
107
|
f.write(b"\n")
|
|
107
108
|
self.logger.info(f"Saved Flowcept messages into {DUMP_BUFFER_PATH}.")
|
|
@@ -16,45 +16,125 @@ from flowcept.configs import (
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class TaskObject:
|
|
19
|
-
"""Task class.
|
|
19
|
+
"""Task object class.
|
|
20
|
+
|
|
21
|
+
Represents a single provenance task in Flowcept, including inputs, outputs,
|
|
22
|
+
execution metadata, telemetry, and environment details.
|
|
23
|
+
"""
|
|
20
24
|
|
|
21
25
|
type = "task"
|
|
26
|
+
"""Constant type label for this object ("task")."""
|
|
27
|
+
|
|
22
28
|
subtype: AnyStr = None
|
|
23
|
-
|
|
29
|
+
"""Optional subtype of the task (e.g., iteration, ML step, custom)."""
|
|
30
|
+
|
|
31
|
+
task_id: AnyStr = None
|
|
32
|
+
"""Unique identifier of the task."""
|
|
33
|
+
|
|
24
34
|
utc_timestamp: float = None
|
|
35
|
+
"""UTC timestamp when the task object was created."""
|
|
36
|
+
|
|
25
37
|
adapter_id: AnyStr = None
|
|
38
|
+
"""Identifier of the adapter that produced this task (if any)."""
|
|
39
|
+
|
|
26
40
|
user: AnyStr = None
|
|
41
|
+
"""User who executed or triggered the task."""
|
|
42
|
+
|
|
27
43
|
data: Any = None
|
|
28
|
-
|
|
44
|
+
"""Arbitrary raw data payload associated with the task. It is good practice to add custom_metadata associated with
|
|
45
|
+
`data`, especially if it contains file contents.
|
|
46
|
+
In that case, `custom_metadata` should contain the keys "file_type", "file_content", "file_name", "extension".
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
used: Dict[AnyStr, Any] = None
|
|
50
|
+
"""Inputs consumed by the task (parameters, files, resources)."""
|
|
51
|
+
|
|
29
52
|
campaign_id: AnyStr = None
|
|
30
|
-
|
|
53
|
+
"""Campaign identifier grouping related tasks together."""
|
|
54
|
+
|
|
55
|
+
generated: Dict[AnyStr, Any] = None
|
|
56
|
+
"""Outputs produced by the task (results, artifacts, files)."""
|
|
57
|
+
|
|
31
58
|
submitted_at: float = None
|
|
59
|
+
"""Timestamp when the task was submitted."""
|
|
60
|
+
|
|
32
61
|
started_at: float = None
|
|
62
|
+
"""Timestamp when the task execution started."""
|
|
63
|
+
|
|
33
64
|
ended_at: float = None
|
|
34
|
-
|
|
65
|
+
"""Timestamp when the task execution ended."""
|
|
66
|
+
|
|
67
|
+
registered_at: float = None
|
|
68
|
+
"""Timestamp when the task was registered by the DocInserter."""
|
|
69
|
+
|
|
35
70
|
telemetry_at_start: Telemetry = None
|
|
71
|
+
"""Telemetry snapshot captured at the start of the task."""
|
|
72
|
+
|
|
36
73
|
telemetry_at_end: Telemetry = None
|
|
74
|
+
"""Telemetry snapshot captured at the end of the task."""
|
|
75
|
+
|
|
37
76
|
workflow_name: AnyStr = None
|
|
77
|
+
"""Name of the workflow this task belongs to."""
|
|
78
|
+
|
|
38
79
|
workflow_id: AnyStr = None
|
|
80
|
+
"""Identifier of the workflow this task belongs to."""
|
|
81
|
+
|
|
39
82
|
parent_task_id: AnyStr = None
|
|
83
|
+
"""Identifier of the parent task, if this task is nested or dependent."""
|
|
84
|
+
|
|
40
85
|
activity_id: AnyStr = None
|
|
41
|
-
|
|
86
|
+
"""Activity name (usually the function name) associated with the task."""
|
|
87
|
+
|
|
88
|
+
group_id: AnyStr = None
|
|
89
|
+
"""Grouping identifier, often used to link loop iterations together."""
|
|
90
|
+
|
|
42
91
|
status: Status = None
|
|
92
|
+
"""Execution status of the task (e.g., FINISHED, ERROR)."""
|
|
93
|
+
|
|
43
94
|
stdout: Union[AnyStr, Dict] = None
|
|
95
|
+
"""Captured standard output from the task, if available."""
|
|
96
|
+
|
|
44
97
|
stderr: Union[AnyStr, Dict] = None
|
|
98
|
+
"""Captured standard error from the task, if available."""
|
|
99
|
+
|
|
45
100
|
custom_metadata: Dict[AnyStr, Any] = None
|
|
101
|
+
"""Custom metadata dictionary provided by the developer/user."""
|
|
102
|
+
|
|
46
103
|
mq_host: str = None
|
|
104
|
+
"""Message queue host associated with the task."""
|
|
105
|
+
|
|
47
106
|
environment_id: AnyStr = None
|
|
107
|
+
"""Identifier of the environment where the task executed."""
|
|
108
|
+
|
|
48
109
|
node_name: AnyStr = None
|
|
110
|
+
"""Node name in a distributed system or HPC cluster."""
|
|
111
|
+
|
|
49
112
|
login_name: AnyStr = None
|
|
113
|
+
"""Login name of the user in the execution environment."""
|
|
114
|
+
|
|
50
115
|
public_ip: AnyStr = None
|
|
116
|
+
"""Public IP address of the machine executing the task."""
|
|
117
|
+
|
|
51
118
|
private_ip: AnyStr = None
|
|
119
|
+
"""Private IP address of the machine executing the task."""
|
|
120
|
+
|
|
52
121
|
hostname: AnyStr = None
|
|
122
|
+
"""Hostname of the machine executing the task."""
|
|
123
|
+
|
|
53
124
|
address: AnyStr = None
|
|
125
|
+
"""Optional network address associated with the task."""
|
|
126
|
+
|
|
54
127
|
dependencies: List = None
|
|
128
|
+
"""List of task IDs this task depends on."""
|
|
129
|
+
|
|
55
130
|
dependents: List = None
|
|
131
|
+
"""List of task IDs that depend on this task."""
|
|
132
|
+
|
|
56
133
|
tags: List = None
|
|
134
|
+
"""User-defined tags attached to the task."""
|
|
135
|
+
|
|
57
136
|
agent_id: str = None
|
|
137
|
+
"""Identifier of the agent responsible for executing this task (if any)."""
|
|
58
138
|
|
|
59
139
|
_DEFAULT_ENRICH_VALUES = {
|
|
60
140
|
"node_name": NODE_NAME,
|
|
@@ -19,27 +19,67 @@ from flowcept.configs import (
|
|
|
19
19
|
# Not a dataclass because a dataclass stores keys even when there's no value,
|
|
20
20
|
# adding unnecessary overhead.
|
|
21
21
|
class WorkflowObject:
|
|
22
|
-
"""Workflow class.
|
|
22
|
+
"""Workflow object class.
|
|
23
|
+
|
|
24
|
+
Represents metadata and provenance details for a workflow execution.
|
|
25
|
+
"""
|
|
23
26
|
|
|
24
27
|
workflow_id: AnyStr = None
|
|
28
|
+
"""Unique identifier for the workflow."""
|
|
29
|
+
|
|
25
30
|
parent_workflow_id: AnyStr = None
|
|
31
|
+
"""Identifier of the parent workflow, if this workflow is nested or derived."""
|
|
32
|
+
|
|
26
33
|
machine_info: Dict = None
|
|
34
|
+
"""System or hardware information where the workflow is executed."""
|
|
35
|
+
|
|
27
36
|
conf: Dict = None
|
|
37
|
+
"""Workflow configuration parameters, such as hyperparameters or runtime options."""
|
|
38
|
+
|
|
28
39
|
flowcept_settings: Dict = None
|
|
40
|
+
"""Snapshot of Flowcept’s active settings used for this workflow."""
|
|
41
|
+
|
|
29
42
|
flowcept_version: AnyStr = None
|
|
43
|
+
"""Version of Flowcept used during execution."""
|
|
44
|
+
|
|
30
45
|
utc_timestamp: float = None
|
|
46
|
+
"""Timestamp (UTC, in seconds) when the workflow object was created."""
|
|
47
|
+
|
|
31
48
|
user: AnyStr = None
|
|
49
|
+
"""User who launched or owns the workflow run."""
|
|
50
|
+
|
|
32
51
|
campaign_id: AnyStr = None
|
|
52
|
+
"""Identifier for grouping workflows into a campaign or experiment."""
|
|
53
|
+
|
|
33
54
|
adapter_id: AnyStr = None
|
|
55
|
+
"""Identifier of the adapter (e.g., Dask, MLflow) that triggered workflow capture."""
|
|
56
|
+
|
|
34
57
|
interceptor_ids: List[AnyStr] = None
|
|
58
|
+
"""List of interceptors applied to this workflow (e.g., instrumentation, telemetry)."""
|
|
59
|
+
|
|
35
60
|
name: AnyStr = None
|
|
61
|
+
"""Descriptive name for the workflow."""
|
|
62
|
+
|
|
36
63
|
custom_metadata: Dict = None
|
|
64
|
+
"""User-defined metadata dictionary with additional annotations."""
|
|
65
|
+
|
|
37
66
|
environment_id: str = None
|
|
67
|
+
"""Identifier for the runtime environment (e.g., conda env, container)."""
|
|
68
|
+
|
|
38
69
|
sys_name: str = None
|
|
70
|
+
"""Logical system or facility name (e.g., HPC system name, cluster identifier)."""
|
|
71
|
+
|
|
39
72
|
extra_metadata: str = None
|
|
73
|
+
"""Optional free-form metadata for extensions not covered by other fields."""
|
|
74
|
+
|
|
40
75
|
used: Dict = None
|
|
76
|
+
"""Inputs consumed by the workflow (datasets, arguments, or configuration values)."""
|
|
77
|
+
|
|
41
78
|
code_repository: Dict = None
|
|
79
|
+
"""Details of the code repository (URL, commit hash, branch) used to run the workflow."""
|
|
80
|
+
|
|
42
81
|
generated: Dict = None
|
|
82
|
+
"""Outputs generated by the workflow (artifacts, models, or results)."""
|
|
43
83
|
|
|
44
84
|
def __init__(self, workflow_id=None, name=None, used=None, generated=None):
|
|
45
85
|
self.workflow_id = workflow_id
|
flowcept/configs.py
CHANGED
|
@@ -158,7 +158,7 @@ PERF_LOG = settings["project"].get("performance_logging", False)
|
|
|
158
158
|
JSON_SERIALIZER = settings["project"].get("json_serializer", "default")
|
|
159
159
|
REPLACE_NON_JSON_SERIALIZABLE = settings["project"].get("replace_non_json_serializable", True)
|
|
160
160
|
ENRICH_MESSAGES = settings["project"].get("enrich_messages", True)
|
|
161
|
-
DUMP_BUFFER_PATH = settings["project"].get("dump_buffer_path",
|
|
161
|
+
DUMP_BUFFER_PATH = settings["project"].get("dump_buffer_path", "flowcept_messages.jsonl")
|
|
162
162
|
|
|
163
163
|
TELEMETRY_CAPTURE = settings.get("telemetry_capture", None)
|
|
164
164
|
TELEMETRY_ENABLED = TELEMETRY_CAPTURE is not None and len(TELEMETRY_CAPTURE)
|
|
@@ -4,6 +4,7 @@ import os.path
|
|
|
4
4
|
from typing import List, Dict
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
7
|
+
from flowcept.commons.autoflush_buffer import AutoflushBuffer
|
|
7
8
|
from flowcept.commons.daos.mq_dao.mq_dao_base import MQDao
|
|
8
9
|
from flowcept.commons.flowcept_dataclasses.workflow_object import (
|
|
9
10
|
WorkflowObject,
|
|
@@ -30,6 +31,7 @@ class Flowcept(object):
|
|
|
30
31
|
# TODO: rename current_workflow_id to workflow_id. This will be a major refactor
|
|
31
32
|
current_workflow_id = None
|
|
32
33
|
campaign_id = None
|
|
34
|
+
buffer = None
|
|
33
35
|
|
|
34
36
|
@ClassProperty
|
|
35
37
|
def db(cls):
|
|
@@ -155,7 +157,11 @@ class Flowcept(object):
|
|
|
155
157
|
interceptor_inst = BaseInterceptor.build(interceptor)
|
|
156
158
|
interceptor_inst.start(bundle_exec_id=self._bundle_exec_id, check_safe_stops=self._check_safe_stops)
|
|
157
159
|
self._interceptor_instances.append(interceptor_inst)
|
|
158
|
-
|
|
160
|
+
if isinstance(interceptor_inst._mq_dao.buffer, AutoflushBuffer):
|
|
161
|
+
Flowcept.buffer = self.buffer = interceptor_inst._mq_dao.buffer.current_buffer
|
|
162
|
+
else:
|
|
163
|
+
Flowcept.buffer = self.buffer = interceptor_inst._mq_dao.buffer
|
|
164
|
+
|
|
159
165
|
if self._should_save_workflow and not self._workflow_saved:
|
|
160
166
|
self.save_workflow(interceptor, interceptor_inst)
|
|
161
167
|
|
|
@@ -296,6 +302,8 @@ class Flowcept(object):
|
|
|
296
302
|
self.logger.info("Stopping DB Inserters...")
|
|
297
303
|
for db_inserter in self._db_inserters:
|
|
298
304
|
db_inserter.stop(bundle_exec_id=self._bundle_exec_id)
|
|
305
|
+
|
|
306
|
+
Flowcept.buffer = self.buffer = None
|
|
299
307
|
self.is_started = False
|
|
300
308
|
self.logger.debug("All stopped!")
|
|
301
309
|
|
|
@@ -4,6 +4,7 @@ from typing import Callable, Dict, Tuple, Optional
|
|
|
4
4
|
|
|
5
5
|
from flowcept.commons.daos.mq_dao.mq_dao_base import MQDao
|
|
6
6
|
from flowcept.commons.flowcept_logger import FlowceptLogger
|
|
7
|
+
from flowcept.configs import MQ_ENABLED
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
class BaseConsumer(object):
|
|
@@ -16,7 +17,11 @@ class BaseConsumer(object):
|
|
|
16
17
|
|
|
17
18
|
def __init__(self):
|
|
18
19
|
"""Initialize the message queue DAO and logger."""
|
|
20
|
+
if not MQ_ENABLED:
|
|
21
|
+
raise Exception("MQ is disabled in the settings. You cannot consume messages.")
|
|
22
|
+
|
|
19
23
|
self._mq_dao = MQDao.build()
|
|
24
|
+
|
|
20
25
|
self.logger = FlowceptLogger()
|
|
21
26
|
self._main_thread: Optional[Thread] = None
|
|
22
27
|
|
|
@@ -11,7 +11,7 @@ from flowcept.commons.vocabulary import Status
|
|
|
11
11
|
UTC_TZ = ZoneInfo("UTC")
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def curate_task_msg(task_msg_dict: dict, convert_times=True):
|
|
14
|
+
def curate_task_msg(task_msg_dict: dict, convert_times=True, keys_to_drop: List = None):
|
|
15
15
|
"""Curate a task message."""
|
|
16
16
|
# Converting any arg to kwarg in the form {"arg1": val1, "arg2: val2}
|
|
17
17
|
for field in TaskObject.get_dict_field_names():
|
|
@@ -45,6 +45,10 @@ def curate_task_msg(task_msg_dict: dict, convert_times=True):
|
|
|
45
45
|
if "used" in task_msg_dict and task_msg_dict["used"].get("workflow_id", None):
|
|
46
46
|
task_msg_dict["workflow_id"] = task_msg_dict["used"].pop("workflow_id")
|
|
47
47
|
|
|
48
|
+
if keys_to_drop is not None:
|
|
49
|
+
for k in keys_to_drop:
|
|
50
|
+
task_msg_dict.pop(k, None)
|
|
51
|
+
|
|
48
52
|
if convert_times:
|
|
49
53
|
for time_field in TaskObject.get_time_field_names():
|
|
50
54
|
if time_field in task_msg_dict:
|
|
@@ -97,7 +101,11 @@ def convert_keys_to_strings(obj):
|
|
|
97
101
|
|
|
98
102
|
|
|
99
103
|
def curate_dict_task_messages(
|
|
100
|
-
doc_list: List[Dict],
|
|
104
|
+
doc_list: List[Dict],
|
|
105
|
+
indexing_key: str,
|
|
106
|
+
utc_time_at_insertion: float = 0,
|
|
107
|
+
convert_times=True,
|
|
108
|
+
keys_to_drop: List = None,
|
|
101
109
|
):
|
|
102
110
|
"""Remove duplicates.
|
|
103
111
|
|
|
@@ -134,7 +142,7 @@ def curate_dict_task_messages(
|
|
|
134
142
|
if utc_time_at_insertion > 0:
|
|
135
143
|
doc["utc_time_at_insertion"] = utc_time_at_insertion
|
|
136
144
|
|
|
137
|
-
curate_task_msg(doc, convert_times)
|
|
145
|
+
curate_task_msg(doc, convert_times, keys_to_drop)
|
|
138
146
|
indexing_key_value = doc[indexing_key]
|
|
139
147
|
|
|
140
148
|
if indexing_key_value not in indexed_buffer:
|
|
@@ -258,7 +258,8 @@ class TelemetryCapture:
|
|
|
258
258
|
disk.disk_usage = psutil.disk_usage("/")._asdict()
|
|
259
259
|
|
|
260
260
|
platform_info = platform.uname()._asdict()
|
|
261
|
-
|
|
261
|
+
network_info_raw = psutil.net_if_addrs()
|
|
262
|
+
network_info = {ifname: [addr._asdict() for addr in addrs] for ifname, addrs in network_info_raw.items()}
|
|
262
263
|
processor_info = cpuinfo.get_cpu_info()
|
|
263
264
|
|
|
264
265
|
gpu_info = None
|
|
@@ -22,21 +22,6 @@ class FlowceptTask(object):
|
|
|
22
22
|
and metadata. It integrates with the Flowcept API and Instrumentation Interceptor to
|
|
23
23
|
log task-specific details.
|
|
24
24
|
|
|
25
|
-
Parameters
|
|
26
|
-
----------
|
|
27
|
-
task_id : str, optional
|
|
28
|
-
Unique identifier for the task. If not provided, it defaults to the current timestamp.
|
|
29
|
-
workflow_id : str, optional
|
|
30
|
-
ID of the workflow to which this task belongs. Defaults to the current workflow ID from
|
|
31
|
-
Flowcept.
|
|
32
|
-
campaign_id : str, optional
|
|
33
|
-
ID of the campaign to which this task belongs. Defaults to the current campaign ID from
|
|
34
|
-
Flowcept.
|
|
35
|
-
used : Dict, optional
|
|
36
|
-
Metadata about the resources or data used during the task execution.
|
|
37
|
-
custom_metadata : Dict, optional
|
|
38
|
-
User-defined metadata associated with the task.
|
|
39
|
-
|
|
40
25
|
Methods
|
|
41
26
|
-------
|
|
42
27
|
__enter__()
|
|
@@ -65,6 +50,7 @@ class FlowceptTask(object):
|
|
|
65
50
|
subtype: str = None,
|
|
66
51
|
custom_metadata: Dict = None,
|
|
67
52
|
generated: Dict = None,
|
|
53
|
+
started_at: float = None,
|
|
68
54
|
ended_at: float = None,
|
|
69
55
|
stdout: str = None,
|
|
70
56
|
stderr: str = None,
|
|
@@ -90,12 +76,16 @@ class FlowceptTask(object):
|
|
|
90
76
|
Describes the specific activity this task captures.
|
|
91
77
|
used : Dict, optional
|
|
92
78
|
Metadata about resources or data used during the task.
|
|
79
|
+
data: Any, optional
|
|
80
|
+
Any raw data associated to this task.
|
|
93
81
|
subtype : str, optional
|
|
94
82
|
Optional string categorizing the task subtype.
|
|
95
83
|
custom_metadata : Dict, optional
|
|
96
84
|
Additional user-defined metadata to associate with the task.
|
|
97
85
|
generated : Dict, optional
|
|
98
86
|
Output data generated during the task execution.
|
|
87
|
+
started_at : float, optional
|
|
88
|
+
Timestamp indicating when the task started.
|
|
99
89
|
ended_at : float, optional
|
|
100
90
|
Timestamp indicating when the task ended.
|
|
101
91
|
stdout : str, optional
|
|
@@ -117,7 +107,7 @@ class FlowceptTask(object):
|
|
|
117
107
|
self._task.telemetry_at_start = tel
|
|
118
108
|
|
|
119
109
|
self._task.activity_id = activity_id
|
|
120
|
-
self._task.started_at = time()
|
|
110
|
+
self._task.started_at = started_at or time()
|
|
121
111
|
self._task.task_id = task_id or self._gen_task_id()
|
|
122
112
|
self._task.workflow_id = workflow_id or Flowcept.current_workflow_id
|
|
123
113
|
self._task.campaign_id = campaign_id or Flowcept.campaign_id
|
|
@@ -159,6 +149,8 @@ class FlowceptTask(object):
|
|
|
159
149
|
ended_at: float = None,
|
|
160
150
|
stdout: str = None,
|
|
161
151
|
stderr: str = None,
|
|
152
|
+
data: Any = None,
|
|
153
|
+
custom_metadata: Dict = None,
|
|
162
154
|
status: Status = Status.FINISHED,
|
|
163
155
|
):
|
|
164
156
|
"""
|
|
@@ -172,6 +164,10 @@ class FlowceptTask(object):
|
|
|
172
164
|
----------
|
|
173
165
|
generated : Dict, optional
|
|
174
166
|
Metadata or data generated during the task's execution. Defaults to None.
|
|
167
|
+
data: Any, optional
|
|
168
|
+
Any raw data associated to this task.
|
|
169
|
+
custom_metadata : Dict, optional
|
|
170
|
+
Additional user-defined metadata to associate with the task.
|
|
175
171
|
ended_at : float, optional
|
|
176
172
|
Timestamp indicating when the task ended. If not provided, defaults to the current time.
|
|
177
173
|
stdout : str, optional
|
|
@@ -191,6 +187,10 @@ class FlowceptTask(object):
|
|
|
191
187
|
if TELEMETRY_ENABLED:
|
|
192
188
|
tel = self._interceptor.telemetry_capture.capture()
|
|
193
189
|
self._task.telemetry_at_end = tel
|
|
190
|
+
if data:
|
|
191
|
+
self._task.data = data
|
|
192
|
+
if custom_metadata:
|
|
193
|
+
self._task.custom_metadata = custom_metadata
|
|
194
194
|
self._task.ended_at = ended_at or time()
|
|
195
195
|
self._task.status = status
|
|
196
196
|
self._task.stderr = stderr
|
flowcept/version.py
CHANGED