flowcept 0.9.1__py3-none-any.whl → 0.9.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowcept/__init__.py +5 -0
- flowcept/agents/flowcept_ctx_manager.py +10 -6
- flowcept/agents/gui/gui_utils.py +52 -1
- flowcept/agents/prompts/general_prompts.py +1 -1
- flowcept/agents/prompts/in_memory_query_prompts.py +36 -17
- flowcept/agents/tools/general_tools.py +1 -1
- flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +19 -3
- flowcept/cli.py +41 -42
- flowcept/commons/autoflush_buffer.py +5 -0
- flowcept/commons/daos/docdb_dao/lmdb_dao.py +4 -1
- flowcept/commons/daos/mq_dao/mq_dao_base.py +1 -0
- flowcept/commons/flowcept_dataclasses/task_object.py +86 -6
- flowcept/commons/flowcept_dataclasses/workflow_object.py +41 -1
- flowcept/commons/task_data_preprocess.py +8 -2
- flowcept/configs.py +1 -1
- flowcept/flowcept_api/flowcept_controller.py +9 -1
- flowcept/flowceptor/consumers/base_consumer.py +5 -0
- flowcept/flowceptor/consumers/consumer_utils.py +11 -3
- flowcept/flowceptor/telemetry_capture.py +2 -1
- flowcept/instrumentation/task_capture.py +16 -16
- flowcept/version.py +1 -1
- flowcept-0.9.3.dist-info/METADATA +589 -0
- {flowcept-0.9.1.dist-info → flowcept-0.9.3.dist-info}/RECORD +27 -27
- resources/sample_settings.yaml +2 -2
- flowcept-0.9.1.dist-info/METADATA +0 -439
- {flowcept-0.9.1.dist-info → flowcept-0.9.3.dist-info}/WHEEL +0 -0
- {flowcept-0.9.1.dist-info → flowcept-0.9.3.dist-info}/entry_points.txt +0 -0
- {flowcept-0.9.1.dist-info → flowcept-0.9.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -19,27 +19,67 @@ from flowcept.configs import (
|
|
|
19
19
|
# Not a dataclass because a dataclass stores keys even when there's no value,
|
|
20
20
|
# adding unnecessary overhead.
|
|
21
21
|
class WorkflowObject:
|
|
22
|
-
"""Workflow class.
|
|
22
|
+
"""Workflow object class.
|
|
23
|
+
|
|
24
|
+
Represents metadata and provenance details for a workflow execution.
|
|
25
|
+
"""
|
|
23
26
|
|
|
24
27
|
workflow_id: AnyStr = None
|
|
28
|
+
"""Unique identifier for the workflow."""
|
|
29
|
+
|
|
25
30
|
parent_workflow_id: AnyStr = None
|
|
31
|
+
"""Identifier of the parent workflow, if this workflow is nested or derived."""
|
|
32
|
+
|
|
26
33
|
machine_info: Dict = None
|
|
34
|
+
"""System or hardware information where the workflow is executed."""
|
|
35
|
+
|
|
27
36
|
conf: Dict = None
|
|
37
|
+
"""Workflow configuration parameters, such as hyperparameters or runtime options."""
|
|
38
|
+
|
|
28
39
|
flowcept_settings: Dict = None
|
|
40
|
+
"""Snapshot of Flowcept’s active settings used for this workflow."""
|
|
41
|
+
|
|
29
42
|
flowcept_version: AnyStr = None
|
|
43
|
+
"""Version of Flowcept used during execution."""
|
|
44
|
+
|
|
30
45
|
utc_timestamp: float = None
|
|
46
|
+
"""Timestamp (UTC, in seconds) when the workflow object was created."""
|
|
47
|
+
|
|
31
48
|
user: AnyStr = None
|
|
49
|
+
"""User who launched or owns the workflow run."""
|
|
50
|
+
|
|
32
51
|
campaign_id: AnyStr = None
|
|
52
|
+
"""Identifier for grouping workflows into a campaign or experiment."""
|
|
53
|
+
|
|
33
54
|
adapter_id: AnyStr = None
|
|
55
|
+
"""Identifier of the adapter (e.g., Dask, MLflow) that triggered workflow capture."""
|
|
56
|
+
|
|
34
57
|
interceptor_ids: List[AnyStr] = None
|
|
58
|
+
"""List of interceptors applied to this workflow (e.g., instrumentation, telemetry)."""
|
|
59
|
+
|
|
35
60
|
name: AnyStr = None
|
|
61
|
+
"""Descriptive name for the workflow."""
|
|
62
|
+
|
|
36
63
|
custom_metadata: Dict = None
|
|
64
|
+
"""User-defined metadata dictionary with additional annotations."""
|
|
65
|
+
|
|
37
66
|
environment_id: str = None
|
|
67
|
+
"""Identifier for the runtime environment (e.g., conda env, container)."""
|
|
68
|
+
|
|
38
69
|
sys_name: str = None
|
|
70
|
+
"""Logical system or facility name (e.g., HPC system name, cluster identifier)."""
|
|
71
|
+
|
|
39
72
|
extra_metadata: str = None
|
|
73
|
+
"""Optional free-form metadata for extensions not covered by other fields."""
|
|
74
|
+
|
|
40
75
|
used: Dict = None
|
|
76
|
+
"""Inputs consumed by the workflow (datasets, arguments, or configuration values)."""
|
|
77
|
+
|
|
41
78
|
code_repository: Dict = None
|
|
79
|
+
"""Details of the code repository (URL, commit hash, branch) used to run the workflow."""
|
|
80
|
+
|
|
42
81
|
generated: Dict = None
|
|
82
|
+
"""Outputs generated by the workflow (artifacts, models, or results)."""
|
|
43
83
|
|
|
44
84
|
def __init__(self, workflow_id=None, name=None, used=None, generated=None):
|
|
45
85
|
self.workflow_id = workflow_id
|
|
@@ -137,7 +137,6 @@ def summarize_task(task: Dict, thresholds: Dict = None, logger=None) -> Dict:
|
|
|
137
137
|
"agent_id",
|
|
138
138
|
"campaign_id",
|
|
139
139
|
"subtype",
|
|
140
|
-
"custom_metadata",
|
|
141
140
|
]:
|
|
142
141
|
value = _safe_get(task, key)
|
|
143
142
|
if value is not None:
|
|
@@ -146,7 +145,14 @@ def summarize_task(task: Dict, thresholds: Dict = None, logger=None) -> Dict:
|
|
|
146
145
|
else:
|
|
147
146
|
task_summary[key] = value
|
|
148
147
|
|
|
149
|
-
|
|
148
|
+
# Adding image column if data is image. This is to handle special cases when there is an image associated to
|
|
149
|
+
# a provenance task.
|
|
150
|
+
if "data" in task:
|
|
151
|
+
if "custom_metadata" in task:
|
|
152
|
+
if "image" in task["custom_metadata"].get("mime_type", ""):
|
|
153
|
+
task_summary["image"] = task["data"]
|
|
154
|
+
|
|
155
|
+
# Special handling for timestamp field
|
|
150
156
|
try:
|
|
151
157
|
time_keys = ["started_at", "ended_at"]
|
|
152
158
|
for time_key in time_keys:
|
flowcept/configs.py
CHANGED
|
@@ -158,7 +158,7 @@ PERF_LOG = settings["project"].get("performance_logging", False)
|
|
|
158
158
|
JSON_SERIALIZER = settings["project"].get("json_serializer", "default")
|
|
159
159
|
REPLACE_NON_JSON_SERIALIZABLE = settings["project"].get("replace_non_json_serializable", True)
|
|
160
160
|
ENRICH_MESSAGES = settings["project"].get("enrich_messages", True)
|
|
161
|
-
DUMP_BUFFER_PATH = settings["project"].get("dump_buffer_path",
|
|
161
|
+
DUMP_BUFFER_PATH = settings["project"].get("dump_buffer_path", "flowcept_messages.jsonl")
|
|
162
162
|
|
|
163
163
|
TELEMETRY_CAPTURE = settings.get("telemetry_capture", None)
|
|
164
164
|
TELEMETRY_ENABLED = TELEMETRY_CAPTURE is not None and len(TELEMETRY_CAPTURE)
|
|
@@ -4,6 +4,7 @@ import os.path
|
|
|
4
4
|
from typing import List, Dict
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
7
|
+
from flowcept.commons.autoflush_buffer import AutoflushBuffer
|
|
7
8
|
from flowcept.commons.daos.mq_dao.mq_dao_base import MQDao
|
|
8
9
|
from flowcept.commons.flowcept_dataclasses.workflow_object import (
|
|
9
10
|
WorkflowObject,
|
|
@@ -30,6 +31,7 @@ class Flowcept(object):
|
|
|
30
31
|
# TODO: rename current_workflow_id to workflow_id. This will be a major refactor
|
|
31
32
|
current_workflow_id = None
|
|
32
33
|
campaign_id = None
|
|
34
|
+
buffer = None
|
|
33
35
|
|
|
34
36
|
@ClassProperty
|
|
35
37
|
def db(cls):
|
|
@@ -155,7 +157,11 @@ class Flowcept(object):
|
|
|
155
157
|
interceptor_inst = BaseInterceptor.build(interceptor)
|
|
156
158
|
interceptor_inst.start(bundle_exec_id=self._bundle_exec_id, check_safe_stops=self._check_safe_stops)
|
|
157
159
|
self._interceptor_instances.append(interceptor_inst)
|
|
158
|
-
|
|
160
|
+
if isinstance(interceptor_inst._mq_dao.buffer, AutoflushBuffer):
|
|
161
|
+
Flowcept.buffer = self.buffer = interceptor_inst._mq_dao.buffer.current_buffer
|
|
162
|
+
else:
|
|
163
|
+
Flowcept.buffer = self.buffer = interceptor_inst._mq_dao.buffer
|
|
164
|
+
|
|
159
165
|
if self._should_save_workflow and not self._workflow_saved:
|
|
160
166
|
self.save_workflow(interceptor, interceptor_inst)
|
|
161
167
|
|
|
@@ -296,6 +302,8 @@ class Flowcept(object):
|
|
|
296
302
|
self.logger.info("Stopping DB Inserters...")
|
|
297
303
|
for db_inserter in self._db_inserters:
|
|
298
304
|
db_inserter.stop(bundle_exec_id=self._bundle_exec_id)
|
|
305
|
+
|
|
306
|
+
Flowcept.buffer = self.buffer = None
|
|
299
307
|
self.is_started = False
|
|
300
308
|
self.logger.debug("All stopped!")
|
|
301
309
|
|
|
@@ -4,6 +4,7 @@ from typing import Callable, Dict, Tuple, Optional
|
|
|
4
4
|
|
|
5
5
|
from flowcept.commons.daos.mq_dao.mq_dao_base import MQDao
|
|
6
6
|
from flowcept.commons.flowcept_logger import FlowceptLogger
|
|
7
|
+
from flowcept.configs import MQ_ENABLED
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
class BaseConsumer(object):
|
|
@@ -16,7 +17,11 @@ class BaseConsumer(object):
|
|
|
16
17
|
|
|
17
18
|
def __init__(self):
|
|
18
19
|
"""Initialize the message queue DAO and logger."""
|
|
20
|
+
if not MQ_ENABLED:
|
|
21
|
+
raise Exception("MQ is disabled in the settings. You cannot consume messages.")
|
|
22
|
+
|
|
19
23
|
self._mq_dao = MQDao.build()
|
|
24
|
+
|
|
20
25
|
self.logger = FlowceptLogger()
|
|
21
26
|
self._main_thread: Optional[Thread] = None
|
|
22
27
|
|
|
@@ -11,7 +11,7 @@ from flowcept.commons.vocabulary import Status
|
|
|
11
11
|
UTC_TZ = ZoneInfo("UTC")
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def curate_task_msg(task_msg_dict: dict, convert_times=True):
|
|
14
|
+
def curate_task_msg(task_msg_dict: dict, convert_times=True, keys_to_drop: List = None):
|
|
15
15
|
"""Curate a task message."""
|
|
16
16
|
# Converting any arg to kwarg in the form {"arg1": val1, "arg2: val2}
|
|
17
17
|
for field in TaskObject.get_dict_field_names():
|
|
@@ -45,6 +45,10 @@ def curate_task_msg(task_msg_dict: dict, convert_times=True):
|
|
|
45
45
|
if "used" in task_msg_dict and task_msg_dict["used"].get("workflow_id", None):
|
|
46
46
|
task_msg_dict["workflow_id"] = task_msg_dict["used"].pop("workflow_id")
|
|
47
47
|
|
|
48
|
+
if keys_to_drop is not None:
|
|
49
|
+
for k in keys_to_drop:
|
|
50
|
+
task_msg_dict.pop(k, None)
|
|
51
|
+
|
|
48
52
|
if convert_times:
|
|
49
53
|
for time_field in TaskObject.get_time_field_names():
|
|
50
54
|
if time_field in task_msg_dict:
|
|
@@ -97,7 +101,11 @@ def convert_keys_to_strings(obj):
|
|
|
97
101
|
|
|
98
102
|
|
|
99
103
|
def curate_dict_task_messages(
|
|
100
|
-
doc_list: List[Dict],
|
|
104
|
+
doc_list: List[Dict],
|
|
105
|
+
indexing_key: str,
|
|
106
|
+
utc_time_at_insertion: float = 0,
|
|
107
|
+
convert_times=True,
|
|
108
|
+
keys_to_drop: List = None,
|
|
101
109
|
):
|
|
102
110
|
"""Remove duplicates.
|
|
103
111
|
|
|
@@ -134,7 +142,7 @@ def curate_dict_task_messages(
|
|
|
134
142
|
if utc_time_at_insertion > 0:
|
|
135
143
|
doc["utc_time_at_insertion"] = utc_time_at_insertion
|
|
136
144
|
|
|
137
|
-
curate_task_msg(doc, convert_times)
|
|
145
|
+
curate_task_msg(doc, convert_times, keys_to_drop)
|
|
138
146
|
indexing_key_value = doc[indexing_key]
|
|
139
147
|
|
|
140
148
|
if indexing_key_value not in indexed_buffer:
|
|
@@ -258,7 +258,8 @@ class TelemetryCapture:
|
|
|
258
258
|
disk.disk_usage = psutil.disk_usage("/")._asdict()
|
|
259
259
|
|
|
260
260
|
platform_info = platform.uname()._asdict()
|
|
261
|
-
|
|
261
|
+
network_info_raw = psutil.net_if_addrs()
|
|
262
|
+
network_info = {ifname: [addr._asdict() for addr in addrs] for ifname, addrs in network_info_raw.items()}
|
|
262
263
|
processor_info = cpuinfo.get_cpu_info()
|
|
263
264
|
|
|
264
265
|
gpu_info = None
|
|
@@ -22,21 +22,6 @@ class FlowceptTask(object):
|
|
|
22
22
|
and metadata. It integrates with the Flowcept API and Instrumentation Interceptor to
|
|
23
23
|
log task-specific details.
|
|
24
24
|
|
|
25
|
-
Parameters
|
|
26
|
-
----------
|
|
27
|
-
task_id : str, optional
|
|
28
|
-
Unique identifier for the task. If not provided, it defaults to the current timestamp.
|
|
29
|
-
workflow_id : str, optional
|
|
30
|
-
ID of the workflow to which this task belongs. Defaults to the current workflow ID from
|
|
31
|
-
Flowcept.
|
|
32
|
-
campaign_id : str, optional
|
|
33
|
-
ID of the campaign to which this task belongs. Defaults to the current campaign ID from
|
|
34
|
-
Flowcept.
|
|
35
|
-
used : Dict, optional
|
|
36
|
-
Metadata about the resources or data used during the task execution.
|
|
37
|
-
custom_metadata : Dict, optional
|
|
38
|
-
User-defined metadata associated with the task.
|
|
39
|
-
|
|
40
25
|
Methods
|
|
41
26
|
-------
|
|
42
27
|
__enter__()
|
|
@@ -65,6 +50,7 @@ class FlowceptTask(object):
|
|
|
65
50
|
subtype: str = None,
|
|
66
51
|
custom_metadata: Dict = None,
|
|
67
52
|
generated: Dict = None,
|
|
53
|
+
started_at: float = None,
|
|
68
54
|
ended_at: float = None,
|
|
69
55
|
stdout: str = None,
|
|
70
56
|
stderr: str = None,
|
|
@@ -90,12 +76,16 @@ class FlowceptTask(object):
|
|
|
90
76
|
Describes the specific activity this task captures.
|
|
91
77
|
used : Dict, optional
|
|
92
78
|
Metadata about resources or data used during the task.
|
|
79
|
+
data: Any, optional
|
|
80
|
+
Any raw data associated to this task.
|
|
93
81
|
subtype : str, optional
|
|
94
82
|
Optional string categorizing the task subtype.
|
|
95
83
|
custom_metadata : Dict, optional
|
|
96
84
|
Additional user-defined metadata to associate with the task.
|
|
97
85
|
generated : Dict, optional
|
|
98
86
|
Output data generated during the task execution.
|
|
87
|
+
started_at : float, optional
|
|
88
|
+
Timestamp indicating when the task started.
|
|
99
89
|
ended_at : float, optional
|
|
100
90
|
Timestamp indicating when the task ended.
|
|
101
91
|
stdout : str, optional
|
|
@@ -117,7 +107,7 @@ class FlowceptTask(object):
|
|
|
117
107
|
self._task.telemetry_at_start = tel
|
|
118
108
|
|
|
119
109
|
self._task.activity_id = activity_id
|
|
120
|
-
self._task.started_at = time()
|
|
110
|
+
self._task.started_at = started_at or time()
|
|
121
111
|
self._task.task_id = task_id or self._gen_task_id()
|
|
122
112
|
self._task.workflow_id = workflow_id or Flowcept.current_workflow_id
|
|
123
113
|
self._task.campaign_id = campaign_id or Flowcept.campaign_id
|
|
@@ -159,6 +149,8 @@ class FlowceptTask(object):
|
|
|
159
149
|
ended_at: float = None,
|
|
160
150
|
stdout: str = None,
|
|
161
151
|
stderr: str = None,
|
|
152
|
+
data: Any = None,
|
|
153
|
+
custom_metadata: Dict = None,
|
|
162
154
|
status: Status = Status.FINISHED,
|
|
163
155
|
):
|
|
164
156
|
"""
|
|
@@ -172,6 +164,10 @@ class FlowceptTask(object):
|
|
|
172
164
|
----------
|
|
173
165
|
generated : Dict, optional
|
|
174
166
|
Metadata or data generated during the task's execution. Defaults to None.
|
|
167
|
+
data: Any, optional
|
|
168
|
+
Any raw data associated to this task.
|
|
169
|
+
custom_metadata : Dict, optional
|
|
170
|
+
Additional user-defined metadata to associate with the task.
|
|
175
171
|
ended_at : float, optional
|
|
176
172
|
Timestamp indicating when the task ended. If not provided, defaults to the current time.
|
|
177
173
|
stdout : str, optional
|
|
@@ -191,6 +187,10 @@ class FlowceptTask(object):
|
|
|
191
187
|
if TELEMETRY_ENABLED:
|
|
192
188
|
tel = self._interceptor.telemetry_capture.capture()
|
|
193
189
|
self._task.telemetry_at_end = tel
|
|
190
|
+
if data:
|
|
191
|
+
self._task.data = data
|
|
192
|
+
if custom_metadata:
|
|
193
|
+
self._task.custom_metadata = custom_metadata
|
|
194
194
|
self._task.ended_at = ended_at or time()
|
|
195
195
|
self._task.status = status
|
|
196
196
|
self._task.stderr = stderr
|
flowcept/version.py
CHANGED