flowcept 0.9.1__py3-none-any.whl → 0.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,27 +19,67 @@ from flowcept.configs import (
19
19
  # Not a dataclass because a dataclass stores keys even when there's no value,
20
20
  # adding unnecessary overhead.
21
21
  class WorkflowObject:
22
- """Workflow class."""
22
+ """Workflow object class.
23
+
24
+ Represents metadata and provenance details for a workflow execution.
25
+ """
23
26
 
24
27
  workflow_id: AnyStr = None
28
+ """Unique identifier for the workflow."""
29
+
25
30
  parent_workflow_id: AnyStr = None
31
+ """Identifier of the parent workflow, if this workflow is nested or derived."""
32
+
26
33
  machine_info: Dict = None
34
+ """System or hardware information where the workflow is executed."""
35
+
27
36
  conf: Dict = None
37
+ """Workflow configuration parameters, such as hyperparameters or runtime options."""
38
+
28
39
  flowcept_settings: Dict = None
40
+ """Snapshot of Flowcept’s active settings used for this workflow."""
41
+
29
42
  flowcept_version: AnyStr = None
43
+ """Version of Flowcept used during execution."""
44
+
30
45
  utc_timestamp: float = None
46
+ """Timestamp (UTC, in seconds) when the workflow object was created."""
47
+
31
48
  user: AnyStr = None
49
+ """User who launched or owns the workflow run."""
50
+
32
51
  campaign_id: AnyStr = None
52
+ """Identifier for grouping workflows into a campaign or experiment."""
53
+
33
54
  adapter_id: AnyStr = None
55
+ """Identifier of the adapter (e.g., Dask, MLflow) that triggered workflow capture."""
56
+
34
57
  interceptor_ids: List[AnyStr] = None
58
+ """List of interceptors applied to this workflow (e.g., instrumentation, telemetry)."""
59
+
35
60
  name: AnyStr = None
61
+ """Descriptive name for the workflow."""
62
+
36
63
  custom_metadata: Dict = None
64
+ """User-defined metadata dictionary with additional annotations."""
65
+
37
66
  environment_id: str = None
67
+ """Identifier for the runtime environment (e.g., conda env, container)."""
68
+
38
69
  sys_name: str = None
70
+ """Logical system or facility name (e.g., HPC system name, cluster identifier)."""
71
+
39
72
  extra_metadata: str = None
73
+ """Optional free-form metadata for extensions not covered by other fields."""
74
+
40
75
  used: Dict = None
76
+ """Inputs consumed by the workflow (datasets, arguments, or configuration values)."""
77
+
41
78
  code_repository: Dict = None
79
+ """Details of the code repository (URL, commit hash, branch) used to run the workflow."""
80
+
42
81
  generated: Dict = None
82
+ """Outputs generated by the workflow (artifacts, models, or results)."""
43
83
 
44
84
  def __init__(self, workflow_id=None, name=None, used=None, generated=None):
45
85
  self.workflow_id = workflow_id
@@ -137,7 +137,6 @@ def summarize_task(task: Dict, thresholds: Dict = None, logger=None) -> Dict:
137
137
  "agent_id",
138
138
  "campaign_id",
139
139
  "subtype",
140
- "custom_metadata",
141
140
  ]:
142
141
  value = _safe_get(task, key)
143
142
  if value is not None:
@@ -146,7 +145,14 @@ def summarize_task(task: Dict, thresholds: Dict = None, logger=None) -> Dict:
146
145
  else:
147
146
  task_summary[key] = value
148
147
 
149
- # Special handling for timestamp field
148
+ # Adding image column if data is image. This is to handle special cases when there is an image associated to
149
+ # a provenance task.
150
+ if "data" in task:
151
+ if "custom_metadata" in task:
152
+ if "image" in task["custom_metadata"].get("mime_type", ""):
153
+ task_summary["image"] = task["data"]
154
+
155
+ # Special handling for timestamp field
150
156
  try:
151
157
  time_keys = ["started_at", "ended_at"]
152
158
  for time_key in time_keys:
flowcept/configs.py CHANGED
@@ -158,7 +158,7 @@ PERF_LOG = settings["project"].get("performance_logging", False)
158
158
  JSON_SERIALIZER = settings["project"].get("json_serializer", "default")
159
159
  REPLACE_NON_JSON_SERIALIZABLE = settings["project"].get("replace_non_json_serializable", True)
160
160
  ENRICH_MESSAGES = settings["project"].get("enrich_messages", True)
161
- DUMP_BUFFER_PATH = settings["project"].get("dump_buffer_path", None)
161
+ DUMP_BUFFER_PATH = settings["project"].get("dump_buffer_path", "flowcept_messages.jsonl")
162
162
 
163
163
  TELEMETRY_CAPTURE = settings.get("telemetry_capture", None)
164
164
  TELEMETRY_ENABLED = TELEMETRY_CAPTURE is not None and len(TELEMETRY_CAPTURE)
@@ -4,6 +4,7 @@ import os.path
4
4
  from typing import List, Dict
5
5
  from uuid import uuid4
6
6
 
7
+ from flowcept.commons.autoflush_buffer import AutoflushBuffer
7
8
  from flowcept.commons.daos.mq_dao.mq_dao_base import MQDao
8
9
  from flowcept.commons.flowcept_dataclasses.workflow_object import (
9
10
  WorkflowObject,
@@ -30,6 +31,7 @@ class Flowcept(object):
30
31
  # TODO: rename current_workflow_id to workflow_id. This will be a major refactor
31
32
  current_workflow_id = None
32
33
  campaign_id = None
34
+ buffer = None
33
35
 
34
36
  @ClassProperty
35
37
  def db(cls):
@@ -155,7 +157,11 @@ class Flowcept(object):
155
157
  interceptor_inst = BaseInterceptor.build(interceptor)
156
158
  interceptor_inst.start(bundle_exec_id=self._bundle_exec_id, check_safe_stops=self._check_safe_stops)
157
159
  self._interceptor_instances.append(interceptor_inst)
158
- self.buffer = interceptor_inst._mq_dao.buffer
160
+ if isinstance(interceptor_inst._mq_dao.buffer, AutoflushBuffer):
161
+ Flowcept.buffer = self.buffer = interceptor_inst._mq_dao.buffer.current_buffer
162
+ else:
163
+ Flowcept.buffer = self.buffer = interceptor_inst._mq_dao.buffer
164
+
159
165
  if self._should_save_workflow and not self._workflow_saved:
160
166
  self.save_workflow(interceptor, interceptor_inst)
161
167
 
@@ -296,6 +302,8 @@ class Flowcept(object):
296
302
  self.logger.info("Stopping DB Inserters...")
297
303
  for db_inserter in self._db_inserters:
298
304
  db_inserter.stop(bundle_exec_id=self._bundle_exec_id)
305
+
306
+ Flowcept.buffer = self.buffer = None
299
307
  self.is_started = False
300
308
  self.logger.debug("All stopped!")
301
309
 
@@ -4,6 +4,7 @@ from typing import Callable, Dict, Tuple, Optional
4
4
 
5
5
  from flowcept.commons.daos.mq_dao.mq_dao_base import MQDao
6
6
  from flowcept.commons.flowcept_logger import FlowceptLogger
7
+ from flowcept.configs import MQ_ENABLED
7
8
 
8
9
 
9
10
  class BaseConsumer(object):
@@ -16,7 +17,11 @@ class BaseConsumer(object):
16
17
 
17
18
  def __init__(self):
18
19
  """Initialize the message queue DAO and logger."""
20
+ if not MQ_ENABLED:
21
+ raise Exception("MQ is disabled in the settings. You cannot consume messages.")
22
+
19
23
  self._mq_dao = MQDao.build()
24
+
20
25
  self.logger = FlowceptLogger()
21
26
  self._main_thread: Optional[Thread] = None
22
27
 
@@ -11,7 +11,7 @@ from flowcept.commons.vocabulary import Status
11
11
  UTC_TZ = ZoneInfo("UTC")
12
12
 
13
13
 
14
- def curate_task_msg(task_msg_dict: dict, convert_times=True):
14
+ def curate_task_msg(task_msg_dict: dict, convert_times=True, keys_to_drop: List = None):
15
15
  """Curate a task message."""
16
16
  # Converting any arg to kwarg in the form {"arg1": val1, "arg2: val2}
17
17
  for field in TaskObject.get_dict_field_names():
@@ -45,6 +45,10 @@ def curate_task_msg(task_msg_dict: dict, convert_times=True):
45
45
  if "used" in task_msg_dict and task_msg_dict["used"].get("workflow_id", None):
46
46
  task_msg_dict["workflow_id"] = task_msg_dict["used"].pop("workflow_id")
47
47
 
48
+ if keys_to_drop is not None:
49
+ for k in keys_to_drop:
50
+ task_msg_dict.pop(k, None)
51
+
48
52
  if convert_times:
49
53
  for time_field in TaskObject.get_time_field_names():
50
54
  if time_field in task_msg_dict:
@@ -97,7 +101,11 @@ def convert_keys_to_strings(obj):
97
101
 
98
102
 
99
103
  def curate_dict_task_messages(
100
- doc_list: List[Dict], indexing_key: str, utc_time_at_insertion: float = 0, convert_times=True
104
+ doc_list: List[Dict],
105
+ indexing_key: str,
106
+ utc_time_at_insertion: float = 0,
107
+ convert_times=True,
108
+ keys_to_drop: List = None,
101
109
  ):
102
110
  """Remove duplicates.
103
111
 
@@ -134,7 +142,7 @@ def curate_dict_task_messages(
134
142
  if utc_time_at_insertion > 0:
135
143
  doc["utc_time_at_insertion"] = utc_time_at_insertion
136
144
 
137
- curate_task_msg(doc, convert_times)
145
+ curate_task_msg(doc, convert_times, keys_to_drop)
138
146
  indexing_key_value = doc[indexing_key]
139
147
 
140
148
  if indexing_key_value not in indexed_buffer:
@@ -258,7 +258,8 @@ class TelemetryCapture:
258
258
  disk.disk_usage = psutil.disk_usage("/")._asdict()
259
259
 
260
260
  platform_info = platform.uname()._asdict()
261
- network_info = psutil.net_if_addrs()
261
+ network_info_raw = psutil.net_if_addrs()
262
+ network_info = {ifname: [addr._asdict() for addr in addrs] for ifname, addrs in network_info_raw.items()}
262
263
  processor_info = cpuinfo.get_cpu_info()
263
264
 
264
265
  gpu_info = None
@@ -22,21 +22,6 @@ class FlowceptTask(object):
22
22
  and metadata. It integrates with the Flowcept API and Instrumentation Interceptor to
23
23
  log task-specific details.
24
24
 
25
- Parameters
26
- ----------
27
- task_id : str, optional
28
- Unique identifier for the task. If not provided, it defaults to the current timestamp.
29
- workflow_id : str, optional
30
- ID of the workflow to which this task belongs. Defaults to the current workflow ID from
31
- Flowcept.
32
- campaign_id : str, optional
33
- ID of the campaign to which this task belongs. Defaults to the current campaign ID from
34
- Flowcept.
35
- used : Dict, optional
36
- Metadata about the resources or data used during the task execution.
37
- custom_metadata : Dict, optional
38
- User-defined metadata associated with the task.
39
-
40
25
  Methods
41
26
  -------
42
27
  __enter__()
@@ -65,6 +50,7 @@ class FlowceptTask(object):
65
50
  subtype: str = None,
66
51
  custom_metadata: Dict = None,
67
52
  generated: Dict = None,
53
+ started_at: float = None,
68
54
  ended_at: float = None,
69
55
  stdout: str = None,
70
56
  stderr: str = None,
@@ -90,12 +76,16 @@ class FlowceptTask(object):
90
76
  Describes the specific activity this task captures.
91
77
  used : Dict, optional
92
78
  Metadata about resources or data used during the task.
79
+ data: Any, optional
80
+ Any raw data associated to this task.
93
81
  subtype : str, optional
94
82
  Optional string categorizing the task subtype.
95
83
  custom_metadata : Dict, optional
96
84
  Additional user-defined metadata to associate with the task.
97
85
  generated : Dict, optional
98
86
  Output data generated during the task execution.
87
+ started_at : float, optional
88
+ Timestamp indicating when the task started.
99
89
  ended_at : float, optional
100
90
  Timestamp indicating when the task ended.
101
91
  stdout : str, optional
@@ -117,7 +107,7 @@ class FlowceptTask(object):
117
107
  self._task.telemetry_at_start = tel
118
108
 
119
109
  self._task.activity_id = activity_id
120
- self._task.started_at = time()
110
+ self._task.started_at = started_at or time()
121
111
  self._task.task_id = task_id or self._gen_task_id()
122
112
  self._task.workflow_id = workflow_id or Flowcept.current_workflow_id
123
113
  self._task.campaign_id = campaign_id or Flowcept.campaign_id
@@ -159,6 +149,8 @@ class FlowceptTask(object):
159
149
  ended_at: float = None,
160
150
  stdout: str = None,
161
151
  stderr: str = None,
152
+ data: Any = None,
153
+ custom_metadata: Dict = None,
162
154
  status: Status = Status.FINISHED,
163
155
  ):
164
156
  """
@@ -172,6 +164,10 @@ class FlowceptTask(object):
172
164
  ----------
173
165
  generated : Dict, optional
174
166
  Metadata or data generated during the task's execution. Defaults to None.
167
+ data: Any, optional
168
+ Any raw data associated to this task.
169
+ custom_metadata : Dict, optional
170
+ Additional user-defined metadata to associate with the task.
175
171
  ended_at : float, optional
176
172
  Timestamp indicating when the task ended. If not provided, defaults to the current time.
177
173
  stdout : str, optional
@@ -191,6 +187,10 @@ class FlowceptTask(object):
191
187
  if TELEMETRY_ENABLED:
192
188
  tel = self._interceptor.telemetry_capture.capture()
193
189
  self._task.telemetry_at_end = tel
190
+ if data:
191
+ self._task.data = data
192
+ if custom_metadata:
193
+ self._task.custom_metadata = custom_metadata
194
194
  self._task.ended_at = ended_at or time()
195
195
  self._task.status = status
196
196
  self._task.stderr = stderr
flowcept/version.py CHANGED
@@ -4,4 +4,4 @@
4
4
  # The expected format is: <Major>.<Minor>.<Patch>
5
5
  # This file is supposed to be automatically modified by the CI Bot.
6
6
  # See .github/workflows/version_bumper.py
7
- __version__ = "0.9.1"
7
+ __version__ = "0.9.3"