PyPI - flowcept - Versions diffs - 0.9.1__py3-none-any.whl → 0.9.3__py3-none-any.whl - Mend

flowcept 0.9.1py3-none-any.whl → 0.9.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

flowcept/__init__.py +5 -0
flowcept/agents/flowcept_ctx_manager.py +10 -6
flowcept/agents/gui/gui_utils.py +52 -1
flowcept/agents/prompts/general_prompts.py +1 -1
flowcept/agents/prompts/in_memory_query_prompts.py +36 -17
flowcept/agents/tools/general_tools.py +1 -1
flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +19 -3
flowcept/cli.py +41 -42
flowcept/commons/autoflush_buffer.py +5 -0
flowcept/commons/daos/docdb_dao/lmdb_dao.py +4 -1
flowcept/commons/daos/mq_dao/mq_dao_base.py +1 -0
flowcept/commons/flowcept_dataclasses/task_object.py +86 -6
flowcept/commons/flowcept_dataclasses/workflow_object.py +41 -1
flowcept/commons/task_data_preprocess.py +8 -2
flowcept/configs.py +1 -1
flowcept/flowcept_api/flowcept_controller.py +9 -1
flowcept/flowceptor/consumers/base_consumer.py +5 -0
flowcept/flowceptor/consumers/consumer_utils.py +11 -3
flowcept/flowceptor/telemetry_capture.py +2 -1
flowcept/instrumentation/task_capture.py +16 -16
flowcept/version.py +1 -1
flowcept-0.9.3.dist-info/METADATA +589 -0
{flowcept-0.9.1.dist-info → flowcept-0.9.3.dist-info}/RECORD +27 -27
resources/sample_settings.yaml +2 -2
flowcept-0.9.1.dist-info/METADATA +0 -439
{flowcept-0.9.1.dist-info → flowcept-0.9.3.dist-info}/WHEEL +0 -0
{flowcept-0.9.1.dist-info → flowcept-0.9.3.dist-info}/entry_points.txt +0 -0
{flowcept-0.9.1.dist-info → flowcept-0.9.3.dist-info}/licenses/LICENSE +0 -0

flowcept/commons/flowcept_dataclasses/workflow_object.py CHANGED Viewed

@@ -19,27 +19,67 @@ from flowcept.configs import (
 # Not a dataclass because a dataclass stores keys even when there's no value,
 # adding unnecessary overhead.
 class WorkflowObject:
-    """Workflow class."""
+    """Workflow object class.
+    Represents metadata and provenance details for a workflow execution.
+    """
     workflow_id: AnyStr = None
+    """Unique identifier for the workflow."""
     parent_workflow_id: AnyStr = None
+    """Identifier of the parent workflow, if this workflow is nested or derived."""
     machine_info: Dict = None
+    """System or hardware information where the workflow is executed."""
     conf: Dict = None
+    """Workflow configuration parameters, such as hyperparameters or runtime options."""
     flowcept_settings: Dict = None
+    """Snapshot of Flowcept’s active settings used for this workflow."""
     flowcept_version: AnyStr = None
+    """Version of Flowcept used during execution."""
     utc_timestamp: float = None
+    """Timestamp (UTC, in seconds) when the workflow object was created."""
     user: AnyStr = None
+    """User who launched or owns the workflow run."""
     campaign_id: AnyStr = None
+    """Identifier for grouping workflows into a campaign or experiment."""
     adapter_id: AnyStr = None
+    """Identifier of the adapter (e.g., Dask, MLflow) that triggered workflow capture."""
     interceptor_ids: List[AnyStr] = None
+    """List of interceptors applied to this workflow (e.g., instrumentation, telemetry)."""
     name: AnyStr = None
+    """Descriptive name for the workflow."""
     custom_metadata: Dict = None
+    """User-defined metadata dictionary with additional annotations."""
     environment_id: str = None
+    """Identifier for the runtime environment (e.g., conda env, container)."""
     sys_name: str = None
+    """Logical system or facility name (e.g., HPC system name, cluster identifier)."""
     extra_metadata: str = None
+    """Optional free-form metadata for extensions not covered by other fields."""
     used: Dict = None
+    """Inputs consumed by the workflow (datasets, arguments, or configuration values)."""
     code_repository: Dict = None
+    """Details of the code repository (URL, commit hash, branch) used to run the workflow."""
     generated: Dict = None
+    """Outputs generated by the workflow (artifacts, models, or results)."""
     def __init__(self, workflow_id=None, name=None, used=None, generated=None):
         self.workflow_id = workflow_id

flowcept/commons/task_data_preprocess.py CHANGED Viewed

@@ -137,7 +137,6 @@ def summarize_task(task: Dict, thresholds: Dict = None, logger=None) -> Dict:
         "agent_id",
         "campaign_id",
         "subtype",
-        "custom_metadata",
     ]:
         value = _safe_get(task, key)
         if value is not None:
@@ -146,7 +145,14 @@ def summarize_task(task: Dict, thresholds: Dict = None, logger=None) -> Dict:
             else:
                 task_summary[key] = value
-        # Special handling for timestamp field
+    # Adding image column if data is image. This is to handle special cases when there is an image associated to
+    # a provenance task.
+    if "data" in task:
+        if "custom_metadata" in task:
+            if "image" in task["custom_metadata"].get("mime_type", ""):
+                task_summary["image"] = task["data"]
+    # Special handling for timestamp field
     try:
         time_keys = ["started_at", "ended_at"]
         for time_key in time_keys:

flowcept/configs.py CHANGED Viewed

@@ -158,7 +158,7 @@ PERF_LOG = settings["project"].get("performance_logging", False)
 JSON_SERIALIZER = settings["project"].get("json_serializer", "default")
 REPLACE_NON_JSON_SERIALIZABLE = settings["project"].get("replace_non_json_serializable", True)
 ENRICH_MESSAGES = settings["project"].get("enrich_messages", True)
-DUMP_BUFFER_PATH = settings["project"].get("dump_buffer_path", None)
+DUMP_BUFFER_PATH = settings["project"].get("dump_buffer_path", "flowcept_messages.jsonl")
 TELEMETRY_CAPTURE = settings.get("telemetry_capture", None)
 TELEMETRY_ENABLED = TELEMETRY_CAPTURE is not None and len(TELEMETRY_CAPTURE)

flowcept/flowcept_api/flowcept_controller.py CHANGED Viewed

@@ -4,6 +4,7 @@ import os.path
 from typing import List, Dict
 from uuid import uuid4
+from flowcept.commons.autoflush_buffer import AutoflushBuffer
 from flowcept.commons.daos.mq_dao.mq_dao_base import MQDao
 from flowcept.commons.flowcept_dataclasses.workflow_object import (
     WorkflowObject,
@@ -30,6 +31,7 @@ class Flowcept(object):
     # TODO: rename current_workflow_id to workflow_id. This will be a major refactor
     current_workflow_id = None
     campaign_id = None
+    buffer = None
     @ClassProperty
     def db(cls):
@@ -155,7 +157,11 @@ class Flowcept(object):
                 interceptor_inst = BaseInterceptor.build(interceptor)
                 interceptor_inst.start(bundle_exec_id=self._bundle_exec_id, check_safe_stops=self._check_safe_stops)
                 self._interceptor_instances.append(interceptor_inst)
-                self.buffer = interceptor_inst._mq_dao.buffer
+                if isinstance(interceptor_inst._mq_dao.buffer, AutoflushBuffer):
+                    Flowcept.buffer = self.buffer = interceptor_inst._mq_dao.buffer.current_buffer
+                else:
+                    Flowcept.buffer = self.buffer = interceptor_inst._mq_dao.buffer
                 if self._should_save_workflow and not self._workflow_saved:
                     self.save_workflow(interceptor, interceptor_inst)
@@ -296,6 +302,8 @@ class Flowcept(object):
             self.logger.info("Stopping DB Inserters...")
             for db_inserter in self._db_inserters:
                 db_inserter.stop(bundle_exec_id=self._bundle_exec_id)
+        Flowcept.buffer = self.buffer = None
         self.is_started = False
         self.logger.debug("All stopped!")

flowcept/flowceptor/consumers/base_consumer.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import Callable, Dict, Tuple, Optional
 from flowcept.commons.daos.mq_dao.mq_dao_base import MQDao
 from flowcept.commons.flowcept_logger import FlowceptLogger
+from flowcept.configs import MQ_ENABLED
 class BaseConsumer(object):
@@ -16,7 +17,11 @@ class BaseConsumer(object):
     def __init__(self):
         """Initialize the message queue DAO and logger."""
+        if not MQ_ENABLED:
+            raise Exception("MQ is disabled in the settings. You cannot consume messages.")
         self._mq_dao = MQDao.build()
         self.logger = FlowceptLogger()
         self._main_thread: Optional[Thread] = None

flowcept/flowceptor/consumers/consumer_utils.py CHANGED Viewed

@@ -11,7 +11,7 @@ from flowcept.commons.vocabulary import Status
 UTC_TZ = ZoneInfo("UTC")
-def curate_task_msg(task_msg_dict: dict, convert_times=True):
+def curate_task_msg(task_msg_dict: dict, convert_times=True, keys_to_drop: List = None):
     """Curate a task message."""
     # Converting any arg to kwarg in the form {"arg1": val1, "arg2: val2}
     for field in TaskObject.get_dict_field_names():
@@ -45,6 +45,10 @@ def curate_task_msg(task_msg_dict: dict, convert_times=True):
     if "used" in task_msg_dict and task_msg_dict["used"].get("workflow_id", None):
         task_msg_dict["workflow_id"] = task_msg_dict["used"].pop("workflow_id")
+    if keys_to_drop is not None:
+        for k in keys_to_drop:
+            task_msg_dict.pop(k, None)
     if convert_times:
         for time_field in TaskObject.get_time_field_names():
             if time_field in task_msg_dict:
@@ -97,7 +101,11 @@ def convert_keys_to_strings(obj):
 def curate_dict_task_messages(
-    doc_list: List[Dict], indexing_key: str, utc_time_at_insertion: float = 0, convert_times=True
+    doc_list: List[Dict],
+    indexing_key: str,
+    utc_time_at_insertion: float = 0,
+    convert_times=True,
+    keys_to_drop: List = None,
 ):
     """Remove duplicates.
@@ -134,7 +142,7 @@ def curate_dict_task_messages(
         if utc_time_at_insertion > 0:
             doc["utc_time_at_insertion"] = utc_time_at_insertion
-        curate_task_msg(doc, convert_times)
+        curate_task_msg(doc, convert_times, keys_to_drop)
         indexing_key_value = doc[indexing_key]
         if indexing_key_value not in indexed_buffer:

flowcept/flowceptor/telemetry_capture.py CHANGED Viewed

@@ -258,7 +258,8 @@ class TelemetryCapture:
             disk.disk_usage = psutil.disk_usage("/")._asdict()
             platform_info = platform.uname()._asdict()
-            network_info = psutil.net_if_addrs()
+            network_info_raw = psutil.net_if_addrs()
+            network_info = {ifname: [addr._asdict() for addr in addrs] for ifname, addrs in network_info_raw.items()}
             processor_info = cpuinfo.get_cpu_info()
             gpu_info = None

flowcept/instrumentation/task_capture.py CHANGED Viewed

@@ -22,21 +22,6 @@ class FlowceptTask(object):
     and metadata. It integrates with the Flowcept API and Instrumentation Interceptor to
     log task-specific details.
-    Parameters
-    ----------
-    task_id : str, optional
-        Unique identifier for the task. If not provided, it defaults to the current timestamp.
-    workflow_id : str, optional
-        ID of the workflow to which this task belongs. Defaults to the current workflow ID from
-        Flowcept.
-    campaign_id : str, optional
-        ID of the campaign to which this task belongs. Defaults to the current campaign ID from
-        Flowcept.
-    used : Dict, optional
-        Metadata about the resources or data used during the task execution.
-    custom_metadata : Dict, optional
-        User-defined metadata associated with the task.
     Methods
     -------
     __enter__()
@@ -65,6 +50,7 @@ class FlowceptTask(object):
         subtype: str = None,
         custom_metadata: Dict = None,
         generated: Dict = None,
+        started_at: float = None,
         ended_at: float = None,
         stdout: str = None,
         stderr: str = None,
@@ -90,12 +76,16 @@ class FlowceptTask(object):
             Describes the specific activity this task captures.
         used : Dict, optional
             Metadata about resources or data used during the task.
+        data: Any, optional
+            Any raw data associated to this task.
         subtype : str, optional
             Optional string categorizing the task subtype.
         custom_metadata : Dict, optional
             Additional user-defined metadata to associate with the task.
         generated : Dict, optional
             Output data generated during the task execution.
+        started_at : float, optional
+            Timestamp indicating when the task started.
         ended_at : float, optional
             Timestamp indicating when the task ended.
         stdout : str, optional
@@ -117,7 +107,7 @@ class FlowceptTask(object):
             self._task.telemetry_at_start = tel
         self._task.activity_id = activity_id
-        self._task.started_at = time()
+        self._task.started_at = started_at or time()
         self._task.task_id = task_id or self._gen_task_id()
         self._task.workflow_id = workflow_id or Flowcept.current_workflow_id
         self._task.campaign_id = campaign_id or Flowcept.campaign_id
@@ -159,6 +149,8 @@ class FlowceptTask(object):
         ended_at: float = None,
         stdout: str = None,
         stderr: str = None,
+        data: Any = None,
+        custom_metadata: Dict = None,
         status: Status = Status.FINISHED,
     ):
         """
@@ -172,6 +164,10 @@ class FlowceptTask(object):
         ----------
         generated : Dict, optional
             Metadata or data generated during the task's execution. Defaults to None.
+        data: Any, optional
+            Any raw data associated to this task.
+        custom_metadata : Dict, optional
+            Additional user-defined metadata to associate with the task.
         ended_at : float, optional
             Timestamp indicating when the task ended. If not provided, defaults to the current time.
         stdout : str, optional
@@ -191,6 +187,10 @@ class FlowceptTask(object):
         if TELEMETRY_ENABLED:
             tel = self._interceptor.telemetry_capture.capture()
             self._task.telemetry_at_end = tel
+        if data:
+            self._task.data = data
+        if custom_metadata:
+            self._task.custom_metadata = custom_metadata
         self._task.ended_at = ended_at or time()
         self._task.status = status
         self._task.stderr = stderr

flowcept/version.py CHANGED Viewed

@@ -4,4 +4,4 @@
 # The expected format is: <Major>.<Minor>.<Patch>
 # This file is supposed to be automatically modified by the CI Bot.
 # See .github/workflows/version_bumper.py
-__version__ = "0.9.1"
+__version__ = "0.9.3"

flowcept 0.9.1__py3-none-any.whl → 0.9.3__py3-none-any.whl

flowcept 0.9.1py3-none-any.whl → 0.9.3py3-none-any.whl