PyPI - datatailr - Versions diffs - 0.1.6__py3-none-any.whl → 0.1.10__py3-none-any.whl - Mend

datatailr 0.1.6py3-none-any.whl → 0.1.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datatailr might be problematic. Click here for more details.

Files changed (35) hide show

datatailr/__init__.py +1 -35
datatailr/acl.py +35 -3
datatailr/blob.py +13 -13
datatailr/build/image.py +38 -2
datatailr/dt_json.py +32 -0
datatailr/errors.py +17 -0
datatailr/group.py +18 -14
datatailr/logging.py +21 -10
datatailr/sbin/datatailr_run.py +147 -0
datatailr/sbin/datatailr_run_app.py +37 -0
datatailr/sbin/{run_job.py → datatailr_run_batch.py} +5 -20
datatailr/sbin/datatailr_run_excel.py +34 -0
datatailr/sbin/datatailr_run_service.py +34 -0
datatailr/scheduler/__init__.py +24 -8
datatailr/scheduler/arguments_cache.py +71 -43
datatailr/scheduler/base.py +195 -79
datatailr/scheduler/batch.py +141 -19
datatailr/scheduler/batch_decorator.py +53 -24
datatailr/scheduler/constants.py +1 -1
datatailr/scheduler/schedule.py +117 -0
datatailr/scheduler/utils.py +3 -1
datatailr/user.py +21 -21
datatailr/utils.py +20 -0
datatailr/wrapper.py +0 -6
{datatailr-0.1.6.dist-info → datatailr-0.1.10.dist-info}/METADATA +37 -4
datatailr-0.1.10.dist-info/RECORD +32 -0
datatailr-0.1.10.dist-info/entry_points.txt +6 -0
datatailr-0.1.10.dist-info/top_level.txt +1 -0
datatailr-0.1.6.dist-info/RECORD +0 -29
datatailr-0.1.6.dist-info/entry_points.txt +0 -2
datatailr-0.1.6.dist-info/top_level.txt +0 -2
test_module/__init__.py +0 -17
test_module/test_submodule.py +0 -38
{datatailr-0.1.6.dist-info → datatailr-0.1.10.dist-info}/WHEEL +0 -0
{datatailr-0.1.6.dist-info → datatailr-0.1.10.dist-info}/licenses/LICENSE +0 -0

datatailr/sbin/datatailr_run_app.py ADDED Viewed

@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+# *************************************************************************
+#
+#  Copyright (c) 2025 - Datatailr Inc.
+#  All Rights Reserved.
+#
+#  This file is part of Datatailr and subject to the terms and conditions
+#  defined in 'LICENSE.txt'. Unauthorized copying and/or distribution
+#  of this file, in parts or full, via any medium is strictly prohibited.
+# *************************************************************************
+import os
+import sys
+import runpy
+from importlib.resources import files
+from datatailr.logging import DatatailrLogger
+logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
+def run():
+    logger.info("Starting Datatailr app...")
+    entrypoint = os.environ.get("DATATAILR_ENTRYPOINT")
+    if entrypoint is None or ":" not in entrypoint:
+        raise ValueError(
+            "Environment variable 'DATATAILR_ENTRYPOINT' is not in the format 'module_name:file_name'."
+        )
+    module_name, file_name = entrypoint.split(":")
+    script = files(module_name).joinpath(file_name)
+    sys.argv = ["streamlit", "run", str(script), *sys.argv[1:]]
+    logger.info(f"Running entrypoint: {entrypoint}")
+    runpy.run_module("streamlit", run_name="__main__")

datatailr/sbin/{run_job.py → datatailr_run_batch.py} RENAMED Viewed

@@ -12,19 +12,19 @@
 import importlib
 import os
-import pickle
-from datatailr import dt__Blob
 from datatailr.logging import DatatailrLogger
 logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
-def main():
+def run():
+    logger.info("Running Datatailr batch job")
     entry_point = os.environ.get("DATATAILR_BATCH_ENTRYPOINT")
     batch_run_id = os.environ.get("DATATAILR_BATCH_RUN_ID")
     batch_id = os.environ.get("DATATAILR_BATCH_ID")
     job_id = os.environ.get("DATATAILR_JOB_ID")
+    logger.info(f"Batch run ID: {batch_run_id}, Batch ID: {batch_id}, Job ID: {job_id}")
     if entry_point is None:
         raise ValueError(
@@ -44,20 +44,5 @@ def main():
         raise ValueError(
             f"The function '{func_name}' in module '{module_name}' is not callable."
         )
-    result = function()
-    result_path = f"batch-results-{batch_run_id}-{job_id}.pkl"
-    with open(result_path, "wb") as f:
-        pickle.dump(result, f)
-    blob = dt__Blob()
-    blob.cp(result_path, "blob://")
-    logger.info(f"{result_path} copied to blob storage.")
-if __name__ == "__main__":
-    try:
-        logger.debug("Starting job execution...")
-        main()
-        logger.debug("Job executed successfully.")
-    except Exception as e:
-        logger.error(f"Error during job execution: {e}")
-        raise
+    function()
+    logger.info("Datatailr batch job completed successfully.")

datatailr/sbin/datatailr_run_excel.py ADDED Viewed

@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+# *************************************************************************
+#
+#  Copyright (c) 2025 - Datatailr Inc.
+#  All Rights Reserved.
+#
+#  This file is part of Datatailr and subject to the terms and conditions
+#  defined in 'LICENSE.txt'. Unauthorized copying and/or distribution
+#  of this file, in parts or full, via any medium is strictly prohibited.
+# *************************************************************************
+import os
+import subprocess
+from datatailr.logging import DatatailrLogger
+logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
+def run():
+    logger.info("Starting Datatailr excel add-in...")
+    entrypoint = os.environ.get("DATATAILR_ENTRYPOINT")
+    hostname = os.environ.get("DATATAILR_HOST")
+    if entrypoint is None:
+        raise ValueError("Environment variable 'DATATAILR_ENTRYPOINT' is not set.")
+    if hostname is None:
+        raise ValueError("Environment variable 'DATATAILR_HOST' is not set.")
+    entrypoint = f'./dt-excel.sh -n -H "{hostname}" -p 8080 "{entrypoint}"'
+    logger.info(f"Running entrypoint: {entrypoint}")
+    subprocess.run(entrypoint, shell=True)

datatailr/sbin/datatailr_run_service.py ADDED Viewed

@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+# *************************************************************************
+#
+#  Copyright (c) 2025 - Datatailr Inc.
+#  All Rights Reserved.
+#
+#  This file is part of Datatailr and subject to the terms and conditions
+#  defined in 'LICENSE.txt'. Unauthorized copying and/or distribution
+#  of this file, in parts or full, via any medium is strictly prohibited.
+# *************************************************************************
+import os
+import importlib
+from datatailr.logging import DatatailrLogger
+logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
+def run():
+    logger.info("Starting Datatailr service...")
+    entrypoint = os.environ.get("DATATAILR_ENTRYPOINT")
+    port = os.environ.get("DATATAILR_SERVICE_PORT")
+    if entrypoint is None:
+        raise ValueError("Environment variable 'DATATAILR_ENTRYPOINT' is not set.")
+    if port is None:
+        raise ValueError("Environment variable 'DATATAILR_SERVICE_PORT' is not set.")
+    entrypoint_module = importlib.import_module(entrypoint)
+    entrypoint_module.__service_main__(int(port))
+    logger.info(f"Running entrypoint: {entrypoint}")

datatailr/scheduler/__init__.py CHANGED Viewed

@@ -8,31 +8,47 @@
 #  of this file, in parts or full, via any medium is strictly prohibited.
 # *************************************************************************
-from datatailr.errors import BatchJobError, DatatailrError
+r"""
+Datatailr Scheduler Module
+==========================
+The `datatailr.scheduler` module provides a framework for scheduling and managing batch jobs.
+The main job types are:
+_______________________
+- **Batch**: Represents a batch job that can be scheduled and executed.
+  The job can include multiple tasks which can be run in parallel or sequentially.
+- **Service**: Represents a service job that runs continuously.
+- **App**: Represents a web app or a dashboard, which can be built using one of the supported frameworks,
+  such as `Streamlit <https://streamlit.io/>`_, `Dash <https://dash.plotly.com/>`_, or `Panel <https://panel.holoviz.org/>`_.
+- **Excel**: Represents an Excel add-in.
+"""
+from datatailr.errors import BatchJobError
 from datatailr.scheduler.base import (
-    ACL,
     EntryPoint,
     Environment,
     Job,
     JobType,
     Resources,
-    User,
+    set_allow_unsafe_scheduling,
 )
 from datatailr.scheduler.batch import Batch, BatchJob, DuplicateJobNameError
-from datatailr.scheduler.batch_decorator import batch_decorator as batch
+from datatailr.scheduler.batch_decorator import batch_decorator as batch_job
+from datatailr.scheduler.schedule import Schedule
 __all__ = [
     "Job",
     "JobType",
     "Environment",
-    "User",
     "Resources",
-    "ACL",
     "EntryPoint",
     "Batch",
     "BatchJob",
-    "batch",
-    "DatatailrError",
+    "batch_job",
     "BatchJobError",
     "DuplicateJobNameError",
+    "set_allow_unsafe_scheduling",
+    "Schedule",
 ]

datatailr/scheduler/arguments_cache.py CHANGED Viewed

@@ -21,17 +21,26 @@ and the inner dictionaries contain the arguments.
 This module is for internal use of the datatailr package.
 """
-from collections import defaultdict
+from datatailr.dt_json import json, decode_json
+import os
 import pickle
-from typing import Any, Dict
+from typing import Any, Dict, Optional
 from datatailr import is_dt_installed, Blob
-from datatailr.scheduler import BatchJob
+from datatailr.errors import DatatailrError
 __BLOB_STORAGE__ = Blob()
+class CacheNotFoundError(DatatailrError):
+    """Custom error for cache operations."""
+    def __init__(self, message: str):
+        super().__init__(message)
+        self.message = message
 class ArgumentsCache:
     def __init__(self, use_persistent_cache: bool = is_dt_installed()):
         """
@@ -40,11 +49,12 @@ class ArgumentsCache:
         :param use_persistent_cache: If True, use the persistent cache backend. Otherwise, use in-memory cache.
         """
         self.use_persistent_cache = use_persistent_cache
-        self.in_memory_cache: Dict[str, Dict[str, Dict[str, Any]]] = defaultdict(
-            lambda: defaultdict(dict)
-        )
+        if not self.use_persistent_cache:
+            # Create a temp folder, for local caching
+            os.makedirs("/tmp/datatailr/batch/arguments", exist_ok=True)
+            os.makedirs("/tmp/datatailr/batch/results", exist_ok=True)
-    def add_arguments(self, batch_run_id: str, job: str, arguments: Dict[str, Any]):
+    def add_arguments(self, batch_id: str, arguments: Dict[str, Any]):
         """
         Add arguments to the cache for a specific job and batch run.
@@ -52,13 +62,16 @@ class ArgumentsCache:
         :param job_name: Name of the job.
         :param arguments: Dictionary of arguments to store.
         """
-        if self.use_persistent_cache and isinstance(job, str):
-            path = f"{batch_run_id}/{job}/args"
+        path = f"/tmp/datatailr/batch/arguments/{batch_id}.pkl"
+        if self.use_persistent_cache:
             self._add_to_persistent_cache(path, arguments)
         else:
-            self.in_memory_cache[batch_run_id][job]["args"] = arguments
+            with open(path, "wb") as f:
+                pickle.dump(arguments, f)
-    def get_arguments(self, batch_run_id: str, job: str) -> Dict[str, Any]:
+    def get_arguments(
+        self, batch_id: str, job: str, batch_run_id: Optional[str]
+    ) -> Dict[str, Any]:
         """
         Retrieve arguments from the cache for a specific job and batch run.
@@ -66,27 +79,37 @@ class ArgumentsCache:
         :param job_name: Name of the job.
         :return: Dictionary of arguments.
         """
+        path = f"/tmp/datatailr/batch/arguments/{batch_id}.pkl"
         if self.use_persistent_cache and isinstance(job, str):
-            path = f"{batch_run_id}/{job}/args"
-            arg_keys = self._get_from_persistent_cache(path)
-            if not isinstance(arg_keys, dict):
-                raise TypeError(
-                    f"Expected a dictionary for arguments, got {type(arg_keys)}"
-                )
+            try:
+                arg_keys = self._get_from_persistent_cache(path)
+            except RuntimeError:
+                return {}
         else:
-            arg_keys = (
-                self.in_memory_cache.get(batch_run_id, {})
-                .get(job, {})
-                .get("args", {})
-                .items()
-            )
-        arguments = {}
-        for key, value in arg_keys:
-            if isinstance(value, BatchJob):
-                arguments[key] = value.name
-            else:
-                arguments[key] = value
-        return arguments
+            if not os.path.exists(path):
+                raise CacheNotFoundError(
+                    f"Cache file not found: {path}. Ensure that the arguments have been cached."
+                )
+            with open(path, "rb") as f:
+                try:
+                    arg_keys = pickle.load(f)
+                except EOFError:
+                    return {}
+                if not isinstance(arg_keys, dict):
+                    raise TypeError(
+                        f"Expected a dictionary for arguments, got {type(arg_keys)}"
+                    )
+        if batch_run_id is None:
+            return arg_keys[job]
+        arguments_mapping = decode_json(
+            os.getenv("DATATAILR_JOB_ARGUMENT_MAPPING", "{}")
+        )
+        arguments_mapping = {value: key for key, value in arguments_mapping.items()}
+        args = {
+            arguments_mapping.get(name, name): self.get_result(batch_run_id, value)
+            for name, value in arg_keys[job].items()
+        }
+        return args
     def add_result(self, batch_run_id: str, job: str, result: Any):
         """
@@ -96,13 +119,14 @@ class ArgumentsCache:
         :param job: Name of the job.
         :param result: Result of the batch job.
         """
+        path = f"/tmp/datatailr/batch/results/{batch_run_id}_{job}.pkl"
         if self.use_persistent_cache and isinstance(job, str):
-            path = f"{batch_run_id}/{job}/result"
             self._add_to_persistent_cache(path, result)
         else:
-            self.in_memory_cache[batch_run_id][job]["result"] = result
+            with open(path, "wb") as f:
+                pickle.dump(result, f)
-    def get_result(self, batch_run_id: str, job: str) -> Any:
+    def get_result(self, batch_run_id: str, job: Any) -> Any:
         """
         Retrieve the result of a batch job from the cache.
@@ -110,10 +134,17 @@ class ArgumentsCache:
         :param job: Name of the job.
         :return: Result of the batch job.
         """
+        path = f"/tmp/datatailr/batch/results/{batch_run_id}_{job}.pkl"
         if self.use_persistent_cache and isinstance(job, str):
-            path = f"{batch_run_id}/{job}/result"
             return self._get_from_persistent_cache(path)
-        return self.in_memory_cache[batch_run_id][job].get("result")
+        else:
+            if not os.path.exists(path):
+                return job
+            with open(path, "rb") as f:
+                try:
+                    return pickle.load(f)
+                except EOFError:
+                    return None
     def _add_to_persistent_cache(self, path: str, blob: Any):
         """
@@ -124,9 +155,8 @@ class ArgumentsCache:
         :raises TypeError: If the blob cannot be pickled.
         """
-        __BLOB_STORAGE__.put_blob(
-            path, pickle.dumps(blob, protocol=pickle.HIGHEST_PROTOCOL)
-        )
+        path = path.replace("/tmp/", "")
+        __BLOB_STORAGE__.put_blob(path, json.dumps(blob))
     def _get_from_persistent_cache(self, path: str) -> Any:
         """
@@ -134,8 +164,6 @@ class ArgumentsCache:
         :param path: Path in the Blob storage where the blob is stored.
         """
-        try:
-            data = __BLOB_STORAGE__.get_blob(path)
-            return pickle.loads(data)
-        except (TypeError, EOFError):
-            return {}
+        path = path.replace("/tmp/", "")
+        data = __BLOB_STORAGE__.get_blob(path)
+        return json.loads(data)

datatailr 0.1.6__py3-none-any.whl → 0.1.10__py3-none-any.whl

Potentially problematic release.

datatailr 0.1.6py3-none-any.whl → 0.1.10py3-none-any.whl