PyPI - datatailr - Versions diffs - 0.1.15__tar.gz → 0.1.17__tar.gz - Mend

datatailr 0.1.15tar.gz → 0.1.17tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datatailr might be problematic. Click here for more details.

Files changed (38) hide show

{datatailr-0.1.15/src/datatailr.egg-info → datatailr-0.1.17}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: datatailr
-Version: 0.1.15
+Version: 0.1.17
 Summary: Ready-to-Use Platform That Drives Business Insights
 Author-email: Datatailr <info@datatailr.com>
 License-Expression: MIT
@@ -84,9 +84,9 @@ print(datatailr.__provider__)
 The following example shows how to create a simple data pipeline using the Datatailr Python package.
 ```python
-from datatailr.scheduler import batch, Batch
+from datatailr.scheduler import batch_job, Batch
-@batch_job_job()
+@batch_job()
 def func_no_args() -> str:
     return "no_args"

{datatailr-0.1.15 → datatailr-0.1.17}/README.md RENAMED Viewed

@@ -47,9 +47,9 @@ print(datatailr.__provider__)
 The following example shows how to create a simple data pipeline using the Datatailr Python package.
 ```python
-from datatailr.scheduler import batch, Batch
+from datatailr.scheduler import batch_job, Batch
-@batch_job_job()
+@batch_job()
 def func_no_args() -> str:
     return "no_args"

{datatailr-0.1.15 → datatailr-0.1.17}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "datatailr"
-version = "0.1.15"
+version = "0.1.17"
 description = "Ready-to-Use Platform That Drives Business Insights"
 readme = "README.md"
 requires-python = ">=3.9"

{datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/build/image.py RENAMED Viewed

@@ -10,6 +10,7 @@
 import json
 import os
+import re
 from typing import Optional
 from datatailr import ACL, User
@@ -25,6 +26,7 @@ class Image:
     def __init__(
         self,
         acl: Optional[ACL] = None,
+        python_version: str = "3.12",
         python_requirements: str | list[str] = "",
         build_script_pre: str = "",
         build_script_post: str = "",
@@ -33,69 +35,103 @@ class Image:
         path_to_repo: Optional[str] = None,
         path_to_module: Optional[str] = None,
     ):
-        if acl is None:
-            signed_user = User.signed_user()
-            if signed_user is None:
-                raise ValueError(
-                    "ACL cannot be None. Please provide a valid ACL or ensure a user is signed in."
-                )
-        elif not isinstance(acl, ACL):
-            raise TypeError("acl must be an instance of ACL.")
-        self.acl = acl or ACL(signed_user)
-        if isinstance(python_requirements, str) and os.path.isfile(python_requirements):
-            with open(python_requirements, "r") as f:
-                python_requirements = f.read()
-        elif isinstance(python_requirements, list):
-            python_requirements = "\n".join(python_requirements)
-        if not isinstance(python_requirements, str):
+        self.python_version = python_version
+        self.acl = acl
+        self.python_requirements = python_requirements
+        self.build_script_pre = build_script_pre
+        self.build_script_post = build_script_post
+        self.branch_name = branch_name
+        self.commit_hash = commit_hash
+        self.path_to_repo = path_to_repo
+        self.path_to_module = path_to_module
+    def __repr__(self):
+        return f"Image(acl={self.acl},)"
+    @property
+    def python_version(self):
+        return self._python_version
+    @python_version.setter
+    def python_version(self, value: str):
+        if not isinstance(value, str):
+            raise TypeError("python_version must be a string.")
+        if not re.match(r"^\d+\.\d+(\.\d+)?$", value):
+            raise ValueError("Invalid python_version format. Expected format: X.Y[.Z]")
+        self._python_version = value
+    @property
+    def python_requirements(self):
+        return self._python_requirements
+    @python_requirements.setter
+    def python_requirements(self, value: str | list[str]):
+        if isinstance(value, str) and os.path.isfile(value):
+            with open(value, "r") as f:
+                value = f.read()
+        elif isinstance(value, list):
+            value = "\n".join(value)
+        if not isinstance(value, str):
             raise TypeError(
                 "python_requirements must be a string or a file path to a requirements file."
             )
-        self.python_requirements = python_requirements
+        self._python_requirements = value
+    @property
+    def build_script_pre(self):
+        return self._build_script_pre
-        if os.path.isfile(build_script_pre):
-            with open(build_script_pre, "r") as f:
-                build_script_pre = f.read()
-        if not isinstance(build_script_pre, str):
+    @build_script_pre.setter
+    def build_script_pre(self, value: str):
+        if not isinstance(value, str):
             raise TypeError(
                 "build_script_pre must be a string or a file path to a script file."
             )
-        self.build_script_pre = build_script_pre
+        if os.path.isfile(value):
+            with open(value, "r") as f:
+                value = f.read()
+        self._build_script_pre = value
+    @property
+    def build_script_post(self):
+        return self._build_script_post
-        if os.path.isfile(build_script_post):
-            with open(build_script_post, "r") as f:
-                build_script_post = f.read()
-        if not isinstance(build_script_post, str):
+    @build_script_post.setter
+    def build_script_post(self, value: str):
+        if not isinstance(value, str):
             raise TypeError(
                 "build_script_post must be a string or a file path to a script file."
             )
-        self.build_script_post = build_script_post
-        self.branch_name = branch_name
-        self.commit_hash = commit_hash
-        self.path_to_repo = path_to_repo
-        self.path_to_module = path_to_module
+        if os.path.isfile(value):
+            with open(value, "r") as f:
+                value = f.read()
+        self._build_script_post = value
-    def __repr__(self):
-        return f"Image(acl={self.acl},)"
+    @property
+    def acl(self):
+        return self._acl
+    @acl.setter
+    def acl(self, value: Optional[ACL]):
+        if value is None:
+            signed_user = User.signed_user()
+            if signed_user is None:
+                raise ValueError(
+                    "ACL cannot be None. Please provide a valid ACL or ensure a user is signed in."
+                )
+        elif not isinstance(value, ACL):
+            raise TypeError("acl must be an instance of ACL.")
+        self._acl = value or ACL(signed_user)
     def update(self, **kwargs):
         for key, value in kwargs.items():
-            if key == "acl" and not isinstance(value, ACL):
-                raise TypeError("acl must be an instance of ACL.")
-            elif key == "python_requirements" and not isinstance(value, str):
-                raise TypeError("python_requirements must be a string.")
-            elif key == "build_script_pre" and not isinstance(value, str):
-                raise TypeError("build_script_pre must be a string.")
-            elif key == "build_script_post" and not isinstance(value, str):
-                raise TypeError("build_script_post must be a string.")
-            elif (
+            if (
                 key in ["branch_name", "commit_hash", "path_to_repo", "path_to_module"]
                 and value is not None
                 and not isinstance(value, str)
             ):
                 raise TypeError(f"{key} must be a string or None.")
-            if key not in self.__dict__:
+            if not hasattr(self, key):
                 raise AttributeError(
                     f"'{self.__class__.__name__}' object has no attribute '{key}'"
                 )
@@ -107,6 +143,7 @@ class Image:
         """
         return {
             "acl": self.acl.to_dict(),
+            "python_version": self.python_version,
             "python_requirements": self.python_requirements,
             "build_script_pre": self.build_script_pre,
             "build_script_post": self.build_script_post,

{datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/scheduler/base.py RENAMED Viewed

@@ -78,12 +78,6 @@ class Resources:
     cpu: float = DEFAULT_TASK_CPU
-# TODO: create a dt_run script that will:
-# 1. create user and group if not exists
-# 2. set the correct path
-# 3. run the job based on its type
 class EntryPoint:
     """
     Represents an entry point for a DataTailr job.
@@ -139,12 +133,13 @@ class Job:
         environment: Optional[Environment] = Environment.DEV,
         image: Optional[Image] = None,
         run_as: Optional[Union[str, User]] = None,
-        resources: Resources = Resources(memory="100m", cpu=1),
+        resources: Resources = Resources(memory="128m", cpu=0.25),
         acl: Optional[ACL] = None,
+        python_version: str = "3.12",
         python_requirements: str = "",
         build_script_pre: str = "",
         build_script_post: str = "",
-        type: JobType = JobType.UNKNOWN,
+        type: Optional[JobType] = JobType.UNKNOWN,
         entrypoint: Optional[EntryPoint] = None,
         update_existing: bool = False,
     ):
@@ -173,12 +168,13 @@ class Job:
         if image is None:
             image = Image(
                 acl=self.acl,
+                python_version=python_version,
                 python_requirements=python_requirements,
                 build_script_pre=build_script_pre,
                 build_script_post=build_script_post,
             )
         self.image = image
-        self.type = type
+        self.type = type if entrypoint is None else entrypoint.type
         self.entrypoint = entrypoint
         self.__id = str(uuid.uuid4())
@@ -229,6 +225,8 @@ class Job:
             if isinstance(self.run_as, User)
             else self.run_as,
             "acl": self.acl.to_dict(),
+            "memory": self.resources.memory,
+            "cpu": self.resources.cpu,
         }
         if self.type != JobType.BATCH:
             job_dict["entrypoint"] = str(self.entrypoint) if self.entrypoint else None
@@ -338,6 +336,8 @@ class Job:
                 __client__.run(f"file://{temp_file_name}", **self.get_schedule_args())
             elif command == "save":
                 __client__.save(f"file://{temp_file_name}", **self.get_schedule_args())
+            elif command == "start":
+                __client__.start(self.name, environment=self.environment)
             else:
                 raise ValueError(f"Unknown command: {command}")
             os.remove(temp_file_name)
@@ -350,12 +350,52 @@ class Job:
         """
         Save the job to the DataTailr platform.
         If the job already exists, it will be updated.
+        The repository state is verified and an image is prepared for execution.
         """
         return self.__run_command__("save")
     def run(self) -> Tuple[bool, str]:
         """
-        Run the job. This method should be implemented to execute the job logic.
-        It verifies the repository state and prepares the job for execution.
+        Run the job.
+        This is equivalent to running job.save() and then job.start().
         """
         return self.__run_command__("run")
+    def start(self) -> Tuple[bool, str]:
+        """
+        Start the job. This will start the job execution on a schedule for batches if a schedule was specified.
+        For other types of jobs and for batches without a schedule the job will be run immediately.
+        """
+        return self.__run_command__("start")
+    def promote(
+        self,
+        from_environment: Optional[Environment] = None,
+        version: Optional[str | int] = None,
+    ) -> Tuple[bool, str]:
+        """
+        Promote the job to the next environment.
+        This method is used to promote a version of the job from one environment to the next one.
+        If none of the environments to promote from are specified, it defaults to promote from all environments.
+        """
+        promote_kwargs = {}
+        if version is not None:
+            promote_kwargs["version"] = str(version)
+        if from_environment is not None:
+            promote_kwargs["environment"] = str(from_environment)
+        try:
+            __client__.promote(self.name, **promote_kwargs)
+            return True, f"Job '{self.name}' promoted successfully."
+        except Exception as e:
+            logger.error(f"Error promoting job '{self.name}': {e}")
+            return False, str(e)
+    def versions(self, environment: Optional[Environment] = None) -> list[str] | None:
+        """
+        List all versions of the job in the specified environment.
+        If no environment is specified, it lists versions across all environments.
+        """
+        command_kwargs = {}
+        if environment is not None:
+            command_kwargs["environment"] = str(environment)
+        return __client__.versions(self.name, **command_kwargs)

{datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/scheduler/schedule.py RENAMED Viewed

@@ -105,7 +105,7 @@ class Schedule:
         match = re.match(r"^(.*?)\s*\((.*?)\)$", result)
         if match:
             cron_expression, schedule_expression = match.groups()
-            self.cron_expression = cron_expression.strip()
+            self.cron_expression = "0 " + cron_expression.strip()
             self.schedule_expression = schedule_expression.strip()
         self.__is_set__ = True

{datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/user.py RENAMED Viewed

@@ -170,7 +170,7 @@ class User:
                 raise Warning(
                     "Password is not required for system users. It will be ignored."
                 )
-            User.__client__.add(
+            new_user = __client__.add(
                 name,
                 first_name=first_name,
                 last_name=last_name,
@@ -179,7 +179,7 @@ class User:
                 system=is_system_user,
             )
         else:
-            __client__.add(
+            new_user = __client__.add(
                 name,
                 first_name=first_name,
                 last_name=last_name,
@@ -188,7 +188,8 @@ class User:
                 primary_group=primary_group,
                 system=is_system_user,
             )
-        return User.get(name)
+        return new_user
     @staticmethod
     def exists(name: str) -> bool:

{datatailr-0.1.15 → datatailr-0.1.17/src/datatailr.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: datatailr
-Version: 0.1.15
+Version: 0.1.17
 Summary: Ready-to-Use Platform That Drives Business Insights
 Author-email: Datatailr <info@datatailr.com>
 License-Expression: MIT
@@ -84,9 +84,9 @@ print(datatailr.__provider__)
 The following example shows how to create a simple data pipeline using the Datatailr Python package.
 ```python
-from datatailr.scheduler import batch, Batch
+from datatailr.scheduler import batch_job, Batch
-@batch_job_job()
+@batch_job()
 def func_no_args() -> str:
     return "no_args"

{datatailr-0.1.15 → datatailr-0.1.17}/src/sbin/datatailr_run.py RENAMED Viewed

@@ -82,6 +82,7 @@ def run_command_as_user(command: str, user: str, env_vars: dict):
     Run a command as a specific user with the given environment variables.
     """
     env_vars.update({"PATH": get_env_var("PATH")})
+    env_vars.update({"PYTHONPATH": get_env_var("PYTHONPATH")})
     env_vars_str = " ".join(f"{key}='{value}'" for key, value in env_vars.items())
     full_command = f"sudo -u {user} {env_vars_str} {command}"
     logger.debug(f"Running command: {full_command}")
@@ -94,7 +95,6 @@ def main():
     job_name = get_env_var("DATATAILR_JOB_NAME")
     job_id = get_env_var("DATATAILR_JOB_ID")
-    entrypoint = get_env_var("DATATAILR_ENTRYPOINT")
     if job_type == "batch":
         run_id = get_env_var("DATATAILR_BATCH_RUN_ID")
@@ -102,6 +102,7 @@ def main():
         job_argument_mapping = get_env_var(
             "DATATAILR_JOB_ARGUMENT_MAPPING", encode_json({})
         )
+        entrypoint = get_env_var("DATATAILR_BATCH_ENTRYPOINT")
         env = {
             "DATATAILR_BATCH_RUN_ID": run_id,
             "DATATAILR_BATCH_ID": batch_id,
@@ -112,6 +113,7 @@ def main():
         run_command_as_user("datatailr_run_batch", user, env)
     elif job_type == "service":
         port = get_env_var("DATATAILR_SERVICE_PORT")
+        entrypoint = get_env_var("DATATAILR_ENTRYPOINT")
         env = {
             "DATATAILR_JOB_NAME": job_name,
             "DATATAILR_JOB_ID": job_id,
@@ -120,6 +122,7 @@ def main():
         }
         run_command_as_user("datatailr_run_service", user, env)
     elif job_type == "app":
+        entrypoint = get_env_var("DATATAILR_ENTRYPOINT")
         env = {
             "DATATAILR_JOB_NAME": job_name,
             "DATATAILR_JOB_ID": job_id,
@@ -128,6 +131,7 @@ def main():
         run_command_as_user("datatailr_run_app", user, env)
     elif job_type == "excel":
         host = get_env_var("DATATAILR_HOST")
+        entrypoint = get_env_var("DATATAILR_ENTRYPOINT")
         env = {
             "DATATAILR_JOB_NAME": job_name,
             "DATATAILR_JOB_ID": job_id,