PyPI - datatailr - Versions diffs - 0.1.8__tar.gz → 0.1.11__tar.gz - Mend

datatailr 0.1.8tar.gz → 0.1.11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datatailr might be problematic. Click here for more details.

Files changed (38) hide show

{datatailr-0.1.8/src/datatailr.egg-info → datatailr-0.1.11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: datatailr
-Version: 0.1.8
+Version: 0.1.11
 Summary: Ready-to-Use Platform That Drives Business Insights
 Author-email: Datatailr <info@datatailr.com>
 License-Expression: MIT
@@ -104,5 +104,47 @@ with Batch(name="MY test DAG", local_run=True) as dag:
 Running this code will create a graph of jobs and execute it.
 Each node on the graph represents a job, which in turn is a call to a function decorated with `@batch()`.
+Since this is a local run then the execution of each node will happen sequentially in the same process.
+To take advantage of the datatailr platform and execute the graph at scale, you can run it using the job scheduler as presented in the next section.
+### Execution at Scale
+To execute the graph at scale, you can use the Datatailr job scheduler. This allows you to run your jobs in parallel, taking advantage of the underlying infrastructure.
+You will first need to separate your function definitions from the DAG definition. This means you should define your functions as a separate module, which can be imported into the DAG definition.
+```python
+# my_module.py
+from datatailr.scheduler import batch, Batch
+@batch()
+def func_no_args() -> str:
+    return "no_args"
+@batch()
+def func_with_args(a: int, b: float) -> str:
+    return f"args: {a}, {b}"
+```
+To use these functions in a batch job, you just need to import them and run in a DAG context:
+```python
+from my_module import func_no_args, func_with_args
+from datatailr.scheduler import Schedule
+schedule = Schedule(at_hour=0)
+with Batch(name="MY test DAG", schedule=schedule) as dag:
+    for n in range(2):
+        res1 = func_no_args().alias(f"func_{n}")
+        res2 = func_with_args(1, res1).alias(f"func_with_args_{n}")
+```
+This will submit the entire DAG for execution, and the scheduler will take care of running the jobs in parallel and managing the resources.
+The DAG in the example above will be scheduled to run daily at 00:00.
 ___
 Visit [our website](https://www.datatailr.com/) for more!

{datatailr-0.1.8 → datatailr-0.1.11}/README.md RENAMED Viewed

@@ -67,5 +67,47 @@ with Batch(name="MY test DAG", local_run=True) as dag:
 Running this code will create a graph of jobs and execute it.
 Each node on the graph represents a job, which in turn is a call to a function decorated with `@batch()`.
+Since this is a local run then the execution of each node will happen sequentially in the same process.
+To take advantage of the datatailr platform and execute the graph at scale, you can run it using the job scheduler as presented in the next section.
+### Execution at Scale
+To execute the graph at scale, you can use the Datatailr job scheduler. This allows you to run your jobs in parallel, taking advantage of the underlying infrastructure.
+You will first need to separate your function definitions from the DAG definition. This means you should define your functions as a separate module, which can be imported into the DAG definition.
+```python
+# my_module.py
+from datatailr.scheduler import batch, Batch
+@batch()
+def func_no_args() -> str:
+    return "no_args"
+@batch()
+def func_with_args(a: int, b: float) -> str:
+    return f"args: {a}, {b}"
+```
+To use these functions in a batch job, you just need to import them and run in a DAG context:
+```python
+from my_module import func_no_args, func_with_args
+from datatailr.scheduler import Schedule
+schedule = Schedule(at_hour=0)
+with Batch(name="MY test DAG", schedule=schedule) as dag:
+    for n in range(2):
+        res1 = func_no_args().alias(f"func_{n}")
+        res2 = func_with_args(1, res1).alias(f"func_with_args_{n}")
+```
+This will submit the entire DAG for execution, and the scheduler will take care of running the jobs in parallel and managing the resources.
+The DAG in the example above will be scheduled to run daily at 00:00.
 ___
 Visit [our website](https://www.datatailr.com/) for more!

{datatailr-0.1.8 → datatailr-0.1.11}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "datatailr"
-version = "0.1.8"
+version = "0.1.11"
 description = "Ready-to-Use Platform That Drives Business Insights"
 readme = "README.md"
 requires-python = ">=3.9"
@@ -37,6 +37,8 @@ documentation = "https://docs.datatailr.com/"
 datatailr_run = "datatailr.sbin.datatailr_run:main"
 datatailr_run_batch = "datatailr.sbin.datatailr_run_batch:run"
 datatailr_run_app = "datatailr.sbin.datatailr_run_app:run"
+datatailr_run_excel = "datatailr.sbin.datatailr_run_excel:run"
+datatailr_run_service = "datatailr.sbin.datatailr_run_service:run"
 [project.optional-dependencies]
 dev = [

{datatailr-0.1.8 → datatailr-0.1.11}/setup.py RENAMED Viewed

@@ -13,6 +13,8 @@ setup(
                 "src/sbin/datatailr_run.py",
                 "src/sbin/datatailr_run_batch.py",
                 "src/sbin/datatailr_run_app.py",
+                "src/sbin/datatailr_run_excel.py",
+                "src/sbin/datatailr_run_service.py",
             ],
         )
     ],

{datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/group.py RENAMED Viewed

@@ -10,7 +10,7 @@
 from typing import Optional, Union
-from datatailr.wrapper import dt__Group, mock_cli_tool
+from datatailr.wrapper import dt__Group
 # Datatailr Group API Client
@@ -75,8 +75,6 @@ class Group:
     def __refresh__(self):
         if not self.name:
             raise ValueError("Name is not set. Cannot refresh group.")
-        if isinstance(__client__, mock_cli_tool):
-            return
         group = __client__.get(self.name)
         if group:
             self.__name = group["name"]

{datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/logging.py RENAMED Viewed

@@ -14,7 +14,7 @@ from logging import StreamHandler
 from logging.handlers import RotatingFileHandler
 from typing import Optional
 from datatailr import User
-from datatailr.wrapper import dt__Tag, mock_cli_tool
+from datatailr.wrapper import dt__Tag
 def get_log_level() -> int:
@@ -34,15 +34,9 @@ def get_log_level() -> int:
 tag = dt__Tag()
-if isinstance(tag, mock_cli_tool):
-    node_name = "local"
-    node_ip = "0.0.0.0"
-    job_name = "local_job"
-else:
-    node_name = tag.get("node_name")
-    node_ip = tag.get("node_ip")
-    job_name = os.getenv("DATATAILR_JOB_NAME", "unknown_job")
+node_name = tag.get("node_name") or "local"
+node_ip = tag.get("node_ip")
+job_name = os.getenv("DATATAILR_JOB_NAME", "unknown_job")
 try:
     user = User.signed_user().name

{datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/scheduler/base.py RENAMED Viewed

@@ -15,7 +15,6 @@ import importlib
 import inspect
 import json
 import os
-import subprocess
 import tempfile
 import uuid
 from dataclasses import dataclass
@@ -139,7 +138,7 @@ class Job:
         name: str,
         environment: Optional[Environment] = Environment.DEV,
         image: Optional[Image] = None,
-        run_as: Optional[Union[str, User]] = User.signed_user(),
+        run_as: Optional[Union[str, User]] = None,
         resources: Resources = Resources(memory="100m", cpu=1),
         acl: Optional[ACL] = None,
         python_requirements: str = "",
@@ -293,16 +292,7 @@ class Job:
                 "Please commit your changes before running the job."
             )
-        remote_commit = (
-            subprocess.run(
-                ("remote_commit = $(git ls-remote origin HEAD)"),
-                shell=True,
-                capture_output=True,
-                text=True,
-            )
-            .stdout.strip()
-            .split("\t")[0]
-        )
+        remote_commit = run_shell_command("git ls-remote origin HEAD")[0].split("\t")[0]
         if local_commit != remote_commit:
             raise RepoValidationError(

{datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/scheduler/batch.py RENAMED Viewed

@@ -302,7 +302,7 @@ class Batch(Job):
         environment: Optional[Environment] = Environment.DEV,
         schedule: Optional[Schedule] = None,
         image: Optional[Image] = None,
-        run_as: Optional[Union[str, User]] = User.signed_user(),
+        run_as: Optional[Union[str, User]] = None,
         resources: Resources = Resources(memory="100m", cpu=1),
         acl: Optional[ACL] = None,
         local_run: bool = False,
@@ -440,9 +440,9 @@ class Batch(Job):
     def get_schedule_args(self) -> Dict[str, Any]:
         if isinstance(self.__schedule, Schedule):
             args = {
-                "at_minute": self.__schedule.at_minutes,
+                "at_minutes": self.__schedule.at_minutes,
                 "every_minute": self.__schedule.every_minute,
-                "at_hour": self.__schedule.at_hours,
+                "at_hours": self.__schedule.at_hours,
                 "every_hour": self.__schedule.every_hour,
                 "weekdays": self.__schedule.weekdays,
                 "day_of_month": self.__schedule.day_of_month,

{datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/user.py RENAMED Viewed

@@ -9,10 +9,9 @@
 # *************************************************************************
 from __future__ import annotations
-import sys
 from typing import Optional
-from datatailr.wrapper import dt__User, mock_cli_tool
+from datatailr.wrapper import dt__User
 # Datatailr User API Client
 __client__ = dt__User()
@@ -93,10 +92,6 @@ class User:
     def __refresh__(self):
         if not self.name:
             raise ValueError("Name is not set. Cannot refresh user.")
-        if isinstance(__client__, mock_cli_tool) or any(
-            "unit" in arg for arg in sys.argv
-        ):
-            return
         user = __client__.get(self.name)
         if user:
             self.__name = user["name"]
@@ -149,14 +144,6 @@ class User:
     @staticmethod
     def signed_user() -> User:
-        if isinstance(__client__, mock_cli_tool) or any(
-            "unit" in arg for arg in sys.argv
-        ):
-            user = User(name="test_user")
-            user.__expiry__ = "mock_expiry"
-            user.__signature__ = "mock_signature"
-            return user
         user_signature_and_expiry = __client__.signed_user()
         if user_signature_and_expiry:
             user = User(name=user_signature_and_expiry["name"])

{datatailr-0.1.8 → datatailr-0.1.11/src/datatailr.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: datatailr
-Version: 0.1.8
+Version: 0.1.11
 Summary: Ready-to-Use Platform That Drives Business Insights
 Author-email: Datatailr <info@datatailr.com>
 License-Expression: MIT
@@ -104,5 +104,47 @@ with Batch(name="MY test DAG", local_run=True) as dag:
 Running this code will create a graph of jobs and execute it.
 Each node on the graph represents a job, which in turn is a call to a function decorated with `@batch()`.
+Since this is a local run then the execution of each node will happen sequentially in the same process.
+To take advantage of the datatailr platform and execute the graph at scale, you can run it using the job scheduler as presented in the next section.
+### Execution at Scale
+To execute the graph at scale, you can use the Datatailr job scheduler. This allows you to run your jobs in parallel, taking advantage of the underlying infrastructure.
+You will first need to separate your function definitions from the DAG definition. This means you should define your functions as a separate module, which can be imported into the DAG definition.
+```python
+# my_module.py
+from datatailr.scheduler import batch, Batch
+@batch()
+def func_no_args() -> str:
+    return "no_args"
+@batch()
+def func_with_args(a: int, b: float) -> str:
+    return f"args: {a}, {b}"
+```
+To use these functions in a batch job, you just need to import them and run in a DAG context:
+```python
+from my_module import func_no_args, func_with_args
+from datatailr.scheduler import Schedule
+schedule = Schedule(at_hour=0)
+with Batch(name="MY test DAG", schedule=schedule) as dag:
+    for n in range(2):
+        res1 = func_no_args().alias(f"func_{n}")
+        res2 = func_with_args(1, res1).alias(f"func_with_args_{n}")
+```
+This will submit the entire DAG for execution, and the scheduler will take care of running the jobs in parallel and managing the resources.
+The DAG in the example above will be scheduled to run daily at 00:00.
 ___
 Visit [our website](https://www.datatailr.com/) for more!

{datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr.egg-info/SOURCES.txt RENAMED Viewed

@@ -31,4 +31,6 @@ src/datatailr/scheduler/schedule.py
 src/datatailr/scheduler/utils.py
 src/sbin/datatailr_run.py
 src/sbin/datatailr_run_app.py
-src/sbin/datatailr_run_batch.py
+src/sbin/datatailr_run_batch.py
+src/sbin/datatailr_run_excel.py
+src/sbin/datatailr_run_service.py

{datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr.egg-info/entry_points.txt RENAMED Viewed

@@ -2,3 +2,5 @@
 datatailr_run = datatailr.sbin.datatailr_run:main
 datatailr_run_app = datatailr.sbin.datatailr_run_app:run
 datatailr_run_batch = datatailr.sbin.datatailr_run_batch:run
+datatailr_run_excel = datatailr.sbin.datatailr_run_excel:run
+datatailr_run_service = datatailr.sbin.datatailr_run_service:run

{datatailr-0.1.8 → datatailr-0.1.11}/src/sbin/datatailr_run.py RENAMED Viewed

@@ -111,10 +111,12 @@ def main():
         }
         run_command_as_user("datatailr_run_batch", user, env)
     elif job_type == "service":
+        port = get_env_var("DATATAILR_SERVICE_PORT")
         env = {
             "DATATAILR_JOB_NAME": job_name,
             "DATATAILR_JOB_ID": job_id,
             "DATATAILR_ENTRYPOINT": entrypoint,
+            "DATATAILR_SERVICE_PORT": port,
         }
         run_command_as_user("datatailr_run_service", user, env)
     elif job_type == "app":
@@ -125,10 +127,12 @@ def main():
         }
         run_command_as_user("datatailr_run_app", user, env)
     elif job_type == "excel":
+        host = get_env_var("DATATAILR_HOST")
         env = {
             "DATATAILR_JOB_NAME": job_name,
             "DATATAILR_JOB_ID": job_id,
             "DATATAILR_ENTRYPOINT": entrypoint,
+            "DATATAILR_HOST": host,
         }
         run_command_as_user("datatailr_run_excel", user, env)
     elif job_type == "IDE":

datatailr-0.1.11/src/sbin/datatailr_run_app.py ADDED Viewed

@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+# *************************************************************************
+#
+#  Copyright (c) 2025 - Datatailr Inc.
+#  All Rights Reserved.
+#
+#  This file is part of Datatailr and subject to the terms and conditions
+#  defined in 'LICENSE.txt'. Unauthorized copying and/or distribution
+#  of this file, in parts or full, via any medium is strictly prohibited.
+# *************************************************************************
+import os
+import sys
+import runpy
+from importlib.resources import files
+from datatailr.logging import DatatailrLogger
+logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
+def run():
+    logger.info("Starting Datatailr app...")
+    entrypoint = os.environ.get("DATATAILR_ENTRYPOINT")
+    if entrypoint is None or ":" not in entrypoint:
+        raise ValueError(
+            "Environment variable 'DATATAILR_ENTRYPOINT' is not in the format 'module_name:file_name'."
+        )
+    module_name, file_name = entrypoint.split(":")
+    script = files(module_name).joinpath(file_name)
+    sys.argv = ["streamlit", "run", str(script), *sys.argv[1:]]
+    logger.info(f"Running entrypoint: {entrypoint}")
+    runpy.run_module("streamlit", run_name="__main__")

datatailr-0.1.11/src/sbin/datatailr_run_excel.py ADDED Viewed

@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+# *************************************************************************
+#
+#  Copyright (c) 2025 - Datatailr Inc.
+#  All Rights Reserved.
+#
+#  This file is part of Datatailr and subject to the terms and conditions
+#  defined in 'LICENSE.txt'. Unauthorized copying and/or distribution
+#  of this file, in parts or full, via any medium is strictly prohibited.
+# *************************************************************************
+import os
+import subprocess
+from datatailr.logging import DatatailrLogger
+logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
+def run():
+    logger.info("Starting Datatailr excel add-in...")
+    entrypoint = os.environ.get("DATATAILR_ENTRYPOINT")
+    hostname = os.environ.get("DATATAILR_HOST")
+    if entrypoint is None:
+        raise ValueError("Environment variable 'DATATAILR_ENTRYPOINT' is not set.")
+    if hostname is None:
+        raise ValueError("Environment variable 'DATATAILR_HOST' is not set.")
+    entrypoint = f'./dt-excel.sh -n -H "{hostname}" -p 8080 "{entrypoint}"'
+    logger.info(f"Running entrypoint: {entrypoint}")
+    subprocess.run(entrypoint, shell=True)

datatailr-0.1.8/src/sbin/datatailr_run_app.py → datatailr-0.1.11/src/sbin/datatailr_run_service.py RENAMED Viewed

@@ -11,6 +11,7 @@
 # *************************************************************************
 import os
+import importlib
 from datatailr.logging import DatatailrLogger
@@ -18,11 +19,16 @@ logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
 def run():
-    logger.info("Starting Datatailr app...")
+    logger.info("Starting Datatailr service...")
     entrypoint = os.environ.get("DATATAILR_ENTRYPOINT")
+    port = os.environ.get("DATATAILR_SERVICE_PORT")
     if entrypoint is None:
         raise ValueError("Environment variable 'DATATAILR_ENTRYPOINT' is not set.")
-    os.system(entrypoint)
+    if port is None:
+        raise ValueError("Environment variable 'DATATAILR_SERVICE_PORT' is not set.")
+    entrypoint_module = importlib.import_module(entrypoint)
     logger.info(f"Running entrypoint: {entrypoint}")
+    entrypoint_module.__service_main__(int(port))