datatailr 0.1.8__tar.gz → 0.1.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datatailr might be problematic. Click here for more details.

Files changed (38) hide show
  1. {datatailr-0.1.8/src/datatailr.egg-info → datatailr-0.1.11}/PKG-INFO +43 -1
  2. {datatailr-0.1.8 → datatailr-0.1.11}/README.md +42 -0
  3. {datatailr-0.1.8 → datatailr-0.1.11}/pyproject.toml +3 -1
  4. {datatailr-0.1.8 → datatailr-0.1.11}/setup.py +2 -0
  5. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/group.py +1 -3
  6. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/logging.py +4 -10
  7. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/scheduler/base.py +2 -12
  8. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/scheduler/batch.py +3 -3
  9. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/user.py +1 -14
  10. {datatailr-0.1.8 → datatailr-0.1.11/src/datatailr.egg-info}/PKG-INFO +43 -1
  11. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr.egg-info/SOURCES.txt +3 -1
  12. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr.egg-info/entry_points.txt +2 -0
  13. {datatailr-0.1.8 → datatailr-0.1.11}/src/sbin/datatailr_run.py +4 -0
  14. datatailr-0.1.11/src/sbin/datatailr_run_app.py +37 -0
  15. datatailr-0.1.11/src/sbin/datatailr_run_excel.py +34 -0
  16. datatailr-0.1.8/src/sbin/datatailr_run_app.py → datatailr-0.1.11/src/sbin/datatailr_run_service.py +8 -2
  17. {datatailr-0.1.8 → datatailr-0.1.11}/LICENSE +0 -0
  18. {datatailr-0.1.8 → datatailr-0.1.11}/setup.cfg +0 -0
  19. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/__init__.py +0 -0
  20. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/acl.py +0 -0
  21. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/blob.py +0 -0
  22. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/build/__init__.py +0 -0
  23. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/build/image.py +0 -0
  24. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/dt_json.py +0 -0
  25. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/errors.py +0 -0
  26. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/scheduler/__init__.py +0 -0
  27. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/scheduler/arguments_cache.py +0 -0
  28. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/scheduler/batch_decorator.py +0 -0
  29. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/scheduler/constants.py +0 -0
  30. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/scheduler/schedule.py +0 -0
  31. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/scheduler/utils.py +0 -0
  32. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/utils.py +0 -0
  33. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/version.py +0 -0
  34. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr/wrapper.py +0 -0
  35. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr.egg-info/dependency_links.txt +0 -0
  36. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr.egg-info/requires.txt +0 -0
  37. {datatailr-0.1.8 → datatailr-0.1.11}/src/datatailr.egg-info/top_level.txt +0 -0
  38. {datatailr-0.1.8 → datatailr-0.1.11}/src/sbin/datatailr_run_batch.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datatailr
3
- Version: 0.1.8
3
+ Version: 0.1.11
4
4
  Summary: Ready-to-Use Platform That Drives Business Insights
5
5
  Author-email: Datatailr <info@datatailr.com>
6
6
  License-Expression: MIT
@@ -104,5 +104,47 @@ with Batch(name="MY test DAG", local_run=True) as dag:
104
104
  Running this code will create a graph of jobs and execute it.
105
105
  Each node on the graph represents a job, which in turn is a call to a function decorated with `@batch()`.
106
106
 
107
+ Since this is a local run then the execution of each node will happen sequentially in the same process.
108
+
109
+ To take advantage of the datatailr platform and execute the graph at scale, you can run it using the job scheduler as presented in the next section.
110
+
111
+ ### Execution at Scale
112
+ To execute the graph at scale, you can use the Datatailr job scheduler. This allows you to run your jobs in parallel, taking advantage of the underlying infrastructure.
113
+
114
+ You will first need to separate your function definitions from the DAG definition. This means you should define your functions as a separate module, which can be imported into the DAG definition.
115
+
116
+
117
+ ```python
118
+ # my_module.py
119
+
120
+ from datatailr.scheduler import batch, Batch
121
+
122
+ @batch()
123
+ def func_no_args() -> str:
124
+ return "no_args"
125
+
126
+
127
+ @batch()
128
+ def func_with_args(a: int, b: float) -> str:
129
+ return f"args: {a}, {b}"
130
+ ```
131
+
132
+ To use these functions in a batch job, you just need to import them and run in a DAG context:
133
+
134
+ ```python
135
+ from my_module import func_no_args, func_with_args
136
+ from datatailr.scheduler import Schedule
137
+
138
+ schedule = Schedule(at_hour=0)
139
+
140
+ with Batch(name="MY test DAG", schedule=schedule) as dag:
141
+ for n in range(2):
142
+ res1 = func_no_args().alias(f"func_{n}")
143
+ res2 = func_with_args(1, res1).alias(f"func_with_args_{n}")
144
+ ```
145
+
146
+ This will submit the entire DAG for execution, and the scheduler will take care of running the jobs in parallel and managing the resources.
147
+ The DAG in the example above will be scheduled to run daily at 00:00.
148
+
107
149
  ___
108
150
  Visit [our website](https://www.datatailr.com/) for more!
@@ -67,5 +67,47 @@ with Batch(name="MY test DAG", local_run=True) as dag:
67
67
  Running this code will create a graph of jobs and execute it.
68
68
  Each node on the graph represents a job, which in turn is a call to a function decorated with `@batch()`.
69
69
 
70
+ Since this is a local run then the execution of each node will happen sequentially in the same process.
71
+
72
+ To take advantage of the datatailr platform and execute the graph at scale, you can run it using the job scheduler as presented in the next section.
73
+
74
+ ### Execution at Scale
75
+ To execute the graph at scale, you can use the Datatailr job scheduler. This allows you to run your jobs in parallel, taking advantage of the underlying infrastructure.
76
+
77
+ You will first need to separate your function definitions from the DAG definition. This means you should define your functions as a separate module, which can be imported into the DAG definition.
78
+
79
+
80
+ ```python
81
+ # my_module.py
82
+
83
+ from datatailr.scheduler import batch, Batch
84
+
85
+ @batch()
86
+ def func_no_args() -> str:
87
+ return "no_args"
88
+
89
+
90
+ @batch()
91
+ def func_with_args(a: int, b: float) -> str:
92
+ return f"args: {a}, {b}"
93
+ ```
94
+
95
+ To use these functions in a batch job, you just need to import them and run in a DAG context:
96
+
97
+ ```python
98
+ from my_module import func_no_args, func_with_args
99
+ from datatailr.scheduler import Schedule
100
+
101
+ schedule = Schedule(at_hour=0)
102
+
103
+ with Batch(name="MY test DAG", schedule=schedule) as dag:
104
+ for n in range(2):
105
+ res1 = func_no_args().alias(f"func_{n}")
106
+ res2 = func_with_args(1, res1).alias(f"func_with_args_{n}")
107
+ ```
108
+
109
+ This will submit the entire DAG for execution, and the scheduler will take care of running the jobs in parallel and managing the resources.
110
+ The DAG in the example above will be scheduled to run daily at 00:00.
111
+
70
112
  ___
71
113
  Visit [our website](https://www.datatailr.com/) for more!
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "datatailr"
7
- version = "0.1.8"
7
+ version = "0.1.11"
8
8
  description = "Ready-to-Use Platform That Drives Business Insights"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -37,6 +37,8 @@ documentation = "https://docs.datatailr.com/"
37
37
  datatailr_run = "datatailr.sbin.datatailr_run:main"
38
38
  datatailr_run_batch = "datatailr.sbin.datatailr_run_batch:run"
39
39
  datatailr_run_app = "datatailr.sbin.datatailr_run_app:run"
40
+ datatailr_run_excel = "datatailr.sbin.datatailr_run_excel:run"
41
+ datatailr_run_service = "datatailr.sbin.datatailr_run_service:run"
40
42
 
41
43
  [project.optional-dependencies]
42
44
  dev = [
@@ -13,6 +13,8 @@ setup(
13
13
  "src/sbin/datatailr_run.py",
14
14
  "src/sbin/datatailr_run_batch.py",
15
15
  "src/sbin/datatailr_run_app.py",
16
+ "src/sbin/datatailr_run_excel.py",
17
+ "src/sbin/datatailr_run_service.py",
16
18
  ],
17
19
  )
18
20
  ],
@@ -10,7 +10,7 @@
10
10
 
11
11
  from typing import Optional, Union
12
12
 
13
- from datatailr.wrapper import dt__Group, mock_cli_tool
13
+ from datatailr.wrapper import dt__Group
14
14
 
15
15
 
16
16
  # Datatailr Group API Client
@@ -75,8 +75,6 @@ class Group:
75
75
  def __refresh__(self):
76
76
  if not self.name:
77
77
  raise ValueError("Name is not set. Cannot refresh group.")
78
- if isinstance(__client__, mock_cli_tool):
79
- return
80
78
  group = __client__.get(self.name)
81
79
  if group:
82
80
  self.__name = group["name"]
@@ -14,7 +14,7 @@ from logging import StreamHandler
14
14
  from logging.handlers import RotatingFileHandler
15
15
  from typing import Optional
16
16
  from datatailr import User
17
- from datatailr.wrapper import dt__Tag, mock_cli_tool
17
+ from datatailr.wrapper import dt__Tag
18
18
 
19
19
 
20
20
  def get_log_level() -> int:
@@ -34,15 +34,9 @@ def get_log_level() -> int:
34
34
 
35
35
 
36
36
  tag = dt__Tag()
37
- if isinstance(tag, mock_cli_tool):
38
- node_name = "local"
39
- node_ip = "0.0.0.0"
40
- job_name = "local_job"
41
-
42
- else:
43
- node_name = tag.get("node_name")
44
- node_ip = tag.get("node_ip")
45
- job_name = os.getenv("DATATAILR_JOB_NAME", "unknown_job")
37
+ node_name = tag.get("node_name") or "local"
38
+ node_ip = tag.get("node_ip")
39
+ job_name = os.getenv("DATATAILR_JOB_NAME", "unknown_job")
46
40
 
47
41
  try:
48
42
  user = User.signed_user().name
@@ -15,7 +15,6 @@ import importlib
15
15
  import inspect
16
16
  import json
17
17
  import os
18
- import subprocess
19
18
  import tempfile
20
19
  import uuid
21
20
  from dataclasses import dataclass
@@ -139,7 +138,7 @@ class Job:
139
138
  name: str,
140
139
  environment: Optional[Environment] = Environment.DEV,
141
140
  image: Optional[Image] = None,
142
- run_as: Optional[Union[str, User]] = User.signed_user(),
141
+ run_as: Optional[Union[str, User]] = None,
143
142
  resources: Resources = Resources(memory="100m", cpu=1),
144
143
  acl: Optional[ACL] = None,
145
144
  python_requirements: str = "",
@@ -293,16 +292,7 @@ class Job:
293
292
  "Please commit your changes before running the job."
294
293
  )
295
294
 
296
- remote_commit = (
297
- subprocess.run(
298
- ("remote_commit = $(git ls-remote origin HEAD)"),
299
- shell=True,
300
- capture_output=True,
301
- text=True,
302
- )
303
- .stdout.strip()
304
- .split("\t")[0]
305
- )
295
+ remote_commit = run_shell_command("git ls-remote origin HEAD")[0].split("\t")[0]
306
296
 
307
297
  if local_commit != remote_commit:
308
298
  raise RepoValidationError(
@@ -302,7 +302,7 @@ class Batch(Job):
302
302
  environment: Optional[Environment] = Environment.DEV,
303
303
  schedule: Optional[Schedule] = None,
304
304
  image: Optional[Image] = None,
305
- run_as: Optional[Union[str, User]] = User.signed_user(),
305
+ run_as: Optional[Union[str, User]] = None,
306
306
  resources: Resources = Resources(memory="100m", cpu=1),
307
307
  acl: Optional[ACL] = None,
308
308
  local_run: bool = False,
@@ -440,9 +440,9 @@ class Batch(Job):
440
440
  def get_schedule_args(self) -> Dict[str, Any]:
441
441
  if isinstance(self.__schedule, Schedule):
442
442
  args = {
443
- "at_minute": self.__schedule.at_minutes,
443
+ "at_minutes": self.__schedule.at_minutes,
444
444
  "every_minute": self.__schedule.every_minute,
445
- "at_hour": self.__schedule.at_hours,
445
+ "at_hours": self.__schedule.at_hours,
446
446
  "every_hour": self.__schedule.every_hour,
447
447
  "weekdays": self.__schedule.weekdays,
448
448
  "day_of_month": self.__schedule.day_of_month,
@@ -9,10 +9,9 @@
9
9
  # *************************************************************************
10
10
 
11
11
  from __future__ import annotations
12
- import sys
13
12
  from typing import Optional
14
13
 
15
- from datatailr.wrapper import dt__User, mock_cli_tool
14
+ from datatailr.wrapper import dt__User
16
15
 
17
16
  # Datatailr User API Client
18
17
  __client__ = dt__User()
@@ -93,10 +92,6 @@ class User:
93
92
  def __refresh__(self):
94
93
  if not self.name:
95
94
  raise ValueError("Name is not set. Cannot refresh user.")
96
- if isinstance(__client__, mock_cli_tool) or any(
97
- "unit" in arg for arg in sys.argv
98
- ):
99
- return
100
95
  user = __client__.get(self.name)
101
96
  if user:
102
97
  self.__name = user["name"]
@@ -149,14 +144,6 @@ class User:
149
144
 
150
145
  @staticmethod
151
146
  def signed_user() -> User:
152
- if isinstance(__client__, mock_cli_tool) or any(
153
- "unit" in arg for arg in sys.argv
154
- ):
155
- user = User(name="test_user")
156
- user.__expiry__ = "mock_expiry"
157
- user.__signature__ = "mock_signature"
158
- return user
159
-
160
147
  user_signature_and_expiry = __client__.signed_user()
161
148
  if user_signature_and_expiry:
162
149
  user = User(name=user_signature_and_expiry["name"])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datatailr
3
- Version: 0.1.8
3
+ Version: 0.1.11
4
4
  Summary: Ready-to-Use Platform That Drives Business Insights
5
5
  Author-email: Datatailr <info@datatailr.com>
6
6
  License-Expression: MIT
@@ -104,5 +104,47 @@ with Batch(name="MY test DAG", local_run=True) as dag:
104
104
  Running this code will create a graph of jobs and execute it.
105
105
  Each node on the graph represents a job, which in turn is a call to a function decorated with `@batch()`.
106
106
 
107
+ Since this is a local run then the execution of each node will happen sequentially in the same process.
108
+
109
+ To take advantage of the datatailr platform and execute the graph at scale, you can run it using the job scheduler as presented in the next section.
110
+
111
+ ### Execution at Scale
112
+ To execute the graph at scale, you can use the Datatailr job scheduler. This allows you to run your jobs in parallel, taking advantage of the underlying infrastructure.
113
+
114
+ You will first need to separate your function definitions from the DAG definition. This means you should define your functions as a separate module, which can be imported into the DAG definition.
115
+
116
+
117
+ ```python
118
+ # my_module.py
119
+
120
+ from datatailr.scheduler import batch, Batch
121
+
122
+ @batch()
123
+ def func_no_args() -> str:
124
+ return "no_args"
125
+
126
+
127
+ @batch()
128
+ def func_with_args(a: int, b: float) -> str:
129
+ return f"args: {a}, {b}"
130
+ ```
131
+
132
+ To use these functions in a batch job, you just need to import them and run in a DAG context:
133
+
134
+ ```python
135
+ from my_module import func_no_args, func_with_args
136
+ from datatailr.scheduler import Schedule
137
+
138
+ schedule = Schedule(at_hour=0)
139
+
140
+ with Batch(name="MY test DAG", schedule=schedule) as dag:
141
+ for n in range(2):
142
+ res1 = func_no_args().alias(f"func_{n}")
143
+ res2 = func_with_args(1, res1).alias(f"func_with_args_{n}")
144
+ ```
145
+
146
+ This will submit the entire DAG for execution, and the scheduler will take care of running the jobs in parallel and managing the resources.
147
+ The DAG in the example above will be scheduled to run daily at 00:00.
148
+
107
149
  ___
108
150
  Visit [our website](https://www.datatailr.com/) for more!
@@ -31,4 +31,6 @@ src/datatailr/scheduler/schedule.py
31
31
  src/datatailr/scheduler/utils.py
32
32
  src/sbin/datatailr_run.py
33
33
  src/sbin/datatailr_run_app.py
34
- src/sbin/datatailr_run_batch.py
34
+ src/sbin/datatailr_run_batch.py
35
+ src/sbin/datatailr_run_excel.py
36
+ src/sbin/datatailr_run_service.py
@@ -2,3 +2,5 @@
2
2
  datatailr_run = datatailr.sbin.datatailr_run:main
3
3
  datatailr_run_app = datatailr.sbin.datatailr_run_app:run
4
4
  datatailr_run_batch = datatailr.sbin.datatailr_run_batch:run
5
+ datatailr_run_excel = datatailr.sbin.datatailr_run_excel:run
6
+ datatailr_run_service = datatailr.sbin.datatailr_run_service:run
@@ -111,10 +111,12 @@ def main():
111
111
  }
112
112
  run_command_as_user("datatailr_run_batch", user, env)
113
113
  elif job_type == "service":
114
+ port = get_env_var("DATATAILR_SERVICE_PORT")
114
115
  env = {
115
116
  "DATATAILR_JOB_NAME": job_name,
116
117
  "DATATAILR_JOB_ID": job_id,
117
118
  "DATATAILR_ENTRYPOINT": entrypoint,
119
+ "DATATAILR_SERVICE_PORT": port,
118
120
  }
119
121
  run_command_as_user("datatailr_run_service", user, env)
120
122
  elif job_type == "app":
@@ -125,10 +127,12 @@ def main():
125
127
  }
126
128
  run_command_as_user("datatailr_run_app", user, env)
127
129
  elif job_type == "excel":
130
+ host = get_env_var("DATATAILR_HOST")
128
131
  env = {
129
132
  "DATATAILR_JOB_NAME": job_name,
130
133
  "DATATAILR_JOB_ID": job_id,
131
134
  "DATATAILR_ENTRYPOINT": entrypoint,
135
+ "DATATAILR_HOST": host,
132
136
  }
133
137
  run_command_as_user("datatailr_run_excel", user, env)
134
138
  elif job_type == "IDE":
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # *************************************************************************
4
+ #
5
+ # Copyright (c) 2025 - Datatailr Inc.
6
+ # All Rights Reserved.
7
+ #
8
+ # This file is part of Datatailr and subject to the terms and conditions
9
+ # defined in 'LICENSE.txt'. Unauthorized copying and/or distribution
10
+ # of this file, in parts or full, via any medium is strictly prohibited.
11
+ # *************************************************************************
12
+
13
+ import os
14
+ import sys
15
+ import runpy
16
+ from importlib.resources import files
17
+
18
+ from datatailr.logging import DatatailrLogger
19
+
20
+
21
+ logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
22
+
23
+
24
+ def run():
25
+ logger.info("Starting Datatailr app...")
26
+ entrypoint = os.environ.get("DATATAILR_ENTRYPOINT")
27
+ if entrypoint is None or ":" not in entrypoint:
28
+ raise ValueError(
29
+ "Environment variable 'DATATAILR_ENTRYPOINT' is not in the format 'module_name:file_name'."
30
+ )
31
+
32
+ module_name, file_name = entrypoint.split(":")
33
+
34
+ script = files(module_name).joinpath(file_name)
35
+ sys.argv = ["streamlit", "run", str(script), *sys.argv[1:]]
36
+ logger.info(f"Running entrypoint: {entrypoint}")
37
+ runpy.run_module("streamlit", run_name="__main__")
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # *************************************************************************
4
+ #
5
+ # Copyright (c) 2025 - Datatailr Inc.
6
+ # All Rights Reserved.
7
+ #
8
+ # This file is part of Datatailr and subject to the terms and conditions
9
+ # defined in 'LICENSE.txt'. Unauthorized copying and/or distribution
10
+ # of this file, in parts or full, via any medium is strictly prohibited.
11
+ # *************************************************************************
12
+
13
+ import os
14
+ import subprocess
15
+
16
+ from datatailr.logging import DatatailrLogger
17
+
18
+ logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
19
+
20
+
21
+ def run():
22
+ logger.info("Starting Datatailr excel add-in...")
23
+ entrypoint = os.environ.get("DATATAILR_ENTRYPOINT")
24
+ hostname = os.environ.get("DATATAILR_HOST")
25
+
26
+ if entrypoint is None:
27
+ raise ValueError("Environment variable 'DATATAILR_ENTRYPOINT' is not set.")
28
+
29
+ if hostname is None:
30
+ raise ValueError("Environment variable 'DATATAILR_HOST' is not set.")
31
+
32
+ entrypoint = f'./dt-excel.sh -n -H "{hostname}" -p 8080 "{entrypoint}"'
33
+ logger.info(f"Running entrypoint: {entrypoint}")
34
+ subprocess.run(entrypoint, shell=True)
@@ -11,6 +11,7 @@
11
11
  # *************************************************************************
12
12
 
13
13
  import os
14
+ import importlib
14
15
 
15
16
  from datatailr.logging import DatatailrLogger
16
17
 
@@ -18,11 +19,16 @@ logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
18
19
 
19
20
 
20
21
  def run():
21
- logger.info("Starting Datatailr app...")
22
+ logger.info("Starting Datatailr service...")
22
23
  entrypoint = os.environ.get("DATATAILR_ENTRYPOINT")
24
+ port = os.environ.get("DATATAILR_SERVICE_PORT")
23
25
 
24
26
  if entrypoint is None:
25
27
  raise ValueError("Environment variable 'DATATAILR_ENTRYPOINT' is not set.")
26
28
 
27
- os.system(entrypoint)
29
+ if port is None:
30
+ raise ValueError("Environment variable 'DATATAILR_SERVICE_PORT' is not set.")
31
+
32
+ entrypoint_module = importlib.import_module(entrypoint)
28
33
  logger.info(f"Running entrypoint: {entrypoint}")
34
+ entrypoint_module.__service_main__(int(port))
File without changes
File without changes