datatailr 0.1.5__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datatailr might be problematic. Click here for more details.
- datatailr/__init__.py +1 -35
- datatailr/acl.py +35 -3
- datatailr/blob.py +13 -13
- datatailr/build/image.py +38 -2
- datatailr/dt_json.py +32 -0
- datatailr/errors.py +17 -0
- datatailr/group.py +20 -12
- datatailr/logging.py +27 -10
- datatailr/sbin/datatailr_run.py +147 -0
- datatailr/sbin/datatailr_run_app.py +28 -0
- datatailr/sbin/{run_job.py → datatailr_run_batch.py} +5 -20
- datatailr/scheduler/__init__.py +24 -8
- datatailr/scheduler/arguments_cache.py +88 -45
- datatailr/scheduler/base.py +195 -69
- datatailr/scheduler/batch.py +155 -19
- datatailr/scheduler/batch_decorator.py +56 -26
- datatailr/scheduler/constants.py +1 -1
- datatailr/scheduler/schedule.py +117 -0
- datatailr/scheduler/utils.py +3 -1
- datatailr/user.py +34 -14
- datatailr/utils.py +20 -0
- datatailr/wrapper.py +10 -10
- {datatailr-0.1.5.dist-info → datatailr-0.1.8.dist-info}/METADATA +38 -5
- datatailr-0.1.8.dist-info/RECORD +30 -0
- datatailr-0.1.8.dist-info/entry_points.txt +4 -0
- datatailr-0.1.8.dist-info/top_level.txt +1 -0
- datatailr-0.1.5.dist-info/RECORD +0 -29
- datatailr-0.1.5.dist-info/entry_points.txt +0 -2
- datatailr-0.1.5.dist-info/top_level.txt +0 -2
- test_module/__init__.py +0 -17
- test_module/test_submodule.py +0 -38
- {datatailr-0.1.5.dist-info → datatailr-0.1.8.dist-info}/WHEEL +0 -0
- {datatailr-0.1.5.dist-info → datatailr-0.1.8.dist-info}/licenses/LICENSE +0 -0
|
@@ -11,21 +11,25 @@
|
|
|
11
11
|
import functools
|
|
12
12
|
import inspect
|
|
13
13
|
import os
|
|
14
|
+
from typing import Callable
|
|
14
15
|
|
|
15
16
|
from datatailr.logging import DatatailrLogger
|
|
16
|
-
from datatailr.scheduler.arguments_cache import ArgumentsCache
|
|
17
|
+
from datatailr.scheduler.arguments_cache import ArgumentsCache, CacheNotFoundError
|
|
17
18
|
from datatailr.scheduler.base import EntryPoint, JobType, Resources
|
|
18
|
-
from datatailr.scheduler.batch import
|
|
19
|
+
from datatailr.scheduler.batch import (
|
|
20
|
+
BatchJob,
|
|
21
|
+
get_current_manager,
|
|
22
|
+
)
|
|
19
23
|
from datatailr.scheduler.constants import DEFAULT_TASK_CPU, DEFAULT_TASK_MEMORY
|
|
20
24
|
from datatailr.scheduler.utils import get_available_env_args
|
|
21
25
|
|
|
22
26
|
__ARGUMENTS_CACHE__ = ArgumentsCache()
|
|
23
|
-
__FUNCTIONS_CREATED_IN_DAG__: dict[
|
|
27
|
+
__FUNCTIONS_CREATED_IN_DAG__: dict[Callable, str] = {}
|
|
24
28
|
logger = DatatailrLogger(__name__).get_logger()
|
|
25
29
|
|
|
26
30
|
|
|
27
31
|
def batch_run_id() -> str:
|
|
28
|
-
return os.
|
|
32
|
+
return os.getenv("DATATAILR_BATCH_RUN_ID", "unknown")
|
|
29
33
|
|
|
30
34
|
|
|
31
35
|
def dag_id(job: BatchJob) -> str:
|
|
@@ -34,13 +38,13 @@ def dag_id(job: BatchJob) -> str:
|
|
|
34
38
|
)
|
|
35
39
|
|
|
36
40
|
|
|
37
|
-
def batch_decorator(memory=DEFAULT_TASK_MEMORY, cpu=DEFAULT_TASK_CPU):
|
|
41
|
+
def batch_decorator(memory: str = DEFAULT_TASK_MEMORY, cpu: float = DEFAULT_TASK_CPU):
|
|
38
42
|
"""
|
|
39
43
|
Decorator to mark a function as a batch job.
|
|
40
44
|
This decorator can be used to wrap functions that should be executed as part of batch jobs.
|
|
41
45
|
"""
|
|
42
46
|
|
|
43
|
-
def decorator(func):
|
|
47
|
+
def decorator(func) -> BatchJob:
|
|
44
48
|
spec = inspect.getfullargspec(func)
|
|
45
49
|
signature = inspect.signature(func)
|
|
46
50
|
varargs = spec.varargs
|
|
@@ -55,39 +59,62 @@ def batch_decorator(memory=DEFAULT_TASK_MEMORY, cpu=DEFAULT_TASK_CPU):
|
|
|
55
59
|
# There are two possible scenarios:
|
|
56
60
|
# 1. The function is called directly, not as part of a batch job. In this case, the args and kwargs should be used.
|
|
57
61
|
# 2. The function is called as part of a batch job - it was constructed as part of a DAG and is now being executed.
|
|
58
|
-
if func not in __FUNCTIONS_CREATED_IN_DAG__:
|
|
59
|
-
return func(*args, **kwargs)
|
|
60
|
-
function_arguments = [v.name for v in parameters.values()]
|
|
61
62
|
env_args = get_available_env_args()
|
|
63
|
+
all_function_args = [
|
|
64
|
+
v.name
|
|
65
|
+
for v in parameters.values()
|
|
66
|
+
if v.kind
|
|
67
|
+
not in (
|
|
68
|
+
inspect.Parameter.VAR_POSITIONAL,
|
|
69
|
+
inspect.Parameter.VAR_KEYWORD,
|
|
70
|
+
)
|
|
71
|
+
]
|
|
62
72
|
final_args = list(args)
|
|
63
|
-
final_kwargs = kwargs.copy()
|
|
64
73
|
|
|
65
74
|
for name, value in env_args.items():
|
|
66
|
-
if name in
|
|
67
|
-
if len(final_args) < len(
|
|
75
|
+
if name in all_function_args:
|
|
76
|
+
if len(final_args) < len(all_function_args):
|
|
68
77
|
final_args.extend(
|
|
69
|
-
[None] * (len(
|
|
78
|
+
[None] * (len(all_function_args) - len(final_args))
|
|
70
79
|
)
|
|
71
|
-
final_args[
|
|
72
|
-
|
|
73
|
-
|
|
80
|
+
final_args[all_function_args.index(name)] = value
|
|
81
|
+
try:
|
|
82
|
+
final_kwargs = __ARGUMENTS_CACHE__.get_arguments(
|
|
83
|
+
dag_id(func),
|
|
84
|
+
os.getenv("DATATAILR_JOB_NAME", func.__name__),
|
|
85
|
+
os.getenv("DATATAILR_BATCH_RUN_ID"),
|
|
86
|
+
)
|
|
87
|
+
except CacheNotFoundError:
|
|
88
|
+
final_kwargs = kwargs
|
|
89
|
+
|
|
90
|
+
if varargs is not None and varkw is None:
|
|
91
|
+
for key in list(final_kwargs.keys()):
|
|
92
|
+
if key not in parameters:
|
|
93
|
+
final_args.append(final_kwargs.pop(key))
|
|
94
|
+
|
|
95
|
+
# Some of the loaded arguments are actually args and not kwargs.
|
|
96
|
+
if len(final_args) == len(parameters.keys()):
|
|
97
|
+
for i, arg_name in enumerate(parameters.keys()):
|
|
98
|
+
final_args[i] = final_kwargs.pop(arg_name, final_args[i])
|
|
99
|
+
result = func(*final_args, **final_kwargs)
|
|
100
|
+
__ARGUMENTS_CACHE__.add_result(
|
|
101
|
+
batch_run_id(),
|
|
102
|
+
os.getenv("DATATAILR_JOB_NAME", func.__name__),
|
|
103
|
+
result,
|
|
74
104
|
)
|
|
75
|
-
result = func(**function_arguments)
|
|
76
|
-
__ARGUMENTS_CACHE__.add_result(batch_run_id(), func.__name__, result)
|
|
77
105
|
return result
|
|
78
106
|
else:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
107
|
+
if varargs is not None:
|
|
108
|
+
all_args = {job.name: job for job in args}
|
|
109
|
+
else:
|
|
110
|
+
all_args = dict(zip(spec.args, args)) | kwargs
|
|
84
111
|
dag.set_autorun(True)
|
|
112
|
+
|
|
85
113
|
job = BatchJob(
|
|
86
114
|
name=func.__name__,
|
|
87
115
|
entrypoint=EntryPoint(
|
|
88
116
|
JobType.BATCH,
|
|
89
|
-
|
|
90
|
-
function_name=func.__name__,
|
|
117
|
+
func=func,
|
|
91
118
|
),
|
|
92
119
|
resources=Resources(memory=memory, cpu=cpu),
|
|
93
120
|
dependencies=[
|
|
@@ -97,6 +124,8 @@ def batch_decorator(memory=DEFAULT_TASK_MEMORY, cpu=DEFAULT_TASK_CPU):
|
|
|
97
124
|
],
|
|
98
125
|
dag=dag,
|
|
99
126
|
)
|
|
127
|
+
job.args = all_args
|
|
128
|
+
__FUNCTIONS_CREATED_IN_DAG__[job.entrypoint.func] = dag.id
|
|
100
129
|
return job
|
|
101
130
|
|
|
102
131
|
module = inspect.getmodule(func)
|
|
@@ -107,6 +136,7 @@ def batch_decorator(memory=DEFAULT_TASK_MEMORY, cpu=DEFAULT_TASK_CPU):
|
|
|
107
136
|
else:
|
|
108
137
|
setattr(module, "__batch_main__", {func.__name__: batch_main})
|
|
109
138
|
|
|
110
|
-
return
|
|
139
|
+
# The return type is a BatchJob, but we use type: ignore to avoid type checking issues
|
|
140
|
+
return batch_main # type: ignore
|
|
111
141
|
|
|
112
142
|
return decorator
|
datatailr/scheduler/constants.py
CHANGED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
##########################################################################
|
|
2
|
+
#
|
|
3
|
+
# Copyright (c) 2025 - Datatailr Inc.
|
|
4
|
+
# All Rights Reserved.
|
|
5
|
+
#
|
|
6
|
+
# This file is part of Datatailr and subject to the terms and conditions
|
|
7
|
+
# defined in 'LICENSE.txt'. Unauthorized copying and/or distribution
|
|
8
|
+
# of this file, in parts or full, via any medium is strictly prohibited.
|
|
9
|
+
##########################################################################
|
|
10
|
+
|
|
11
|
+
from typing import Any
|
|
12
|
+
from datatailr.wrapper import dt__Job
|
|
13
|
+
import re
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
__CLIENT__ = dt__Job()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Schedule:
|
|
20
|
+
"""
|
|
21
|
+
Represents a schedule for batch jobs.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
cron_expression: str = "",
|
|
27
|
+
at_minutes: list[int] | None = None,
|
|
28
|
+
every_minute: int | None = None,
|
|
29
|
+
at_hours: list[int] | None = None,
|
|
30
|
+
every_hour: int | None = None,
|
|
31
|
+
weekdays: list[str] | None = None,
|
|
32
|
+
day_of_month: int | None = None,
|
|
33
|
+
in_month: list[str] | None = None,
|
|
34
|
+
every_month: int | None = None,
|
|
35
|
+
timezone: str | None = None,
|
|
36
|
+
run_after_job_uuid: str | None = None,
|
|
37
|
+
run_after_job_name: str | None = None,
|
|
38
|
+
run_after_job_condition: str | None = None,
|
|
39
|
+
):
|
|
40
|
+
self.at_minutes = at_minutes
|
|
41
|
+
self.every_minute = every_minute
|
|
42
|
+
self.at_hours = at_hours
|
|
43
|
+
self.every_hour = every_hour
|
|
44
|
+
self.weekdays = weekdays
|
|
45
|
+
self.day_of_month = day_of_month
|
|
46
|
+
self.in_month = in_month
|
|
47
|
+
self.every_month = every_month
|
|
48
|
+
self.timezone = timezone
|
|
49
|
+
self.run_after_job_uuid = run_after_job_uuid
|
|
50
|
+
self.run_after_job_name = run_after_job_name
|
|
51
|
+
self.run_after_job_condition = run_after_job_condition
|
|
52
|
+
self.schedule_expression = None
|
|
53
|
+
self.cron_expression = cron_expression
|
|
54
|
+
|
|
55
|
+
self.__is_set__ = False
|
|
56
|
+
|
|
57
|
+
def __str__(self) -> str:
|
|
58
|
+
self.__compile__()
|
|
59
|
+
return self.cron_expression
|
|
60
|
+
|
|
61
|
+
def __repr__(self) -> str:
|
|
62
|
+
self.__compile__()
|
|
63
|
+
return f"Schedule(cron_expression={self.cron_expression}, timezone={self.timezone}) - {self.schedule_expression}"
|
|
64
|
+
|
|
65
|
+
def __setattr__(self, name: str, value: Any) -> None:
|
|
66
|
+
super().__setattr__(name, value)
|
|
67
|
+
if name in [
|
|
68
|
+
"at_minutes",
|
|
69
|
+
"at_hours",
|
|
70
|
+
"every_minute",
|
|
71
|
+
"every_hour",
|
|
72
|
+
"weekdays",
|
|
73
|
+
"day_of_month",
|
|
74
|
+
"in_month",
|
|
75
|
+
"every_month",
|
|
76
|
+
]:
|
|
77
|
+
self.__is_set__ = False
|
|
78
|
+
|
|
79
|
+
def __compile__(self):
|
|
80
|
+
if self.__is_set__:
|
|
81
|
+
return
|
|
82
|
+
argument_name = [
|
|
83
|
+
"at_minutes",
|
|
84
|
+
"at_hours",
|
|
85
|
+
"every_minute",
|
|
86
|
+
"every_hour",
|
|
87
|
+
"weekdays",
|
|
88
|
+
"day_of_month",
|
|
89
|
+
"in_month",
|
|
90
|
+
"every_month",
|
|
91
|
+
"run_after_job_uuid",
|
|
92
|
+
"run_after_job_name",
|
|
93
|
+
"run_after_job_condition",
|
|
94
|
+
]
|
|
95
|
+
arguments = {}
|
|
96
|
+
|
|
97
|
+
for key in argument_name:
|
|
98
|
+
if hasattr(self, key) and getattr(self, key) is not None:
|
|
99
|
+
value = getattr(self, key)
|
|
100
|
+
if isinstance(value, list):
|
|
101
|
+
value = ",".join(map(str, value))
|
|
102
|
+
arguments[key] = value
|
|
103
|
+
|
|
104
|
+
result = __CLIENT__.run("", cron_string=True, **arguments)
|
|
105
|
+
match = re.match(r"^(.*?)\s*\((.*?)\)$", result)
|
|
106
|
+
if match:
|
|
107
|
+
cron_expression, schedule_expression = match.groups()
|
|
108
|
+
self.cron_expression = cron_expression.strip()
|
|
109
|
+
self.schedule_expression = schedule_expression.strip()
|
|
110
|
+
self.__is_set__ = True
|
|
111
|
+
|
|
112
|
+
def get_cron_string(self) -> str:
|
|
113
|
+
"""
|
|
114
|
+
Returns the compiled cron string.
|
|
115
|
+
"""
|
|
116
|
+
self.__compile__()
|
|
117
|
+
return self.cron_expression
|
datatailr/scheduler/utils.py
CHANGED
|
@@ -16,7 +16,9 @@ from datatailr.scheduler.constants import BATCH_JOB_ARGUMENTS
|
|
|
16
16
|
def get_available_env_args():
|
|
17
17
|
"""
|
|
18
18
|
Get the available environment variables for batch job arguments.
|
|
19
|
-
|
|
19
|
+
|
|
20
|
+
This function retrieves the environment variables that match the keys defined in DATATAILR_BATCH_JOB_ARGUMENTS.
|
|
21
|
+
|
|
20
22
|
Returns:
|
|
21
23
|
dict: A dictionary of available environment variables for batch jobs.
|
|
22
24
|
"""
|
datatailr/user.py
CHANGED
|
@@ -8,15 +8,23 @@
|
|
|
8
8
|
# of this file, in parts or full, via any medium is strictly prohibited.
|
|
9
9
|
# *************************************************************************
|
|
10
10
|
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
import sys
|
|
11
13
|
from typing import Optional
|
|
12
14
|
|
|
13
|
-
from datatailr import dt__User
|
|
15
|
+
from datatailr.wrapper import dt__User, mock_cli_tool
|
|
16
|
+
|
|
17
|
+
# Datatailr User API Client
|
|
18
|
+
__client__ = dt__User()
|
|
14
19
|
|
|
15
20
|
|
|
16
21
|
class User:
|
|
17
|
-
"""
|
|
22
|
+
"""
|
|
23
|
+
Representing a Datatailr User.
|
|
24
|
+
|
|
18
25
|
This class provides methods to interact with the Datatailr User API.
|
|
19
26
|
It allows you to create, update, delete, and manage users within the Datatailr platform.
|
|
27
|
+
|
|
20
28
|
Attributes:
|
|
21
29
|
first_name (str): The first name of the user.
|
|
22
30
|
last_name (str): The last name of the user.
|
|
@@ -25,6 +33,7 @@ class User:
|
|
|
25
33
|
user_id (int): The unique identifier for the user.
|
|
26
34
|
primary_group_id (int): The primary group of the user.
|
|
27
35
|
is_system_user (bool): Indicates if the user is a system user.
|
|
36
|
+
|
|
28
37
|
Static Methods:
|
|
29
38
|
signed_user() -> Optional['User']:
|
|
30
39
|
Retrieve the currently signed-in user, if available.
|
|
@@ -38,14 +47,12 @@ class User:
|
|
|
38
47
|
List all users available in the Datatailr platform.
|
|
39
48
|
remove(name: str) -> None:
|
|
40
49
|
Remove a user by their username.
|
|
50
|
+
|
|
41
51
|
Instance Methods:
|
|
42
52
|
verify() -> None:
|
|
43
53
|
Refresh the user information from the Datatailr API.
|
|
44
54
|
"""
|
|
45
55
|
|
|
46
|
-
# Datatailr User API Client
|
|
47
|
-
__client__ = dt__User()
|
|
48
|
-
|
|
49
56
|
def __init__(self, name):
|
|
50
57
|
self.__name = name
|
|
51
58
|
self.__first_name = None
|
|
@@ -86,7 +93,11 @@ class User:
|
|
|
86
93
|
def __refresh__(self):
|
|
87
94
|
if not self.name:
|
|
88
95
|
raise ValueError("Name is not set. Cannot refresh user.")
|
|
89
|
-
|
|
96
|
+
if isinstance(__client__, mock_cli_tool) or any(
|
|
97
|
+
"unit" in arg for arg in sys.argv
|
|
98
|
+
):
|
|
99
|
+
return
|
|
100
|
+
user = __client__.get(self.name)
|
|
90
101
|
if user:
|
|
91
102
|
self.__name = user["name"]
|
|
92
103
|
self.__first_name = user["first_name"]
|
|
@@ -133,17 +144,26 @@ class User:
|
|
|
133
144
|
return self.__is_system_user
|
|
134
145
|
|
|
135
146
|
@staticmethod
|
|
136
|
-
def get(name: str) ->
|
|
147
|
+
def get(name: str) -> User:
|
|
137
148
|
return User(name)
|
|
138
149
|
|
|
139
150
|
@staticmethod
|
|
140
|
-
def signed_user() ->
|
|
141
|
-
|
|
151
|
+
def signed_user() -> User:
|
|
152
|
+
if isinstance(__client__, mock_cli_tool) or any(
|
|
153
|
+
"unit" in arg for arg in sys.argv
|
|
154
|
+
):
|
|
155
|
+
user = User(name="test_user")
|
|
156
|
+
user.__expiry__ = "mock_expiry"
|
|
157
|
+
user.__signature__ = "mock_signature"
|
|
158
|
+
return user
|
|
159
|
+
|
|
160
|
+
user_signature_and_expiry = __client__.signed_user()
|
|
142
161
|
if user_signature_and_expiry:
|
|
143
162
|
user = User(name=user_signature_and_expiry["name"])
|
|
144
163
|
user.__expiry__ = user_signature_and_expiry["expiry"]
|
|
145
164
|
user.__signature__ = user_signature_and_expiry["signature"]
|
|
146
165
|
return user
|
|
166
|
+
|
|
147
167
|
raise PermissionError(
|
|
148
168
|
"No signed user found. Please ensure you are signed in to Datatailr."
|
|
149
169
|
)
|
|
@@ -172,7 +192,7 @@ class User:
|
|
|
172
192
|
system=is_system_user,
|
|
173
193
|
)
|
|
174
194
|
else:
|
|
175
|
-
|
|
195
|
+
__client__.add(
|
|
176
196
|
name,
|
|
177
197
|
first_name=first_name,
|
|
178
198
|
last_name=last_name,
|
|
@@ -185,17 +205,17 @@ class User:
|
|
|
185
205
|
|
|
186
206
|
@staticmethod
|
|
187
207
|
def exists(name: str) -> bool:
|
|
188
|
-
return
|
|
208
|
+
return __client__.exists(name)
|
|
189
209
|
|
|
190
210
|
@staticmethod
|
|
191
211
|
def ls() -> list:
|
|
192
|
-
users =
|
|
212
|
+
users = __client__.ls()
|
|
193
213
|
return [User.get(user["name"]) for user in users]
|
|
194
214
|
|
|
195
215
|
@staticmethod
|
|
196
216
|
def remove(name: str) -> None:
|
|
197
|
-
|
|
217
|
+
__client__.rm(name)
|
|
198
218
|
return None
|
|
199
219
|
|
|
200
220
|
def verify(self) -> None:
|
|
201
|
-
return
|
|
221
|
+
return __client__.verify(self.name, self.__expiry__, self.__signature__)
|
datatailr/utils.py
CHANGED
|
@@ -10,6 +10,8 @@
|
|
|
10
10
|
|
|
11
11
|
import shutil
|
|
12
12
|
from enum import Enum
|
|
13
|
+
import subprocess
|
|
14
|
+
from typing import Tuple
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
class Environment(Enum):
|
|
@@ -33,3 +35,21 @@ def is_dt_installed():
|
|
|
33
35
|
Check if DataTailr is installed by looking for the 'dt' command in the system PATH.
|
|
34
36
|
"""
|
|
35
37
|
return shutil.which("dt") is not None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def run_shell_command(command: str) -> Tuple[str, int]:
|
|
41
|
+
"""
|
|
42
|
+
Run a shell command.
|
|
43
|
+
|
|
44
|
+
This function executes a shell command and returns the output.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
command (str): The shell command to execute.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
str: The output of the executed command.
|
|
51
|
+
"""
|
|
52
|
+
result = subprocess.run(command, shell=True, capture_output=True, text=True)
|
|
53
|
+
if result.returncode != 0:
|
|
54
|
+
raise RuntimeError(f"Command '{command}' failed with error: {result.stderr}")
|
|
55
|
+
return result.stdout.strip(), result.returncode
|
datatailr/wrapper.py
CHANGED
|
@@ -11,17 +11,23 @@
|
|
|
11
11
|
import json
|
|
12
12
|
import os
|
|
13
13
|
import subprocess
|
|
14
|
+
from typing import Union
|
|
14
15
|
|
|
15
16
|
from datatailr.utils import is_dt_installed
|
|
16
17
|
|
|
18
|
+
API_JSON_PATH: Union[str, None] = None
|
|
19
|
+
|
|
17
20
|
if is_dt_installed() or os.path.exists("/opt/datatailr/etc/api.json"):
|
|
18
21
|
API_JSON_PATH = os.path.join("/opt", "datatailr", "etc", "api.json")
|
|
19
22
|
CLI_TOOL = "dt"
|
|
20
23
|
else:
|
|
21
24
|
# For running local tests, use api.json from the repo and a mock CLI tool
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
)
|
|
25
|
+
import sys
|
|
26
|
+
|
|
27
|
+
if any("unittest" in arg for arg in sys.argv):
|
|
28
|
+
API_JSON_PATH = os.path.join(
|
|
29
|
+
os.path.dirname(__file__), "..", "..", "..", "..", "dt", "api.json"
|
|
30
|
+
)
|
|
25
31
|
CLI_TOOL = "echo"
|
|
26
32
|
|
|
27
33
|
|
|
@@ -155,7 +161,7 @@ def create_class(cmd_name, command):
|
|
|
155
161
|
|
|
156
162
|
|
|
157
163
|
# Load API JSON and create classes at import time
|
|
158
|
-
if os.path.exists(API_JSON_PATH):
|
|
164
|
+
if API_JSON_PATH and os.path.exists(API_JSON_PATH):
|
|
159
165
|
with open(API_JSON_PATH, "r") as f:
|
|
160
166
|
api = json.load(f)
|
|
161
167
|
|
|
@@ -188,12 +194,6 @@ dt__Blob = globals().get("dt__Blob", mock_cli_tool)
|
|
|
188
194
|
dt__Dns = globals().get("dt__Dns", mock_cli_tool)
|
|
189
195
|
dt__System = globals().get("dt__System", mock_cli_tool)
|
|
190
196
|
dt__Sms = globals().get("dt__Sms", mock_cli_tool)
|
|
191
|
-
dt__Group = globals().get("dt__Group", mock_cli_tool)
|
|
192
|
-
dt__Job = globals().get("dt__Job", mock_cli_tool)
|
|
193
|
-
dt__Blob = globals().get("dt__Blob", mock_cli_tool)
|
|
194
|
-
dt__Dns = globals().get("dt__Dns", mock_cli_tool)
|
|
195
|
-
dt__System = globals().get("dt__System", mock_cli_tool)
|
|
196
|
-
dt__Sms = globals().get("dt__Sms", mock_cli_tool)
|
|
197
197
|
dt__Email = globals().get("dt__Email", mock_cli_tool)
|
|
198
198
|
dt__Kv = globals().get("dt__Kv", mock_cli_tool)
|
|
199
199
|
dt__Log = globals().get("dt__Log", mock_cli_tool)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datatailr
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: Ready-to-Use Platform That Drives Business Insights
|
|
5
5
|
Author-email: Datatailr <info@datatailr.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -26,15 +26,27 @@ Requires-Dist: mypy; extra == "dev"
|
|
|
26
26
|
Requires-Dist: types-setuptools; extra == "dev"
|
|
27
27
|
Requires-Dist: toml; extra == "dev"
|
|
28
28
|
Requires-Dist: coverage; extra == "dev"
|
|
29
|
+
Requires-Dist: sphinx-rtd-theme; extra == "dev"
|
|
30
|
+
Requires-Dist: sphinx; extra == "dev"
|
|
31
|
+
Requires-Dist: sphinx-autodoc-typehints; extra == "dev"
|
|
32
|
+
Requires-Dist: sphinx-autosummary; extra == "dev"
|
|
33
|
+
Requires-Dist: sphinx-design; extra == "dev"
|
|
34
|
+
Requires-Dist: sphinx-copybutton; extra == "dev"
|
|
35
|
+
Requires-Dist: myst-parser; extra == "dev"
|
|
29
36
|
Dynamic: license-file
|
|
30
37
|
|
|
31
|
-
|
|
38
|
+
<div style="text-align: center;">
|
|
39
|
+
<a href="https://www.datatailr.com/" target="_blank">
|
|
40
|
+
<img src="https://s3.eu-west-1.amazonaws.com/datatailr.com/assets/datatailr-logo.svg" alt="Datatailr Logo" />
|
|
41
|
+
</a>
|
|
42
|
+
</div>
|
|
32
43
|
|
|
33
|
-
|
|
34
|
-
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
**Datatailr empowers your team to streamline analytics and data workflows
|
|
35
47
|
from idea to production without infrastructure hurdles.**
|
|
36
48
|
|
|
37
|
-
|
|
49
|
+
# What is Datatailr?
|
|
38
50
|
|
|
39
51
|
Datatailr is a platform that simplifies the process of building and deploying data applications.
|
|
40
52
|
|
|
@@ -69,7 +81,28 @@ print(datatailr.__provider__)
|
|
|
69
81
|
|
|
70
82
|
|
|
71
83
|
## Quickstart
|
|
84
|
+
The following example shows how to create a simple data pipeline using the Datatailr Python package.
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from datatailr.scheduler import batch, Batch
|
|
88
|
+
|
|
89
|
+
@batch()
|
|
90
|
+
def func_no_args() -> str:
|
|
91
|
+
return "no_args"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@batch()
|
|
95
|
+
def func_with_args(a: int, b: float) -> str:
|
|
96
|
+
return f"args: {a}, {b}"
|
|
97
|
+
|
|
98
|
+
with Batch(name="MY test DAG", local_run=True) as dag:
|
|
99
|
+
for n in range(2):
|
|
100
|
+
res1 = func_no_args().alias(f"func_{n}")
|
|
101
|
+
res2 = func_with_args(1, res1).alias(f"func_with_args_{n}")
|
|
102
|
+
```
|
|
72
103
|
|
|
104
|
+
Running this code will create a graph of jobs and execute it.
|
|
105
|
+
Each node on the graph represents a job, which in turn is a call to a function decorated with `@batch()`.
|
|
73
106
|
|
|
74
107
|
___
|
|
75
108
|
Visit [our website](https://www.datatailr.com/) for more!
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
datatailr/__init__.py,sha256=QTTG8X76BnlQwVx5N4ZQtSbLkgFipZ9NJGAbvtfuk_g,1051
|
|
2
|
+
datatailr/acl.py,sha256=tlDy6VlHinSy5W1FbVxcNQNi7FliWUXy3ssIbzaPp28,4157
|
|
3
|
+
datatailr/blob.py,sha256=xkXT6RZcMww4YfLVjOyqvvPxWc-Ku6fTJ_PeCXyBys4,3159
|
|
4
|
+
datatailr/dt_json.py,sha256=3xmTqDBk68oPl2UW8UVOYPaBw4lAsVg6nDLwcen5nuo,2252
|
|
5
|
+
datatailr/errors.py,sha256=p_e4ao3sFEfz1g4LvEDqw6bVzHJPJSINLjJ8H6_PqOo,751
|
|
6
|
+
datatailr/group.py,sha256=34unhas6jQH_KgQSz7N42AIT-4wAHtS9T8x-_W7AkA8,4444
|
|
7
|
+
datatailr/logging.py,sha256=IkucGsHfkxaiy8Ul4Qy6U3fHbqCq3mChTDoaBNovYeA,3403
|
|
8
|
+
datatailr/user.py,sha256=uIRMDdR_vey2r0d7aVEoq5WHb6tKLP8jBtEPc2n-xBY,7046
|
|
9
|
+
datatailr/utils.py,sha256=mqnnERMyHNAuAgFY4Ry4O4yW0ZjCRtJbjfI5fXVqt2s,1524
|
|
10
|
+
datatailr/version.py,sha256=N9K8ZxlwFFSz8XSgbgaTWZY4k2J0JKfj698nZ_O2pIU,536
|
|
11
|
+
datatailr/wrapper.py,sha256=K9ZD76cWey_ikA6C5sKejwRaYBDln4QMg-RcoRGiuFc,7991
|
|
12
|
+
datatailr/build/__init__.py,sha256=_dA7b4L6wsaAFaSxUoYSJ1oaRqDHDMR20kqoCocSOss,487
|
|
13
|
+
datatailr/build/image.py,sha256=xeYKPR6usg0pqJNpbhsYI8t_BB2iWxhY0KBNTWqpb_Q,4939
|
|
14
|
+
datatailr/sbin/datatailr_run.py,sha256=3WuvzdRrRFA0nWX9v-nShwoqjOQSnvzgZRvRvWdKJPI,5756
|
|
15
|
+
datatailr/sbin/datatailr_run_app.py,sha256=67Y3RQPCZ2N9LIErEdXOsnYQ_4UzTmQygjyIfMhEkyk,891
|
|
16
|
+
datatailr/sbin/datatailr_run_batch.py,sha256=UWnp96j_G66R_Cape7Bb-rbK6UBLF7Y5_mTlWyGJAVQ,1818
|
|
17
|
+
datatailr/scheduler/__init__.py,sha256=qydHYVtEP6SUWd2CQ6FRdTdRWNz3SbYPJy4FK_wOvMk,1772
|
|
18
|
+
datatailr/scheduler/arguments_cache.py,sha256=CydYR9o2pqfa4KsPTA1mJSBN-0YF47Q6AmODm4zAJQ4,6254
|
|
19
|
+
datatailr/scheduler/base.py,sha256=OkoMxTjSzpFIFikWRctN9kuZM85oJDbNET8xKe8YUQY,12558
|
|
20
|
+
datatailr/scheduler/batch.py,sha256=CK57jS6MvFrSj14q9ZTrD15akoBdAeTJ130MFb_aIN4,16449
|
|
21
|
+
datatailr/scheduler/batch_decorator.py,sha256=LqL1bsupWLn-YEQUvFJYae7R3ogrL5-VodyiiScrkRw,5806
|
|
22
|
+
datatailr/scheduler/constants.py,sha256=5WWTsfwZ_BA8gVDOTa2AQX9DJ0NzfaWgtY3vrODS2-8,606
|
|
23
|
+
datatailr/scheduler/schedule.py,sha256=vzXaBBKMVJeCGD0VxsRPeW80sYReJ83XxWzDHVgLibY,3734
|
|
24
|
+
datatailr/scheduler/utils.py,sha256=up6oR2iwe6G52LkvgfO394xchXgCYNjOMGRQW3e8PQk,1082
|
|
25
|
+
datatailr-0.1.8.dist-info/licenses/LICENSE,sha256=ikKP4_O-UD_b8FuNdKmbzTb6odd0JX085ZW_FAPN3VI,1066
|
|
26
|
+
datatailr-0.1.8.dist-info/METADATA,sha256=rviYJtuu-teMbR_toxDPyC3ANi5sGzWeFFxH1PhijSM,3608
|
|
27
|
+
datatailr-0.1.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
28
|
+
datatailr-0.1.8.dist-info/entry_points.txt,sha256=VVBtNTDhrPMkcqrU3XjTHrfEdPQXwbJW9kgH1C-rT7U,186
|
|
29
|
+
datatailr-0.1.8.dist-info/top_level.txt,sha256=75gntW0X_SKpqxLL6hAPipvpk28GAhJBvoyqN_HohWU,10
|
|
30
|
+
datatailr-0.1.8.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
datatailr
|
datatailr-0.1.5.dist-info/RECORD
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
datatailr/__init__.py,sha256=god5oEAyJEGbEsxGZOwD6llaBkDNzKjCl8j2tOkiGgQ,1567
|
|
2
|
-
datatailr/acl.py,sha256=gLuzw6FLFYLVoyXOS_oHMfd5uGKKzlDqR1WBdN_AGwQ,2862
|
|
3
|
-
datatailr/blob.py,sha256=VImsbKBaYP6oG2Wiy4oPr6zxtUibEHpfI8p8nE1wj14,3195
|
|
4
|
-
datatailr/dt_json.py,sha256=oos6hwC4UxT4TEbbXNSozQGhwnc_bfpo-Y2rj24ks9A,1422
|
|
5
|
-
datatailr/errors.py,sha256=0CHvBOlzvDWoePk_preMy2qKlsztKCuQNdaQDbhNyaU,204
|
|
6
|
-
datatailr/group.py,sha256=-zDktOVhbGfmNn-3eMqDSyL6Sr2uz0gkHmCHtMrHMnA,4391
|
|
7
|
-
datatailr/logging.py,sha256=b3Uaumdo1ZZOaTnD-7iH1rlieWsllQSijNn9rX4svuo,3053
|
|
8
|
-
datatailr/user.py,sha256=2N9HzeHixwZ_Hsu0VM7YYbyJCoemGW9eASzT1rriycQ,6615
|
|
9
|
-
datatailr/utils.py,sha256=eHXOc7VwIUR8ryn5jBmw5QI00ARERJwXV5oICdVRnmQ,937
|
|
10
|
-
datatailr/version.py,sha256=N9K8ZxlwFFSz8XSgbgaTWZY4k2J0JKfj698nZ_O2pIU,536
|
|
11
|
-
datatailr/wrapper.py,sha256=Sm6OruuwOKx2tA01ftFEvp4Hfl9yWY_BVXwHbWCTRzE,8141
|
|
12
|
-
datatailr/build/__init__.py,sha256=_dA7b4L6wsaAFaSxUoYSJ1oaRqDHDMR20kqoCocSOss,487
|
|
13
|
-
datatailr/build/image.py,sha256=P2MiGxpzuZ6hOm9JubkLoOq-RdzKYnXbKJHiryIbJeA,3103
|
|
14
|
-
datatailr/sbin/run_job.py,sha256=B3H3UI3zpll7zVm7lZLsfPtnRlA6jjk8s_D_w89pvC4,2175
|
|
15
|
-
datatailr/scheduler/__init__.py,sha256=YtCnv9vuX-EPGr3WBXvXJIlLsZ94mW1dBzI6h7Yjcu0,1009
|
|
16
|
-
datatailr/scheduler/arguments_cache.py,sha256=-JnAWXfHMSSkYJx8iYt9JgvdPgR-1Z8DcN5Kvcx9Amo,4574
|
|
17
|
-
datatailr/scheduler/base.py,sha256=IRduJjhng4Uuv3Y9vDqUNTbs636AgH2ZISWRbIP6LqQ,7826
|
|
18
|
-
datatailr/scheduler/batch.py,sha256=BlZq609thXM0skZiPZjGHLO9WozOq8T3SiErpFPU4Zg,10779
|
|
19
|
-
datatailr/scheduler/batch_decorator.py,sha256=o6DAEODwsfYx9uYwjnaJZ9iUmqbFH2wMh-jz_L5bgNo,4598
|
|
20
|
-
datatailr/scheduler/constants.py,sha256=ISG5uMnVPbGbjaaulU0xdmSggnd-DMr9ed0WTAZSUmU,604
|
|
21
|
-
datatailr/scheduler/utils.py,sha256=YHtAc5sCwfgiClr5G6R3hfAjdlrFdnNW2l-3XwPZLXM,1070
|
|
22
|
-
datatailr-0.1.5.dist-info/licenses/LICENSE,sha256=ikKP4_O-UD_b8FuNdKmbzTb6odd0JX085ZW_FAPN3VI,1066
|
|
23
|
-
test_module/__init__.py,sha256=OF9XaL3RKdbWD5Ug4L-ufLqbykSt_rTK6gqZr4uBJ8g,576
|
|
24
|
-
test_module/test_submodule.py,sha256=O07fFbzJEy5rf1vdlYzPvs8v0IxHFg-CaYMqaHkskbc,1294
|
|
25
|
-
datatailr-0.1.5.dist-info/METADATA,sha256=uaE8S33ZpLXxhAZHjpjzQLDjteygHhNHXYMmheg4VY8,2511
|
|
26
|
-
datatailr-0.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
27
|
-
datatailr-0.1.5.dist-info/entry_points.txt,sha256=4lNE9VXvztJdIQsODI308v9FRzkVgmMu-VKGEceNxJs,59
|
|
28
|
-
datatailr-0.1.5.dist-info/top_level.txt,sha256=UZOKaWS1kZGGwV7hP476-EcSUJBNspxVSSp9WqtORzk,22
|
|
29
|
-
datatailr-0.1.5.dist-info/RECORD,,
|
test_module/__init__.py
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
# *************************************************************************
|
|
2
|
-
#
|
|
3
|
-
# Copyright (c) 2025 - Datatailr Inc.
|
|
4
|
-
# All Rights Reserved.
|
|
5
|
-
#
|
|
6
|
-
# This file is part of Datatailr and subject to the terms and conditions
|
|
7
|
-
# defined in 'LICENSE.txt'. Unauthorized copying and/or distribution
|
|
8
|
-
# of this file, in parts or full, via any medium is strictly prohibited.
|
|
9
|
-
# *************************************************************************
|
|
10
|
-
|
|
11
|
-
from .test_submodule import foo
|
|
12
|
-
from .test_submodule import test_function as test_function
|
|
13
|
-
|
|
14
|
-
__all__ = [
|
|
15
|
-
"test_function",
|
|
16
|
-
"foo",
|
|
17
|
-
]
|