datatailr 0.1.5__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datatailr might be problematic. Click here for more details.
- {datatailr-0.1.5/src/datatailr.egg-info → datatailr-0.1.6}/PKG-INFO +2 -2
- {datatailr-0.1.5 → datatailr-0.1.6}/README.md +1 -1
- {datatailr-0.1.5 → datatailr-0.1.6}/pyproject.toml +1 -1
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/group.py +3 -1
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/scheduler/arguments_cache.py +40 -25
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/scheduler/batch.py +14 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/scheduler/batch_decorator.py +3 -2
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/user.py +8 -1
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/wrapper.py +10 -4
- {datatailr-0.1.5 → datatailr-0.1.6/src/datatailr.egg-info}/PKG-INFO +2 -2
- {datatailr-0.1.5 → datatailr-0.1.6}/LICENSE +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/setup.cfg +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/setup.py +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/__init__.py +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/acl.py +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/blob.py +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/build/__init__.py +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/build/image.py +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/dt_json.py +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/errors.py +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/logging.py +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/scheduler/__init__.py +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/scheduler/base.py +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/scheduler/constants.py +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/scheduler/utils.py +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/utils.py +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr/version.py +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr.egg-info/SOURCES.txt +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr.egg-info/dependency_links.txt +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr.egg-info/entry_points.txt +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr.egg-info/requires.txt +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/datatailr.egg-info/top_level.txt +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/sbin/run_job.py +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/test_module/__init__.py +0 -0
- {datatailr-0.1.5 → datatailr-0.1.6}/src/test_module/test_submodule.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datatailr
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: Ready-to-Use Platform That Drives Business Insights
|
|
5
5
|
Author-email: Datatailr <info@datatailr.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -31,7 +31,7 @@ Dynamic: license-file
|
|
|
31
31
|
[](https://www.datatailr.com/)
|
|
32
32
|
|
|
33
33
|
___
|
|
34
|
-
**Datatailr
|
|
34
|
+
**Datatailr empowers your team to streamline analytics and data workflows
|
|
35
35
|
from idea to production without infrastructure hurdles.**
|
|
36
36
|
|
|
37
37
|
## What is Datatailr?
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[](https://www.datatailr.com/)
|
|
2
2
|
|
|
3
3
|
___
|
|
4
|
-
**Datatailr
|
|
4
|
+
**Datatailr empowers your team to streamline analytics and data workflows
|
|
5
5
|
from idea to production without infrastructure hurdles.**
|
|
6
6
|
|
|
7
7
|
## What is Datatailr?
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
from typing import Optional, Union
|
|
12
12
|
|
|
13
|
-
from datatailr import dt__Group
|
|
13
|
+
from datatailr import dt__Group, mock_cli_tool
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class Group:
|
|
@@ -69,6 +69,8 @@ class Group:
|
|
|
69
69
|
def __refresh__(self):
|
|
70
70
|
if not self.name:
|
|
71
71
|
raise ValueError("Name is not set. Cannot refresh group.")
|
|
72
|
+
if isinstance(Group.__client__, mock_cli_tool):
|
|
73
|
+
return
|
|
72
74
|
group = Group.__client__.get(self.name)
|
|
73
75
|
if group:
|
|
74
76
|
self.__name = group["name"]
|
|
@@ -22,12 +22,16 @@ This module is for internal use of the datatailr package.
|
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
24
|
from collections import defaultdict
|
|
25
|
+
import pickle
|
|
25
26
|
from typing import Any, Dict
|
|
26
27
|
|
|
27
|
-
from datatailr import is_dt_installed
|
|
28
|
+
from datatailr import is_dt_installed, Blob
|
|
28
29
|
from datatailr.scheduler import BatchJob
|
|
29
30
|
|
|
30
31
|
|
|
32
|
+
__BLOB_STORAGE__ = Blob()
|
|
33
|
+
|
|
34
|
+
|
|
31
35
|
class ArgumentsCache:
|
|
32
36
|
def __init__(self, use_persistent_cache: bool = is_dt_installed()):
|
|
33
37
|
"""
|
|
@@ -49,7 +53,8 @@ class ArgumentsCache:
|
|
|
49
53
|
:param arguments: Dictionary of arguments to store.
|
|
50
54
|
"""
|
|
51
55
|
if self.use_persistent_cache and isinstance(job, str):
|
|
52
|
-
|
|
56
|
+
path = f"{batch_run_id}/{job}/args"
|
|
57
|
+
self._add_to_persistent_cache(path, arguments)
|
|
53
58
|
else:
|
|
54
59
|
self.in_memory_cache[batch_run_id][job]["args"] = arguments
|
|
55
60
|
|
|
@@ -62,14 +67,21 @@ class ArgumentsCache:
|
|
|
62
67
|
:return: Dictionary of arguments.
|
|
63
68
|
"""
|
|
64
69
|
if self.use_persistent_cache and isinstance(job, str):
|
|
65
|
-
|
|
70
|
+
path = f"{batch_run_id}/{job}/args"
|
|
71
|
+
arg_keys = self._get_from_persistent_cache(path)
|
|
72
|
+
if not isinstance(arg_keys, dict):
|
|
73
|
+
raise TypeError(
|
|
74
|
+
f"Expected a dictionary for arguments, got {type(arg_keys)}"
|
|
75
|
+
)
|
|
76
|
+
else:
|
|
77
|
+
arg_keys = (
|
|
78
|
+
self.in_memory_cache.get(batch_run_id, {})
|
|
79
|
+
.get(job, {})
|
|
80
|
+
.get("args", {})
|
|
81
|
+
.items()
|
|
82
|
+
)
|
|
66
83
|
arguments = {}
|
|
67
|
-
for key, value in
|
|
68
|
-
self.in_memory_cache.get(batch_run_id, {})
|
|
69
|
-
.get(job, {})
|
|
70
|
-
.get("args", {})
|
|
71
|
-
.items()
|
|
72
|
-
):
|
|
84
|
+
for key, value in arg_keys:
|
|
73
85
|
if isinstance(value, BatchJob):
|
|
74
86
|
arguments[key] = value.name
|
|
75
87
|
else:
|
|
@@ -85,7 +97,8 @@ class ArgumentsCache:
|
|
|
85
97
|
:param result: Result of the batch job.
|
|
86
98
|
"""
|
|
87
99
|
if self.use_persistent_cache and isinstance(job, str):
|
|
88
|
-
|
|
100
|
+
path = f"{batch_run_id}/{job}/result"
|
|
101
|
+
self._add_to_persistent_cache(path, result)
|
|
89
102
|
else:
|
|
90
103
|
self.in_memory_cache[batch_run_id][job]["result"] = result
|
|
91
104
|
|
|
@@ -98,29 +111,31 @@ class ArgumentsCache:
|
|
|
98
111
|
:return: Result of the batch job.
|
|
99
112
|
"""
|
|
100
113
|
if self.use_persistent_cache and isinstance(job, str):
|
|
101
|
-
|
|
114
|
+
path = f"{batch_run_id}/{job}/result"
|
|
115
|
+
return self._get_from_persistent_cache(path)
|
|
102
116
|
return self.in_memory_cache[batch_run_id][job].get("result")
|
|
103
117
|
|
|
104
|
-
def _add_to_persistent_cache(
|
|
105
|
-
self, batch_run_id: str, job_name: str, arguments: Dict[str, Any]
|
|
106
|
-
):
|
|
118
|
+
def _add_to_persistent_cache(self, path: str, blob: Any):
|
|
107
119
|
"""
|
|
108
120
|
Add arguments to the persistent cache.
|
|
121
|
+
This method serializes the blob using pickle and stores it in the Blob storage.
|
|
122
|
+
:param path: Path in the Blob storage where the blob will be stored.
|
|
123
|
+
:param blob: The blob to store, typically a dictionary of arguments.
|
|
124
|
+
:raises TypeError: If the blob cannot be pickled.
|
|
109
125
|
|
|
110
|
-
:param batch_run_id: Identifier for the batch run.
|
|
111
|
-
:param job_name: Name of the job.
|
|
112
|
-
:param arguments: Dictionary of arguments to store.
|
|
113
126
|
"""
|
|
114
|
-
|
|
127
|
+
__BLOB_STORAGE__.put_blob(
|
|
128
|
+
path, pickle.dumps(blob, protocol=pickle.HIGHEST_PROTOCOL)
|
|
129
|
+
)
|
|
115
130
|
|
|
116
|
-
def _get_from_persistent_cache(
|
|
117
|
-
self, batch_run_id: str, job_name: str
|
|
118
|
-
) -> Dict[str, Any]:
|
|
131
|
+
def _get_from_persistent_cache(self, path: str) -> Any:
|
|
119
132
|
"""
|
|
120
133
|
Retrieve arguments from the persistent cache.
|
|
121
134
|
|
|
122
|
-
:param
|
|
123
|
-
:param job_name: Name of the job.
|
|
124
|
-
:return: Dictionary of arguments.
|
|
135
|
+
:param path: Path in the Blob storage where the blob is stored.
|
|
125
136
|
"""
|
|
126
|
-
|
|
137
|
+
try:
|
|
138
|
+
data = __BLOB_STORAGE__.get_blob(path)
|
|
139
|
+
return pickle.loads(data)
|
|
140
|
+
except (TypeError, EOFError):
|
|
141
|
+
return {}
|
|
@@ -109,6 +109,20 @@ class BatchJob:
|
|
|
109
109
|
), "All dependencies must be integers representing job IDs."
|
|
110
110
|
self.dag.add_job(self)
|
|
111
111
|
|
|
112
|
+
def __call__(self, *args, **kwds) -> BatchJob:
|
|
113
|
+
"""
|
|
114
|
+
Allows the BatchJob instance to be called like a function, returning itself.
|
|
115
|
+
This is useful for chaining or functional-style programming.
|
|
116
|
+
"""
|
|
117
|
+
return self
|
|
118
|
+
|
|
119
|
+
@property
|
|
120
|
+
def id(self) -> int:
|
|
121
|
+
"""
|
|
122
|
+
Returns the unique identifier of the BatchJob instance.
|
|
123
|
+
"""
|
|
124
|
+
return self.__id
|
|
125
|
+
|
|
112
126
|
def alias(self, name: str):
|
|
113
127
|
"""
|
|
114
128
|
Set an alias for the BatchJob instance.
|
|
@@ -40,7 +40,7 @@ def batch_decorator(memory=DEFAULT_TASK_MEMORY, cpu=DEFAULT_TASK_CPU):
|
|
|
40
40
|
This decorator can be used to wrap functions that should be executed as part of batch jobs.
|
|
41
41
|
"""
|
|
42
42
|
|
|
43
|
-
def decorator(func):
|
|
43
|
+
def decorator(func) -> BatchJob:
|
|
44
44
|
spec = inspect.getfullargspec(func)
|
|
45
45
|
signature = inspect.signature(func)
|
|
46
46
|
varargs = spec.varargs
|
|
@@ -107,6 +107,7 @@ def batch_decorator(memory=DEFAULT_TASK_MEMORY, cpu=DEFAULT_TASK_CPU):
|
|
|
107
107
|
else:
|
|
108
108
|
setattr(module, "__batch_main__", {func.__name__: batch_main})
|
|
109
109
|
|
|
110
|
-
return
|
|
110
|
+
# The return type is a BatchJob, but we use type: ignore to avoid type checking issues
|
|
111
|
+
return batch_main # type: ignore
|
|
111
112
|
|
|
112
113
|
return decorator
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
from typing import Optional
|
|
12
12
|
|
|
13
|
-
from datatailr import dt__User
|
|
13
|
+
from datatailr import dt__User, mock_cli_tool
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class User:
|
|
@@ -86,6 +86,8 @@ class User:
|
|
|
86
86
|
def __refresh__(self):
|
|
87
87
|
if not self.name:
|
|
88
88
|
raise ValueError("Name is not set. Cannot refresh user.")
|
|
89
|
+
if isinstance(User.__client__, mock_cli_tool):
|
|
90
|
+
return
|
|
89
91
|
user = self.__client__.get(self.name)
|
|
90
92
|
if user:
|
|
91
93
|
self.__name = user["name"]
|
|
@@ -138,6 +140,11 @@ class User:
|
|
|
138
140
|
|
|
139
141
|
@staticmethod
|
|
140
142
|
def signed_user() -> "User":
|
|
143
|
+
if isinstance(User.__client__, mock_cli_tool):
|
|
144
|
+
user = User(name="mock_user")
|
|
145
|
+
user.__expiry__ = "mock_expiry"
|
|
146
|
+
user.__signature__ = "mock_signature"
|
|
147
|
+
return user
|
|
141
148
|
user_signature_and_expiry = User.__client__.signed_user()
|
|
142
149
|
if user_signature_and_expiry:
|
|
143
150
|
user = User(name=user_signature_and_expiry["name"])
|
|
@@ -11,17 +11,23 @@
|
|
|
11
11
|
import json
|
|
12
12
|
import os
|
|
13
13
|
import subprocess
|
|
14
|
+
from typing import Union
|
|
14
15
|
|
|
15
16
|
from datatailr.utils import is_dt_installed
|
|
16
17
|
|
|
18
|
+
API_JSON_PATH: Union[str, None] = None
|
|
19
|
+
|
|
17
20
|
if is_dt_installed() or os.path.exists("/opt/datatailr/etc/api.json"):
|
|
18
21
|
API_JSON_PATH = os.path.join("/opt", "datatailr", "etc", "api.json")
|
|
19
22
|
CLI_TOOL = "dt"
|
|
20
23
|
else:
|
|
21
24
|
# For running local tests, use api.json from the repo and a mock CLI tool
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
)
|
|
25
|
+
import sys
|
|
26
|
+
|
|
27
|
+
if any("unittest" in arg for arg in sys.argv):
|
|
28
|
+
API_JSON_PATH = os.path.join(
|
|
29
|
+
os.path.dirname(__file__), "..", "..", "..", "..", "dt", "api.json"
|
|
30
|
+
)
|
|
25
31
|
CLI_TOOL = "echo"
|
|
26
32
|
|
|
27
33
|
|
|
@@ -155,7 +161,7 @@ def create_class(cmd_name, command):
|
|
|
155
161
|
|
|
156
162
|
|
|
157
163
|
# Load API JSON and create classes at import time
|
|
158
|
-
if os.path.exists(API_JSON_PATH):
|
|
164
|
+
if API_JSON_PATH and os.path.exists(API_JSON_PATH):
|
|
159
165
|
with open(API_JSON_PATH, "r") as f:
|
|
160
166
|
api = json.load(f)
|
|
161
167
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datatailr
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: Ready-to-Use Platform That Drives Business Insights
|
|
5
5
|
Author-email: Datatailr <info@datatailr.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -31,7 +31,7 @@ Dynamic: license-file
|
|
|
31
31
|
[](https://www.datatailr.com/)
|
|
32
32
|
|
|
33
33
|
___
|
|
34
|
-
**Datatailr
|
|
34
|
+
**Datatailr empowers your team to streamline analytics and data workflows
|
|
35
35
|
from idea to production without infrastructure hurdles.**
|
|
36
36
|
|
|
37
37
|
## What is Datatailr?
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|