datatailr 0.1.21__tar.gz → 0.1.23__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datatailr might be problematic. Click here for more details.
- {datatailr-0.1.21/src/datatailr.egg-info → datatailr-0.1.23}/PKG-INFO +1 -1
- {datatailr-0.1.21 → datatailr-0.1.23}/pyproject.toml +1 -1
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/excel.py +1 -1
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/scheduler/arguments_cache.py +2 -6
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/scheduler/base.py +21 -12
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/scheduler/batch.py +18 -5
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/user.py +4 -2
- {datatailr-0.1.21 → datatailr-0.1.23/src/datatailr.egg-info}/PKG-INFO +1 -1
- {datatailr-0.1.21 → datatailr-0.1.23}/src/sbin/datatailr_run.py +6 -8
- {datatailr-0.1.21 → datatailr-0.1.23}/LICENSE +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/README.md +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/setup.cfg +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/setup.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/__init__.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/acl.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/blob.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/build/__init__.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/build/image.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/dt_json.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/errors.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/group.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/logging.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/scheduler/__init__.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/scheduler/batch_decorator.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/scheduler/constants.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/scheduler/schedule.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/scheduler/utils.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/utils.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/version.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/wrapper.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr.egg-info/SOURCES.txt +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr.egg-info/dependency_links.txt +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr.egg-info/entry_points.txt +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr.egg-info/requires.txt +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr.egg-info/top_level.txt +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/sbin/datatailr_run_app.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/sbin/datatailr_run_batch.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/sbin/datatailr_run_excel.py +0 -0
- {datatailr-0.1.21 → datatailr-0.1.23}/src/sbin/datatailr_run_service.py +0 -0
|
@@ -21,7 +21,7 @@ and the inner dictionaries contain the arguments.
|
|
|
21
21
|
This module is for internal use of the datatailr package.
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
|
-
from datatailr.dt_json import json
|
|
24
|
+
from datatailr.dt_json import json
|
|
25
25
|
import os
|
|
26
26
|
import pickle
|
|
27
27
|
from typing import Any, Dict, Optional
|
|
@@ -101,12 +101,8 @@ class ArgumentsCache:
|
|
|
101
101
|
)
|
|
102
102
|
if batch_run_id is None:
|
|
103
103
|
return arg_keys[job]
|
|
104
|
-
arguments_mapping = decode_json(
|
|
105
|
-
os.getenv("DATATAILR_JOB_ARGUMENT_MAPPING", "{}")
|
|
106
|
-
)
|
|
107
|
-
arguments_mapping = {value: key for key, value in arguments_mapping.items()}
|
|
108
104
|
args = {
|
|
109
|
-
|
|
105
|
+
name: self.get_result(batch_run_id, value)
|
|
110
106
|
for name, value in arg_keys[job].items()
|
|
111
107
|
}
|
|
112
108
|
return args
|
|
@@ -11,8 +11,7 @@
|
|
|
11
11
|
from __future__ import annotations
|
|
12
12
|
|
|
13
13
|
from datetime import datetime
|
|
14
|
-
import importlib
|
|
15
|
-
import inspect
|
|
14
|
+
import importlib.util
|
|
16
15
|
import json
|
|
17
16
|
import os
|
|
18
17
|
import tempfile
|
|
@@ -96,12 +95,17 @@ class EntryPoint:
|
|
|
96
95
|
|
|
97
96
|
# Find the absolute path to the repository and then the relative path to the module.
|
|
98
97
|
# This will be used in the creation of the code 'bundle' when building the image.
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
98
|
+
module_spec = importlib.util.find_spec(func.__module__)
|
|
99
|
+
if module_spec is not None and module_spec.origin is not None:
|
|
100
|
+
package_root = module_spec.origin
|
|
101
|
+
else:
|
|
102
|
+
package_root = "."
|
|
102
103
|
module_parts = self.module_name.split(".")
|
|
103
|
-
for _ in module_parts:
|
|
104
|
+
for _ in range(len(module_parts) - 1):
|
|
104
105
|
package_root = os.path.dirname(package_root)
|
|
106
|
+
path_to_repo = run_shell_command(
|
|
107
|
+
f"cd {package_root} && git rev-parse --show-toplevel"
|
|
108
|
+
)[0]
|
|
105
109
|
path_to_module = os.path.relpath(package_root, path_to_repo)
|
|
106
110
|
self.path_to_repo = path_to_repo
|
|
107
111
|
self.path_to_module = path_to_module
|
|
@@ -123,7 +127,7 @@ class EntryPoint:
|
|
|
123
127
|
return f"EntryPoint({self.function_name} from {self.module_name}, type={self.type})"
|
|
124
128
|
|
|
125
129
|
def __str__(self):
|
|
126
|
-
return f"{self.module_name}
|
|
130
|
+
return f"{self.module_name}:{self.function_name}"
|
|
127
131
|
|
|
128
132
|
|
|
129
133
|
class Job:
|
|
@@ -277,20 +281,25 @@ class Job:
|
|
|
277
281
|
2. Check if the local commit matches the remote HEAD (the repo is synced with the remote).
|
|
278
282
|
Returns a tuple of (branch: str, commit_hash: str).
|
|
279
283
|
"""
|
|
280
|
-
|
|
281
|
-
|
|
284
|
+
path_to_repo = self.image.path_to_repo or "."
|
|
285
|
+
local_commit = run_shell_command(f"cd {path_to_repo} && git rev-parse HEAD")[0]
|
|
286
|
+
branch_name = run_shell_command(
|
|
287
|
+
f"cd {path_to_repo} && git rev-parse --abbrev-ref HEAD"
|
|
288
|
+
)[0]
|
|
282
289
|
|
|
283
290
|
if os.getenv("DATATAILR_ALLOW_UNSAFE_SCHEDULING", "false").lower() == "true":
|
|
284
291
|
return branch_name, local_commit
|
|
285
|
-
return_code = run_shell_command("git diff --exit-code")
|
|
286
|
-
is_committed = return_code == 0
|
|
292
|
+
return_code = run_shell_command(f"cd {path_to_repo} && git diff --exit-code")
|
|
293
|
+
is_committed = return_code is not None and return_code[1] == 0
|
|
287
294
|
|
|
288
295
|
if not is_committed:
|
|
289
296
|
raise RepoValidationError(
|
|
290
297
|
"Please commit your changes before running the job."
|
|
291
298
|
)
|
|
292
299
|
|
|
293
|
-
remote_commit = run_shell_command(
|
|
300
|
+
remote_commit = run_shell_command(
|
|
301
|
+
f"cd {path_to_repo} && git ls-remote origin HEAD"
|
|
302
|
+
)[0].split("\t")[0]
|
|
294
303
|
|
|
295
304
|
if local_commit != remote_commit:
|
|
296
305
|
raise RepoValidationError(
|
|
@@ -18,7 +18,6 @@ from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
|
|
|
18
18
|
import uuid
|
|
19
19
|
|
|
20
20
|
from datatailr import Image
|
|
21
|
-
from datatailr.dt_json import encode_json
|
|
22
21
|
from datatailr.errors import BatchJobError
|
|
23
22
|
from datatailr.logging import DatatailrLogger
|
|
24
23
|
from datatailr.scheduler.base import (
|
|
@@ -114,7 +113,7 @@ class BatchJob:
|
|
|
114
113
|
isinstance(dep, int) for dep in self.dependencies
|
|
115
114
|
), "All dependencies must be integers representing job IDs."
|
|
116
115
|
self.dag.add_job(self)
|
|
117
|
-
self.
|
|
116
|
+
self.argument_mapping = argument_mapping or {}
|
|
118
117
|
|
|
119
118
|
def __call__(self, *args, **kwds) -> BatchJob:
|
|
120
119
|
"""
|
|
@@ -200,6 +199,7 @@ class BatchJob:
|
|
|
200
199
|
"""
|
|
201
200
|
return {
|
|
202
201
|
"display_name": self.name,
|
|
202
|
+
"name": self.name,
|
|
203
203
|
"child_number": self.__id,
|
|
204
204
|
"entrypoint": str(self.entrypoint),
|
|
205
205
|
"memory": self.resources.memory if self.resources else DEFAULT_TASK_MEMORY,
|
|
@@ -235,7 +235,7 @@ class BatchJob:
|
|
|
235
235
|
|
|
236
236
|
def __add_dependency__(self, other):
|
|
237
237
|
self.dependencies.add(other.__id)
|
|
238
|
-
arg_name = self.
|
|
238
|
+
arg_name = self.argument_mapping.get(other.name, other.name)
|
|
239
239
|
if arg_name is not None:
|
|
240
240
|
self.__args[arg_name] = other
|
|
241
241
|
|
|
@@ -282,7 +282,6 @@ class BatchJob:
|
|
|
282
282
|
"DATATAILR_BATCH_ID": str(self.dag.id),
|
|
283
283
|
"DATATAILR_JOB_ID": str(self.__id),
|
|
284
284
|
"DATATAILR_JOB_NAME": self.name,
|
|
285
|
-
"DATATAILR_JOB_ARGUMENT_MAPPING": encode_json(self.__argument_mapping),
|
|
286
285
|
}
|
|
287
286
|
self.entrypoint(env=env)
|
|
288
287
|
else:
|
|
@@ -464,9 +463,23 @@ class Batch(Job):
|
|
|
464
463
|
def arg_name(arg: Union[BatchJob, str]) -> str:
|
|
465
464
|
return arg.name if isinstance(arg, BatchJob) else arg
|
|
466
465
|
|
|
466
|
+
def merged(dst: dict[str, str], src: dict[str, str]) -> dict[str, str]:
|
|
467
|
+
# copy so we don't mutate the original mapping
|
|
468
|
+
out = dict(dst)
|
|
469
|
+
seen_vals = set(out.values())
|
|
470
|
+
for k, v in src.items():
|
|
471
|
+
if v not in seen_vals:
|
|
472
|
+
out[k] = v
|
|
473
|
+
seen_vals.add(v)
|
|
474
|
+
return out
|
|
475
|
+
|
|
467
476
|
args = {
|
|
468
|
-
j.name:
|
|
477
|
+
j.name: merged(
|
|
478
|
+
j.argument_mapping, {k: arg_name(v) for k, v in j.args.items()}
|
|
479
|
+
)
|
|
480
|
+
for j in self.__jobs
|
|
469
481
|
}
|
|
482
|
+
|
|
470
483
|
__ARGUMENTS_CACHE__.add_arguments(self.id, args)
|
|
471
484
|
if not self.__local_run and is_dt_installed():
|
|
472
485
|
return super().run()
|
|
@@ -162,7 +162,7 @@ class User:
|
|
|
162
162
|
last_name: str,
|
|
163
163
|
email: str,
|
|
164
164
|
password: str,
|
|
165
|
-
primary_group:
|
|
165
|
+
primary_group: str,
|
|
166
166
|
is_system_user: bool = False,
|
|
167
167
|
) -> Optional["User"]:
|
|
168
168
|
if is_system_user:
|
|
@@ -177,6 +177,7 @@ class User:
|
|
|
177
177
|
email=email,
|
|
178
178
|
primary_group=primary_group,
|
|
179
179
|
system=is_system_user,
|
|
180
|
+
json_enrichened=True,
|
|
180
181
|
)
|
|
181
182
|
else:
|
|
182
183
|
new_user = __client__.add(
|
|
@@ -187,9 +188,10 @@ class User:
|
|
|
187
188
|
password=password,
|
|
188
189
|
primary_group=primary_group,
|
|
189
190
|
system=is_system_user,
|
|
191
|
+
json_enrichened=True,
|
|
190
192
|
)
|
|
191
193
|
|
|
192
|
-
return new_user
|
|
194
|
+
return User(new_user["name"]) if new_user else None
|
|
193
195
|
|
|
194
196
|
@staticmethod
|
|
195
197
|
def exists(name: str) -> bool:
|
|
@@ -27,7 +27,6 @@
|
|
|
27
27
|
# DATATAILR_GID - the group ID of the group as it is defined in the system.
|
|
28
28
|
# DATATAILR_JOB_TYPE - the type of job to run. (batch\service\app\excel\IDE)
|
|
29
29
|
# Job environment variables (not all are always relevant, depending on the job type):
|
|
30
|
-
# DATATAILR_JOB_ARGUMENT_MAPPING - a JSON string mapping job argument names to their
|
|
31
30
|
# DATATAILR_BATCH_RUN_ID - the unique identifier for the batch run.
|
|
32
31
|
# DATATAILR_BATCH_ID - the unique identifier for the batch.
|
|
33
32
|
# DATATAILR_JOB_ID - the unique identifier for the job.
|
|
@@ -37,11 +36,14 @@ import os
|
|
|
37
36
|
import sys
|
|
38
37
|
from typing import Tuple
|
|
39
38
|
from datatailr.logging import DatatailrLogger
|
|
40
|
-
from datatailr.
|
|
41
|
-
|
|
39
|
+
from datatailr.utils import is_dt_installed
|
|
42
40
|
|
|
43
41
|
logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
|
|
44
42
|
|
|
43
|
+
if not is_dt_installed():
|
|
44
|
+
logger.error("Datatailr is not installed.")
|
|
45
|
+
sys.exit(1)
|
|
46
|
+
|
|
45
47
|
|
|
46
48
|
def get_env_var(name: str, default: str | None = None) -> str:
|
|
47
49
|
"""
|
|
@@ -83,7 +85,7 @@ def run_command_as_user(command: str, user: str, env_vars: dict):
|
|
|
83
85
|
Run a command as a specific user with the given environment variables.
|
|
84
86
|
"""
|
|
85
87
|
env_vars.update({"PATH": get_env_var("PATH")})
|
|
86
|
-
env_vars.update({"PYTHONPATH": get_env_var("PYTHONPATH")})
|
|
88
|
+
env_vars.update({"PYTHONPATH": get_env_var("PYTHONPATH", "")})
|
|
87
89
|
env_vars_str = " ".join(f"{key}='{value}'" for key, value in env_vars.items())
|
|
88
90
|
full_command = f"sudo -u {user} {env_vars_str} {command}"
|
|
89
91
|
logger.debug(f"Running command: {full_command}")
|
|
@@ -103,16 +105,12 @@ def main():
|
|
|
103
105
|
if job_type == "batch":
|
|
104
106
|
run_id = get_env_var("DATATAILR_BATCH_RUN_ID")
|
|
105
107
|
batch_id = get_env_var("DATATAILR_BATCH_ID")
|
|
106
|
-
job_argument_mapping = get_env_var(
|
|
107
|
-
"DATATAILR_JOB_ARGUMENT_MAPPING", encode_json({})
|
|
108
|
-
)
|
|
109
108
|
entrypoint = get_env_var("DATATAILR_BATCH_ENTRYPOINT")
|
|
110
109
|
env = {
|
|
111
110
|
"DATATAILR_BATCH_RUN_ID": run_id,
|
|
112
111
|
"DATATAILR_BATCH_ID": batch_id,
|
|
113
112
|
"DATATAILR_JOB_ID": job_id,
|
|
114
113
|
"DATATAILR_BATCH_ENTRYPOINT": entrypoint,
|
|
115
|
-
"DATATAILR_JOB_ARGUMENT_MAPPING": job_argument_mapping,
|
|
116
114
|
}
|
|
117
115
|
run_command_as_user("datatailr_run_batch", user, env)
|
|
118
116
|
elif job_type == "service":
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|