datatailr 0.1.46__tar.gz → 0.1.48__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datatailr might be problematic. Click here for more details.
- {datatailr-0.1.46/src/datatailr.egg-info → datatailr-0.1.48}/PKG-INFO +1 -1
- {datatailr-0.1.46 → datatailr-0.1.48}/pyproject.toml +1 -1
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/scheduler/arguments_cache.py +3 -2
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/scheduler/base.py +8 -2
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/scheduler/batch.py +17 -1
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/utils.py +12 -0
- {datatailr-0.1.46 → datatailr-0.1.48/src/datatailr.egg-info}/PKG-INFO +1 -1
- {datatailr-0.1.46 → datatailr-0.1.48}/src/sbin/datatailr_run.py +5 -8
- {datatailr-0.1.46 → datatailr-0.1.48}/LICENSE +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/README.md +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/setup.cfg +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/setup.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/__init__.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/acl.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/blob.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/build/__init__.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/build/image.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/dt_json.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/errors.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/excel.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/group.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/logging.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/scheduler/__init__.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/scheduler/batch_decorator.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/scheduler/constants.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/scheduler/schedule.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/scheduler/utils.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/user.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/version.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr/wrapper.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr.egg-info/SOURCES.txt +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr.egg-info/dependency_links.txt +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr.egg-info/entry_points.txt +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr.egg-info/requires.txt +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/datatailr.egg-info/top_level.txt +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/sbin/datatailr_run_app.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/sbin/datatailr_run_batch.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/sbin/datatailr_run_excel.py +0 -0
- {datatailr-0.1.46 → datatailr-0.1.48}/src/sbin/datatailr_run_service.py +0 -0
|
@@ -115,10 +115,11 @@ class ArgumentsCache:
|
|
|
115
115
|
:param job: Name of the job.
|
|
116
116
|
:param result: Result of the batch job.
|
|
117
117
|
"""
|
|
118
|
-
path = f"/tmp/datatailr/batch/results/{batch_run_id}
|
|
118
|
+
path = f"/tmp/datatailr/batch/results/{batch_run_id}/{job}.pkl"
|
|
119
119
|
if self.use_persistent_cache and isinstance(job, str):
|
|
120
120
|
self._add_to_persistent_cache(path, result)
|
|
121
121
|
else:
|
|
122
|
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
122
123
|
with open(path, "wb") as f:
|
|
123
124
|
pickle.dump(result, f)
|
|
124
125
|
|
|
@@ -130,7 +131,7 @@ class ArgumentsCache:
|
|
|
130
131
|
:param job: Name of the job.
|
|
131
132
|
:return: Result of the batch job.
|
|
132
133
|
"""
|
|
133
|
-
path = f"/tmp/datatailr/batch/results/{batch_run_id}
|
|
134
|
+
path = f"/tmp/datatailr/batch/results/{batch_run_id}/{job}.pkl"
|
|
134
135
|
if self.use_persistent_cache and isinstance(job, str):
|
|
135
136
|
return self._get_from_persistent_cache(path)
|
|
136
137
|
else:
|
|
@@ -18,7 +18,7 @@ import tempfile
|
|
|
18
18
|
import uuid
|
|
19
19
|
from dataclasses import dataclass
|
|
20
20
|
from enum import Enum
|
|
21
|
-
from typing import Callable, Optional, Tuple, Union
|
|
21
|
+
from typing import Callable, Dict, Optional, Tuple, Union
|
|
22
22
|
|
|
23
23
|
from datatailr import ACL, Environment, User, is_dt_installed
|
|
24
24
|
from datatailr.wrapper import dt__Job
|
|
@@ -26,7 +26,7 @@ from datatailr.scheduler.constants import DEFAULT_TASK_MEMORY, DEFAULT_TASK_CPU
|
|
|
26
26
|
from datatailr.build.image import Image
|
|
27
27
|
from datatailr.errors import BatchJobError
|
|
28
28
|
from datatailr.logging import DatatailrLogger
|
|
29
|
-
from datatailr.utils import run_shell_command
|
|
29
|
+
from datatailr.utils import run_shell_command, dict_to_env_vars
|
|
30
30
|
|
|
31
31
|
logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
|
|
32
32
|
__client__ = dt__Job()
|
|
@@ -148,6 +148,7 @@ class Job:
|
|
|
148
148
|
python_requirements: str = "",
|
|
149
149
|
build_script_pre: str = "",
|
|
150
150
|
build_script_post: str = "",
|
|
151
|
+
env_vars: Dict[str, str | int | float | bool] = {},
|
|
151
152
|
type: Optional[JobType] = JobType.UNKNOWN,
|
|
152
153
|
entrypoint: Optional[EntryPoint] = None,
|
|
153
154
|
update_existing: bool = False,
|
|
@@ -186,6 +187,7 @@ class Job:
|
|
|
186
187
|
self.type = type if entrypoint is None else entrypoint.type
|
|
187
188
|
self.entrypoint = entrypoint
|
|
188
189
|
self.__id = str(uuid.uuid4())
|
|
190
|
+
self.__env_vars = env_vars
|
|
189
191
|
|
|
190
192
|
@property
|
|
191
193
|
def id(self) -> str:
|
|
@@ -237,8 +239,12 @@ class Job:
|
|
|
237
239
|
"memory": self.resources.memory,
|
|
238
240
|
"cpu": self.resources.cpu,
|
|
239
241
|
}
|
|
242
|
+
if self.type == JobType.EXCEL:
|
|
243
|
+
if "DATATAILR_LOCAL" not in self.__env_vars:
|
|
244
|
+
self.__env_vars.update({"DATATAILR_LOCAL": "false"})
|
|
240
245
|
if self.type != JobType.BATCH:
|
|
241
246
|
job_dict["entrypoint"] = str(self.entrypoint) if self.entrypoint else None
|
|
247
|
+
job_dict["env"] = dict_to_env_vars(self.__env_vars)
|
|
242
248
|
return job_dict
|
|
243
249
|
|
|
244
250
|
def from_dict(self, job_dict: dict):
|
|
@@ -32,7 +32,7 @@ from datatailr.scheduler.base import (
|
|
|
32
32
|
from datatailr.scheduler.constants import DEFAULT_TASK_CPU, DEFAULT_TASK_MEMORY
|
|
33
33
|
from datatailr.scheduler.arguments_cache import ArgumentsCache
|
|
34
34
|
from datatailr.scheduler.schedule import Schedule
|
|
35
|
-
from datatailr.utils import is_dt_installed
|
|
35
|
+
from datatailr.utils import is_dt_installed, dict_to_env_vars
|
|
36
36
|
|
|
37
37
|
__DAG_CONTEXT__: contextvars.ContextVar = contextvars.ContextVar("dag_context")
|
|
38
38
|
__ARGUMENTS_CACHE__ = ArgumentsCache()
|
|
@@ -97,6 +97,7 @@ class BatchJob:
|
|
|
97
97
|
dependencies: Sequence[Union[str, BatchJob]] = [],
|
|
98
98
|
dag: Optional[Batch] = get_current_manager(),
|
|
99
99
|
argument_mapping: Dict[str, str] = {},
|
|
100
|
+
env_vars: Dict[str, str | int | float | bool] = {},
|
|
100
101
|
):
|
|
101
102
|
self.name = name
|
|
102
103
|
self.entrypoint = entrypoint
|
|
@@ -112,6 +113,7 @@ class BatchJob:
|
|
|
112
113
|
assert all(
|
|
113
114
|
isinstance(dep, int) for dep in self.dependencies
|
|
114
115
|
), "All dependencies must be integers representing job IDs."
|
|
116
|
+
self.__env_vars = env_vars
|
|
115
117
|
self.dag.add_job(self)
|
|
116
118
|
self.argument_mapping = argument_mapping or {}
|
|
117
119
|
|
|
@@ -193,6 +195,16 @@ class BatchJob:
|
|
|
193
195
|
def __setstate__(self, state: dict):
|
|
194
196
|
self.__dict__.update(state)
|
|
195
197
|
|
|
198
|
+
def update_env_vars(self, env_vars: Dict[str, str | int | float | bool]) -> None:
|
|
199
|
+
"""
|
|
200
|
+
Update the environment variables for the BatchJob instance.
|
|
201
|
+
|
|
202
|
+
:param env_vars: A dictionary of environment variables to update.
|
|
203
|
+
"""
|
|
204
|
+
env = env_vars.copy()
|
|
205
|
+
env.update(self.__env_vars)
|
|
206
|
+
self.__env_vars = env
|
|
207
|
+
|
|
196
208
|
def to_dict(self):
|
|
197
209
|
"""
|
|
198
210
|
Convert the BatchJob instance to a dictionary representation.
|
|
@@ -205,6 +217,7 @@ class BatchJob:
|
|
|
205
217
|
"memory": self.resources.memory if self.resources else DEFAULT_TASK_MEMORY,
|
|
206
218
|
"cpu": self.resources.cpu if self.resources else DEFAULT_TASK_CPU,
|
|
207
219
|
"depends_on": list(self.dependencies),
|
|
220
|
+
"env": dict_to_env_vars(self.__env_vars),
|
|
208
221
|
}
|
|
209
222
|
|
|
210
223
|
def to_json(self):
|
|
@@ -308,6 +321,7 @@ class Batch(Job):
|
|
|
308
321
|
python_requirements: str = "",
|
|
309
322
|
build_script_pre: str = "",
|
|
310
323
|
build_script_post: str = "",
|
|
324
|
+
env_vars: Dict[str, str | int | float | bool] = {},
|
|
311
325
|
):
|
|
312
326
|
super().__init__(
|
|
313
327
|
environment=environment,
|
|
@@ -327,6 +341,7 @@ class Batch(Job):
|
|
|
327
341
|
self.__BATCH_JOB_NAMES__: Dict[str, int] = {}
|
|
328
342
|
self.__local_run = local_run
|
|
329
343
|
self.__schedule = schedule
|
|
344
|
+
self.__env_vars = env_vars
|
|
330
345
|
|
|
331
346
|
@property
|
|
332
347
|
def next_job_id(self):
|
|
@@ -369,6 +384,7 @@ class Batch(Job):
|
|
|
369
384
|
f"Function {job.entrypoint.function_name} is defined in a different module: "
|
|
370
385
|
f"{package_path_to_module} != {image_path_to_module}"
|
|
371
386
|
)
|
|
387
|
+
job.update_env_vars(self.__env_vars)
|
|
372
388
|
self.__jobs.append(job)
|
|
373
389
|
|
|
374
390
|
def is_job_in(self, job: BatchJob) -> bool:
|
|
@@ -53,3 +53,15 @@ def run_shell_command(command: str) -> Tuple[str, int]:
|
|
|
53
53
|
if result.returncode != 0:
|
|
54
54
|
raise RuntimeError(f"Command '{command}' failed with error: {result.stderr}")
|
|
55
55
|
return result.stdout.strip(), result.returncode
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def dict_to_env_vars(env_vars: dict) -> list:
|
|
59
|
+
"""Convert a dictionary of environment variables to a list format suitable for shell commands.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
env_vars (dict): A dictionary where keys are environment variable names and values are their corresponding values.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
list: A list of lists, where each inner list contains a key-value pair representing an environment variable.
|
|
66
|
+
"""
|
|
67
|
+
return [[key, str(value)] for key, value in env_vars.items()]
|
|
@@ -96,14 +96,11 @@ def run_command_as_user(command: str | list, user: str, env_vars: dict):
|
|
|
96
96
|
argv = ["sudo", "-u", user, "env", *env_kv, "bash", "-lc", command]
|
|
97
97
|
|
|
98
98
|
try:
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
)
|
|
105
|
-
logger.info(f"stdout: {result.stdout}")
|
|
106
|
-
logger.debug(f"stderr: {result.stderr}")
|
|
99
|
+
proc = subprocess.Popen(argv)
|
|
100
|
+
returncode = proc.wait()
|
|
101
|
+
if returncode != 0:
|
|
102
|
+
logger.error(f"Command failed with exit code {returncode}")
|
|
103
|
+
sys.exit(returncode)
|
|
107
104
|
except subprocess.CalledProcessError as e:
|
|
108
105
|
logger.error(f"Command failed with exit code {e.returncode}")
|
|
109
106
|
logger.error(f"stderr: {e.stderr}")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|