datatailr 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datatailr might be problematic. Click here for more details.
- datatailr/__init__.py +63 -0
- datatailr/acl.py +80 -0
- datatailr/blob.py +103 -0
- datatailr/build/__init__.py +11 -0
- datatailr/build/image.py +87 -0
- datatailr/dt_json.py +42 -0
- datatailr/errors.py +10 -0
- datatailr/group.py +136 -0
- datatailr/logging.py +93 -0
- datatailr/sbin/run_job.py +63 -0
- datatailr/scheduler/__init__.py +38 -0
- datatailr/scheduler/arguments_cache.py +126 -0
- datatailr/scheduler/base.py +238 -0
- datatailr/scheduler/batch.py +347 -0
- datatailr/scheduler/batch_decorator.py +112 -0
- datatailr/scheduler/constants.py +20 -0
- datatailr/scheduler/utils.py +28 -0
- datatailr/user.py +201 -0
- datatailr/utils.py +35 -0
- datatailr/version.py +14 -0
- datatailr/wrapper.py +204 -0
- datatailr-0.1.2.dist-info/METADATA +24 -0
- datatailr-0.1.2.dist-info/RECORD +29 -0
- {datatailr-0.1.0.dist-info → datatailr-0.1.2.dist-info}/WHEEL +1 -1
- datatailr-0.1.2.dist-info/entry_points.txt +2 -0
- {datatailr-0.1.0.dist-info → datatailr-0.1.2.dist-info}/licenses/LICENSE +0 -1
- datatailr-0.1.2.dist-info/top_level.txt +2 -0
- test_module/__init__.py +17 -0
- test_module/test_submodule.py +38 -0
- datatailr-0.1.0.dist-info/METADATA +0 -17
- datatailr-0.1.0.dist-info/RECORD +0 -6
- datatailr-0.1.0.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
# *************************************************************************
|
|
2
|
+
#
|
|
3
|
+
# Copyright (c) 2025 - Datatailr Inc.
|
|
4
|
+
# All Rights Reserved.
|
|
5
|
+
#
|
|
6
|
+
# This file is part of Datatailr and subject to the terms and conditions
|
|
7
|
+
# defined in 'LICENSE.txt'. Unauthorized copying and/or distribution
|
|
8
|
+
# of this file, in parts or full, via any medium is strictly prohibited.
|
|
9
|
+
# *************************************************************************
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import contextvars
|
|
14
|
+
import json
|
|
15
|
+
import os
|
|
16
|
+
from functools import reduce
|
|
17
|
+
from typing import List, Optional, Sequence, Set, Tuple, Union
|
|
18
|
+
|
|
19
|
+
from datatailr import Image
|
|
20
|
+
from datatailr.errors import BatchJobError
|
|
21
|
+
from datatailr.logging import DatatailrLogger
|
|
22
|
+
from datatailr.scheduler.base import (
|
|
23
|
+
ACL,
|
|
24
|
+
EntryPoint,
|
|
25
|
+
Environment,
|
|
26
|
+
Job,
|
|
27
|
+
JobType,
|
|
28
|
+
Resources,
|
|
29
|
+
User,
|
|
30
|
+
)
|
|
31
|
+
from datatailr.scheduler.constants import DEFAULT_TASK_CPU, DEFAULT_TASK_MEMORY
|
|
32
|
+
from datatailr.utils import is_dt_installed
|
|
33
|
+
|
|
34
|
+
__BATCH_JOB_NAMES__ = {}
|
|
35
|
+
__DAG_CONTEXT__: contextvars.ContextVar = contextvars.ContextVar("dag_context")
|
|
36
|
+
logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_current_manager():
|
|
40
|
+
return __DAG_CONTEXT__.get(None)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def next_batch_job_id():
|
|
44
|
+
i = 0
|
|
45
|
+
while True:
|
|
46
|
+
yield i
|
|
47
|
+
i += 1
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class CyclicDependencyError(BatchJobError):
|
|
51
|
+
"""
|
|
52
|
+
Exception raised when a cyclic dependency is detected in the batch job dependencies.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(self, message: str):
|
|
56
|
+
super().__init__(message)
|
|
57
|
+
self.message = message
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class DuplicateJobNameError(BatchJobError):
|
|
61
|
+
"""
|
|
62
|
+
Exception raised when a job with a duplicate name is added to the batch.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def __init__(self, job_name: str):
|
|
66
|
+
super().__init__(
|
|
67
|
+
f"A job with the name '{job_name}' already exists in the batch."
|
|
68
|
+
)
|
|
69
|
+
self.job_name = job_name
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class MissingDagError(BatchJobError):
|
|
73
|
+
"""
|
|
74
|
+
Exception raised when a BatchJob is created outside the context of a Batch.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(self):
|
|
78
|
+
super().__init__(
|
|
79
|
+
"A BatchJob must be either created within the context of a Batch or a Batch object has to be provided as the dag argument."
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class BatchJob:
|
|
84
|
+
"""
|
|
85
|
+
Represents a job within a batch job.
|
|
86
|
+
|
|
87
|
+
This class can be extended to define specific configurations for each job in the batch.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
def __init__(
|
|
91
|
+
self,
|
|
92
|
+
name: str,
|
|
93
|
+
entrypoint: EntryPoint,
|
|
94
|
+
resources: Optional[Resources] = None,
|
|
95
|
+
dependencies: Sequence[Union[str, BatchJob]] = [],
|
|
96
|
+
dag: Optional[Batch] = get_current_manager(),
|
|
97
|
+
):
|
|
98
|
+
self.name = name
|
|
99
|
+
self.entrypoint = entrypoint
|
|
100
|
+
self.resources = resources
|
|
101
|
+
self.dependencies: set = set(dependencies)
|
|
102
|
+
if dag is None:
|
|
103
|
+
raise MissingDagError()
|
|
104
|
+
self.__id = dag.next_job_id
|
|
105
|
+
self.dag = dag
|
|
106
|
+
__BATCH_JOB_NAMES__[self.name] = self.__id
|
|
107
|
+
self.dependencies = self.translate_dependencies()
|
|
108
|
+
assert all(
|
|
109
|
+
isinstance(dep, int) for dep in self.dependencies
|
|
110
|
+
), "All dependencies must be integers representing job IDs."
|
|
111
|
+
self.dag.add_job(self)
|
|
112
|
+
|
|
113
|
+
def alias(self, name: str):
|
|
114
|
+
"""
|
|
115
|
+
Set an alias for the BatchJob instance.
|
|
116
|
+
|
|
117
|
+
:param name: The alias name to set.
|
|
118
|
+
"""
|
|
119
|
+
if name in __BATCH_JOB_NAMES__:
|
|
120
|
+
raise DuplicateJobNameError(name)
|
|
121
|
+
assert __BATCH_JOB_NAMES__.pop(self.name) == self.__id
|
|
122
|
+
__BATCH_JOB_NAMES__[name] = self.__id
|
|
123
|
+
self.name = name
|
|
124
|
+
return self
|
|
125
|
+
|
|
126
|
+
def __repr__(self):
|
|
127
|
+
return (
|
|
128
|
+
f"BatchJob(name={self.name}, entrypoint={self.entrypoint}, "
|
|
129
|
+
f"resources={self.resources}) (id={self.__id})"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
def to_dict(self):
|
|
133
|
+
"""
|
|
134
|
+
Convert the BatchJob instance to a dictionary representation.
|
|
135
|
+
"""
|
|
136
|
+
return {
|
|
137
|
+
"display_name": self.name,
|
|
138
|
+
"name": self.__id,
|
|
139
|
+
"entrypoint": str(self.entrypoint),
|
|
140
|
+
"memory": self.resources.memory if self.resources else DEFAULT_TASK_MEMORY,
|
|
141
|
+
"cpu": self.resources.cpu if self.resources else DEFAULT_TASK_CPU,
|
|
142
|
+
"depends_on": list(self.dependencies),
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
def to_json(self):
|
|
146
|
+
"""
|
|
147
|
+
Convert the BatchJob instance to a JSON string representation.
|
|
148
|
+
"""
|
|
149
|
+
return json.dumps(self.to_dict())
|
|
150
|
+
|
|
151
|
+
def translate_dependencies(self) -> Set[int]:
|
|
152
|
+
"""
|
|
153
|
+
Translate the dependencies of the BatchJob instance into a format suitable for the batch job.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
def get_dependency_name(dep):
|
|
157
|
+
if isinstance(dep, str):
|
|
158
|
+
return dep
|
|
159
|
+
elif isinstance(dep, BatchJob):
|
|
160
|
+
return dep.name
|
|
161
|
+
else:
|
|
162
|
+
raise TypeError(f"Unsupported dependency type: {type(dep)}")
|
|
163
|
+
|
|
164
|
+
return set(
|
|
165
|
+
[__BATCH_JOB_NAMES__[get_dependency_name(dep)] for dep in self.dependencies]
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def __add_dependency__(self, other):
|
|
169
|
+
self.dependencies.add(other.__id)
|
|
170
|
+
|
|
171
|
+
def __lshift__(
|
|
172
|
+
self, other: Sequence[BatchJob] | BatchJob
|
|
173
|
+
) -> Sequence[BatchJob] | BatchJob:
|
|
174
|
+
if isinstance(other, list):
|
|
175
|
+
for task in other:
|
|
176
|
+
self.__add_dependency__(task)
|
|
177
|
+
else:
|
|
178
|
+
self.__add_dependency__(other)
|
|
179
|
+
return other
|
|
180
|
+
|
|
181
|
+
def __rshift__(
|
|
182
|
+
self, other: Sequence[BatchJob] | BatchJob
|
|
183
|
+
) -> Sequence[BatchJob] | BatchJob:
|
|
184
|
+
if isinstance(other, Sequence):
|
|
185
|
+
for task in other:
|
|
186
|
+
task.__add_dependency__(self)
|
|
187
|
+
else:
|
|
188
|
+
other.__add_dependency__(self)
|
|
189
|
+
return other
|
|
190
|
+
|
|
191
|
+
def __rrshift__(self, other: Sequence[BatchJob] | BatchJob) -> BatchJob:
|
|
192
|
+
self.__lshift__(other)
|
|
193
|
+
return self
|
|
194
|
+
|
|
195
|
+
def __rlshift__(self, other: Sequence[BatchJob] | BatchJob) -> BatchJob:
|
|
196
|
+
self.__rshift__(other)
|
|
197
|
+
return self
|
|
198
|
+
|
|
199
|
+
def __hash__(self):
|
|
200
|
+
return self.__id
|
|
201
|
+
|
|
202
|
+
def __lt__(self, other: BatchJob) -> bool:
|
|
203
|
+
return self.__id < other.__id
|
|
204
|
+
|
|
205
|
+
def run(self):
|
|
206
|
+
"""
|
|
207
|
+
Execute the job's entrypoint.
|
|
208
|
+
"""
|
|
209
|
+
if isinstance(self.entrypoint, EntryPoint):
|
|
210
|
+
self.entrypoint()
|
|
211
|
+
else:
|
|
212
|
+
raise TypeError(f"Invalid entrypoint type: {type(self.entrypoint)}")
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class Batch(Job):
|
|
216
|
+
"""
|
|
217
|
+
Represents a batch job in the scheduler.
|
|
218
|
+
|
|
219
|
+
Inherits from Job and is used to define batch jobs with specific configurations.
|
|
220
|
+
"""
|
|
221
|
+
|
|
222
|
+
def __init__(
|
|
223
|
+
self,
|
|
224
|
+
environment: Optional[Environment],
|
|
225
|
+
name: str,
|
|
226
|
+
image: Image,
|
|
227
|
+
run_as: Optional[Union[str, User]],
|
|
228
|
+
resources: Resources = Resources(memory="100m", cpu=1),
|
|
229
|
+
acl: Optional[ACL] = None,
|
|
230
|
+
):
|
|
231
|
+
super().__init__(
|
|
232
|
+
environment=environment,
|
|
233
|
+
name=name,
|
|
234
|
+
image=image,
|
|
235
|
+
run_as=run_as,
|
|
236
|
+
resources=resources,
|
|
237
|
+
acl=acl,
|
|
238
|
+
)
|
|
239
|
+
self.type = JobType.BATCH
|
|
240
|
+
self.__jobs: List[BatchJob] = []
|
|
241
|
+
self._auto_run = False
|
|
242
|
+
self.__next_job_id = next_batch_job_id()
|
|
243
|
+
|
|
244
|
+
@property
|
|
245
|
+
def next_job_id(self):
|
|
246
|
+
"""
|
|
247
|
+
Returns a generator for the next job ID in the batch.
|
|
248
|
+
"""
|
|
249
|
+
return next(self.__next_job_id)
|
|
250
|
+
|
|
251
|
+
def add_job(self, job: BatchJob):
|
|
252
|
+
"""
|
|
253
|
+
Adds a job to the batch job.
|
|
254
|
+
|
|
255
|
+
:param job: The BatchJob instance to add.
|
|
256
|
+
"""
|
|
257
|
+
if not isinstance(job, BatchJob):
|
|
258
|
+
raise TypeError(
|
|
259
|
+
f"Only BatchJob instances can be added to a Batch. Got {type(job)} instead."
|
|
260
|
+
)
|
|
261
|
+
if self.get_job_by_name(job.name) is not None:
|
|
262
|
+
raise DuplicateJobNameError(job.name)
|
|
263
|
+
# Use the batch level resource values as defaults for jobs
|
|
264
|
+
job.resources = job.resources or self.resources
|
|
265
|
+
self.__jobs.append(job)
|
|
266
|
+
|
|
267
|
+
def is_job_in(self, job: BatchJob) -> bool:
|
|
268
|
+
return job in self.__jobs
|
|
269
|
+
|
|
270
|
+
def get_job_by_name(self, job_name: str) -> Optional[BatchJob]:
|
|
271
|
+
return next((job for job in self.__jobs if job.name == job_name), None)
|
|
272
|
+
|
|
273
|
+
def to_dict(self):
|
|
274
|
+
"""
|
|
275
|
+
Convert the Batch instance to a dictionary representation.
|
|
276
|
+
"""
|
|
277
|
+
batch_dict = super().to_dict()
|
|
278
|
+
batch_dict["jobs"] = [job.to_dict() for job in self.__jobs]
|
|
279
|
+
return batch_dict
|
|
280
|
+
|
|
281
|
+
def to_json(self):
|
|
282
|
+
"""
|
|
283
|
+
Convert the Batch instance to a JSON string representation.
|
|
284
|
+
"""
|
|
285
|
+
return json.dumps(self.to_dict(), indent=4)
|
|
286
|
+
|
|
287
|
+
def __repr__(self):
|
|
288
|
+
jobs_repr = ", ".join(repr(job) for job in self.__jobs)
|
|
289
|
+
return (
|
|
290
|
+
f"Batch(name={self.name}, environment={self.environment}, "
|
|
291
|
+
f"run_as={self.run_as}, resources={self.resources}, "
|
|
292
|
+
f"acl={self.acl}, jobs=[{jobs_repr}])"
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
def set_autorun(self, auto_run):
|
|
296
|
+
self._auto_run = auto_run
|
|
297
|
+
|
|
298
|
+
def __enter__(self):
|
|
299
|
+
self._token = __DAG_CONTEXT__.set(self)
|
|
300
|
+
return self
|
|
301
|
+
|
|
302
|
+
def __exit__(self, exc_type, exc_value, exc_traceback):
|
|
303
|
+
__DAG_CONTEXT__.reset(self._token)
|
|
304
|
+
if self._auto_run:
|
|
305
|
+
self.run()
|
|
306
|
+
|
|
307
|
+
def __topological_sort__(self):
|
|
308
|
+
jobs = {
|
|
309
|
+
hash(job): set([hash(dep) for dep in job.dependencies])
|
|
310
|
+
for job in self.__jobs
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
for k, v in jobs.items():
|
|
314
|
+
v.discard(k) # ignore self dependencies
|
|
315
|
+
extra_items_in_deps = reduce(set.union, jobs.values()) - set(jobs.keys())
|
|
316
|
+
jobs.update({item: set() for item in extra_items_in_deps})
|
|
317
|
+
while True:
|
|
318
|
+
ordered = set(item for item, dep in jobs.items() if not dep)
|
|
319
|
+
if not ordered:
|
|
320
|
+
break
|
|
321
|
+
yield sorted(ordered)
|
|
322
|
+
jobs = {
|
|
323
|
+
item: (dep - ordered)
|
|
324
|
+
for item, dep in jobs.items()
|
|
325
|
+
if item not in ordered
|
|
326
|
+
}
|
|
327
|
+
if jobs:
|
|
328
|
+
raise CyclicDependencyError(
|
|
329
|
+
"A cyclic dependency exists amongst {}".format(jobs)
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
def run(self) -> Tuple[bool, str]:
|
|
333
|
+
if is_dt_installed():
|
|
334
|
+
return super().run()
|
|
335
|
+
else:
|
|
336
|
+
os.environ["DATATAILR_BATCH_RUN_ID"] = "1"
|
|
337
|
+
for step in self.__topological_sort__():
|
|
338
|
+
for job_id in step:
|
|
339
|
+
job = self.__jobs[job_id]
|
|
340
|
+
logger.info(
|
|
341
|
+
f"Batch {self.name}, running job '{job.name}' in environment '{self.environment}' as '{self.run_as}'"
|
|
342
|
+
)
|
|
343
|
+
job.run()
|
|
344
|
+
from datatailr.scheduler.batch_decorator import __FUNCTIONS_CREATED_IN_DAG__
|
|
345
|
+
|
|
346
|
+
__FUNCTIONS_CREATED_IN_DAG__.clear()
|
|
347
|
+
return True, ""
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# *************************************************************************
|
|
2
|
+
#
|
|
3
|
+
# Copyright (c) 2025 - Datatailr Inc.
|
|
4
|
+
# All Rights Reserved.
|
|
5
|
+
#
|
|
6
|
+
# This file is part of Datatailr and subject to the terms and conditions
|
|
7
|
+
# defined in 'LICENSE.txt'. Unauthorized copying and/or distribution
|
|
8
|
+
# of this file, in parts or full, via any medium is strictly prohibited.
|
|
9
|
+
# *************************************************************************
|
|
10
|
+
|
|
11
|
+
import functools
|
|
12
|
+
import inspect
|
|
13
|
+
import os
|
|
14
|
+
|
|
15
|
+
from datatailr.logging import DatatailrLogger
|
|
16
|
+
from datatailr.scheduler.arguments_cache import ArgumentsCache
|
|
17
|
+
from datatailr.scheduler.base import EntryPoint, JobType, Resources
|
|
18
|
+
from datatailr.scheduler.batch import BatchJob, get_current_manager
|
|
19
|
+
from datatailr.scheduler.constants import DEFAULT_TASK_CPU, DEFAULT_TASK_MEMORY
|
|
20
|
+
from datatailr.scheduler.utils import get_available_env_args
|
|
21
|
+
|
|
22
|
+
__ARGUMENTS_CACHE__ = ArgumentsCache()
|
|
23
|
+
__FUNCTIONS_CREATED_IN_DAG__: dict[BatchJob, str] = {}
|
|
24
|
+
logger = DatatailrLogger(__name__).get_logger()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def batch_run_id() -> str:
|
|
28
|
+
return os.environ["DATATAILR_BATCH_RUN_ID"]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def dag_id(job: BatchJob) -> str:
|
|
32
|
+
return os.getenv(
|
|
33
|
+
"DATATAILR_BATCH_ID", __FUNCTIONS_CREATED_IN_DAG__.get(job, "unknown")
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def batch_decorator(memory=DEFAULT_TASK_MEMORY, cpu=DEFAULT_TASK_CPU):
|
|
38
|
+
"""
|
|
39
|
+
Decorator to mark a function as a batch job.
|
|
40
|
+
This decorator can be used to wrap functions that should be executed as part of batch jobs.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def decorator(func):
|
|
44
|
+
spec = inspect.getfullargspec(func)
|
|
45
|
+
signature = inspect.signature(func)
|
|
46
|
+
varargs = spec.varargs
|
|
47
|
+
varkw = spec.varkw
|
|
48
|
+
parameters = signature.parameters
|
|
49
|
+
|
|
50
|
+
@functools.wraps(func)
|
|
51
|
+
def batch_main(*args, **kwargs):
|
|
52
|
+
dag = get_current_manager()
|
|
53
|
+
if dag is None:
|
|
54
|
+
logger.info(f'Function "{func.__name__}" is being executed.')
|
|
55
|
+
# There are two possible scenarios:
|
|
56
|
+
# 1. The function is called directly, not as part of a batch job. In this case, the args and kwargs should be used.
|
|
57
|
+
# 2. The function is called as part of a batch job - it was constructed as part of a DAG and is now being executed.
|
|
58
|
+
if func not in __FUNCTIONS_CREATED_IN_DAG__:
|
|
59
|
+
return func(*args, **kwargs)
|
|
60
|
+
function_arguments = [v.name for v in parameters.values()]
|
|
61
|
+
env_args = get_available_env_args()
|
|
62
|
+
final_args = list(args)
|
|
63
|
+
final_kwargs = kwargs.copy()
|
|
64
|
+
|
|
65
|
+
for name, value in env_args.items():
|
|
66
|
+
if name in function_arguments:
|
|
67
|
+
if len(final_args) < len(function_arguments):
|
|
68
|
+
final_args.extend(
|
|
69
|
+
[None] * (len(function_arguments) - len(final_args))
|
|
70
|
+
)
|
|
71
|
+
final_args[function_arguments.index(name)] = value
|
|
72
|
+
function_arguments = __ARGUMENTS_CACHE__.get_arguments(
|
|
73
|
+
dag_id(func), func.__name__
|
|
74
|
+
)
|
|
75
|
+
result = func(**function_arguments)
|
|
76
|
+
__ARGUMENTS_CACHE__.add_result(batch_run_id(), func.__name__, result)
|
|
77
|
+
return result
|
|
78
|
+
else:
|
|
79
|
+
__FUNCTIONS_CREATED_IN_DAG__[func] = dag.id
|
|
80
|
+
all_args = dict(zip(spec.args, args)) | kwargs
|
|
81
|
+
|
|
82
|
+
__ARGUMENTS_CACHE__.add_arguments(dag.id, func.__name__, all_args)
|
|
83
|
+
|
|
84
|
+
dag.set_autorun(True)
|
|
85
|
+
job = BatchJob(
|
|
86
|
+
name=func.__name__,
|
|
87
|
+
entrypoint=EntryPoint(
|
|
88
|
+
JobType.BATCH,
|
|
89
|
+
module_name=func.__module__,
|
|
90
|
+
function_name=func.__name__,
|
|
91
|
+
),
|
|
92
|
+
resources=Resources(memory=memory, cpu=cpu),
|
|
93
|
+
dependencies=[
|
|
94
|
+
value.name
|
|
95
|
+
for _, value in all_args.items()
|
|
96
|
+
if isinstance(value, BatchJob)
|
|
97
|
+
],
|
|
98
|
+
dag=dag,
|
|
99
|
+
)
|
|
100
|
+
return job
|
|
101
|
+
|
|
102
|
+
module = inspect.getmodule(func)
|
|
103
|
+
if hasattr(module, "__batch_main__"):
|
|
104
|
+
if func.__name__ in getattr(module, "__batch_main__"):
|
|
105
|
+
raise ValueError(f"Duplicate batch main function {func.__name__}")
|
|
106
|
+
module.__batch_main__[func.__name__] = batch_main # type: ignore
|
|
107
|
+
else:
|
|
108
|
+
setattr(module, "__batch_main__", {func.__name__: batch_main})
|
|
109
|
+
|
|
110
|
+
return batch_main
|
|
111
|
+
|
|
112
|
+
return decorator
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# *************************************************************************
|
|
2
|
+
#
|
|
3
|
+
# Copyright (c) 2025 - Datatailr Inc.
|
|
4
|
+
# All Rights Reserved.
|
|
5
|
+
#
|
|
6
|
+
# This file is part of Datatailr and subject to the terms and conditions
|
|
7
|
+
# defined in 'LICENSE.txt'. Unauthorized copying and/or distribution
|
|
8
|
+
# of this file, in parts or full, via any medium is strictly prohibited.
|
|
9
|
+
# *************************************************************************
|
|
10
|
+
|
|
11
|
+
DEFAULT_TASK_MEMORY = "100m"
|
|
12
|
+
DEFAULT_TASK_CPU = 1
|
|
13
|
+
|
|
14
|
+
BATCH_JOB_ARGUMENTS = (
|
|
15
|
+
"rundate",
|
|
16
|
+
"scheduled_time",
|
|
17
|
+
"started_at",
|
|
18
|
+
"batch_name",
|
|
19
|
+
"job_name",
|
|
20
|
+
)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# *************************************************************************
|
|
2
|
+
#
|
|
3
|
+
# Copyright (c) 2025 - Datatailr Inc.
|
|
4
|
+
# All Rights Reserved.
|
|
5
|
+
#
|
|
6
|
+
# This file is part of Datatailr and subject to the terms and conditions
|
|
7
|
+
# defined in 'LICENSE.txt'. Unauthorized copying and/or distribution
|
|
8
|
+
# of this file, in parts or full, via any medium is strictly prohibited.
|
|
9
|
+
# *************************************************************************
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
|
|
13
|
+
from datatailr.scheduler.constants import BATCH_JOB_ARGUMENTS
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_available_env_args():
|
|
17
|
+
"""
|
|
18
|
+
Get the available environment variables for batch job arguments.
|
|
19
|
+
This function retrieves the environment variables that match the keys defined in BATCH_JOB_ARGUMENTS.
|
|
20
|
+
Returns:
|
|
21
|
+
dict: A dictionary of available environment variables for batch jobs.
|
|
22
|
+
"""
|
|
23
|
+
available_args = {}
|
|
24
|
+
for key, value in os.environ.items():
|
|
25
|
+
arg_key = key.replace("DATATAILR_BATCH_ARG_", "").lower()
|
|
26
|
+
if arg_key in BATCH_JOB_ARGUMENTS:
|
|
27
|
+
available_args[arg_key] = value
|
|
28
|
+
return available_args
|