datatailr 0.1.6__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datatailr might be problematic. Click here for more details.

@@ -8,22 +8,40 @@
8
8
  # of this file, in parts or full, via any medium is strictly prohibited.
9
9
  # *************************************************************************
10
10
 
11
+ from __future__ import annotations
12
+
13
+ from datetime import datetime
11
14
  import importlib
15
+ import inspect
12
16
  import json
13
17
  import os
14
- import subprocess
15
18
  import tempfile
16
19
  import uuid
17
20
  from dataclasses import dataclass
18
21
  from enum import Enum
19
22
  from typing import Callable, Optional, Tuple, Union
20
23
 
21
- from datatailr import ACL, Environment, User, dt__Job, is_dt_installed
24
+ from datatailr import ACL, Environment, User, is_dt_installed
25
+ from datatailr.wrapper import dt__Job
26
+ from datatailr.scheduler.constants import DEFAULT_TASK_MEMORY, DEFAULT_TASK_CPU
22
27
  from datatailr.build.image import Image
23
28
  from datatailr.errors import BatchJobError
24
29
  from datatailr.logging import DatatailrLogger
30
+ from datatailr.utils import run_shell_command
25
31
 
26
32
  logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
33
+ __client__ = dt__Job()
34
+
35
+
36
+ def set_allow_unsafe_scheduling(allow: bool):
37
+ """
38
+ Set whether to allow unsafe scheduling of jobs.
39
+ This is a global setting that affects how jobs are scheduled.
40
+ """
41
+ if allow:
42
+ os.environ["DATATAILR_ALLOW_UNSAFE_SCHEDULING"] = "true"
43
+ else:
44
+ os.environ.pop("DATATAILR_ALLOW_UNSAFE_SCHEDULING", None)
27
45
 
28
46
 
29
47
  class RepoValidationError(BatchJobError):
@@ -40,6 +58,7 @@ class JobType(Enum):
40
58
  BATCH = "batch"
41
59
  SERVICE = "service"
42
60
  APP = "app"
61
+ EXCEL = "excel"
43
62
  UNKNOWN = "unknown"
44
63
 
45
64
  def __str__(self):
@@ -55,8 +74,14 @@ class Resources:
55
74
  Represents the resources required for a job.
56
75
  """
57
76
 
58
- memory: str = "100m"
59
- cpu: int = 1
77
+ memory: str = DEFAULT_TASK_MEMORY
78
+ cpu: float = DEFAULT_TASK_CPU
79
+
80
+
81
+ # TODO: create a dt_run script that will:
82
+ # 1. create user and group if not exists
83
+ # 2. set the correct path
84
+ # 3. run the job based on its type
60
85
 
61
86
 
62
87
  class EntryPoint:
@@ -68,26 +93,30 @@ class EntryPoint:
68
93
  def __init__(
69
94
  self,
70
95
  type: JobType,
71
- func: Optional[Callable] = None,
72
- module_name: Optional[str] = None,
73
- function_name: Optional[str] = None,
96
+ func: Callable,
74
97
  ):
75
- if func is None and (module_name is None or function_name is None):
76
- raise ValueError(
77
- "Either a function or module and function names must be provided."
78
- )
79
98
  self.func = func
80
- self.module_name = func.__module__ if func else module_name
81
- self.function_name = func.__name__ if func else function_name
99
+ self.module_name = func.__module__
100
+ self.function_name = func.__name__
82
101
  self.type = type
83
102
 
103
+ # Find the absolute path to the repository and then the relative path to the module.
104
+ # This will be used in the creation of the code 'bundle' when building the image.
105
+ path_to_repo = run_shell_command("git rev-parse --show-toplevel")[0]
106
+ path_to_code = inspect.getfile(func)
107
+ package_root = path_to_code
108
+ module_parts = self.module_name.split(".")
109
+ for _ in module_parts:
110
+ package_root = os.path.dirname(package_root)
111
+ path_to_module = os.path.relpath(package_root, path_to_repo)
112
+ self.path_to_repo = path_to_repo
113
+ self.path_to_module = path_to_module
114
+
84
115
  def __call__(self, *args, **kwargs):
116
+ os.environ.update(kwargs.pop("env", {}))
85
117
  if self.type == JobType.BATCH:
86
- if self.module_name and self.function_name:
87
- module = importlib.import_module(self.module_name)
88
- func = getattr(module, self.function_name)
89
- elif self.func is not None:
90
- func = self.func
118
+ module = importlib.import_module(self.module_name)
119
+ func = getattr(module, self.function_name)
91
120
  return func(*args, **kwargs)
92
121
 
93
122
  elif self.type == JobType.SERVICE:
@@ -106,13 +135,28 @@ class EntryPoint:
106
135
  class Job:
107
136
  def __init__(
108
137
  self,
109
- environment: Optional[Environment],
110
138
  name: str,
111
- image: Image,
112
- run_as: Optional[Union[str, User]],
139
+ environment: Optional[Environment] = Environment.DEV,
140
+ image: Optional[Image] = None,
141
+ run_as: Optional[Union[str, User]] = None,
113
142
  resources: Resources = Resources(memory="100m", cpu=1),
114
143
  acl: Optional[ACL] = None,
144
+ python_requirements: str = "",
145
+ build_script_pre: str = "",
146
+ build_script_post: str = "",
147
+ type: JobType = JobType.UNKNOWN,
148
+ entrypoint: Optional[EntryPoint] = None,
149
+ update_existing: bool = False,
115
150
  ):
151
+ if environment is None:
152
+ environment = Environment.DEV
153
+
154
+ if update_existing:
155
+ existing_job = self.__get_existing__(name, environment)
156
+ if existing_job:
157
+ self.from_dict(existing_job)
158
+ return
159
+
116
160
  if run_as is None:
117
161
  run_as = User.signed_user()
118
162
  if environment is None:
@@ -126,11 +170,16 @@ class Job:
126
170
  self.name = name
127
171
  self.run_as = run_as
128
172
  self.resources = resources
173
+ if image is None:
174
+ image = Image(
175
+ acl=self.acl,
176
+ python_requirements=python_requirements,
177
+ build_script_pre=build_script_pre,
178
+ build_script_post=build_script_post,
179
+ )
129
180
  self.image = image
130
-
131
- # Placeholders, to be set in derived classes
132
- self.type: JobType = JobType.UNKNOWN
133
- self.entrypoint = None
181
+ self.type = type
182
+ self.entrypoint = entrypoint
134
183
  self.__id = str(uuid.uuid4())
135
184
 
136
185
  @property
@@ -140,6 +189,25 @@ class Job:
140
189
  """
141
190
  return self.__id
142
191
 
192
+ @classmethod
193
+ def __get_existing__(
194
+ cls, job_name: str, environment: Environment
195
+ ) -> Optional[dict]:
196
+ """
197
+ Retrieve an existing job instance from the DataTailr platform.
198
+ Based on the job name and environment.
199
+ """
200
+ job_list = __client__.ls(filter=f"name={job_name},environment={environment}")
201
+ if not isinstance(job_list, list):
202
+ return None
203
+ if len(job_list) == 0:
204
+ return None
205
+ if len(job_list) > 1:
206
+ raise BatchJobError(
207
+ f"Multiple jobs found with name '{job_name}' in environment '{environment}'."
208
+ )
209
+ return job_list[0]
210
+
143
211
  def __repr__(self):
144
212
  return (
145
213
  f"Job(name={self.name}, environment={self.environment}, "
@@ -169,77 +237,125 @@ class Job:
169
237
  job_dict["cpu"] = self.resources.cpu
170
238
  return job_dict
171
239
 
240
+ def from_dict(self, job_dict: dict):
241
+ self.name = job_dict["name"]
242
+ self.image = job_dict["image"]
243
+
244
+ environment = job_dict.get("environment", "dev")
245
+ environment = Environment(environment.lower())
246
+ self.environment = environment
247
+
248
+ user = job_dict["run_as"]["name"]
249
+ user = User(user.lower())
250
+ self.run_as = user
251
+
252
+ self.resources = Resources(memory=job_dict["memory"], cpu=job_dict["num_cpus"])
253
+ acl = job_dict.get("acl", None)
254
+ if acl is None:
255
+ acl = ACL(user=self.run_as)
256
+ else:
257
+ acl = ACL.from_dict(acl)
258
+ self.acl = acl
259
+ self.python_requirements = (job_dict.get("python_requirements", ""),)
260
+ self.build_script_pre = (job_dict.get("build_script_pre", ""),)
261
+ self.build_script_post = (job_dict.get("build_script_post", ""),)
262
+ self.type = JobType(job_dict.get("type", "unknown").lower())
263
+ self.state = job_dict["state"]
264
+ self.create_time = datetime.fromtimestamp(job_dict["create_time"] * 1e-6)
265
+ self.version = job_dict["version"]
266
+ self.__id = job_dict["id"]
267
+
172
268
  def to_json(self):
173
269
  """
174
270
  Convert the Job instance to a JSON string representation.
175
271
  """
176
272
  return json.dumps(self.to_dict())
177
273
 
178
- def verify_repo_is_ready(self) -> Tuple[bool, str]:
179
- is_committed = (
180
- subprocess.run(
181
- ("git diff --exit-code"), shell=True, capture_output=True
182
- ).returncode
183
- == 0
184
- )
274
+ def verify_repo_is_ready(self) -> Tuple[str, str]:
275
+ """
276
+ Verify if the repository is ready for job execution.
277
+ The check consists of two parts:
278
+ 1. Check if there are uncommitted changes in the repository.
279
+ 2. Check if the local commit matches the remote HEAD (the repo is synced with the remote).
280
+ Returns a tuple of (branch: str, commit_hash: str).
281
+ """
282
+ local_commit = run_shell_command("git rev-parse HEAD")[0]
283
+ branch_name = run_shell_command("git rev-parse --abbrev-ref HEAD")[0]
284
+
285
+ if os.getenv("DATATAILR_ALLOW_UNSAFE_SCHEDULING", "false").lower() == "true":
286
+ return branch_name, local_commit
287
+ return_code = run_shell_command("git diff --exit-code")[1]
288
+ is_committed = return_code == 0
289
+
185
290
  if not is_committed:
186
- return (
187
- False,
188
- "Uncommitted changes detected. Please commit your changes before running the job.",
291
+ raise RepoValidationError(
292
+ "Please commit your changes before running the job."
189
293
  )
190
294
 
191
- local_commit = subprocess.run(
192
- ("git rev-parse HEAD"), shell=True, capture_output=True, text=True
193
- ).stdout.strip()
194
- remote_commit = (
195
- subprocess.run(
196
- ("git ls-remote origin HEAD"),
197
- shell=True,
198
- capture_output=True,
199
- text=True,
200
- )
201
- .stdout.strip()
202
- .split("\t")[0]
203
- )
295
+ remote_commit = run_shell_command("git ls-remote origin HEAD")[0].split("\t")[0]
204
296
 
205
297
  if local_commit != remote_commit:
206
- return (
207
- False,
208
- "Local commit does not match remote HEAD. Please pull the latest changes before running the job.",
298
+ raise RepoValidationError(
299
+ "Please sync your local repository with the remote before running the job."
209
300
  )
210
301
 
211
- branch = subprocess.run(
212
- ("git rev-parse --abbrev-ref HEAD"),
213
- shell=True,
214
- capture_output=True,
215
- text=True,
216
- ).stdout.strip()
217
- return True, ""
302
+ return branch_name, local_commit
218
303
 
219
- def run(self) -> Tuple[bool, str]:
220
- """
221
- Run the job. This method should be implemented to execute the job logic.
222
- It verifies the repository state and prepares the job for execution.
223
- Returns a tuple of (success: bool, message: str).
224
- If the repository is not ready, it returns False with an error message.
225
- If the job runs successfully, it returns True with an empty message.
226
- """
227
- if is_dt_installed():
228
- check_result = self.verify_repo_is_ready()
229
- if not check_result[0]:
230
- raise RepoValidationError(check_result[1])
231
- logger.info(
232
- f"Running job '{self.name}' in environment '{self.environment}' as '{self.run_as}'"
233
- )
304
+ def __prepare__(self) -> str:
305
+ branch_name, local_commit = self.verify_repo_is_ready()
306
+ self.image.update(
307
+ branch_name=branch_name,
308
+ commit_hash=local_commit,
309
+ )
310
+ logger.info(
311
+ f"Running job '{self.name}' in environment '{self.environment}' as '{self.run_as}'"
312
+ )
234
313
 
235
- with tempfile.NamedTemporaryFile(delete=False, suffix=".json") as temp_file:
236
- temp_file.write(self.to_json().encode())
314
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".json") as temp_file:
315
+ temp_file.write(self.to_json().encode())
316
+ return temp_file.name
237
317
 
238
- dt__Job().run(f"file://{temp_file.name}")
239
- os.remove(temp_file.name)
318
+ def get_schedule_args(self) -> dict:
319
+ """
320
+ Returns additional arguments for scheduling the job.
321
+ Override or extend this method as needed.
322
+ """
323
+ return {}
240
324
 
241
- return True, ""
242
- else:
325
+ def __run_command__(self, command: str) -> Tuple[bool, str]:
326
+ """
327
+ Run a command in the context of the job.
328
+ This is used to execute the job's entry point.
329
+ """
330
+ if not is_dt_installed():
243
331
  raise NotImplementedError(
244
332
  "DataTailr is not installed. Please install DataTailr to run this job."
245
333
  )
334
+ try:
335
+ temp_file_name = self.__prepare__()
336
+
337
+ if command == "run":
338
+ __client__.run(f"file://{temp_file_name}", **self.get_schedule_args())
339
+ elif command == "save":
340
+ __client__.save(f"file://{temp_file_name}", **self.get_schedule_args())
341
+ else:
342
+ raise ValueError(f"Unknown command: {command}")
343
+ os.remove(temp_file_name)
344
+ except Exception as e:
345
+ logger.error(f"Error running command '{command}': {e}")
346
+ return False, str(e)
347
+ return True, f"Job '{self.name}' {command}d successfully."
348
+
349
+ def save(self) -> Tuple[bool, str]:
350
+ """
351
+ Save the job to the DataTailr platform.
352
+ If the job already exists, it will be updated.
353
+ """
354
+ return self.__run_command__("save")
355
+
356
+ def run(self) -> Tuple[bool, str]:
357
+ """
358
+ Run the job. This method should be implemented to execute the job logic.
359
+ It verifies the repository state and prepares the job for execution.
360
+ """
361
+ return self.__run_command__("run")
@@ -14,9 +14,11 @@ import contextvars
14
14
  import json
15
15
  import os
16
16
  from functools import reduce
17
- from typing import Dict, List, Optional, Sequence, Set, Tuple, Union
17
+ from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
18
+ import uuid
18
19
 
19
20
  from datatailr import Image
21
+ from datatailr.dt_json import encode_json
20
22
  from datatailr.errors import BatchJobError
21
23
  from datatailr.logging import DatatailrLogger
22
24
  from datatailr.scheduler.base import (
@@ -29,9 +31,12 @@ from datatailr.scheduler.base import (
29
31
  User,
30
32
  )
31
33
  from datatailr.scheduler.constants import DEFAULT_TASK_CPU, DEFAULT_TASK_MEMORY
34
+ from datatailr.scheduler.arguments_cache import ArgumentsCache
35
+ from datatailr.scheduler.schedule import Schedule
32
36
  from datatailr.utils import is_dt_installed
33
37
 
34
38
  __DAG_CONTEXT__: contextvars.ContextVar = contextvars.ContextVar("dag_context")
39
+ __ARGUMENTS_CACHE__ = ArgumentsCache()
35
40
  logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
36
41
 
37
42
 
@@ -39,13 +44,6 @@ def get_current_manager():
39
44
  return __DAG_CONTEXT__.get(None)
40
45
 
41
46
 
42
- def next_batch_job_id():
43
- i = 0
44
- while True:
45
- yield i
46
- i += 1
47
-
48
-
49
47
  class CyclicDependencyError(BatchJobError):
50
48
  """
51
49
  Exception raised when a cyclic dependency is detected in the batch job dependencies.
@@ -79,6 +77,12 @@ class MissingDagError(BatchJobError):
79
77
  )
80
78
 
81
79
 
80
+ class CodePackageMismatchError(BatchJobError):
81
+ def __init__(self, message: str):
82
+ super().__init__(message)
83
+ self.message = message
84
+
85
+
82
86
  class BatchJob:
83
87
  """
84
88
  Represents a job within a batch job.
@@ -93,6 +97,7 @@ class BatchJob:
93
97
  resources: Optional[Resources] = None,
94
98
  dependencies: Sequence[Union[str, BatchJob]] = [],
95
99
  dag: Optional[Batch] = get_current_manager(),
100
+ argument_mapping: Dict[str, str] = {},
96
101
  ):
97
102
  self.name = name
98
103
  self.entrypoint = entrypoint
@@ -102,12 +107,14 @@ class BatchJob:
102
107
  raise MissingDagError()
103
108
  self.__id = dag.next_job_id
104
109
  self.dag = dag
110
+ self.__args: Dict[str, Any] = {}
105
111
  self.dag.__BATCH_JOB_NAMES__[self.name] = self.__id
106
112
  self.dependencies = self.translate_dependencies()
107
113
  assert all(
108
114
  isinstance(dep, int) for dep in self.dependencies
109
115
  ), "All dependencies must be integers representing job IDs."
110
116
  self.dag.add_job(self)
117
+ self.__argument_mapping = argument_mapping or {}
111
118
 
112
119
  def __call__(self, *args, **kwds) -> BatchJob:
113
120
  """
@@ -116,6 +123,22 @@ class BatchJob:
116
123
  """
117
124
  return self
118
125
 
126
+ @property
127
+ def args(self) -> Dict[str, Any]:
128
+ """
129
+ Returns the arguments for the BatchJob instance.
130
+ """
131
+ return self.__args or {}
132
+
133
+ @args.setter
134
+ def args(self, args: Dict[str, Any]):
135
+ """
136
+ Sets the arguments for the BatchJob instance.
137
+ """
138
+ if not isinstance(args, dict):
139
+ raise TypeError(f"Expected a dictionary for args, got {type(args)}")
140
+ self.__args = args
141
+
119
142
  @property
120
143
  def id(self) -> int:
121
144
  """
@@ -123,7 +146,7 @@ class BatchJob:
123
146
  """
124
147
  return self.__id
125
148
 
126
- def alias(self, name: str):
149
+ def alias(self, name: str) -> BatchJob:
127
150
  """
128
151
  Set an alias for the BatchJob instance.
129
152
 
@@ -136,19 +159,48 @@ class BatchJob:
136
159
  self.name = name
137
160
  return self
138
161
 
162
+ def set_resources(
163
+ self,
164
+ resources: Optional[Resources] = None,
165
+ memory: Optional[str] = None,
166
+ cpu: Optional[float] = None,
167
+ ) -> BatchJob:
168
+ """
169
+ Set the resources for the BatchJob instance.
170
+
171
+ :param resources: The Resources instance to set.
172
+ """
173
+ if resources is not None:
174
+ if not isinstance(resources, Resources):
175
+ raise TypeError(f"Expected Resources instance, got {type(resources)}")
176
+ else:
177
+ resources = Resources(
178
+ memory=memory or DEFAULT_TASK_MEMORY, cpu=cpu or DEFAULT_TASK_CPU
179
+ )
180
+ self.resources = resources
181
+ return self
182
+
139
183
  def __repr__(self):
140
184
  return (
141
185
  f"BatchJob(name={self.name}, entrypoint={self.entrypoint}, "
142
186
  f"resources={self.resources}) (id={self.__id})"
143
187
  )
144
188
 
189
+ def __getstate__(self) -> object:
190
+ state = self.__dict__.copy()
191
+ state.pop("dag", None)
192
+ return state
193
+
194
+ def __setstate__(self, state: dict):
195
+ self.__dict__.update(state)
196
+
145
197
  def to_dict(self):
146
198
  """
147
199
  Convert the BatchJob instance to a dictionary representation.
148
200
  """
149
201
  return {
150
202
  "display_name": self.name,
151
- "name": self.__id,
203
+ "child_number": self.__id,
152
204
  "entrypoint": str(self.entrypoint),
153
205
  "memory": self.resources.memory if self.resources else DEFAULT_TASK_MEMORY,
154
206
  "cpu": self.resources.cpu if self.resources else DEFAULT_TASK_CPU,
@@ -183,6 +235,9 @@ class BatchJob:
183
235
 
184
236
  def __add_dependency__(self, other):
185
237
  self.dependencies.add(other.__id)
238
+ arg_name = self.__argument_mapping.get(other.name, other.name)
239
+ if arg_name is not None:
240
+ self.__args[arg_name] = other
186
241
 
187
242
  def __lshift__(
188
243
  self, other: Sequence[BatchJob] | BatchJob
@@ -223,7 +278,13 @@ class BatchJob:
223
278
  Execute the job's entrypoint.
224
279
  """
225
280
  if isinstance(self.entrypoint, EntryPoint):
226
- self.entrypoint()
281
+ env = {
282
+ "DATATAILR_BATCH_ID": str(self.dag.id),
283
+ "DATATAILR_JOB_ID": str(self.__id),
284
+ "DATATAILR_JOB_NAME": self.name,
285
+ "DATATAILR_JOB_ARGUMENT_MAPPING": encode_json(self.__argument_mapping),
286
+ }
287
+ self.entrypoint(env=env)
227
288
  else:
228
289
  raise TypeError(f"Invalid entrypoint type: {type(self.entrypoint)}")
229
290
 
@@ -237,12 +298,17 @@ class Batch(Job):
237
298
 
238
299
  def __init__(
239
300
  self,
240
- environment: Optional[Environment],
241
301
  name: str,
242
- image: Image,
243
- run_as: Optional[Union[str, User]],
302
+ environment: Optional[Environment] = Environment.DEV,
303
+ schedule: Optional[Schedule] = None,
304
+ image: Optional[Image] = None,
305
+ run_as: Optional[Union[str, User]] = None,
244
306
  resources: Resources = Resources(memory="100m", cpu=1),
245
307
  acl: Optional[ACL] = None,
308
+ local_run: bool = False,
309
+ python_requirements: str = "",
310
+ build_script_pre: str = "",
311
+ build_script_post: str = "",
246
312
  ):
247
313
  super().__init__(
248
314
  environment=environment,
@@ -251,19 +317,25 @@ class Batch(Job):
251
317
  run_as=run_as,
252
318
  resources=resources,
253
319
  acl=acl,
320
+ python_requirements=python_requirements,
321
+ build_script_pre=build_script_pre,
322
+ build_script_post=build_script_post,
323
+ type=JobType.BATCH,
254
324
  )
255
- self.type = JobType.BATCH
256
325
  self.__jobs: List[BatchJob] = []
257
326
  self._auto_run = False
258
- self.__next_job_id = next_batch_job_id()
327
+ self.__next_job_id = -1
259
328
  self.__BATCH_JOB_NAMES__: Dict[str, int] = {}
329
+ self.__local_run = local_run
330
+ self.__schedule = schedule
260
331
 
261
332
  @property
262
333
  def next_job_id(self):
263
334
  """
264
335
  Returns a generator for the next job ID in the batch.
265
336
  """
266
- return next(self.__next_job_id)
337
+ self.__next_job_id += 1
338
+ return self.__next_job_id
267
339
 
268
340
  def add_job(self, job: BatchJob):
269
341
  """
@@ -279,6 +351,25 @@ class Batch(Job):
279
351
  raise DuplicateJobNameError(job.name)
280
352
  # Use the batch level resource values as defaults for jobs
281
353
  job.resources = job.resources or self.resources
354
+ image_path_to_repo = self.image.path_to_repo
355
+ image_path_to_module = self.image.path_to_module
356
+ package_path_to_repo = job.entrypoint.path_to_repo
357
+ package_path_to_module = job.entrypoint.path_to_module
358
+
359
+ if image_path_to_repo is None:
360
+ self.image.path_to_repo = package_path_to_repo
361
+ elif package_path_to_repo != image_path_to_repo:
362
+ raise CodePackageMismatchError(
363
+ f"Function {job.entrypoint.function_name} is defined in a different package root: "
364
+ f"{package_path_to_repo} != {image_path_to_repo}"
365
+ )
366
+ if image_path_to_module is None:
367
+ self.image.path_to_module = package_path_to_module
368
+ elif package_path_to_module != image_path_to_module:
369
+ raise CodePackageMismatchError(
370
+ f"Function {job.entrypoint.function_name} is defined in a different module: "
371
+ f"{package_path_to_module} != {image_path_to_module}"
372
+ )
282
373
  self.__jobs.append(job)
283
374
 
284
375
  def is_job_in(self, job: BatchJob) -> bool:
@@ -293,6 +384,7 @@ class Batch(Job):
293
384
  """
294
385
  batch_dict = super().to_dict()
295
386
  batch_dict["jobs"] = [job.to_dict() for job in self.__jobs]
387
+ batch_dict["schedule"] = str(self.__schedule) if self.__schedule else None
296
388
  return batch_dict
297
389
 
298
390
  def to_json(self):
@@ -345,11 +437,41 @@ class Batch(Job):
345
437
  "A cyclic dependency exists amongst {}".format(jobs)
346
438
  )
347
439
 
440
+ def get_schedule_args(self) -> Dict[str, Any]:
441
+ if isinstance(self.__schedule, Schedule):
442
+ args = {
443
+ "at_minutes": self.__schedule.at_minutes,
444
+ "every_minute": self.__schedule.every_minute,
445
+ "at_hours": self.__schedule.at_hours,
446
+ "every_hour": self.__schedule.every_hour,
447
+ "weekdays": self.__schedule.weekdays,
448
+ "day_of_month": self.__schedule.day_of_month,
449
+ "in_month": self.__schedule.in_month,
450
+ "every_month": self.__schedule.every_month,
451
+ "timezone": self.__schedule.timezone,
452
+ "run_after_job_uuid": self.__schedule.run_after_job_uuid,
453
+ "run_after_job_name": self.__schedule.run_after_job_name,
454
+ "run_after_job_condition": self.__schedule.run_after_job_condition,
455
+ }
456
+ args = {key: value for key, value in args.items() if value is not None}
457
+ for key, value in args.items():
458
+ if isinstance(value, list):
459
+ args[key] = ",".join(map(str, value))
460
+ return args
461
+ return {}
462
+
348
463
  def run(self) -> Tuple[bool, str]:
349
- if is_dt_installed():
464
+ def arg_name(arg: Union[BatchJob, str]) -> str:
465
+ return arg.name if isinstance(arg, BatchJob) else arg
466
+
467
+ args = {
468
+ j.name: {k: arg_name(v) for k, v in j.args.items()} for j in self.__jobs
469
+ }
470
+ __ARGUMENTS_CACHE__.add_arguments(self.id, args)
471
+ if not self.__local_run and is_dt_installed():
350
472
  return super().run()
351
473
  else:
352
- os.environ["DATATAILR_BATCH_RUN_ID"] = "1"
474
+ os.environ["DATATAILR_BATCH_RUN_ID"] = uuid.uuid4().hex[:8]
353
475
  for step in self.__topological_sort__():
354
476
  for job_id in step:
355
477
  job = self.__jobs[job_id]