datatailr 0.1.5__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datatailr might be problematic. Click here for more details.

@@ -8,7 +8,11 @@
8
8
  # of this file, in parts or full, via any medium is strictly prohibited.
9
9
  # *************************************************************************
10
10
 
11
+ from __future__ import annotations
12
+
13
+ from datetime import datetime
11
14
  import importlib
15
+ import inspect
12
16
  import json
13
17
  import os
14
18
  import subprocess
@@ -18,12 +22,27 @@ from dataclasses import dataclass
18
22
  from enum import Enum
19
23
  from typing import Callable, Optional, Tuple, Union
20
24
 
21
- from datatailr import ACL, Environment, User, dt__Job, is_dt_installed
25
+ from datatailr import ACL, Environment, User, is_dt_installed
26
+ from datatailr.wrapper import dt__Job
27
+ from datatailr.scheduler.constants import DEFAULT_TASK_MEMORY, DEFAULT_TASK_CPU
22
28
  from datatailr.build.image import Image
23
29
  from datatailr.errors import BatchJobError
24
30
  from datatailr.logging import DatatailrLogger
31
+ from datatailr.utils import run_shell_command
25
32
 
26
33
  logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
34
+ __client__ = dt__Job()
35
+
36
+
37
+ def set_allow_unsafe_scheduling(allow: bool):
38
+ """
39
+ Set whether to allow unsafe scheduling of jobs.
40
+ This is a global setting that affects how jobs are scheduled.
41
+ """
42
+ if allow:
43
+ os.environ["DATATAILR_ALLOW_UNSAFE_SCHEDULING"] = "true"
44
+ else:
45
+ os.environ.pop("DATATAILR_ALLOW_UNSAFE_SCHEDULING", None)
27
46
 
28
47
 
29
48
  class RepoValidationError(BatchJobError):
@@ -40,6 +59,7 @@ class JobType(Enum):
40
59
  BATCH = "batch"
41
60
  SERVICE = "service"
42
61
  APP = "app"
62
+ EXCEL = "excel"
43
63
  UNKNOWN = "unknown"
44
64
 
45
65
  def __str__(self):
@@ -55,8 +75,14 @@ class Resources:
55
75
  Represents the resources required for a job.
56
76
  """
57
77
 
58
- memory: str = "100m"
59
- cpu: int = 1
78
+ memory: str = DEFAULT_TASK_MEMORY
79
+ cpu: float = DEFAULT_TASK_CPU
80
+
81
+
82
+ # TODO: create a dt_run script that will:
83
+ # 1. create user and group if not exists
84
+ # 2. set the correct path
85
+ # 3. run the job based on its type
60
86
 
61
87
 
62
88
  class EntryPoint:
@@ -68,26 +94,30 @@ class EntryPoint:
68
94
  def __init__(
69
95
  self,
70
96
  type: JobType,
71
- func: Optional[Callable] = None,
72
- module_name: Optional[str] = None,
73
- function_name: Optional[str] = None,
97
+ func: Callable,
74
98
  ):
75
- if func is None and (module_name is None or function_name is None):
76
- raise ValueError(
77
- "Either a function or module and function names must be provided."
78
- )
79
99
  self.func = func
80
- self.module_name = func.__module__ if func else module_name
81
- self.function_name = func.__name__ if func else function_name
100
+ self.module_name = func.__module__
101
+ self.function_name = func.__name__
82
102
  self.type = type
83
103
 
104
+ # Find the absolute path to the repository and then the relative path to the module.
105
+ # This will be used in the creation of the code 'bundle' when building the image.
106
+ path_to_repo = run_shell_command("git rev-parse --show-toplevel")[0]
107
+ path_to_code = inspect.getfile(func)
108
+ package_root = path_to_code
109
+ module_parts = self.module_name.split(".")
110
+ for _ in module_parts:
111
+ package_root = os.path.dirname(package_root)
112
+ path_to_module = os.path.relpath(package_root, path_to_repo)
113
+ self.path_to_repo = path_to_repo
114
+ self.path_to_module = path_to_module
115
+
84
116
  def __call__(self, *args, **kwargs):
117
+ os.environ.update(kwargs.pop("env", {}))
85
118
  if self.type == JobType.BATCH:
86
- if self.module_name and self.function_name:
87
- module = importlib.import_module(self.module_name)
88
- func = getattr(module, self.function_name)
89
- elif self.func is not None:
90
- func = self.func
119
+ module = importlib.import_module(self.module_name)
120
+ func = getattr(module, self.function_name)
91
121
  return func(*args, **kwargs)
92
122
 
93
123
  elif self.type == JobType.SERVICE:
@@ -106,13 +136,28 @@ class EntryPoint:
106
136
  class Job:
107
137
  def __init__(
108
138
  self,
109
- environment: Optional[Environment],
110
139
  name: str,
111
- image: Image,
112
- run_as: Optional[Union[str, User]],
140
+ environment: Optional[Environment] = Environment.DEV,
141
+ image: Optional[Image] = None,
142
+ run_as: Optional[Union[str, User]] = User.signed_user(),
113
143
  resources: Resources = Resources(memory="100m", cpu=1),
114
144
  acl: Optional[ACL] = None,
145
+ python_requirements: str = "",
146
+ build_script_pre: str = "",
147
+ build_script_post: str = "",
148
+ type: JobType = JobType.UNKNOWN,
149
+ entrypoint: Optional[EntryPoint] = None,
150
+ update_existing: bool = False,
115
151
  ):
152
+ if environment is None:
153
+ environment = Environment.DEV
154
+
155
+ if update_existing:
156
+ existing_job = self.__get_existing__(name, environment)
157
+ if existing_job:
158
+ self.from_dict(existing_job)
159
+ return
160
+
116
161
  if run_as is None:
117
162
  run_as = User.signed_user()
118
163
  if environment is None:
@@ -126,11 +171,16 @@ class Job:
126
171
  self.name = name
127
172
  self.run_as = run_as
128
173
  self.resources = resources
174
+ if image is None:
175
+ image = Image(
176
+ acl=self.acl,
177
+ python_requirements=python_requirements,
178
+ build_script_pre=build_script_pre,
179
+ build_script_post=build_script_post,
180
+ )
129
181
  self.image = image
130
-
131
- # Placeholders, to be set in derived classes
132
- self.type: JobType = JobType.UNKNOWN
133
- self.entrypoint = None
182
+ self.type = type
183
+ self.entrypoint = entrypoint
134
184
  self.__id = str(uuid.uuid4())
135
185
 
136
186
  @property
@@ -140,6 +190,25 @@ class Job:
140
190
  """
141
191
  return self.__id
142
192
 
193
+ @classmethod
194
+ def __get_existing__(
195
+ cls, job_name: str, environment: Environment
196
+ ) -> Optional[dict]:
197
+ """
198
+ Retrieve an existing job instance from the DataTailr platform.
199
+ Based on the job name and environment.
200
+ """
201
+ job_list = __client__.ls(filter=f"name={job_name},environment={environment}")
202
+ if not isinstance(job_list, list):
203
+ return None
204
+ if len(job_list) == 0:
205
+ return None
206
+ if len(job_list) > 1:
207
+ raise BatchJobError(
208
+ f"Multiple jobs found with name '{job_name}' in environment '{environment}'."
209
+ )
210
+ return job_list[0]
211
+
143
212
  def __repr__(self):
144
213
  return (
145
214
  f"Job(name={self.name}, environment={self.environment}, "
@@ -169,31 +238,64 @@ class Job:
169
238
  job_dict["cpu"] = self.resources.cpu
170
239
  return job_dict
171
240
 
241
+ def from_dict(self, job_dict: dict):
242
+ self.name = job_dict["name"]
243
+ self.image = job_dict["image"]
244
+
245
+ environment = job_dict.get("environment", "dev")
246
+ environment = Environment(environment.lower())
247
+ self.environment = environment
248
+
249
+ user = job_dict["run_as"]["name"]
250
+ user = User(user.lower())
251
+ self.run_as = user
252
+
253
+ self.resources = Resources(memory=job_dict["memory"], cpu=job_dict["num_cpus"])
254
+ acl = job_dict.get("acl", None)
255
+ if acl is None:
256
+ acl = ACL(user=self.run_as)
257
+ else:
258
+ acl = ACL.from_dict(acl)
259
+ self.acl = acl
260
+ self.python_requirements = (job_dict.get("python_requirements", ""),)
261
+ self.build_script_pre = (job_dict.get("build_script_pre", ""),)
262
+ self.build_script_post = (job_dict.get("build_script_post", ""),)
263
+ self.type = JobType(job_dict.get("type", "unknown").lower())
264
+ self.state = job_dict["state"]
265
+ self.create_time = datetime.fromtimestamp(job_dict["create_time"] * 1e-6)
266
+ self.version = job_dict["version"]
267
+ self.__id = job_dict["id"]
268
+
172
269
  def to_json(self):
173
270
  """
174
271
  Convert the Job instance to a JSON string representation.
175
272
  """
176
273
  return json.dumps(self.to_dict())
177
274
 
178
- def verify_repo_is_ready(self) -> Tuple[bool, str]:
179
- is_committed = (
180
- subprocess.run(
181
- ("git diff --exit-code"), shell=True, capture_output=True
182
- ).returncode
183
- == 0
184
- )
275
+ def verify_repo_is_ready(self) -> Tuple[str, str]:
276
+ """
277
+ Verify if the repository is ready for job execution.
278
+ The check consists of two parts:
279
+ 1. Check if there are uncommitted changes in the repository.
280
+ 2. Check if the local commit matches the remote HEAD (the repo is synced with the remote).
281
+ Returns a tuple of (branch: str, commit_hash: str).
282
+ """
283
+ local_commit = run_shell_command("git rev-parse HEAD")[0]
284
+ branch_name = run_shell_command("git rev-parse --abbrev-ref HEAD")[0]
285
+
286
+ if os.getenv("DATATAILR_ALLOW_UNSAFE_SCHEDULING", "false").lower() == "true":
287
+ return branch_name, local_commit
288
+ return_code = run_shell_command("git diff --exit-code")[1]
289
+ is_committed = return_code == 0
290
+
185
291
  if not is_committed:
186
- return (
187
- False,
188
- "Uncommitted changes detected. Please commit your changes before running the job.",
292
+ raise RepoValidationError(
293
+ "Please commit your changes before running the job."
189
294
  )
190
295
 
191
- local_commit = subprocess.run(
192
- ("git rev-parse HEAD"), shell=True, capture_output=True, text=True
193
- ).stdout.strip()
194
296
  remote_commit = (
195
297
  subprocess.run(
196
- ("git ls-remote origin HEAD"),
298
+ ("remote_commit = $(git ls-remote origin HEAD)"),
197
299
  shell=True,
198
300
  capture_output=True,
199
301
  text=True,
@@ -203,43 +305,67 @@ class Job:
203
305
  )
204
306
 
205
307
  if local_commit != remote_commit:
206
- return (
207
- False,
208
- "Local commit does not match remote HEAD. Please pull the latest changes before running the job.",
308
+ raise RepoValidationError(
309
+ "Please sync your local repository with the remote before running the job."
209
310
  )
210
311
 
211
- branch = subprocess.run(
212
- ("git rev-parse --abbrev-ref HEAD"),
213
- shell=True,
214
- capture_output=True,
215
- text=True,
216
- ).stdout.strip()
217
- return True, ""
312
+ return branch_name, local_commit
218
313
 
219
- def run(self) -> Tuple[bool, str]:
220
- """
221
- Run the job. This method should be implemented to execute the job logic.
222
- It verifies the repository state and prepares the job for execution.
223
- Returns a tuple of (success: bool, message: str).
224
- If the repository is not ready, it returns False with an error message.
225
- If the job runs successfully, it returns True with an empty message.
226
- """
227
- if is_dt_installed():
228
- check_result = self.verify_repo_is_ready()
229
- if not check_result[0]:
230
- raise RepoValidationError(check_result[1])
231
- logger.info(
232
- f"Running job '{self.name}' in environment '{self.environment}' as '{self.run_as}'"
233
- )
314
+ def __prepare__(self) -> str:
315
+ branch_name, local_commit = self.verify_repo_is_ready()
316
+ self.image.update(
317
+ branch_name=branch_name,
318
+ commit_hash=local_commit,
319
+ )
320
+ logger.info(
321
+ f"Running job '{self.name}' in environment '{self.environment}' as '{self.run_as}'"
322
+ )
234
323
 
235
- with tempfile.NamedTemporaryFile(delete=False, suffix=".json") as temp_file:
236
- temp_file.write(self.to_json().encode())
324
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".json") as temp_file:
325
+ temp_file.write(self.to_json().encode())
326
+ return temp_file.name
237
327
 
238
- dt__Job().run(f"file://{temp_file.name}")
239
- os.remove(temp_file.name)
328
+ def get_schedule_args(self) -> dict:
329
+ """
330
+ Returns additional arguments for scheduling the job.
331
+ Override or extend this method as needed.
332
+ """
333
+ return {}
240
334
 
241
- return True, ""
242
- else:
335
+ def __run_command__(self, command: str) -> Tuple[bool, str]:
336
+ """
337
+ Run a command in the context of the job.
338
+ This is used to execute the job's entry point.
339
+ """
340
+ if not is_dt_installed():
243
341
  raise NotImplementedError(
244
342
  "DataTailr is not installed. Please install DataTailr to run this job."
245
343
  )
344
+ try:
345
+ temp_file_name = self.__prepare__()
346
+
347
+ if command == "run":
348
+ __client__.run(f"file://{temp_file_name}", **self.get_schedule_args())
349
+ elif command == "save":
350
+ __client__.save(f"file://{temp_file_name}", **self.get_schedule_args())
351
+ else:
352
+ raise ValueError(f"Unknown command: {command}")
353
+ os.remove(temp_file_name)
354
+ except Exception as e:
355
+ logger.error(f"Error running command '{command}': {e}")
356
+ return False, str(e)
357
+ return True, f"Job '{self.name}' {command}d successfully."
358
+
359
+ def save(self) -> Tuple[bool, str]:
360
+ """
361
+ Save the job to the DataTailr platform.
362
+ If the job already exists, it will be updated.
363
+ """
364
+ return self.__run_command__("save")
365
+
366
+ def run(self) -> Tuple[bool, str]:
367
+ """
368
+ Run the job. This method should be implemented to execute the job logic.
369
+ It verifies the repository state and prepares the job for execution.
370
+ """
371
+ return self.__run_command__("run")
@@ -14,9 +14,11 @@ import contextvars
14
14
  import json
15
15
  import os
16
16
  from functools import reduce
17
- from typing import Dict, List, Optional, Sequence, Set, Tuple, Union
17
+ from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
18
+ import uuid
18
19
 
19
20
  from datatailr import Image
21
+ from datatailr.dt_json import encode_json
20
22
  from datatailr.errors import BatchJobError
21
23
  from datatailr.logging import DatatailrLogger
22
24
  from datatailr.scheduler.base import (
@@ -29,9 +31,12 @@ from datatailr.scheduler.base import (
29
31
  User,
30
32
  )
31
33
  from datatailr.scheduler.constants import DEFAULT_TASK_CPU, DEFAULT_TASK_MEMORY
34
+ from datatailr.scheduler.arguments_cache import ArgumentsCache
35
+ from datatailr.scheduler.schedule import Schedule
32
36
  from datatailr.utils import is_dt_installed
33
37
 
34
38
  __DAG_CONTEXT__: contextvars.ContextVar = contextvars.ContextVar("dag_context")
39
+ __ARGUMENTS_CACHE__ = ArgumentsCache()
35
40
  logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
36
41
 
37
42
 
@@ -39,13 +44,6 @@ def get_current_manager():
39
44
  return __DAG_CONTEXT__.get(None)
40
45
 
41
46
 
42
- def next_batch_job_id():
43
- i = 0
44
- while True:
45
- yield i
46
- i += 1
47
-
48
-
49
47
  class CyclicDependencyError(BatchJobError):
50
48
  """
51
49
  Exception raised when a cyclic dependency is detected in the batch job dependencies.
@@ -79,6 +77,12 @@ class MissingDagError(BatchJobError):
79
77
  )
80
78
 
81
79
 
80
+ class CodePackageMismatchError(BatchJobError):
81
+ def __init__(self, message: str):
82
+ super().__init__(message)
83
+ self.message = message
84
+
85
+
82
86
  class BatchJob:
83
87
  """
84
88
  Represents a job within a batch job.
@@ -93,6 +97,7 @@ class BatchJob:
93
97
  resources: Optional[Resources] = None,
94
98
  dependencies: Sequence[Union[str, BatchJob]] = [],
95
99
  dag: Optional[Batch] = get_current_manager(),
100
+ argument_mapping: Dict[str, str] = {},
96
101
  ):
97
102
  self.name = name
98
103
  self.entrypoint = entrypoint
@@ -102,14 +107,46 @@ class BatchJob:
102
107
  raise MissingDagError()
103
108
  self.__id = dag.next_job_id
104
109
  self.dag = dag
110
+ self.__args: Dict[str, Any] = {}
105
111
  self.dag.__BATCH_JOB_NAMES__[self.name] = self.__id
106
112
  self.dependencies = self.translate_dependencies()
107
113
  assert all(
108
114
  isinstance(dep, int) for dep in self.dependencies
109
115
  ), "All dependencies must be integers representing job IDs."
110
116
  self.dag.add_job(self)
117
+ self.__argument_mapping = argument_mapping or {}
118
+
119
+ def __call__(self, *args, **kwds) -> BatchJob:
120
+ """
121
+ Allows the BatchJob instance to be called like a function, returning itself.
122
+ This is useful for chaining or functional-style programming.
123
+ """
124
+ return self
125
+
126
+ @property
127
+ def args(self) -> Dict[str, Any]:
128
+ """
129
+ Returns the arguments for the BatchJob instance.
130
+ """
131
+ return self.__args or {}
132
+
133
+ @args.setter
134
+ def args(self, args: Dict[str, Any]):
135
+ """
136
+ Sets the arguments for the BatchJob instance.
137
+ """
138
+ if not isinstance(args, dict):
139
+ raise TypeError(f"Expected a dictionary for args, got {type(args)}")
140
+ self.__args = args
141
+
142
+ @property
143
+ def id(self) -> int:
144
+ """
145
+ Returns the unique identifier of the BatchJob instance.
146
+ """
147
+ return self.__id
111
148
 
112
- def alias(self, name: str):
149
+ def alias(self, name: str) -> BatchJob:
113
150
  """
114
151
  Set an alias for the BatchJob instance.
115
152
 
@@ -122,19 +159,48 @@ class BatchJob:
122
159
  self.name = name
123
160
  return self
124
161
 
162
+ def set_resources(
163
+ self,
164
+ resources: Optional[Resources] = None,
165
+ memory: Optional[str] = None,
166
+ cpu: Optional[float] = None,
167
+ ) -> BatchJob:
168
+ """
169
+ Set the resources for the BatchJob instance.
170
+
171
+ :param resources: The Resources instance to set.
172
+ """
173
+ if resources is not None:
174
+ if not isinstance(resources, Resources):
175
+ raise TypeError(f"Expected Resources instance, got {type(resources)}")
176
+ else:
177
+ resources = Resources(
178
+ memory=memory or DEFAULT_TASK_MEMORY, cpu=cpu or DEFAULT_TASK_CPU
179
+ )
180
+ self.resources = resources
181
+ return self
182
+
125
183
  def __repr__(self):
126
184
  return (
127
185
  f"BatchJob(name={self.name}, entrypoint={self.entrypoint}, "
128
186
  f"resources={self.resources}) (id={self.__id})"
129
187
  )
130
188
 
189
+ def __getstate__(self) -> object:
190
+ state = self.__dict__.copy()
191
+ state.pop("dag", None)
192
+ return state
193
+
194
+ def __setstate__(self, state: dict):
195
+ self.__dict__.update(state)
196
+
131
197
  def to_dict(self):
132
198
  """
133
199
  Convert the BatchJob instance to a dictionary representation.
134
200
  """
135
201
  return {
136
202
  "display_name": self.name,
137
- "name": self.__id,
203
+ "child_number": self.__id,
138
204
  "entrypoint": str(self.entrypoint),
139
205
  "memory": self.resources.memory if self.resources else DEFAULT_TASK_MEMORY,
140
206
  "cpu": self.resources.cpu if self.resources else DEFAULT_TASK_CPU,
@@ -169,6 +235,9 @@ class BatchJob:
169
235
 
170
236
  def __add_dependency__(self, other):
171
237
  self.dependencies.add(other.__id)
238
+ arg_name = self.__argument_mapping.get(other.name, other.name)
239
+ if arg_name is not None:
240
+ self.__args[arg_name] = other
172
241
 
173
242
  def __lshift__(
174
243
  self, other: Sequence[BatchJob] | BatchJob
@@ -209,7 +278,13 @@ class BatchJob:
209
278
  Execute the job's entrypoint.
210
279
  """
211
280
  if isinstance(self.entrypoint, EntryPoint):
212
- self.entrypoint()
281
+ env = {
282
+ "DATATAILR_BATCH_ID": str(self.dag.id),
283
+ "DATATAILR_JOB_ID": str(self.__id),
284
+ "DATATAILR_JOB_NAME": self.name,
285
+ "DATATAILR_JOB_ARGUMENT_MAPPING": encode_json(self.__argument_mapping),
286
+ }
287
+ self.entrypoint(env=env)
213
288
  else:
214
289
  raise TypeError(f"Invalid entrypoint type: {type(self.entrypoint)}")
215
290
 
@@ -223,12 +298,17 @@ class Batch(Job):
223
298
 
224
299
  def __init__(
225
300
  self,
226
- environment: Optional[Environment],
227
301
  name: str,
228
- image: Image,
229
- run_as: Optional[Union[str, User]],
302
+ environment: Optional[Environment] = Environment.DEV,
303
+ schedule: Optional[Schedule] = None,
304
+ image: Optional[Image] = None,
305
+ run_as: Optional[Union[str, User]] = User.signed_user(),
230
306
  resources: Resources = Resources(memory="100m", cpu=1),
231
307
  acl: Optional[ACL] = None,
308
+ local_run: bool = False,
309
+ python_requirements: str = "",
310
+ build_script_pre: str = "",
311
+ build_script_post: str = "",
232
312
  ):
233
313
  super().__init__(
234
314
  environment=environment,
@@ -237,19 +317,25 @@ class Batch(Job):
237
317
  run_as=run_as,
238
318
  resources=resources,
239
319
  acl=acl,
320
+ python_requirements=python_requirements,
321
+ build_script_pre=build_script_pre,
322
+ build_script_post=build_script_post,
323
+ type=JobType.BATCH,
240
324
  )
241
- self.type = JobType.BATCH
242
325
  self.__jobs: List[BatchJob] = []
243
326
  self._auto_run = False
244
- self.__next_job_id = next_batch_job_id()
327
+ self.__next_job_id = -1
245
328
  self.__BATCH_JOB_NAMES__: Dict[str, int] = {}
329
+ self.__local_run = local_run
330
+ self.__schedule = schedule
246
331
 
247
332
  @property
248
333
  def next_job_id(self):
249
334
  """
250
335
  Returns a generator for the next job ID in the batch.
251
336
  """
252
- return next(self.__next_job_id)
337
+ self.__next_job_id += 1
338
+ return self.__next_job_id
253
339
 
254
340
  def add_job(self, job: BatchJob):
255
341
  """
@@ -265,6 +351,25 @@ class Batch(Job):
265
351
  raise DuplicateJobNameError(job.name)
266
352
  # Use the batch level resource values as defaults for jobs
267
353
  job.resources = job.resources or self.resources
354
+ image_path_to_repo = self.image.path_to_repo
355
+ image_path_to_module = self.image.path_to_module
356
+ package_path_to_repo = job.entrypoint.path_to_repo
357
+ package_path_to_module = job.entrypoint.path_to_module
358
+
359
+ if image_path_to_repo is None:
360
+ self.image.path_to_repo = package_path_to_repo
361
+ elif package_path_to_repo != image_path_to_repo:
362
+ raise CodePackageMismatchError(
363
+ f"Function {job.entrypoint.function_name} is defined in a different package root: "
364
+ f"{package_path_to_repo} != {image_path_to_repo}"
365
+ )
366
+ if image_path_to_module is None:
367
+ self.image.path_to_module = package_path_to_module
368
+ elif package_path_to_module != image_path_to_module:
369
+ raise CodePackageMismatchError(
370
+ f"Function {job.entrypoint.function_name} is defined in a different module: "
371
+ f"{package_path_to_module} != {image_path_to_module}"
372
+ )
268
373
  self.__jobs.append(job)
269
374
 
270
375
  def is_job_in(self, job: BatchJob) -> bool:
@@ -279,6 +384,7 @@ class Batch(Job):
279
384
  """
280
385
  batch_dict = super().to_dict()
281
386
  batch_dict["jobs"] = [job.to_dict() for job in self.__jobs]
387
+ batch_dict["schedule"] = str(self.__schedule) if self.__schedule else None
282
388
  return batch_dict
283
389
 
284
390
  def to_json(self):
@@ -331,11 +437,41 @@ class Batch(Job):
331
437
  "A cyclic dependency exists amongst {}".format(jobs)
332
438
  )
333
439
 
440
+ def get_schedule_args(self) -> Dict[str, Any]:
441
+ if isinstance(self.__schedule, Schedule):
442
+ args = {
443
+ "at_minute": self.__schedule.at_minutes,
444
+ "every_minute": self.__schedule.every_minute,
445
+ "at_hour": self.__schedule.at_hours,
446
+ "every_hour": self.__schedule.every_hour,
447
+ "weekdays": self.__schedule.weekdays,
448
+ "day_of_month": self.__schedule.day_of_month,
449
+ "in_month": self.__schedule.in_month,
450
+ "every_month": self.__schedule.every_month,
451
+ "timezone": self.__schedule.timezone,
452
+ "run_after_job_uuid": self.__schedule.run_after_job_uuid,
453
+ "run_after_job_name": self.__schedule.run_after_job_name,
454
+ "run_after_job_condition": self.__schedule.run_after_job_condition,
455
+ }
456
+ args = {key: value for key, value in args.items() if value is not None}
457
+ for key, value in args.items():
458
+ if isinstance(value, list):
459
+ args[key] = ",".join(map(str, value))
460
+ return args
461
+ return {}
462
+
334
463
  def run(self) -> Tuple[bool, str]:
335
- if is_dt_installed():
464
+ def arg_name(arg: Union[BatchJob, str]) -> str:
465
+ return arg.name if isinstance(arg, BatchJob) else arg
466
+
467
+ args = {
468
+ j.name: {k: arg_name(v) for k, v in j.args.items()} for j in self.__jobs
469
+ }
470
+ __ARGUMENTS_CACHE__.add_arguments(self.id, args)
471
+ if not self.__local_run and is_dt_installed():
336
472
  return super().run()
337
473
  else:
338
- os.environ["DATATAILR_BATCH_RUN_ID"] = "1"
474
+ os.environ["DATATAILR_BATCH_RUN_ID"] = uuid.uuid4().hex[:8]
339
475
  for step in self.__topological_sort__():
340
476
  for job_id in step:
341
477
  job = self.__jobs[job_id]