datatailr 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datatailr might be problematic. Click here for more details.

@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # *************************************************************************
4
+ #
5
+ # Copyright (c) 2025 - Datatailr Inc.
6
+ # All Rights Reserved.
7
+ #
8
+ # This file is part of Datatailr and subject to the terms and conditions
9
+ # defined in 'LICENSE.txt'. Unauthorized copying and/or distribution
10
+ # of this file, in parts or full, via any medium is strictly prohibited.
11
+ # *************************************************************************
12
+
13
+ import os
14
+
15
+ from datatailr.logging import DatatailrLogger
16
+
17
+ logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
18
+
19
+
20
+ def run():
21
+ logger.info("Starting Datatailr app...")
22
+ entrypoint = os.environ.get("DATATAILR_ENTRYPOINT")
23
+
24
+ if entrypoint is None:
25
+ raise ValueError("Environment variable 'DATATAILR_ENTRYPOINT' is not set.")
26
+
27
+ os.system(entrypoint)
28
+ logger.info(f"Running entrypoint: {entrypoint}")
@@ -12,19 +12,19 @@
12
12
 
13
13
  import importlib
14
14
  import os
15
- import pickle
16
15
 
17
- from datatailr import dt__Blob
18
16
  from datatailr.logging import DatatailrLogger
19
17
 
20
18
  logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
21
19
 
22
20
 
23
- def main():
21
+ def run():
22
+ logger.info("Running Datatailr batch job")
24
23
  entry_point = os.environ.get("DATATAILR_BATCH_ENTRYPOINT")
25
24
  batch_run_id = os.environ.get("DATATAILR_BATCH_RUN_ID")
26
25
  batch_id = os.environ.get("DATATAILR_BATCH_ID")
27
26
  job_id = os.environ.get("DATATAILR_JOB_ID")
27
+ logger.info(f"Batch run ID: {batch_run_id}, Batch ID: {batch_id}, Job ID: {job_id}")
28
28
 
29
29
  if entry_point is None:
30
30
  raise ValueError(
@@ -44,20 +44,5 @@ def main():
44
44
  raise ValueError(
45
45
  f"The function '{func_name}' in module '{module_name}' is not callable."
46
46
  )
47
- result = function()
48
- result_path = f"batch-results-{batch_run_id}-{job_id}.pkl"
49
- with open(result_path, "wb") as f:
50
- pickle.dump(result, f)
51
- blob = dt__Blob()
52
- blob.cp(result_path, "blob://")
53
- logger.info(f"{result_path} copied to blob storage.")
54
-
55
-
56
- if __name__ == "__main__":
57
- try:
58
- logger.debug("Starting job execution...")
59
- main()
60
- logger.debug("Job executed successfully.")
61
- except Exception as e:
62
- logger.error(f"Error during job execution: {e}")
63
- raise
47
+ function()
48
+ logger.info("Datatailr batch job completed successfully.")
@@ -8,31 +8,47 @@
8
8
  # of this file, in parts or full, via any medium is strictly prohibited.
9
9
  # *************************************************************************
10
10
 
11
- from datatailr.errors import BatchJobError, DatatailrError
11
+ r"""
12
+ Datatailr Scheduler Module
13
+ ==========================
14
+
15
+ The `datatailr.scheduler` module provides a framework for scheduling and managing batch jobs.
16
+
17
+ The main job types are:
18
+ _______________________
19
+
20
+ - **Batch**: Represents a batch job that can be scheduled and executed.
21
+ The job can include multiple tasks which can be run in parallel or sequentially.
22
+ - **Service**: Represents a service job that runs continuously.
23
+ - **App**: Represents a web app or a dashboard, which can be built using one of the supported frameworks,
24
+ such as `Streamlit <https://streamlit.io/>`_, `Dash <https://dash.plotly.com/>`_, or `Panel <https://panel.holoviz.org/>`_.
25
+ - **Excel**: Represents an Excel add-in.
26
+ """
27
+
28
+ from datatailr.errors import BatchJobError
12
29
  from datatailr.scheduler.base import (
13
- ACL,
14
30
  EntryPoint,
15
31
  Environment,
16
32
  Job,
17
33
  JobType,
18
34
  Resources,
19
- User,
35
+ set_allow_unsafe_scheduling,
20
36
  )
21
37
  from datatailr.scheduler.batch import Batch, BatchJob, DuplicateJobNameError
22
- from datatailr.scheduler.batch_decorator import batch_decorator as batch
38
+ from datatailr.scheduler.batch_decorator import batch_decorator as batch_job
39
+ from datatailr.scheduler.schedule import Schedule
23
40
 
24
41
  __all__ = [
25
42
  "Job",
26
43
  "JobType",
27
44
  "Environment",
28
- "User",
29
45
  "Resources",
30
- "ACL",
31
46
  "EntryPoint",
32
47
  "Batch",
33
48
  "BatchJob",
34
- "batch",
35
- "DatatailrError",
49
+ "batch_job",
36
50
  "BatchJobError",
37
51
  "DuplicateJobNameError",
52
+ "set_allow_unsafe_scheduling",
53
+ "Schedule",
38
54
  ]
@@ -21,17 +21,26 @@ and the inner dictionaries contain the arguments.
21
21
  This module is for internal use of the datatailr package.
22
22
  """
23
23
 
24
- from collections import defaultdict
24
+ from datatailr.dt_json import json, decode_json
25
+ import os
25
26
  import pickle
26
- from typing import Any, Dict
27
+ from typing import Any, Dict, Optional
27
28
 
28
29
  from datatailr import is_dt_installed, Blob
29
- from datatailr.scheduler import BatchJob
30
+ from datatailr.errors import DatatailrError
30
31
 
31
32
 
32
33
  __BLOB_STORAGE__ = Blob()
33
34
 
34
35
 
36
+ class CacheNotFoundError(DatatailrError):
37
+ """Custom error for cache operations."""
38
+
39
+ def __init__(self, message: str):
40
+ super().__init__(message)
41
+ self.message = message
42
+
43
+
35
44
  class ArgumentsCache:
36
45
  def __init__(self, use_persistent_cache: bool = is_dt_installed()):
37
46
  """
@@ -40,11 +49,12 @@ class ArgumentsCache:
40
49
  :param use_persistent_cache: If True, use the persistent cache backend. Otherwise, use in-memory cache.
41
50
  """
42
51
  self.use_persistent_cache = use_persistent_cache
43
- self.in_memory_cache: Dict[str, Dict[str, Dict[str, Any]]] = defaultdict(
44
- lambda: defaultdict(dict)
45
- )
52
+ if not self.use_persistent_cache:
53
+ # Create a temp folder, for local caching
54
+ os.makedirs("/tmp/datatailr/batch/arguments", exist_ok=True)
55
+ os.makedirs("/tmp/datatailr/batch/results", exist_ok=True)
46
56
 
47
- def add_arguments(self, batch_run_id: str, job: str, arguments: Dict[str, Any]):
57
+ def add_arguments(self, batch_id: str, arguments: Dict[str, Any]):
48
58
  """
49
59
  Add arguments to the cache for a specific job and batch run.
50
60
 
@@ -52,13 +62,16 @@ class ArgumentsCache:
52
62
  :param job_name: Name of the job.
53
63
  :param arguments: Dictionary of arguments to store.
54
64
  """
55
- if self.use_persistent_cache and isinstance(job, str):
56
- path = f"{batch_run_id}/{job}/args"
65
+ path = f"/tmp/datatailr/batch/arguments/{batch_id}.pkl"
66
+ if self.use_persistent_cache:
57
67
  self._add_to_persistent_cache(path, arguments)
58
68
  else:
59
- self.in_memory_cache[batch_run_id][job]["args"] = arguments
69
+ with open(path, "wb") as f:
70
+ pickle.dump(arguments, f)
60
71
 
61
- def get_arguments(self, batch_run_id: str, job: str) -> Dict[str, Any]:
72
+ def get_arguments(
73
+ self, batch_id: str, job: str, batch_run_id: Optional[str]
74
+ ) -> Dict[str, Any]:
62
75
  """
63
76
  Retrieve arguments from the cache for a specific job and batch run.
64
77
 
@@ -66,27 +79,37 @@ class ArgumentsCache:
66
79
  :param job_name: Name of the job.
67
80
  :return: Dictionary of arguments.
68
81
  """
82
+ path = f"/tmp/datatailr/batch/arguments/{batch_id}.pkl"
69
83
  if self.use_persistent_cache and isinstance(job, str):
70
- path = f"{batch_run_id}/{job}/args"
71
- arg_keys = self._get_from_persistent_cache(path)
72
- if not isinstance(arg_keys, dict):
73
- raise TypeError(
74
- f"Expected a dictionary for arguments, got {type(arg_keys)}"
75
- )
84
+ try:
85
+ arg_keys = self._get_from_persistent_cache(path)
86
+ except RuntimeError:
87
+ return {}
76
88
  else:
77
- arg_keys = (
78
- self.in_memory_cache.get(batch_run_id, {})
79
- .get(job, {})
80
- .get("args", {})
81
- .items()
82
- )
83
- arguments = {}
84
- for key, value in arg_keys:
85
- if isinstance(value, BatchJob):
86
- arguments[key] = value.name
87
- else:
88
- arguments[key] = value
89
- return arguments
89
+ if not os.path.exists(path):
90
+ raise CacheNotFoundError(
91
+ f"Cache file not found: {path}. Ensure that the arguments have been cached."
92
+ )
93
+ with open(path, "rb") as f:
94
+ try:
95
+ arg_keys = pickle.load(f)
96
+ except EOFError:
97
+ return {}
98
+ if not isinstance(arg_keys, dict):
99
+ raise TypeError(
100
+ f"Expected a dictionary for arguments, got {type(arg_keys)}"
101
+ )
102
+ if batch_run_id is None:
103
+ return arg_keys[job]
104
+ arguments_mapping = decode_json(
105
+ os.getenv("DATATAILR_JOB_ARGUMENT_MAPPING", "{}")
106
+ )
107
+ arguments_mapping = {value: key for key, value in arguments_mapping.items()}
108
+ args = {
109
+ arguments_mapping.get(name, name): self.get_result(batch_run_id, value)
110
+ for name, value in arg_keys[job].items()
111
+ }
112
+ return args
90
113
 
91
114
  def add_result(self, batch_run_id: str, job: str, result: Any):
92
115
  """
@@ -96,13 +119,14 @@ class ArgumentsCache:
96
119
  :param job: Name of the job.
97
120
  :param result: Result of the batch job.
98
121
  """
122
+ path = f"/tmp/datatailr/batch/results/{batch_run_id}_{job}.pkl"
99
123
  if self.use_persistent_cache and isinstance(job, str):
100
- path = f"{batch_run_id}/{job}/result"
101
124
  self._add_to_persistent_cache(path, result)
102
125
  else:
103
- self.in_memory_cache[batch_run_id][job]["result"] = result
126
+ with open(path, "wb") as f:
127
+ pickle.dump(result, f)
104
128
 
105
- def get_result(self, batch_run_id: str, job: str) -> Any:
129
+ def get_result(self, batch_run_id: str, job: Any) -> Any:
106
130
  """
107
131
  Retrieve the result of a batch job from the cache.
108
132
 
@@ -110,10 +134,17 @@ class ArgumentsCache:
110
134
  :param job: Name of the job.
111
135
  :return: Result of the batch job.
112
136
  """
137
+ path = f"/tmp/datatailr/batch/results/{batch_run_id}_{job}.pkl"
113
138
  if self.use_persistent_cache and isinstance(job, str):
114
- path = f"{batch_run_id}/{job}/result"
115
139
  return self._get_from_persistent_cache(path)
116
- return self.in_memory_cache[batch_run_id][job].get("result")
140
+ else:
141
+ if not os.path.exists(path):
142
+ return job
143
+ with open(path, "rb") as f:
144
+ try:
145
+ return pickle.load(f)
146
+ except EOFError:
147
+ return None
117
148
 
118
149
  def _add_to_persistent_cache(self, path: str, blob: Any):
119
150
  """
@@ -124,9 +155,8 @@ class ArgumentsCache:
124
155
  :raises TypeError: If the blob cannot be pickled.
125
156
 
126
157
  """
127
- __BLOB_STORAGE__.put_blob(
128
- path, pickle.dumps(blob, protocol=pickle.HIGHEST_PROTOCOL)
129
- )
158
+ path = path.replace("/tmp/", "")
159
+ __BLOB_STORAGE__.put_blob(path, json.dumps(blob))
130
160
 
131
161
  def _get_from_persistent_cache(self, path: str) -> Any:
132
162
  """
@@ -134,8 +164,6 @@ class ArgumentsCache:
134
164
 
135
165
  :param path: Path in the Blob storage where the blob is stored.
136
166
  """
137
- try:
138
- data = __BLOB_STORAGE__.get_blob(path)
139
- return pickle.loads(data)
140
- except (TypeError, EOFError):
141
- return {}
167
+ path = path.replace("/tmp/", "")
168
+ data = __BLOB_STORAGE__.get_blob(path)
169
+ return json.loads(data)
@@ -8,7 +8,11 @@
8
8
  # of this file, in parts or full, via any medium is strictly prohibited.
9
9
  # *************************************************************************
10
10
 
11
+ from __future__ import annotations
12
+
13
+ from datetime import datetime
11
14
  import importlib
15
+ import inspect
12
16
  import json
13
17
  import os
14
18
  import subprocess
@@ -18,12 +22,27 @@ from dataclasses import dataclass
18
22
  from enum import Enum
19
23
  from typing import Callable, Optional, Tuple, Union
20
24
 
21
- from datatailr import ACL, Environment, User, dt__Job, is_dt_installed
25
+ from datatailr import ACL, Environment, User, is_dt_installed
26
+ from datatailr.wrapper import dt__Job
27
+ from datatailr.scheduler.constants import DEFAULT_TASK_MEMORY, DEFAULT_TASK_CPU
22
28
  from datatailr.build.image import Image
23
29
  from datatailr.errors import BatchJobError
24
30
  from datatailr.logging import DatatailrLogger
31
+ from datatailr.utils import run_shell_command
25
32
 
26
33
  logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
34
+ __client__ = dt__Job()
35
+
36
+
37
+ def set_allow_unsafe_scheduling(allow: bool):
38
+ """
39
+ Set whether to allow unsafe scheduling of jobs.
40
+ This is a global setting that affects how jobs are scheduled.
41
+ """
42
+ if allow:
43
+ os.environ["DATATAILR_ALLOW_UNSAFE_SCHEDULING"] = "true"
44
+ else:
45
+ os.environ.pop("DATATAILR_ALLOW_UNSAFE_SCHEDULING", None)
27
46
 
28
47
 
29
48
  class RepoValidationError(BatchJobError):
@@ -40,6 +59,7 @@ class JobType(Enum):
40
59
  BATCH = "batch"
41
60
  SERVICE = "service"
42
61
  APP = "app"
62
+ EXCEL = "excel"
43
63
  UNKNOWN = "unknown"
44
64
 
45
65
  def __str__(self):
@@ -55,8 +75,14 @@ class Resources:
55
75
  Represents the resources required for a job.
56
76
  """
57
77
 
58
- memory: str = "100m"
59
- cpu: int = 1
78
+ memory: str = DEFAULT_TASK_MEMORY
79
+ cpu: float = DEFAULT_TASK_CPU
80
+
81
+
82
+ # TODO: create a dt_run script that will:
83
+ # 1. create user and group if not exists
84
+ # 2. set the correct path
85
+ # 3. run the job based on its type
60
86
 
61
87
 
62
88
  class EntryPoint:
@@ -68,26 +94,30 @@ class EntryPoint:
68
94
  def __init__(
69
95
  self,
70
96
  type: JobType,
71
- func: Optional[Callable] = None,
72
- module_name: Optional[str] = None,
73
- function_name: Optional[str] = None,
97
+ func: Callable,
74
98
  ):
75
- if func is None and (module_name is None or function_name is None):
76
- raise ValueError(
77
- "Either a function or module and function names must be provided."
78
- )
79
99
  self.func = func
80
- self.module_name = func.__module__ if func else module_name
81
- self.function_name = func.__name__ if func else function_name
100
+ self.module_name = func.__module__
101
+ self.function_name = func.__name__
82
102
  self.type = type
83
103
 
104
+ # Find the absolute path to the repository and then the relative path to the module.
105
+ # This will be used in the creation of the code 'bundle' when building the image.
106
+ path_to_repo = run_shell_command("git rev-parse --show-toplevel")[0]
107
+ path_to_code = inspect.getfile(func)
108
+ package_root = path_to_code
109
+ module_parts = self.module_name.split(".")
110
+ for _ in module_parts:
111
+ package_root = os.path.dirname(package_root)
112
+ path_to_module = os.path.relpath(package_root, path_to_repo)
113
+ self.path_to_repo = path_to_repo
114
+ self.path_to_module = path_to_module
115
+
84
116
  def __call__(self, *args, **kwargs):
117
+ os.environ.update(kwargs.pop("env", {}))
85
118
  if self.type == JobType.BATCH:
86
- if self.module_name and self.function_name:
87
- module = importlib.import_module(self.module_name)
88
- func = getattr(module, self.function_name)
89
- elif self.func is not None:
90
- func = self.func
119
+ module = importlib.import_module(self.module_name)
120
+ func = getattr(module, self.function_name)
91
121
  return func(*args, **kwargs)
92
122
 
93
123
  elif self.type == JobType.SERVICE:
@@ -106,13 +136,28 @@ class EntryPoint:
106
136
  class Job:
107
137
  def __init__(
108
138
  self,
109
- environment: Optional[Environment],
110
139
  name: str,
111
- image: Image,
112
- run_as: Optional[Union[str, User]],
140
+ environment: Optional[Environment] = Environment.DEV,
141
+ image: Optional[Image] = None,
142
+ run_as: Optional[Union[str, User]] = User.signed_user(),
113
143
  resources: Resources = Resources(memory="100m", cpu=1),
114
144
  acl: Optional[ACL] = None,
145
+ python_requirements: str = "",
146
+ build_script_pre: str = "",
147
+ build_script_post: str = "",
148
+ type: JobType = JobType.UNKNOWN,
149
+ entrypoint: Optional[EntryPoint] = None,
150
+ update_existing: bool = False,
115
151
  ):
152
+ if environment is None:
153
+ environment = Environment.DEV
154
+
155
+ if update_existing:
156
+ existing_job = self.__get_existing__(name, environment)
157
+ if existing_job:
158
+ self.from_dict(existing_job)
159
+ return
160
+
116
161
  if run_as is None:
117
162
  run_as = User.signed_user()
118
163
  if environment is None:
@@ -126,11 +171,16 @@ class Job:
126
171
  self.name = name
127
172
  self.run_as = run_as
128
173
  self.resources = resources
174
+ if image is None:
175
+ image = Image(
176
+ acl=self.acl,
177
+ python_requirements=python_requirements,
178
+ build_script_pre=build_script_pre,
179
+ build_script_post=build_script_post,
180
+ )
129
181
  self.image = image
130
-
131
- # Placeholders, to be set in derived classes
132
- self.type: JobType = JobType.UNKNOWN
133
- self.entrypoint = None
182
+ self.type = type
183
+ self.entrypoint = entrypoint
134
184
  self.__id = str(uuid.uuid4())
135
185
 
136
186
  @property
@@ -140,6 +190,25 @@ class Job:
140
190
  """
141
191
  return self.__id
142
192
 
193
+ @classmethod
194
+ def __get_existing__(
195
+ cls, job_name: str, environment: Environment
196
+ ) -> Optional[dict]:
197
+ """
198
+ Retrieve an existing job instance from the DataTailr platform.
199
+ Based on the job name and environment.
200
+ """
201
+ job_list = __client__.ls(filter=f"name={job_name},environment={environment}")
202
+ if not isinstance(job_list, list):
203
+ return None
204
+ if len(job_list) == 0:
205
+ return None
206
+ if len(job_list) > 1:
207
+ raise BatchJobError(
208
+ f"Multiple jobs found with name '{job_name}' in environment '{environment}'."
209
+ )
210
+ return job_list[0]
211
+
143
212
  def __repr__(self):
144
213
  return (
145
214
  f"Job(name={self.name}, environment={self.environment}, "
@@ -169,31 +238,64 @@ class Job:
169
238
  job_dict["cpu"] = self.resources.cpu
170
239
  return job_dict
171
240
 
241
+ def from_dict(self, job_dict: dict):
242
+ self.name = job_dict["name"]
243
+ self.image = job_dict["image"]
244
+
245
+ environment = job_dict.get("environment", "dev")
246
+ environment = Environment(environment.lower())
247
+ self.environment = environment
248
+
249
+ user = job_dict["run_as"]["name"]
250
+ user = User(user.lower())
251
+ self.run_as = user
252
+
253
+ self.resources = Resources(memory=job_dict["memory"], cpu=job_dict["num_cpus"])
254
+ acl = job_dict.get("acl", None)
255
+ if acl is None:
256
+ acl = ACL(user=self.run_as)
257
+ else:
258
+ acl = ACL.from_dict(acl)
259
+ self.acl = acl
260
+ self.python_requirements = (job_dict.get("python_requirements", ""),)
261
+ self.build_script_pre = (job_dict.get("build_script_pre", ""),)
262
+ self.build_script_post = (job_dict.get("build_script_post", ""),)
263
+ self.type = JobType(job_dict.get("type", "unknown").lower())
264
+ self.state = job_dict["state"]
265
+ self.create_time = datetime.fromtimestamp(job_dict["create_time"] * 1e-6)
266
+ self.version = job_dict["version"]
267
+ self.__id = job_dict["id"]
268
+
172
269
  def to_json(self):
173
270
  """
174
271
  Convert the Job instance to a JSON string representation.
175
272
  """
176
273
  return json.dumps(self.to_dict())
177
274
 
178
- def verify_repo_is_ready(self) -> Tuple[bool, str]:
179
- is_committed = (
180
- subprocess.run(
181
- ("git diff --exit-code"), shell=True, capture_output=True
182
- ).returncode
183
- == 0
184
- )
275
+ def verify_repo_is_ready(self) -> Tuple[str, str]:
276
+ """
277
+ Verify if the repository is ready for job execution.
278
+ The check consists of two parts:
279
+ 1. Check if there are uncommitted changes in the repository.
280
+ 2. Check if the local commit matches the remote HEAD (the repo is synced with the remote).
281
+ Returns a tuple of (branch: str, commit_hash: str).
282
+ """
283
+ local_commit = run_shell_command("git rev-parse HEAD")[0]
284
+ branch_name = run_shell_command("git rev-parse --abbrev-ref HEAD")[0]
285
+
286
+ if os.getenv("DATATAILR_ALLOW_UNSAFE_SCHEDULING", "false").lower() == "true":
287
+ return branch_name, local_commit
288
+ return_code = run_shell_command("git diff --exit-code")[1]
289
+ is_committed = return_code == 0
290
+
185
291
  if not is_committed:
186
- return (
187
- False,
188
- "Uncommitted changes detected. Please commit your changes before running the job.",
292
+ raise RepoValidationError(
293
+ "Please commit your changes before running the job."
189
294
  )
190
295
 
191
- local_commit = subprocess.run(
192
- ("git rev-parse HEAD"), shell=True, capture_output=True, text=True
193
- ).stdout.strip()
194
296
  remote_commit = (
195
297
  subprocess.run(
196
- ("git ls-remote origin HEAD"),
298
+ ("remote_commit = $(git ls-remote origin HEAD)"),
197
299
  shell=True,
198
300
  capture_output=True,
199
301
  text=True,
@@ -203,43 +305,67 @@ class Job:
203
305
  )
204
306
 
205
307
  if local_commit != remote_commit:
206
- return (
207
- False,
208
- "Local commit does not match remote HEAD. Please pull the latest changes before running the job.",
308
+ raise RepoValidationError(
309
+ "Please sync your local repository with the remote before running the job."
209
310
  )
210
311
 
211
- branch = subprocess.run(
212
- ("git rev-parse --abbrev-ref HEAD"),
213
- shell=True,
214
- capture_output=True,
215
- text=True,
216
- ).stdout.strip()
217
- return True, ""
312
+ return branch_name, local_commit
218
313
 
219
- def run(self) -> Tuple[bool, str]:
220
- """
221
- Run the job. This method should be implemented to execute the job logic.
222
- It verifies the repository state and prepares the job for execution.
223
- Returns a tuple of (success: bool, message: str).
224
- If the repository is not ready, it returns False with an error message.
225
- If the job runs successfully, it returns True with an empty message.
226
- """
227
- if is_dt_installed():
228
- check_result = self.verify_repo_is_ready()
229
- if not check_result[0]:
230
- raise RepoValidationError(check_result[1])
231
- logger.info(
232
- f"Running job '{self.name}' in environment '{self.environment}' as '{self.run_as}'"
233
- )
314
+ def __prepare__(self) -> str:
315
+ branch_name, local_commit = self.verify_repo_is_ready()
316
+ self.image.update(
317
+ branch_name=branch_name,
318
+ commit_hash=local_commit,
319
+ )
320
+ logger.info(
321
+ f"Running job '{self.name}' in environment '{self.environment}' as '{self.run_as}'"
322
+ )
234
323
 
235
- with tempfile.NamedTemporaryFile(delete=False, suffix=".json") as temp_file:
236
- temp_file.write(self.to_json().encode())
324
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".json") as temp_file:
325
+ temp_file.write(self.to_json().encode())
326
+ return temp_file.name
237
327
 
238
- dt__Job().run(f"file://{temp_file.name}")
239
- os.remove(temp_file.name)
328
+ def get_schedule_args(self) -> dict:
329
+ """
330
+ Returns additional arguments for scheduling the job.
331
+ Override or extend this method as needed.
332
+ """
333
+ return {}
240
334
 
241
- return True, ""
242
- else:
335
+ def __run_command__(self, command: str) -> Tuple[bool, str]:
336
+ """
337
+ Run a command in the context of the job.
338
+ This is used to execute the job's entry point.
339
+ """
340
+ if not is_dt_installed():
243
341
  raise NotImplementedError(
244
342
  "DataTailr is not installed. Please install DataTailr to run this job."
245
343
  )
344
+ try:
345
+ temp_file_name = self.__prepare__()
346
+
347
+ if command == "run":
348
+ __client__.run(f"file://{temp_file_name}", **self.get_schedule_args())
349
+ elif command == "save":
350
+ __client__.save(f"file://{temp_file_name}", **self.get_schedule_args())
351
+ else:
352
+ raise ValueError(f"Unknown command: {command}")
353
+ os.remove(temp_file_name)
354
+ except Exception as e:
355
+ logger.error(f"Error running command '{command}': {e}")
356
+ return False, str(e)
357
+ return True, f"Job '{self.name}' {command}d successfully."
358
+
359
+ def save(self) -> Tuple[bool, str]:
360
+ """
361
+ Save the job to the DataTailr platform.
362
+ If the job already exists, it will be updated.
363
+ """
364
+ return self.__run_command__("save")
365
+
366
+ def run(self) -> Tuple[bool, str]:
367
+ """
368
+ Run the job. This method should be implemented to execute the job logic.
369
+ It verifies the repository state and prepares the job for execution.
370
+ """
371
+ return self.__run_command__("run")