datatailr 0.1.15__tar.gz → 0.1.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datatailr might be problematic. Click here for more details.

Files changed (38) hide show
  1. {datatailr-0.1.15/src/datatailr.egg-info → datatailr-0.1.17}/PKG-INFO +3 -3
  2. {datatailr-0.1.15 → datatailr-0.1.17}/README.md +2 -2
  3. {datatailr-0.1.15 → datatailr-0.1.17}/pyproject.toml +1 -1
  4. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/build/image.py +80 -43
  5. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/scheduler/base.py +51 -11
  6. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/scheduler/schedule.py +1 -1
  7. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/user.py +4 -3
  8. {datatailr-0.1.15 → datatailr-0.1.17/src/datatailr.egg-info}/PKG-INFO +3 -3
  9. {datatailr-0.1.15 → datatailr-0.1.17}/src/sbin/datatailr_run.py +5 -1
  10. {datatailr-0.1.15 → datatailr-0.1.17}/LICENSE +0 -0
  11. {datatailr-0.1.15 → datatailr-0.1.17}/setup.cfg +0 -0
  12. {datatailr-0.1.15 → datatailr-0.1.17}/setup.py +0 -0
  13. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/__init__.py +0 -0
  14. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/acl.py +0 -0
  15. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/blob.py +0 -0
  16. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/build/__init__.py +0 -0
  17. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/dt_json.py +0 -0
  18. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/errors.py +0 -0
  19. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/group.py +0 -0
  20. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/logging.py +0 -0
  21. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/scheduler/__init__.py +0 -0
  22. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/scheduler/arguments_cache.py +0 -0
  23. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/scheduler/batch.py +0 -0
  24. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/scheduler/batch_decorator.py +0 -0
  25. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/scheduler/constants.py +0 -0
  26. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/scheduler/utils.py +0 -0
  27. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/utils.py +0 -0
  28. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/version.py +0 -0
  29. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr/wrapper.py +0 -0
  30. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr.egg-info/SOURCES.txt +0 -0
  31. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr.egg-info/dependency_links.txt +0 -0
  32. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr.egg-info/entry_points.txt +0 -0
  33. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr.egg-info/requires.txt +0 -0
  34. {datatailr-0.1.15 → datatailr-0.1.17}/src/datatailr.egg-info/top_level.txt +0 -0
  35. {datatailr-0.1.15 → datatailr-0.1.17}/src/sbin/datatailr_run_app.py +0 -0
  36. {datatailr-0.1.15 → datatailr-0.1.17}/src/sbin/datatailr_run_batch.py +0 -0
  37. {datatailr-0.1.15 → datatailr-0.1.17}/src/sbin/datatailr_run_excel.py +0 -0
  38. {datatailr-0.1.15 → datatailr-0.1.17}/src/sbin/datatailr_run_service.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datatailr
3
- Version: 0.1.15
3
+ Version: 0.1.17
4
4
  Summary: Ready-to-Use Platform That Drives Business Insights
5
5
  Author-email: Datatailr <info@datatailr.com>
6
6
  License-Expression: MIT
@@ -84,9 +84,9 @@ print(datatailr.__provider__)
84
84
  The following example shows how to create a simple data pipeline using the Datatailr Python package.
85
85
 
86
86
  ```python
87
- from datatailr.scheduler import batch, Batch
87
+ from datatailr.scheduler import batch_job, Batch
88
88
 
89
- @batch_job_job()
89
+ @batch_job()
90
90
  def func_no_args() -> str:
91
91
  return "no_args"
92
92
 
@@ -47,9 +47,9 @@ print(datatailr.__provider__)
47
47
  The following example shows how to create a simple data pipeline using the Datatailr Python package.
48
48
 
49
49
  ```python
50
- from datatailr.scheduler import batch, Batch
50
+ from datatailr.scheduler import batch_job, Batch
51
51
 
52
- @batch_job_job()
52
+ @batch_job()
53
53
  def func_no_args() -> str:
54
54
  return "no_args"
55
55
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "datatailr"
7
- version = "0.1.15"
7
+ version = "0.1.17"
8
8
  description = "Ready-to-Use Platform That Drives Business Insights"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -10,6 +10,7 @@
10
10
 
11
11
  import json
12
12
  import os
13
+ import re
13
14
  from typing import Optional
14
15
 
15
16
  from datatailr import ACL, User
@@ -25,6 +26,7 @@ class Image:
25
26
  def __init__(
26
27
  self,
27
28
  acl: Optional[ACL] = None,
29
+ python_version: str = "3.12",
28
30
  python_requirements: str | list[str] = "",
29
31
  build_script_pre: str = "",
30
32
  build_script_post: str = "",
@@ -33,69 +35,103 @@ class Image:
33
35
  path_to_repo: Optional[str] = None,
34
36
  path_to_module: Optional[str] = None,
35
37
  ):
36
- if acl is None:
37
- signed_user = User.signed_user()
38
- if signed_user is None:
39
- raise ValueError(
40
- "ACL cannot be None. Please provide a valid ACL or ensure a user is signed in."
41
- )
42
- elif not isinstance(acl, ACL):
43
- raise TypeError("acl must be an instance of ACL.")
44
- self.acl = acl or ACL(signed_user)
45
-
46
- if isinstance(python_requirements, str) and os.path.isfile(python_requirements):
47
- with open(python_requirements, "r") as f:
48
- python_requirements = f.read()
49
- elif isinstance(python_requirements, list):
50
- python_requirements = "\n".join(python_requirements)
51
- if not isinstance(python_requirements, str):
38
+ self.python_version = python_version
39
+ self.acl = acl
40
+ self.python_requirements = python_requirements
41
+ self.build_script_pre = build_script_pre
42
+ self.build_script_post = build_script_post
43
+ self.branch_name = branch_name
44
+ self.commit_hash = commit_hash
45
+ self.path_to_repo = path_to_repo
46
+ self.path_to_module = path_to_module
47
+
48
+ def __repr__(self):
49
+ return f"Image(acl={self.acl},)"
50
+
51
+ @property
52
+ def python_version(self):
53
+ return self._python_version
54
+
55
+ @python_version.setter
56
+ def python_version(self, value: str):
57
+ if not isinstance(value, str):
58
+ raise TypeError("python_version must be a string.")
59
+ if not re.match(r"^\d+\.\d+(\.\d+)?$", value):
60
+ raise ValueError("Invalid python_version format. Expected format: X.Y[.Z]")
61
+ self._python_version = value
62
+
63
+ @property
64
+ def python_requirements(self):
65
+ return self._python_requirements
66
+
67
+ @python_requirements.setter
68
+ def python_requirements(self, value: str | list[str]):
69
+ if isinstance(value, str) and os.path.isfile(value):
70
+ with open(value, "r") as f:
71
+ value = f.read()
72
+ elif isinstance(value, list):
73
+ value = "\n".join(value)
74
+ if not isinstance(value, str):
52
75
  raise TypeError(
53
76
  "python_requirements must be a string or a file path to a requirements file."
54
77
  )
55
- self.python_requirements = python_requirements
78
+ self._python_requirements = value
79
+
80
+ @property
81
+ def build_script_pre(self):
82
+ return self._build_script_pre
56
83
 
57
- if os.path.isfile(build_script_pre):
58
- with open(build_script_pre, "r") as f:
59
- build_script_pre = f.read()
60
- if not isinstance(build_script_pre, str):
84
+ @build_script_pre.setter
85
+ def build_script_pre(self, value: str):
86
+ if not isinstance(value, str):
61
87
  raise TypeError(
62
88
  "build_script_pre must be a string or a file path to a script file."
63
89
  )
64
- self.build_script_pre = build_script_pre
90
+ if os.path.isfile(value):
91
+ with open(value, "r") as f:
92
+ value = f.read()
93
+ self._build_script_pre = value
94
+
95
+ @property
96
+ def build_script_post(self):
97
+ return self._build_script_post
65
98
 
66
- if os.path.isfile(build_script_post):
67
- with open(build_script_post, "r") as f:
68
- build_script_post = f.read()
69
- if not isinstance(build_script_post, str):
99
+ @build_script_post.setter
100
+ def build_script_post(self, value: str):
101
+ if not isinstance(value, str):
70
102
  raise TypeError(
71
103
  "build_script_post must be a string or a file path to a script file."
72
104
  )
73
- self.build_script_post = build_script_post
74
- self.branch_name = branch_name
75
- self.commit_hash = commit_hash
76
- self.path_to_repo = path_to_repo
77
- self.path_to_module = path_to_module
105
+ if os.path.isfile(value):
106
+ with open(value, "r") as f:
107
+ value = f.read()
108
+ self._build_script_post = value
78
109
 
79
- def __repr__(self):
80
- return f"Image(acl={self.acl},)"
110
+ @property
111
+ def acl(self):
112
+ return self._acl
113
+
114
+ @acl.setter
115
+ def acl(self, value: Optional[ACL]):
116
+ if value is None:
117
+ signed_user = User.signed_user()
118
+ if signed_user is None:
119
+ raise ValueError(
120
+ "ACL cannot be None. Please provide a valid ACL or ensure a user is signed in."
121
+ )
122
+ elif not isinstance(value, ACL):
123
+ raise TypeError("acl must be an instance of ACL.")
124
+ self._acl = value or ACL(signed_user)
81
125
 
82
126
  def update(self, **kwargs):
83
127
  for key, value in kwargs.items():
84
- if key == "acl" and not isinstance(value, ACL):
85
- raise TypeError("acl must be an instance of ACL.")
86
- elif key == "python_requirements" and not isinstance(value, str):
87
- raise TypeError("python_requirements must be a string.")
88
- elif key == "build_script_pre" and not isinstance(value, str):
89
- raise TypeError("build_script_pre must be a string.")
90
- elif key == "build_script_post" and not isinstance(value, str):
91
- raise TypeError("build_script_post must be a string.")
92
- elif (
128
+ if (
93
129
  key in ["branch_name", "commit_hash", "path_to_repo", "path_to_module"]
94
130
  and value is not None
95
131
  and not isinstance(value, str)
96
132
  ):
97
133
  raise TypeError(f"{key} must be a string or None.")
98
- if key not in self.__dict__:
134
+ if not hasattr(self, key):
99
135
  raise AttributeError(
100
136
  f"'{self.__class__.__name__}' object has no attribute '{key}'"
101
137
  )
@@ -107,6 +143,7 @@ class Image:
107
143
  """
108
144
  return {
109
145
  "acl": self.acl.to_dict(),
146
+ "python_version": self.python_version,
110
147
  "python_requirements": self.python_requirements,
111
148
  "build_script_pre": self.build_script_pre,
112
149
  "build_script_post": self.build_script_post,
@@ -78,12 +78,6 @@ class Resources:
78
78
  cpu: float = DEFAULT_TASK_CPU
79
79
 
80
80
 
81
- # TODO: create a dt_run script that will:
82
- # 1. create user and group if not exists
83
- # 2. set the correct path
84
- # 3. run the job based on its type
85
-
86
-
87
81
  class EntryPoint:
88
82
  """
89
83
  Represents an entry point for a DataTailr job.
@@ -139,12 +133,13 @@ class Job:
139
133
  environment: Optional[Environment] = Environment.DEV,
140
134
  image: Optional[Image] = None,
141
135
  run_as: Optional[Union[str, User]] = None,
142
- resources: Resources = Resources(memory="100m", cpu=1),
136
+ resources: Resources = Resources(memory="128m", cpu=0.25),
143
137
  acl: Optional[ACL] = None,
138
+ python_version: str = "3.12",
144
139
  python_requirements: str = "",
145
140
  build_script_pre: str = "",
146
141
  build_script_post: str = "",
147
- type: JobType = JobType.UNKNOWN,
142
+ type: Optional[JobType] = JobType.UNKNOWN,
148
143
  entrypoint: Optional[EntryPoint] = None,
149
144
  update_existing: bool = False,
150
145
  ):
@@ -173,12 +168,13 @@ class Job:
173
168
  if image is None:
174
169
  image = Image(
175
170
  acl=self.acl,
171
+ python_version=python_version,
176
172
  python_requirements=python_requirements,
177
173
  build_script_pre=build_script_pre,
178
174
  build_script_post=build_script_post,
179
175
  )
180
176
  self.image = image
181
- self.type = type
177
+ self.type = type if entrypoint is None else entrypoint.type
182
178
  self.entrypoint = entrypoint
183
179
  self.__id = str(uuid.uuid4())
184
180
 
@@ -229,6 +225,8 @@ class Job:
229
225
  if isinstance(self.run_as, User)
230
226
  else self.run_as,
231
227
  "acl": self.acl.to_dict(),
228
+ "memory": self.resources.memory,
229
+ "cpu": self.resources.cpu,
232
230
  }
233
231
  if self.type != JobType.BATCH:
234
232
  job_dict["entrypoint"] = str(self.entrypoint) if self.entrypoint else None
@@ -338,6 +336,8 @@ class Job:
338
336
  __client__.run(f"file://{temp_file_name}", **self.get_schedule_args())
339
337
  elif command == "save":
340
338
  __client__.save(f"file://{temp_file_name}", **self.get_schedule_args())
339
+ elif command == "start":
340
+ __client__.start(self.name, environment=self.environment)
341
341
  else:
342
342
  raise ValueError(f"Unknown command: {command}")
343
343
  os.remove(temp_file_name)
@@ -350,12 +350,52 @@ class Job:
350
350
  """
351
351
  Save the job to the DataTailr platform.
352
352
  If the job already exists, it will be updated.
353
+ The repository state is verified and an image is prepared for execution.
353
354
  """
354
355
  return self.__run_command__("save")
355
356
 
356
357
  def run(self) -> Tuple[bool, str]:
357
358
  """
358
- Run the job. This method should be implemented to execute the job logic.
359
- It verifies the repository state and prepares the job for execution.
359
+ Run the job.
360
+ This is equivalent to running job.save() and then job.start().
360
361
  """
361
362
  return self.__run_command__("run")
363
+
364
+ def start(self) -> Tuple[bool, str]:
365
+ """
366
+ Start the job. This will start the job execution on a schedule for batches if a schedule was specified.
367
+ For other types of jobs and for batches without a schedule the job will be run immediately.
368
+ """
369
+ return self.__run_command__("start")
370
+
371
+ def promote(
372
+ self,
373
+ from_environment: Optional[Environment] = None,
374
+ version: Optional[str | int] = None,
375
+ ) -> Tuple[bool, str]:
376
+ """
377
+ Promote the job to the next environment.
378
+ This method is used to promote a version of the job from one environment to the next one.
379
+ If none of the environments to promote from are specified, it defaults to promote from all environments.
380
+ """
381
+ promote_kwargs = {}
382
+ if version is not None:
383
+ promote_kwargs["version"] = str(version)
384
+ if from_environment is not None:
385
+ promote_kwargs["environment"] = str(from_environment)
386
+ try:
387
+ __client__.promote(self.name, **promote_kwargs)
388
+ return True, f"Job '{self.name}' promoted successfully."
389
+ except Exception as e:
390
+ logger.error(f"Error promoting job '{self.name}': {e}")
391
+ return False, str(e)
392
+
393
+ def versions(self, environment: Optional[Environment] = None) -> list[str] | None:
394
+ """
395
+ List all versions of the job in the specified environment.
396
+ If no environment is specified, it lists versions across all environments.
397
+ """
398
+ command_kwargs = {}
399
+ if environment is not None:
400
+ command_kwargs["environment"] = str(environment)
401
+ return __client__.versions(self.name, **command_kwargs)
@@ -105,7 +105,7 @@ class Schedule:
105
105
  match = re.match(r"^(.*?)\s*\((.*?)\)$", result)
106
106
  if match:
107
107
  cron_expression, schedule_expression = match.groups()
108
- self.cron_expression = cron_expression.strip()
108
+ self.cron_expression = "0 " + cron_expression.strip()
109
109
  self.schedule_expression = schedule_expression.strip()
110
110
  self.__is_set__ = True
111
111
 
@@ -170,7 +170,7 @@ class User:
170
170
  raise Warning(
171
171
  "Password is not required for system users. It will be ignored."
172
172
  )
173
- User.__client__.add(
173
+ new_user = __client__.add(
174
174
  name,
175
175
  first_name=first_name,
176
176
  last_name=last_name,
@@ -179,7 +179,7 @@ class User:
179
179
  system=is_system_user,
180
180
  )
181
181
  else:
182
- __client__.add(
182
+ new_user = __client__.add(
183
183
  name,
184
184
  first_name=first_name,
185
185
  last_name=last_name,
@@ -188,7 +188,8 @@ class User:
188
188
  primary_group=primary_group,
189
189
  system=is_system_user,
190
190
  )
191
- return User.get(name)
191
+
192
+ return new_user
192
193
 
193
194
  @staticmethod
194
195
  def exists(name: str) -> bool:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datatailr
3
- Version: 0.1.15
3
+ Version: 0.1.17
4
4
  Summary: Ready-to-Use Platform That Drives Business Insights
5
5
  Author-email: Datatailr <info@datatailr.com>
6
6
  License-Expression: MIT
@@ -84,9 +84,9 @@ print(datatailr.__provider__)
84
84
  The following example shows how to create a simple data pipeline using the Datatailr Python package.
85
85
 
86
86
  ```python
87
- from datatailr.scheduler import batch, Batch
87
+ from datatailr.scheduler import batch_job, Batch
88
88
 
89
- @batch_job_job()
89
+ @batch_job()
90
90
  def func_no_args() -> str:
91
91
  return "no_args"
92
92
 
@@ -82,6 +82,7 @@ def run_command_as_user(command: str, user: str, env_vars: dict):
82
82
  Run a command as a specific user with the given environment variables.
83
83
  """
84
84
  env_vars.update({"PATH": get_env_var("PATH")})
85
+ env_vars.update({"PYTHONPATH": get_env_var("PYTHONPATH")})
85
86
  env_vars_str = " ".join(f"{key}='{value}'" for key, value in env_vars.items())
86
87
  full_command = f"sudo -u {user} {env_vars_str} {command}"
87
88
  logger.debug(f"Running command: {full_command}")
@@ -94,7 +95,6 @@ def main():
94
95
 
95
96
  job_name = get_env_var("DATATAILR_JOB_NAME")
96
97
  job_id = get_env_var("DATATAILR_JOB_ID")
97
- entrypoint = get_env_var("DATATAILR_ENTRYPOINT")
98
98
 
99
99
  if job_type == "batch":
100
100
  run_id = get_env_var("DATATAILR_BATCH_RUN_ID")
@@ -102,6 +102,7 @@ def main():
102
102
  job_argument_mapping = get_env_var(
103
103
  "DATATAILR_JOB_ARGUMENT_MAPPING", encode_json({})
104
104
  )
105
+ entrypoint = get_env_var("DATATAILR_BATCH_ENTRYPOINT")
105
106
  env = {
106
107
  "DATATAILR_BATCH_RUN_ID": run_id,
107
108
  "DATATAILR_BATCH_ID": batch_id,
@@ -112,6 +113,7 @@ def main():
112
113
  run_command_as_user("datatailr_run_batch", user, env)
113
114
  elif job_type == "service":
114
115
  port = get_env_var("DATATAILR_SERVICE_PORT")
116
+ entrypoint = get_env_var("DATATAILR_ENTRYPOINT")
115
117
  env = {
116
118
  "DATATAILR_JOB_NAME": job_name,
117
119
  "DATATAILR_JOB_ID": job_id,
@@ -120,6 +122,7 @@ def main():
120
122
  }
121
123
  run_command_as_user("datatailr_run_service", user, env)
122
124
  elif job_type == "app":
125
+ entrypoint = get_env_var("DATATAILR_ENTRYPOINT")
123
126
  env = {
124
127
  "DATATAILR_JOB_NAME": job_name,
125
128
  "DATATAILR_JOB_ID": job_id,
@@ -128,6 +131,7 @@ def main():
128
131
  run_command_as_user("datatailr_run_app", user, env)
129
132
  elif job_type == "excel":
130
133
  host = get_env_var("DATATAILR_HOST")
134
+ entrypoint = get_env_var("DATATAILR_ENTRYPOINT")
131
135
  env = {
132
136
  "DATATAILR_JOB_NAME": job_name,
133
137
  "DATATAILR_JOB_ID": job_id,
File without changes
File without changes
File without changes