datatailr 0.1.21__tar.gz → 0.1.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datatailr might be problematic. Click here for more details.

Files changed (39) hide show
  1. {datatailr-0.1.21/src/datatailr.egg-info → datatailr-0.1.23}/PKG-INFO +1 -1
  2. {datatailr-0.1.21 → datatailr-0.1.23}/pyproject.toml +1 -1
  3. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/excel.py +1 -1
  4. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/scheduler/arguments_cache.py +2 -6
  5. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/scheduler/base.py +21 -12
  6. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/scheduler/batch.py +18 -5
  7. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/user.py +4 -2
  8. {datatailr-0.1.21 → datatailr-0.1.23/src/datatailr.egg-info}/PKG-INFO +1 -1
  9. {datatailr-0.1.21 → datatailr-0.1.23}/src/sbin/datatailr_run.py +6 -8
  10. {datatailr-0.1.21 → datatailr-0.1.23}/LICENSE +0 -0
  11. {datatailr-0.1.21 → datatailr-0.1.23}/README.md +0 -0
  12. {datatailr-0.1.21 → datatailr-0.1.23}/setup.cfg +0 -0
  13. {datatailr-0.1.21 → datatailr-0.1.23}/setup.py +0 -0
  14. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/__init__.py +0 -0
  15. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/acl.py +0 -0
  16. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/blob.py +0 -0
  17. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/build/__init__.py +0 -0
  18. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/build/image.py +0 -0
  19. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/dt_json.py +0 -0
  20. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/errors.py +0 -0
  21. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/group.py +0 -0
  22. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/logging.py +0 -0
  23. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/scheduler/__init__.py +0 -0
  24. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/scheduler/batch_decorator.py +0 -0
  25. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/scheduler/constants.py +0 -0
  26. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/scheduler/schedule.py +0 -0
  27. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/scheduler/utils.py +0 -0
  28. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/utils.py +0 -0
  29. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/version.py +0 -0
  30. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr/wrapper.py +0 -0
  31. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr.egg-info/SOURCES.txt +0 -0
  32. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr.egg-info/dependency_links.txt +0 -0
  33. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr.egg-info/entry_points.txt +0 -0
  34. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr.egg-info/requires.txt +0 -0
  35. {datatailr-0.1.21 → datatailr-0.1.23}/src/datatailr.egg-info/top_level.txt +0 -0
  36. {datatailr-0.1.21 → datatailr-0.1.23}/src/sbin/datatailr_run_app.py +0 -0
  37. {datatailr-0.1.21 → datatailr-0.1.23}/src/sbin/datatailr_run_batch.py +0 -0
  38. {datatailr-0.1.21 → datatailr-0.1.23}/src/sbin/datatailr_run_excel.py +0 -0
  39. {datatailr-0.1.21 → datatailr-0.1.23}/src/sbin/datatailr_run_service.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datatailr
3
- Version: 0.1.21
3
+ Version: 0.1.23
4
4
  Summary: Ready-to-Use Platform That Drives Business Insights
5
5
  Author-email: Datatailr <info@datatailr.com>
6
6
  License-Expression: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "datatailr"
7
- version = "0.1.21"
7
+ version = "0.1.23"
8
8
  description = "Ready-to-Use Platform That Drives Business Insights"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,5 +1,5 @@
1
1
  try:
2
- from dt.excel import Addin
2
+ from dt.excel import Addin # type: ignore
3
3
  except ImportError:
4
4
 
5
5
  class DummyAddin:
@@ -21,7 +21,7 @@ and the inner dictionaries contain the arguments.
21
21
  This module is for internal use of the datatailr package.
22
22
  """
23
23
 
24
- from datatailr.dt_json import json, decode_json
24
+ from datatailr.dt_json import json
25
25
  import os
26
26
  import pickle
27
27
  from typing import Any, Dict, Optional
@@ -101,12 +101,8 @@ class ArgumentsCache:
101
101
  )
102
102
  if batch_run_id is None:
103
103
  return arg_keys[job]
104
- arguments_mapping = decode_json(
105
- os.getenv("DATATAILR_JOB_ARGUMENT_MAPPING", "{}")
106
- )
107
- arguments_mapping = {value: key for key, value in arguments_mapping.items()}
108
104
  args = {
109
- arguments_mapping.get(name, name): self.get_result(batch_run_id, value)
105
+ name: self.get_result(batch_run_id, value)
110
106
  for name, value in arg_keys[job].items()
111
107
  }
112
108
  return args
@@ -11,8 +11,7 @@
11
11
  from __future__ import annotations
12
12
 
13
13
  from datetime import datetime
14
- import importlib
15
- import inspect
14
+ import importlib.util
16
15
  import json
17
16
  import os
18
17
  import tempfile
@@ -96,12 +95,17 @@ class EntryPoint:
96
95
 
97
96
  # Find the absolute path to the repository and then the relative path to the module.
98
97
  # This will be used in the creation of the code 'bundle' when building the image.
99
- path_to_repo = run_shell_command("git rev-parse --show-toplevel")[0]
100
- path_to_code = inspect.getfile(func)
101
- package_root = path_to_code
98
+ module_spec = importlib.util.find_spec(func.__module__)
99
+ if module_spec is not None and module_spec.origin is not None:
100
+ package_root = module_spec.origin
101
+ else:
102
+ package_root = "."
102
103
  module_parts = self.module_name.split(".")
103
- for _ in module_parts:
104
+ for _ in range(len(module_parts) - 1):
104
105
  package_root = os.path.dirname(package_root)
106
+ path_to_repo = run_shell_command(
107
+ f"cd {package_root} && git rev-parse --show-toplevel"
108
+ )[0]
105
109
  path_to_module = os.path.relpath(package_root, path_to_repo)
106
110
  self.path_to_repo = path_to_repo
107
111
  self.path_to_module = path_to_module
@@ -123,7 +127,7 @@ class EntryPoint:
123
127
  return f"EntryPoint({self.function_name} from {self.module_name}, type={self.type})"
124
128
 
125
129
  def __str__(self):
126
- return f"{self.module_name}.{self.function_name}"
130
+ return f"{self.module_name}:{self.function_name}"
127
131
 
128
132
 
129
133
  class Job:
@@ -277,20 +281,25 @@ class Job:
277
281
  2. Check if the local commit matches the remote HEAD (the repo is synced with the remote).
278
282
  Returns a tuple of (branch: str, commit_hash: str).
279
283
  """
280
- local_commit = run_shell_command("git rev-parse HEAD")[0]
281
- branch_name = run_shell_command("git rev-parse --abbrev-ref HEAD")[0]
284
+ path_to_repo = self.image.path_to_repo or "."
285
+ local_commit = run_shell_command(f"cd {path_to_repo} && git rev-parse HEAD")[0]
286
+ branch_name = run_shell_command(
287
+ f"cd {path_to_repo} && git rev-parse --abbrev-ref HEAD"
288
+ )[0]
282
289
 
283
290
  if os.getenv("DATATAILR_ALLOW_UNSAFE_SCHEDULING", "false").lower() == "true":
284
291
  return branch_name, local_commit
285
- return_code = run_shell_command("git diff --exit-code")[1]
286
- is_committed = return_code == 0
292
+ return_code = run_shell_command(f"cd {path_to_repo} && git diff --exit-code")
293
+ is_committed = return_code is not None and return_code[1] == 0
287
294
 
288
295
  if not is_committed:
289
296
  raise RepoValidationError(
290
297
  "Please commit your changes before running the job."
291
298
  )
292
299
 
293
- remote_commit = run_shell_command("git ls-remote origin HEAD")[0].split("\t")[0]
300
+ remote_commit = run_shell_command(
301
+ f"cd {path_to_repo} && git ls-remote origin HEAD"
302
+ )[0].split("\t")[0]
294
303
 
295
304
  if local_commit != remote_commit:
296
305
  raise RepoValidationError(
@@ -18,7 +18,6 @@ from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
18
18
  import uuid
19
19
 
20
20
  from datatailr import Image
21
- from datatailr.dt_json import encode_json
22
21
  from datatailr.errors import BatchJobError
23
22
  from datatailr.logging import DatatailrLogger
24
23
  from datatailr.scheduler.base import (
@@ -114,7 +113,7 @@ class BatchJob:
114
113
  isinstance(dep, int) for dep in self.dependencies
115
114
  ), "All dependencies must be integers representing job IDs."
116
115
  self.dag.add_job(self)
117
- self.__argument_mapping = argument_mapping or {}
116
+ self.argument_mapping = argument_mapping or {}
118
117
 
119
118
  def __call__(self, *args, **kwds) -> BatchJob:
120
119
  """
@@ -200,6 +199,7 @@ class BatchJob:
200
199
  """
201
200
  return {
202
201
  "display_name": self.name,
202
+ "name": self.name,
203
203
  "child_number": self.__id,
204
204
  "entrypoint": str(self.entrypoint),
205
205
  "memory": self.resources.memory if self.resources else DEFAULT_TASK_MEMORY,
@@ -235,7 +235,7 @@ class BatchJob:
235
235
 
236
236
  def __add_dependency__(self, other):
237
237
  self.dependencies.add(other.__id)
238
- arg_name = self.__argument_mapping.get(other.name, other.name)
238
+ arg_name = self.argument_mapping.get(other.name, other.name)
239
239
  if arg_name is not None:
240
240
  self.__args[arg_name] = other
241
241
 
@@ -282,7 +282,6 @@ class BatchJob:
282
282
  "DATATAILR_BATCH_ID": str(self.dag.id),
283
283
  "DATATAILR_JOB_ID": str(self.__id),
284
284
  "DATATAILR_JOB_NAME": self.name,
285
- "DATATAILR_JOB_ARGUMENT_MAPPING": encode_json(self.__argument_mapping),
286
285
  }
287
286
  self.entrypoint(env=env)
288
287
  else:
@@ -464,9 +463,23 @@ class Batch(Job):
464
463
  def arg_name(arg: Union[BatchJob, str]) -> str:
465
464
  return arg.name if isinstance(arg, BatchJob) else arg
466
465
 
466
+ def merged(dst: dict[str, str], src: dict[str, str]) -> dict[str, str]:
467
+ # copy so we don't mutate the original mapping
468
+ out = dict(dst)
469
+ seen_vals = set(out.values())
470
+ for k, v in src.items():
471
+ if v not in seen_vals:
472
+ out[k] = v
473
+ seen_vals.add(v)
474
+ return out
475
+
467
476
  args = {
468
- j.name: {k: arg_name(v) for k, v in j.args.items()} for j in self.__jobs
477
+ j.name: merged(
478
+ j.argument_mapping, {k: arg_name(v) for k, v in j.args.items()}
479
+ )
480
+ for j in self.__jobs
469
481
  }
482
+
470
483
  __ARGUMENTS_CACHE__.add_arguments(self.id, args)
471
484
  if not self.__local_run and is_dt_installed():
472
485
  return super().run()
@@ -162,7 +162,7 @@ class User:
162
162
  last_name: str,
163
163
  email: str,
164
164
  password: str,
165
- primary_group: int,
165
+ primary_group: str,
166
166
  is_system_user: bool = False,
167
167
  ) -> Optional["User"]:
168
168
  if is_system_user:
@@ -177,6 +177,7 @@ class User:
177
177
  email=email,
178
178
  primary_group=primary_group,
179
179
  system=is_system_user,
180
+ json_enrichened=True,
180
181
  )
181
182
  else:
182
183
  new_user = __client__.add(
@@ -187,9 +188,10 @@ class User:
187
188
  password=password,
188
189
  primary_group=primary_group,
189
190
  system=is_system_user,
191
+ json_enrichened=True,
190
192
  )
191
193
 
192
- return new_user
194
+ return User(new_user["name"]) if new_user else None
193
195
 
194
196
  @staticmethod
195
197
  def exists(name: str) -> bool:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datatailr
3
- Version: 0.1.21
3
+ Version: 0.1.23
4
4
  Summary: Ready-to-Use Platform That Drives Business Insights
5
5
  Author-email: Datatailr <info@datatailr.com>
6
6
  License-Expression: MIT
@@ -27,7 +27,6 @@
27
27
  # DATATAILR_GID - the group ID of the group as it is defined in the system.
28
28
  # DATATAILR_JOB_TYPE - the type of job to run. (batch\service\app\excel\IDE)
29
29
  # Job environment variables (not all are always relevant, depending on the job type):
30
- # DATATAILR_JOB_ARGUMENT_MAPPING - a JSON string mapping job argument names to their
31
30
  # DATATAILR_BATCH_RUN_ID - the unique identifier for the batch run.
32
31
  # DATATAILR_BATCH_ID - the unique identifier for the batch.
33
32
  # DATATAILR_JOB_ID - the unique identifier for the job.
@@ -37,11 +36,14 @@ import os
37
36
  import sys
38
37
  from typing import Tuple
39
38
  from datatailr.logging import DatatailrLogger
40
- from datatailr.dt_json import encode_json
41
-
39
+ from datatailr.utils import is_dt_installed
42
40
 
43
41
  logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
44
42
 
43
+ if not is_dt_installed():
44
+ logger.error("Datatailr is not installed.")
45
+ sys.exit(1)
46
+
45
47
 
46
48
  def get_env_var(name: str, default: str | None = None) -> str:
47
49
  """
@@ -83,7 +85,7 @@ def run_command_as_user(command: str, user: str, env_vars: dict):
83
85
  Run a command as a specific user with the given environment variables.
84
86
  """
85
87
  env_vars.update({"PATH": get_env_var("PATH")})
86
- env_vars.update({"PYTHONPATH": get_env_var("PYTHONPATH")})
88
+ env_vars.update({"PYTHONPATH": get_env_var("PYTHONPATH", "")})
87
89
  env_vars_str = " ".join(f"{key}='{value}'" for key, value in env_vars.items())
88
90
  full_command = f"sudo -u {user} {env_vars_str} {command}"
89
91
  logger.debug(f"Running command: {full_command}")
@@ -103,16 +105,12 @@ def main():
103
105
  if job_type == "batch":
104
106
  run_id = get_env_var("DATATAILR_BATCH_RUN_ID")
105
107
  batch_id = get_env_var("DATATAILR_BATCH_ID")
106
- job_argument_mapping = get_env_var(
107
- "DATATAILR_JOB_ARGUMENT_MAPPING", encode_json({})
108
- )
109
108
  entrypoint = get_env_var("DATATAILR_BATCH_ENTRYPOINT")
110
109
  env = {
111
110
  "DATATAILR_BATCH_RUN_ID": run_id,
112
111
  "DATATAILR_BATCH_ID": batch_id,
113
112
  "DATATAILR_JOB_ID": job_id,
114
113
  "DATATAILR_BATCH_ENTRYPOINT": entrypoint,
115
- "DATATAILR_JOB_ARGUMENT_MAPPING": job_argument_mapping,
116
114
  }
117
115
  run_command_as_user("datatailr_run_batch", user, env)
118
116
  elif job_type == "service":
File without changes
File without changes
File without changes
File without changes