datatailr 0.1.21__py3-none-any.whl → 0.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datatailr might be problematic. Click here for more details.

datatailr/excel.py CHANGED
@@ -1,5 +1,5 @@
1
1
  try:
2
- from dt.excel import Addin
2
+ from dt.excel import Addin # type: ignore
3
3
  except ImportError:
4
4
 
5
5
  class DummyAddin:
@@ -27,7 +27,6 @@
27
27
  # DATATAILR_GID - the group ID of the group as it is defined in the system.
28
28
  # DATATAILR_JOB_TYPE - the type of job to run. (batch\service\app\excel\IDE)
29
29
  # Job environment variables (not all are always relevant, depending on the job type):
30
- # DATATAILR_JOB_ARGUMENT_MAPPING - a JSON string mapping job argument names to their
31
30
  # DATATAILR_BATCH_RUN_ID - the unique identifier for the batch run.
32
31
  # DATATAILR_BATCH_ID - the unique identifier for the batch.
33
32
  # DATATAILR_JOB_ID - the unique identifier for the job.
@@ -37,7 +36,6 @@ import os
37
36
  import sys
38
37
  from typing import Tuple
39
38
  from datatailr.logging import DatatailrLogger
40
- from datatailr.dt_json import encode_json
41
39
 
42
40
 
43
41
  logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
@@ -83,7 +81,7 @@ def run_command_as_user(command: str, user: str, env_vars: dict):
83
81
  Run a command as a specific user with the given environment variables.
84
82
  """
85
83
  env_vars.update({"PATH": get_env_var("PATH")})
86
- env_vars.update({"PYTHONPATH": get_env_var("PYTHONPATH")})
84
+ env_vars.update({"PYTHONPATH": get_env_var("PYTHONPATH", "")})
87
85
  env_vars_str = " ".join(f"{key}='{value}'" for key, value in env_vars.items())
88
86
  full_command = f"sudo -u {user} {env_vars_str} {command}"
89
87
  logger.debug(f"Running command: {full_command}")
@@ -103,16 +101,12 @@ def main():
103
101
  if job_type == "batch":
104
102
  run_id = get_env_var("DATATAILR_BATCH_RUN_ID")
105
103
  batch_id = get_env_var("DATATAILR_BATCH_ID")
106
- job_argument_mapping = get_env_var(
107
- "DATATAILR_JOB_ARGUMENT_MAPPING", encode_json({})
108
- )
109
104
  entrypoint = get_env_var("DATATAILR_BATCH_ENTRYPOINT")
110
105
  env = {
111
106
  "DATATAILR_BATCH_RUN_ID": run_id,
112
107
  "DATATAILR_BATCH_ID": batch_id,
113
108
  "DATATAILR_JOB_ID": job_id,
114
109
  "DATATAILR_BATCH_ENTRYPOINT": entrypoint,
115
- "DATATAILR_JOB_ARGUMENT_MAPPING": job_argument_mapping,
116
110
  }
117
111
  run_command_as_user("datatailr_run_batch", user, env)
118
112
  elif job_type == "service":
@@ -21,16 +21,18 @@ and the inner dictionaries contain the arguments.
21
21
  This module is for internal use of the datatailr package.
22
22
  """
23
23
 
24
- from datatailr.dt_json import json, decode_json
24
+ from datatailr.dt_json import json
25
25
  import os
26
26
  import pickle
27
27
  from typing import Any, Dict, Optional
28
28
 
29
29
  from datatailr import is_dt_installed, Blob
30
30
  from datatailr.errors import DatatailrError
31
+ from datatailr.logging import DatatailrLogger
31
32
 
32
33
 
33
34
  __BLOB_STORAGE__ = Blob()
35
+ logger = DatatailrLogger(__name__).get_logger()
34
36
 
35
37
 
36
38
  class CacheNotFoundError(DatatailrError):
@@ -79,6 +81,9 @@ class ArgumentsCache:
79
81
  :param job_name: Name of the job.
80
82
  :return: Dictionary of arguments.
81
83
  """
84
+ logger.info(
85
+ f"Retrieving arguments for {batch_id=}, {job=}, {batch_run_id=}, {self.use_persistent_cache=}"
86
+ )
82
87
  path = f"/tmp/datatailr/batch/arguments/{batch_id}.pkl"
83
88
  if self.use_persistent_cache and isinstance(job, str):
84
89
  try:
@@ -101,12 +106,8 @@ class ArgumentsCache:
101
106
  )
102
107
  if batch_run_id is None:
103
108
  return arg_keys[job]
104
- arguments_mapping = decode_json(
105
- os.getenv("DATATAILR_JOB_ARGUMENT_MAPPING", "{}")
106
- )
107
- arguments_mapping = {value: key for key, value in arguments_mapping.items()}
108
109
  args = {
109
- arguments_mapping.get(name, name): self.get_result(batch_run_id, value)
110
+ name: self.get_result(batch_run_id, value)
110
111
  for name, value in arg_keys[job].items()
111
112
  }
112
113
  return args
@@ -120,6 +121,9 @@ class ArgumentsCache:
120
121
  :param result: Result of the batch job.
121
122
  """
122
123
  path = f"/tmp/datatailr/batch/results/{batch_run_id}_{job}.pkl"
124
+ logger.info(
125
+ f"Adding result for {batch_run_id=}, {job=}, {result=}, {self.use_persistent_cache=}"
126
+ )
123
127
  if self.use_persistent_cache and isinstance(job, str):
124
128
  self._add_to_persistent_cache(path, result)
125
129
  else:
@@ -156,6 +160,7 @@ class ArgumentsCache:
156
160
 
157
161
  """
158
162
  path = path.replace("/tmp/", "")
163
+ logger.info(f"Adding arguments to persistent cache for {path=}")
159
164
  __BLOB_STORAGE__.put_blob(path, json.dumps(blob))
160
165
 
161
166
  def _get_from_persistent_cache(self, path: str) -> Any:
@@ -165,5 +170,6 @@ class ArgumentsCache:
165
170
  :param path: Path in the Blob storage where the blob is stored.
166
171
  """
167
172
  path = path.replace("/tmp/", "")
173
+ logger.info(f"Retrieving arguments from persistent cache for {path=}")
168
174
  data = __BLOB_STORAGE__.get_blob(path)
169
175
  return json.loads(data)
@@ -11,8 +11,7 @@
11
11
  from __future__ import annotations
12
12
 
13
13
  from datetime import datetime
14
- import importlib
15
- import inspect
14
+ import importlib.util
16
15
  import json
17
16
  import os
18
17
  import tempfile
@@ -96,12 +95,17 @@ class EntryPoint:
96
95
 
97
96
  # Find the absolute path to the repository and then the relative path to the module.
98
97
  # This will be used in the creation of the code 'bundle' when building the image.
99
- path_to_repo = run_shell_command("git rev-parse --show-toplevel")[0]
100
- path_to_code = inspect.getfile(func)
101
- package_root = path_to_code
98
+ module_spec = importlib.util.find_spec(func.__module__)
99
+ if module_spec is not None and module_spec.origin is not None:
100
+ package_root = module_spec.origin
101
+ else:
102
+ package_root = "."
102
103
  module_parts = self.module_name.split(".")
103
104
  for _ in module_parts:
104
105
  package_root = os.path.dirname(package_root)
106
+ path_to_repo = run_shell_command(
107
+ f"cd {package_root} && git rev-parse --show-toplevel"
108
+ )[0]
105
109
  path_to_module = os.path.relpath(package_root, path_to_repo)
106
110
  self.path_to_repo = path_to_repo
107
111
  self.path_to_module = path_to_module
@@ -123,7 +127,7 @@ class EntryPoint:
123
127
  return f"EntryPoint({self.function_name} from {self.module_name}, type={self.type})"
124
128
 
125
129
  def __str__(self):
126
- return f"{self.module_name}.{self.function_name}"
130
+ return f"{self.module_name}:{self.function_name}"
127
131
 
128
132
 
129
133
  class Job:
@@ -277,20 +281,25 @@ class Job:
277
281
  2. Check if the local commit matches the remote HEAD (the repo is synced with the remote).
278
282
  Returns a tuple of (branch: str, commit_hash: str).
279
283
  """
280
- local_commit = run_shell_command("git rev-parse HEAD")[0]
281
- branch_name = run_shell_command("git rev-parse --abbrev-ref HEAD")[0]
284
+ path_to_repo = self.image.path_to_repo or "."
285
+ local_commit = run_shell_command(f"cd {path_to_repo} && git rev-parse HEAD")[0]
286
+ branch_name = run_shell_command(
287
+ f"cd {path_to_repo} && git rev-parse --abbrev-ref HEAD"
288
+ )[0]
282
289
 
283
290
  if os.getenv("DATATAILR_ALLOW_UNSAFE_SCHEDULING", "false").lower() == "true":
284
291
  return branch_name, local_commit
285
- return_code = run_shell_command("git diff --exit-code")[1]
286
- is_committed = return_code == 0
292
+ return_code = run_shell_command(f"cd {path_to_repo} && git diff --exit-code")
293
+ is_committed = return_code is not None and return_code[1] == 0
287
294
 
288
295
  if not is_committed:
289
296
  raise RepoValidationError(
290
297
  "Please commit your changes before running the job."
291
298
  )
292
299
 
293
- remote_commit = run_shell_command("git ls-remote origin HEAD")[0].split("\t")[0]
300
+ remote_commit = run_shell_command(
301
+ f"cd {path_to_repo} && git ls-remote origin HEAD"
302
+ )[0].split("\t")[0]
294
303
 
295
304
  if local_commit != remote_commit:
296
305
  raise RepoValidationError(
@@ -18,7 +18,6 @@ from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
18
18
  import uuid
19
19
 
20
20
  from datatailr import Image
21
- from datatailr.dt_json import encode_json
22
21
  from datatailr.errors import BatchJobError
23
22
  from datatailr.logging import DatatailrLogger
24
23
  from datatailr.scheduler.base import (
@@ -114,7 +113,7 @@ class BatchJob:
114
113
  isinstance(dep, int) for dep in self.dependencies
115
114
  ), "All dependencies must be integers representing job IDs."
116
115
  self.dag.add_job(self)
117
- self.__argument_mapping = argument_mapping or {}
116
+ self.argument_mapping = argument_mapping or {}
118
117
 
119
118
  def __call__(self, *args, **kwds) -> BatchJob:
120
119
  """
@@ -200,6 +199,7 @@ class BatchJob:
200
199
  """
201
200
  return {
202
201
  "display_name": self.name,
202
+ "name": self.name,
203
203
  "child_number": self.__id,
204
204
  "entrypoint": str(self.entrypoint),
205
205
  "memory": self.resources.memory if self.resources else DEFAULT_TASK_MEMORY,
@@ -235,7 +235,7 @@ class BatchJob:
235
235
 
236
236
  def __add_dependency__(self, other):
237
237
  self.dependencies.add(other.__id)
238
- arg_name = self.__argument_mapping.get(other.name, other.name)
238
+ arg_name = self.argument_mapping.get(other.name, other.name)
239
239
  if arg_name is not None:
240
240
  self.__args[arg_name] = other
241
241
 
@@ -282,7 +282,6 @@ class BatchJob:
282
282
  "DATATAILR_BATCH_ID": str(self.dag.id),
283
283
  "DATATAILR_JOB_ID": str(self.__id),
284
284
  "DATATAILR_JOB_NAME": self.name,
285
- "DATATAILR_JOB_ARGUMENT_MAPPING": encode_json(self.__argument_mapping),
286
285
  }
287
286
  self.entrypoint(env=env)
288
287
  else:
@@ -464,9 +463,23 @@ class Batch(Job):
464
463
  def arg_name(arg: Union[BatchJob, str]) -> str:
465
464
  return arg.name if isinstance(arg, BatchJob) else arg
466
465
 
466
+ def merged(dst: dict[str, str], src: dict[str, str]) -> dict[str, str]:
467
+ # copy so we don't mutate the original mapping
468
+ out = dict(dst)
469
+ seen_vals = set(out.values())
470
+ for k, v in src.items():
471
+ if v not in seen_vals:
472
+ out[k] = v
473
+ seen_vals.add(v)
474
+ return out
475
+
467
476
  args = {
468
- j.name: {k: arg_name(v) for k, v in j.args.items()} for j in self.__jobs
477
+ j.name: merged(
478
+ j.argument_mapping, {k: arg_name(v) for k, v in j.args.items()}
479
+ )
480
+ for j in self.__jobs
469
481
  }
482
+
470
483
  __ARGUMENTS_CACHE__.add_arguments(self.id, args)
471
484
  if not self.__local_run and is_dt_installed():
472
485
  return super().run()
datatailr/user.py CHANGED
@@ -162,7 +162,7 @@ class User:
162
162
  last_name: str,
163
163
  email: str,
164
164
  password: str,
165
- primary_group: int,
165
+ primary_group: str,
166
166
  is_system_user: bool = False,
167
167
  ) -> Optional["User"]:
168
168
  if is_system_user:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datatailr
3
- Version: 0.1.21
3
+ Version: 0.1.22
4
4
  Summary: Ready-to-Use Platform That Drives Business Insights
5
5
  Author-email: Datatailr <info@datatailr.com>
6
6
  License-Expression: MIT
@@ -3,31 +3,31 @@ datatailr/acl.py,sha256=tlDy6VlHinSy5W1FbVxcNQNi7FliWUXy3ssIbzaPp28,4157
3
3
  datatailr/blob.py,sha256=xkXT6RZcMww4YfLVjOyqvvPxWc-Ku6fTJ_PeCXyBys4,3159
4
4
  datatailr/dt_json.py,sha256=3xmTqDBk68oPl2UW8UVOYPaBw4lAsVg6nDLwcen5nuo,2252
5
5
  datatailr/errors.py,sha256=p_e4ao3sFEfz1g4LvEDqw6bVzHJPJSINLjJ8H6_PqOo,751
6
- datatailr/excel.py,sha256=YqIkaM5ap6uKj92yH6YGoB6I1oN10S-1zl5Uf9_xXx4,409
6
+ datatailr/excel.py,sha256=7_13VFHrjHlIAEeTaGQIlq4ycJvzRo-TvkP_KRBELGs,425
7
7
  datatailr/group.py,sha256=ExsrkqUooAfFLWKvnkp1ZxisSJD1yCp9TKqoCXDCwhs,4360
8
8
  datatailr/logging.py,sha256=4Rsx3wf2tAr1334E2goBjhC877RwbUNaFgBlh902vU4,3270
9
- datatailr/user.py,sha256=tY6TcwOvq1SW6Uqot6eYR3SBAihPwWKs2qDfHDvF4xw,6624
9
+ datatailr/user.py,sha256=THAHiHgcr6MGoF2EuFEUfPh-zRT4E_WhG6eoVNBNeKo,6624
10
10
  datatailr/utils.py,sha256=mqnnERMyHNAuAgFY4Ry4O4yW0ZjCRtJbjfI5fXVqt2s,1524
11
11
  datatailr/version.py,sha256=N9K8ZxlwFFSz8XSgbgaTWZY4k2J0JKfj698nZ_O2pIU,536
12
12
  datatailr/wrapper.py,sha256=K9ZD76cWey_ikA6C5sKejwRaYBDln4QMg-RcoRGiuFc,7991
13
13
  datatailr/build/__init__.py,sha256=_dA7b4L6wsaAFaSxUoYSJ1oaRqDHDMR20kqoCocSOss,487
14
14
  datatailr/build/image.py,sha256=YC8ML-l-sj6TcIBY-DCx_vaeI_7SmL9fPFhHnuxzRh0,5509
15
- datatailr/sbin/datatailr_run.py,sha256=_P2q0-i3JS0sPt49eX_EhjPxCrJyfi9-FGoYcqND4Zs,6307
15
+ datatailr/sbin/datatailr_run.py,sha256=l3xoX_Ex-U7vXCiH343D3SQnqP6BY-xIx5qB6iES5ns,5996
16
16
  datatailr/sbin/datatailr_run_app.py,sha256=AOkutzv4DeKfWZs-ZBciAMKnK4A05SfkVf1ZJnSSFwA,1231
17
17
  datatailr/sbin/datatailr_run_batch.py,sha256=UWnp96j_G66R_Cape7Bb-rbK6UBLF7Y5_mTlWyGJAVQ,1818
18
18
  datatailr/sbin/datatailr_run_excel.py,sha256=Gr_QZgqJrwgRVD9_o4v-2tbvU-QMvNHL7xUvFGhftFc,1163
19
19
  datatailr/sbin/datatailr_run_service.py,sha256=R8eNLN2SGnMtyfLy3vq9isUHr3dRzeBqESTquNK9Iho,1156
20
20
  datatailr/scheduler/__init__.py,sha256=qydHYVtEP6SUWd2CQ6FRdTdRWNz3SbYPJy4FK_wOvMk,1772
21
- datatailr/scheduler/arguments_cache.py,sha256=CydYR9o2pqfa4KsPTA1mJSBN-0YF47Q6AmODm4zAJQ4,6254
22
- datatailr/scheduler/base.py,sha256=XZexeOGWHOSI0babRB8XCvAnL3fAQSXYWRfj7srMAkk,14240
23
- datatailr/scheduler/batch.py,sha256=77iUufBqRBau9Ku4IivLOKqh-lknclEak8jg2YhsX3c,16437
21
+ datatailr/scheduler/arguments_cache.py,sha256=ct9CiUF3IDB5S7dsQWFarc8yQXa6X8xvRrKjJ1zYKhM,6527
22
+ datatailr/scheduler/base.py,sha256=4K39tQPpYnrhf2PmFWlcp0xkH2qEwhkfflt8qpWhxnQ,14633
23
+ datatailr/scheduler/batch.py,sha256=JBSiFNwv2X2BkBocz7ddVmwND6MFds978f0Ok5f8aUc,16782
24
24
  datatailr/scheduler/batch_decorator.py,sha256=LqL1bsupWLn-YEQUvFJYae7R3ogrL5-VodyiiScrkRw,5806
25
25
  datatailr/scheduler/constants.py,sha256=5WWTsfwZ_BA8gVDOTa2AQX9DJ0NzfaWgtY3vrODS2-8,606
26
26
  datatailr/scheduler/schedule.py,sha256=0XJJen2nL1xplRs0Xbjwgq3T-0bFCOrJzkSALdio998,3741
27
27
  datatailr/scheduler/utils.py,sha256=up6oR2iwe6G52LkvgfO394xchXgCYNjOMGRQW3e8PQk,1082
28
- datatailr-0.1.21.dist-info/licenses/LICENSE,sha256=ikKP4_O-UD_b8FuNdKmbzTb6odd0JX085ZW_FAPN3VI,1066
29
- datatailr-0.1.21.dist-info/METADATA,sha256=b1iOwgH34LZmK3e-D1gBtAhLjCmuvlW_Ip-D4LYWN6I,5146
30
- datatailr-0.1.21.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
31
- datatailr-0.1.21.dist-info/entry_points.txt,sha256=YqXfk2At-olW4PUSRkqvy_O3Mbv7uTKCCPuAAiz3Qbg,312
32
- datatailr-0.1.21.dist-info/top_level.txt,sha256=75gntW0X_SKpqxLL6hAPipvpk28GAhJBvoyqN_HohWU,10
33
- datatailr-0.1.21.dist-info/RECORD,,
28
+ datatailr-0.1.22.dist-info/licenses/LICENSE,sha256=ikKP4_O-UD_b8FuNdKmbzTb6odd0JX085ZW_FAPN3VI,1066
29
+ datatailr-0.1.22.dist-info/METADATA,sha256=KplXEwrUeqWjlRjkyN3NenplmPDlWPNzUsmCxbkD83Q,5146
30
+ datatailr-0.1.22.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
31
+ datatailr-0.1.22.dist-info/entry_points.txt,sha256=YqXfk2At-olW4PUSRkqvy_O3Mbv7uTKCCPuAAiz3Qbg,312
32
+ datatailr-0.1.22.dist-info/top_level.txt,sha256=75gntW0X_SKpqxLL6hAPipvpk28GAhJBvoyqN_HohWU,10
33
+ datatailr-0.1.22.dist-info/RECORD,,