datatailr 0.1.70__py3-none-any.whl → 0.1.72__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datatailr might be problematic. Click here for more details.

datatailr/blob.py CHANGED
@@ -10,12 +10,14 @@
10
10
 
11
11
  from __future__ import annotations
12
12
 
13
+ import os
13
14
  import tempfile
14
15
 
15
16
  from datatailr.wrapper import dt__Blob
16
17
 
17
18
  # Datatailr Blob API Client
18
19
  __client__ = dt__Blob()
20
+ __user__ = os.getenv("USER", "root")
19
21
 
20
22
 
21
23
  class Blob:
@@ -81,8 +83,13 @@ class Blob:
81
83
  """
82
84
  # Since direct reading and writting of blobs is not implemented yet, we are using a temporary file.
83
85
  # This is a workaround to allow reading the blob content directly from the blob storage.
84
-
85
- with tempfile.NamedTemporaryFile(delete=True) as temp_file:
86
+ temp_dir = f"/home/{__user__}/tmp"
87
+ if not os.path.exists(temp_dir):
88
+ temp_dir = "/tmp"
89
+ else:
90
+ temp_dir += "/.dt"
91
+ os.makedirs(temp_dir, exist_ok=True)
92
+ with tempfile.NamedTemporaryFile(dir=temp_dir, delete=True) as temp_file:
86
93
  self.get_file(name, temp_file.name)
87
94
  with open(temp_file.name, "r") as f:
88
95
  return f.read()
@@ -2,8 +2,8 @@
2
2
 
3
3
  # noqa: F401
4
4
  try:
5
- from dt.excel import Addin
6
- from dt.excel import Queue
5
+ from datatailr.excel.addin import Addin
6
+ from datatailr.excel.addin import Queue
7
7
  except ImportError:
8
8
 
9
9
  class DummyAddin:
@@ -0,0 +1,169 @@
1
+ """
2
+ Copyright (c) 2025 - Datatailr Inc.
3
+ All Rights Reserved.
4
+
5
+ This file is part of Datatailr and subject to the terms and conditions
6
+ defined in 'LICENSE.txt'. Unauthorized copying and/or distribution
7
+ of this file, in parts or full, via any medium is strictly prohibited.
8
+ """
9
+
10
+ import os
11
+ import sys
12
+ import importlib
13
+ import subprocess
14
+ import inspect
15
+ import numpy as np
16
+ from dt.excel_base import Addin as AddinBase, Queue # type: ignore
17
+
18
+
19
+ def __progress__(queue, stop):
20
+ from time import sleep
21
+
22
+ bar = ["█", "██", "███", "████", "█████", "██████", "███████"]
23
+
24
+ count = 0
25
+ while True:
26
+ if stop.is_set():
27
+ return
28
+ queue.push(bar[count % len(bar)])
29
+ count += 1
30
+ sleep(0.25)
31
+
32
+
33
+ def get_package_root(mod):
34
+ # Given module, e.g., dt.excel located at /opt/datatailr/python/dt/excel.py
35
+ # return entry for sys.path so it could be imported as a module.
36
+ # For the module above: /opt/datatailr/python
37
+ mod_path = os.path.abspath(mod.__file__)
38
+ mod_parts = mod.__name__.split(".")
39
+ for _ in range(len(mod_parts)):
40
+ mod_path = os.path.dirname(mod_path)
41
+ return mod_path
42
+
43
+
44
+ class Addin(AddinBase):
45
+ def __init__(self, *args, **kwargs):
46
+ super(Addin, self).__init__(*args, **kwargs)
47
+
48
+ def run(self, port):
49
+ # Excel addin executable will try to import an object literally called "addin"
50
+ # from a module passed to dt-excel.sh as an argument. So to find which module
51
+ # to pass to dt-excel.sh, we walk the callstack until a module with "addin"
52
+ # object of type Addin is found. If not -- inform user about this requirement.
53
+ found_module = None
54
+ for frame_info in inspect.stack():
55
+ mod = inspect.getmodule(frame_info.frame)
56
+ if not mod or not hasattr(mod, "__name__"):
57
+ continue
58
+
59
+ temp_path = get_package_root(mod)
60
+ sys.path.insert(0, temp_path)
61
+ try:
62
+ imported_mod = importlib.import_module(mod.__name__)
63
+ finally:
64
+ sys.path.pop(0)
65
+
66
+ addin_obj = getattr(imported_mod, "addin", None)
67
+ if addin_obj is self or id(addin_obj) == id(self):
68
+ found_module = mod
69
+ break
70
+
71
+ if not found_module:
72
+ raise ValueError(
73
+ "'addin' not found. Please, use 'addin' as variable name for your Addin instance."
74
+ )
75
+
76
+ if found_module.__name__ != "__main__":
77
+ # addin.run was called from the initial python script (where __name__ == "__main__")
78
+ module_name = found_module.__name__
79
+ if found_module.__file__ is None:
80
+ raise ValueError(f"Module {found_module.__name__} has no __file__")
81
+ dir_name = os.path.dirname(os.path.abspath(found_module.__file__))
82
+ else:
83
+ # initial python script did not call addin.run() itself (e.g. it imported function that called addin.run)
84
+ filename = inspect.getsourcefile(found_module)
85
+ if filename is None:
86
+ raise ValueError(f"Cannot determine filename for module {found_module}")
87
+ module_name = os.path.splitext(os.path.basename(filename))[0]
88
+ dir_name = os.path.dirname(os.path.abspath(filename))
89
+
90
+ subprocess.run(
91
+ [
92
+ "bash",
93
+ "-c",
94
+ f'PYTHONPATH="{dir_name}:$PYTHONPATH" /opt/datatailr/bin/dt-excel.sh -n -H "localhost" -l -p {port} -w 8000 {module_name}',
95
+ ]
96
+ )
97
+
98
+ def expose(
99
+ self, description, help, volatile=False, streaming=False, progressbar=False
100
+ ):
101
+ if streaming and progressbar:
102
+ raise ValueError(
103
+ "you cannot specify progressbar and streaming at the same time"
104
+ )
105
+
106
+ def decorator(func):
107
+ signature = inspect.signature(func)
108
+
109
+ def wrapper(*args, **kwargs):
110
+ id = args[0]
111
+
112
+ for arg in signature.parameters.values():
113
+ if streaming and arg.name == "queue":
114
+ continue
115
+
116
+ if not (
117
+ isinstance(kwargs[arg.name], arg.annotation)
118
+ or isinstance(kwargs[arg.name], np.ndarray)
119
+ ):
120
+ raise ValueError(
121
+ "excel/python/dt/excel.py: Got argument of wrong type, expected %s or numpy.ndarray, got %s"
122
+ % (arg.annotation, type(kwargs[arg.name]))
123
+ )
124
+
125
+ if not streaming:
126
+ if not progressbar:
127
+ result = func(**kwargs)
128
+ if hasattr(result, "tolist"):
129
+ result = result.tolist()
130
+ return result
131
+
132
+ from threading import Event, Thread
133
+
134
+ error = None
135
+ queue = Queue(self.name.lower() + "." + func.__name__, id)
136
+ stop = Event()
137
+ thread = Thread(target=__progress__, args=(queue, stop))
138
+ thread.start()
139
+ try:
140
+ result = func(**kwargs)
141
+ except Exception as exception:
142
+ error = str(exception)
143
+
144
+ stop.set()
145
+ thread.join()
146
+
147
+ if error is not None:
148
+ queue.error(error)
149
+ else:
150
+ queue.push(result)
151
+ return
152
+
153
+ try:
154
+ func(Queue(self.name.lower() + "." + func.__name__, id), **kwargs)
155
+ except Exception as exception:
156
+ queue.error(str(exception))
157
+
158
+ self.decorator_impl(
159
+ signature,
160
+ wrapper,
161
+ func.__name__,
162
+ description,
163
+ help,
164
+ volatile,
165
+ streaming or progressbar,
166
+ )
167
+ return wrapper
168
+
169
+ return decorator
@@ -35,6 +35,7 @@
35
35
  import concurrent.futures
36
36
  import subprocess
37
37
  import os
38
+ import stat
38
39
  import shlex
39
40
  import sysconfig
40
41
  from typing import Optional, Tuple
@@ -80,6 +81,21 @@ def create_user_and_group() -> Tuple[str, str]:
80
81
  os.system(
81
82
  f"getent passwd {user} || useradd -g {group} -s /bin/bash -m {user} -u {uid} -o"
82
83
  )
84
+
85
+ permissions = (
86
+ stat.S_IWOTH
87
+ | stat.S_IXOTH
88
+ | stat.S_IWUSR
89
+ | stat.S_IRUSR
90
+ | stat.S_IRGRP
91
+ | stat.S_IWGRP
92
+ | stat.S_IXUSR
93
+ | stat.S_IXGRP
94
+ )
95
+
96
+ os.makedirs(f"/home/{user}/tmp/.dt", exist_ok=True)
97
+ os.chmod(f"/home/{user}/tmp/.dt", permissions)
98
+
83
99
  return user, group
84
100
 
85
101
 
@@ -88,16 +104,13 @@ def prepare_command_argv(command: str | list, user: str, env_vars: dict) -> list
88
104
  command = shlex.split(command)
89
105
 
90
106
  python_libdir = sysconfig.get_config_var("LIBDIR")
91
- ld_library_path = get_env_var("LD_LIBRARY_PATH", "")
92
-
93
- if ld_library_path:
94
- python_libdir = ld_library_path + ":" + python_libdir
107
+ ld_library_path = get_env_var("LD_LIBRARY_PATH", None)
95
108
 
96
109
  # Base environment variables setup
97
110
  base_env = {
98
111
  "PATH": get_env_var("PATH", ""),
99
112
  "PYTHONPATH": get_env_var("PYTHONPATH", ""),
100
- "LD_LIBRARY_PATH": python_libdir,
113
+ "LD_LIBRARY_PATH": ":".join(filter(None, [python_libdir, ld_library_path])),
101
114
  }
102
115
 
103
116
  merged_env = base_env | env_vars
@@ -144,7 +157,7 @@ def run_commands_in_parallel(
144
157
  user: str,
145
158
  env_vars: dict,
146
159
  log_stream_names: Optional[list[str | None]] = None,
147
- ) -> tuple[int, int]:
160
+ ) -> int:
148
161
  """
149
162
  Executes two commands concurrently using a ThreadPoolExecutor.
150
163
  Returns a tuple of (return_code_cmd1, return_code_cmd2).
@@ -166,7 +179,7 @@ def run_commands_in_parallel(
166
179
  results = [
167
180
  future.result() for future in concurrent.futures.as_completed(futures)
168
181
  ]
169
- return results[0], results[1]
182
+ return 0 if all(code == 0 for code in results) else 1
170
183
 
171
184
 
172
185
  def main():
@@ -226,6 +239,7 @@ def main():
226
239
  "--bind-addr=0.0.0.0:9090",
227
240
  f'--app-name="Datatailr IDE {get_env_var("DATATAILR_USER")}"',
228
241
  ]
242
+ job_name = get_env_var("DATATAILR_JOB_NAME")
229
243
  jupyter_command = [
230
244
  "jupyter-lab",
231
245
  "--ip='*'",
@@ -233,6 +247,9 @@ def main():
233
247
  "--no-browser",
234
248
  "--NotebookApp.token=''",
235
249
  "--NotebookApp.password=''",
250
+ f"--ServerApp.base_url=/workspace/{job_name}/jupyter/",
251
+ f"--ServerApp.static_url_prefix=/workspace/{job_name}/jupyter/static/",
252
+ f"--ServerApp.root_dir=/home/{user}",
236
253
  ]
237
254
  run_commands_in_parallel(
238
255
  [ide_command, jupyter_command], user, env, ["code-server", "jupyter"]
@@ -49,10 +49,7 @@ class ArgumentsCache:
49
49
 
50
50
  :param use_persistent_cache: If True, use the persistent cache backend. Otherwise, use in-memory cache.
51
51
  """
52
- try:
53
- self.__bucket_name__ = dt__Tag().get("blob_storage_prefix") + "batch"
54
- except Exception:
55
- self.__bucket_name__ = "local-batch"
52
+ self.__bucket_name__ = dt__Tag().get("blob_storage_prefix") + "batch"
56
53
  self.use_persistent_cache = use_persistent_cache
57
54
  if not self.use_persistent_cache:
58
55
  # Create a temp folder, for local caching
@@ -183,6 +183,9 @@ class Job:
183
183
  build_script_pre=build_script_pre,
184
184
  build_script_post=build_script_post,
185
185
  )
186
+ if entrypoint is not None:
187
+ image.path_to_repo = entrypoint.path_to_repo
188
+ image.path_to_module = entrypoint.path_to_module
186
189
  self.image = image
187
190
  self.type = type if entrypoint is None else entrypoint.type
188
191
  self.entrypoint = entrypoint
@@ -294,7 +294,7 @@ class BatchJob:
294
294
  env = {
295
295
  "DATATAILR_BATCH_ID": str(self.dag.id),
296
296
  "DATATAILR_JOB_ID": str(self.__id),
297
- "DATATAILR_JOB_NAME": self.name,
297
+ "DATATAILR_JOB_NAME": f"{self.dag.name}[{self.__id}]",
298
298
  }
299
299
  self.entrypoint(env=env)
300
300
  else:
@@ -479,10 +479,26 @@ class Batch(Job):
479
479
 
480
480
  def prepare_args(self) -> None:
481
481
  def arg_name(arg: Union[BatchJob, str]) -> str:
482
- return arg.name if isinstance(arg, BatchJob) else arg
482
+ return f"{self.name}[{arg.id}]" if isinstance(arg, BatchJob) else arg
483
+
484
+ def adjust_mapping(mapping: Dict[str, str]) -> Dict[str, str]:
485
+ result = {}
486
+ for k, v in mapping.items():
487
+ if isinstance(v, BatchJob):
488
+ result[k] = f"{self.name}[{v.id}]"
489
+ elif isinstance(v, str):
490
+ job = self.get_job_by_name(v)
491
+ if job is not None:
492
+ result[k] = f"{self.name}[{job.id}]"
493
+ else:
494
+ result[k] = v
495
+ else:
496
+ raise TypeError(
497
+ f"Unsupported type in argument mapping: {type(v)} for key {k}"
498
+ )
499
+ return result
483
500
 
484
501
  def merged(dst: dict[str, str], src: dict[str, str]) -> dict[str, str]:
485
- # copy so we don't mutate the original mapping
486
502
  out = dict(dst)
487
503
  seen_vals = set(out.values())
488
504
  for k, v in src.items():
@@ -492,12 +508,12 @@ class Batch(Job):
492
508
  return out
493
509
 
494
510
  args = {
495
- j.name: merged(
496
- j.argument_mapping, {k: arg_name(v) for k, v in j.args.items()}
511
+ f"{self.name}[{j.id}]": merged(
512
+ adjust_mapping(j.argument_mapping),
513
+ {j.argument_mapping.get(k, k): arg_name(v) for k, v in j.args.items()},
497
514
  )
498
515
  for j in self.__jobs
499
516
  }
500
-
501
517
  __ARGUMENTS_CACHE__.add_arguments(self.id, args)
502
518
 
503
519
  def save(self) -> Tuple[bool, str]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datatailr
3
- Version: 0.1.70
3
+ Version: 0.1.72
4
4
  Summary: Ready-to-Use Platform That Drives Business Insights
5
5
  Author-email: Datatailr <info@datatailr.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  datatailr/__init__.py,sha256=QTTG8X76BnlQwVx5N4ZQtSbLkgFipZ9NJGAbvtfuk_g,1051
2
2
  datatailr/acl.py,sha256=7hBwF7TP_ADoDryYEFuXx2FCLavLmp3k_F0-sEXg26g,4173
3
- datatailr/blob.py,sha256=9lKZKm4eRKVE4_t2zy3hh6Z_Ov4iaTA3-tJFjnQKezg,3313
3
+ datatailr/blob.py,sha256=FHAB90wpt0DgDsejo46iCtZ1N2d6QMpo19uY_7NX1t8,3581
4
4
  datatailr/dt_json.py,sha256=3xmTqDBk68oPl2UW8UVOYPaBw4lAsVg6nDLwcen5nuo,2252
5
5
  datatailr/errors.py,sha256=p_e4ao3sFEfz1g4LvEDqw6bVzHJPJSINLjJ8H6_PqOo,751
6
6
  datatailr/group.py,sha256=AC0nCA44eEWZCJCq2klPqkFg_995mS3C_wu5uSFFLtU,4426
@@ -12,23 +12,24 @@ datatailr/version.py,sha256=N9K8ZxlwFFSz8XSgbgaTWZY4k2J0JKfj698nZ_O2pIU,536
12
12
  datatailr/wrapper.py,sha256=45RrMeYIFFWJAtOlQZRe1fT9daeq4vFlj6nIajbewEY,8080
13
13
  datatailr/build/__init__.py,sha256=_dA7b4L6wsaAFaSxUoYSJ1oaRqDHDMR20kqoCocSOss,487
14
14
  datatailr/build/image.py,sha256=YC8ML-l-sj6TcIBY-DCx_vaeI_7SmL9fPFhHnuxzRh0,5509
15
- datatailr/excel/__init__.py,sha256=IpXEPIFuu8IG3uQ8k7FDHqiHnChtyp-jgE8Plx2cWXQ,656
16
- datatailr/sbin/datatailr_run.py,sha256=dfNPo7sToUq2KnYoB-yPBlXq6ojhJ3BQEViBguQXq54,9412
15
+ datatailr/excel/__init__.py,sha256=wox5ltPeOYZcZoRDW4R6tJsfOjf-0WZM2_pGgltGjdo,682
16
+ datatailr/excel/addin.py,sha256=at0S1cNHShCOCXAml1W2sJmJ5DdNroTN6Bp6KWnYZ94,6104
17
+ datatailr/sbin/datatailr_run.py,sha256=m8FZLYYXw7LFrvZhBXKWaBL784EKz27Qk7Se0Vo-KVY,9967
17
18
  datatailr/sbin/datatailr_run_app.py,sha256=itF76XC2F4RK9s6bkoEppEiYwSLHK_5Jai3yvC-kFhY,1501
18
19
  datatailr/sbin/datatailr_run_batch.py,sha256=UWnp96j_G66R_Cape7Bb-rbK6UBLF7Y5_mTlWyGJAVQ,1818
19
20
  datatailr/sbin/datatailr_run_excel.py,sha256=BLWmvxpKEE_8vJhs8E4VWq07FOBof5tlow-AkIEXtHw,1470
20
21
  datatailr/sbin/datatailr_run_service.py,sha256=DO9LGOpz3CVZOJJRHb4ac7AgY_mLbXHGadSyVCeIknc,1212
21
22
  datatailr/scheduler/__init__.py,sha256=qydHYVtEP6SUWd2CQ6FRdTdRWNz3SbYPJy4FK_wOvMk,1772
22
- datatailr/scheduler/arguments_cache.py,sha256=UEy55T6-rMg__t8S2hpXCmvyZCFH4FyAdMOb_vNbcIw,6328
23
- datatailr/scheduler/base.py,sha256=OWRblRCmKVe1stN43J35_g-1oKH4qteU4lrDRezyMV4,16829
24
- datatailr/scheduler/batch.py,sha256=CQCH1wHhW1qx09J7iQNQleErJ4n0nssAbd6u9YS6FMY,17735
23
+ datatailr/scheduler/arguments_cache.py,sha256=00OE0DhobYteBOnirjulO1ltgGBRamAdCO168O3_Zes,6236
24
+ datatailr/scheduler/base.py,sha256=WWi_VnDxev0GG6QolF3Wtj-p_JS5t2CN9VALYPl1OYo,16994
25
+ datatailr/scheduler/batch.py,sha256=ZhEf3YkXf1_ieV5ivk4-me60ov9v5r9f9BdkJw84i_0,18475
25
26
  datatailr/scheduler/batch_decorator.py,sha256=LqL1bsupWLn-YEQUvFJYae7R3ogrL5-VodyiiScrkRw,5806
26
27
  datatailr/scheduler/constants.py,sha256=5WWTsfwZ_BA8gVDOTa2AQX9DJ0NzfaWgtY3vrODS2-8,606
27
28
  datatailr/scheduler/schedule.py,sha256=0XJJen2nL1xplRs0Xbjwgq3T-0bFCOrJzkSALdio998,3741
28
29
  datatailr/scheduler/utils.py,sha256=up6oR2iwe6G52LkvgfO394xchXgCYNjOMGRQW3e8PQk,1082
29
- datatailr-0.1.70.dist-info/licenses/LICENSE,sha256=ikKP4_O-UD_b8FuNdKmbzTb6odd0JX085ZW_FAPN3VI,1066
30
- datatailr-0.1.70.dist-info/METADATA,sha256=D6yUGFiVL9fUxb_OU0Wu9J-4s6xfTEJ-qAyfceKc-U0,5146
31
- datatailr-0.1.70.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
32
- datatailr-0.1.70.dist-info/entry_points.txt,sha256=YqXfk2At-olW4PUSRkqvy_O3Mbv7uTKCCPuAAiz3Qbg,312
33
- datatailr-0.1.70.dist-info/top_level.txt,sha256=75gntW0X_SKpqxLL6hAPipvpk28GAhJBvoyqN_HohWU,10
34
- datatailr-0.1.70.dist-info/RECORD,,
30
+ datatailr-0.1.72.dist-info/licenses/LICENSE,sha256=ikKP4_O-UD_b8FuNdKmbzTb6odd0JX085ZW_FAPN3VI,1066
31
+ datatailr-0.1.72.dist-info/METADATA,sha256=w-sZvCPuMwHr_mFMYq9xTMwg1i1TD3sZ4VGhroQL7aU,5146
32
+ datatailr-0.1.72.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
33
+ datatailr-0.1.72.dist-info/entry_points.txt,sha256=YqXfk2At-olW4PUSRkqvy_O3Mbv7uTKCCPuAAiz3Qbg,312
34
+ datatailr-0.1.72.dist-info/top_level.txt,sha256=75gntW0X_SKpqxLL6hAPipvpk28GAhJBvoyqN_HohWU,10
35
+ datatailr-0.1.72.dist-info/RECORD,,