datatailr 0.1.70__py3-none-any.whl → 0.1.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datatailr might be problematic. Click here for more details.
- datatailr/blob.py +9 -2
- datatailr/excel/__init__.py +2 -2
- datatailr/excel/addin.py +169 -0
- datatailr/sbin/datatailr_run.py +27 -7
- datatailr/scheduler/arguments_cache.py +1 -4
- datatailr/scheduler/base.py +3 -0
- {datatailr-0.1.70.dist-info → datatailr-0.1.71.dist-info}/METADATA +1 -1
- {datatailr-0.1.70.dist-info → datatailr-0.1.71.dist-info}/RECORD +12 -11
- {datatailr-0.1.70.dist-info → datatailr-0.1.71.dist-info}/WHEEL +0 -0
- {datatailr-0.1.70.dist-info → datatailr-0.1.71.dist-info}/entry_points.txt +0 -0
- {datatailr-0.1.70.dist-info → datatailr-0.1.71.dist-info}/licenses/LICENSE +0 -0
- {datatailr-0.1.70.dist-info → datatailr-0.1.71.dist-info}/top_level.txt +0 -0
datatailr/blob.py
CHANGED
|
@@ -10,12 +10,14 @@
|
|
|
10
10
|
|
|
11
11
|
from __future__ import annotations
|
|
12
12
|
|
|
13
|
+
import os
|
|
13
14
|
import tempfile
|
|
14
15
|
|
|
15
16
|
from datatailr.wrapper import dt__Blob
|
|
16
17
|
|
|
17
18
|
# Datatailr Blob API Client
|
|
18
19
|
__client__ = dt__Blob()
|
|
20
|
+
__user__ = os.getenv("USER", "root")
|
|
19
21
|
|
|
20
22
|
|
|
21
23
|
class Blob:
|
|
@@ -81,8 +83,13 @@ class Blob:
|
|
|
81
83
|
"""
|
|
82
84
|
# Since direct reading and writting of blobs is not implemented yet, we are using a temporary file.
|
|
83
85
|
# This is a workaround to allow reading the blob content directly from the blob storage.
|
|
84
|
-
|
|
85
|
-
|
|
86
|
+
temp_dir = f"/home/{__user__}/tmp"
|
|
87
|
+
if not os.path.exists(temp_dir):
|
|
88
|
+
temp_dir = "/tmp"
|
|
89
|
+
else:
|
|
90
|
+
temp_dir += "/.dt"
|
|
91
|
+
os.makedirs(temp_dir, exist_ok=True)
|
|
92
|
+
with tempfile.NamedTemporaryFile(dir=temp_dir, delete=True) as temp_file:
|
|
86
93
|
self.get_file(name, temp_file.name)
|
|
87
94
|
with open(temp_file.name, "r") as f:
|
|
88
95
|
return f.read()
|
datatailr/excel/__init__.py
CHANGED
datatailr/excel/addin.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright (c) 2025 - Datatailr Inc.
|
|
3
|
+
All Rights Reserved.
|
|
4
|
+
|
|
5
|
+
This file is part of Datatailr and subject to the terms and conditions
|
|
6
|
+
defined in 'LICENSE.txt'. Unauthorized copying and/or distribution
|
|
7
|
+
of this file, in parts or full, via any medium is strictly prohibited.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import sys
|
|
12
|
+
import importlib
|
|
13
|
+
import subprocess
|
|
14
|
+
import inspect
|
|
15
|
+
import numpy as np
|
|
16
|
+
from dt.excel_base import Addin as AddinBase, Queue # type: ignore
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def __progress__(queue, stop):
|
|
20
|
+
from time import sleep
|
|
21
|
+
|
|
22
|
+
bar = ["█", "██", "███", "████", "█████", "██████", "███████"]
|
|
23
|
+
|
|
24
|
+
count = 0
|
|
25
|
+
while True:
|
|
26
|
+
if stop.is_set():
|
|
27
|
+
return
|
|
28
|
+
queue.push(bar[count % len(bar)])
|
|
29
|
+
count += 1
|
|
30
|
+
sleep(0.25)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_package_root(mod):
|
|
34
|
+
# Given module, e.g., dt.excel located at /opt/datatailr/python/dt/excel.py
|
|
35
|
+
# return entry for sys.path so it could be imported as a module.
|
|
36
|
+
# For the module above: /opt/datatailr/python
|
|
37
|
+
mod_path = os.path.abspath(mod.__file__)
|
|
38
|
+
mod_parts = mod.__name__.split(".")
|
|
39
|
+
for _ in range(len(mod_parts)):
|
|
40
|
+
mod_path = os.path.dirname(mod_path)
|
|
41
|
+
return mod_path
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class Addin(AddinBase):
|
|
45
|
+
def __init__(self, *args, **kwargs):
|
|
46
|
+
super(Addin, self).__init__(*args, **kwargs)
|
|
47
|
+
|
|
48
|
+
def run(self, port):
|
|
49
|
+
# Excel addin executable will try to import an object literally called "addin"
|
|
50
|
+
# from a module passed to dt-excel.sh as an argument. So to find which module
|
|
51
|
+
# to pass to dt-excel.sh, we walk the callstack until a module with "addin"
|
|
52
|
+
# object of type Addin is found. If not -- inform user about this requirement.
|
|
53
|
+
found_module = None
|
|
54
|
+
for frame_info in inspect.stack():
|
|
55
|
+
mod = inspect.getmodule(frame_info.frame)
|
|
56
|
+
if not mod or not hasattr(mod, "__name__"):
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
temp_path = get_package_root(mod)
|
|
60
|
+
sys.path.insert(0, temp_path)
|
|
61
|
+
try:
|
|
62
|
+
imported_mod = importlib.import_module(mod.__name__)
|
|
63
|
+
finally:
|
|
64
|
+
sys.path.pop(0)
|
|
65
|
+
|
|
66
|
+
addin_obj = getattr(imported_mod, "addin", None)
|
|
67
|
+
if addin_obj is self or id(addin_obj) == id(self):
|
|
68
|
+
found_module = mod
|
|
69
|
+
break
|
|
70
|
+
|
|
71
|
+
if not found_module:
|
|
72
|
+
raise ValueError(
|
|
73
|
+
"'addin' not found. Please, use 'addin' as variable name for your Addin instance."
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
if found_module.__name__ != "__main__":
|
|
77
|
+
# addin.run was called from the initial python script (where __name__ == "__main__")
|
|
78
|
+
module_name = found_module.__name__
|
|
79
|
+
if found_module.__file__ is None:
|
|
80
|
+
raise ValueError(f"Module {found_module.__name__} has no __file__")
|
|
81
|
+
dir_name = os.path.dirname(os.path.abspath(found_module.__file__))
|
|
82
|
+
else:
|
|
83
|
+
# initial python script did not call addin.run() itself (e.g. it imported function that called addin.run)
|
|
84
|
+
filename = inspect.getsourcefile(found_module)
|
|
85
|
+
if filename is None:
|
|
86
|
+
raise ValueError(f"Cannot determine filename for module {found_module}")
|
|
87
|
+
module_name = os.path.splitext(os.path.basename(filename))[0]
|
|
88
|
+
dir_name = os.path.dirname(os.path.abspath(filename))
|
|
89
|
+
|
|
90
|
+
subprocess.run(
|
|
91
|
+
[
|
|
92
|
+
"bash",
|
|
93
|
+
"-c",
|
|
94
|
+
f'PYTHONPATH="{dir_name}:$PYTHONPATH" /opt/datatailr/bin/dt-excel.sh -n -H "localhost" -l -p {port} -w 8000 {module_name}',
|
|
95
|
+
]
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def expose(
|
|
99
|
+
self, description, help, volatile=False, streaming=False, progressbar=False
|
|
100
|
+
):
|
|
101
|
+
if streaming and progressbar:
|
|
102
|
+
raise ValueError(
|
|
103
|
+
"you cannot specify progressbar and streaming at the same time"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def decorator(func):
|
|
107
|
+
signature = inspect.signature(func)
|
|
108
|
+
|
|
109
|
+
def wrapper(*args, **kwargs):
|
|
110
|
+
id = args[0]
|
|
111
|
+
|
|
112
|
+
for arg in signature.parameters.values():
|
|
113
|
+
if streaming and arg.name == "queue":
|
|
114
|
+
continue
|
|
115
|
+
|
|
116
|
+
if not (
|
|
117
|
+
isinstance(kwargs[arg.name], arg.annotation)
|
|
118
|
+
or isinstance(kwargs[arg.name], np.ndarray)
|
|
119
|
+
):
|
|
120
|
+
raise ValueError(
|
|
121
|
+
"excel/python/dt/excel.py: Got argument of wrong type, expected %s or numpy.ndarray, got %s"
|
|
122
|
+
% (arg.annotation, type(kwargs[arg.name]))
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
if not streaming:
|
|
126
|
+
if not progressbar:
|
|
127
|
+
result = func(**kwargs)
|
|
128
|
+
if hasattr(result, "tolist"):
|
|
129
|
+
result = result.tolist()
|
|
130
|
+
return result
|
|
131
|
+
|
|
132
|
+
from threading import Event, Thread
|
|
133
|
+
|
|
134
|
+
error = None
|
|
135
|
+
queue = Queue(self.name.lower() + "." + func.__name__, id)
|
|
136
|
+
stop = Event()
|
|
137
|
+
thread = Thread(target=__progress__, args=(queue, stop))
|
|
138
|
+
thread.start()
|
|
139
|
+
try:
|
|
140
|
+
result = func(**kwargs)
|
|
141
|
+
except Exception as exception:
|
|
142
|
+
error = str(exception)
|
|
143
|
+
|
|
144
|
+
stop.set()
|
|
145
|
+
thread.join()
|
|
146
|
+
|
|
147
|
+
if error is not None:
|
|
148
|
+
queue.error(error)
|
|
149
|
+
else:
|
|
150
|
+
queue.push(result)
|
|
151
|
+
return
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
func(Queue(self.name.lower() + "." + func.__name__, id), **kwargs)
|
|
155
|
+
except Exception as exception:
|
|
156
|
+
queue.error(str(exception))
|
|
157
|
+
|
|
158
|
+
self.decorator_impl(
|
|
159
|
+
signature,
|
|
160
|
+
wrapper,
|
|
161
|
+
func.__name__,
|
|
162
|
+
description,
|
|
163
|
+
help,
|
|
164
|
+
volatile,
|
|
165
|
+
streaming or progressbar,
|
|
166
|
+
)
|
|
167
|
+
return wrapper
|
|
168
|
+
|
|
169
|
+
return decorator
|
datatailr/sbin/datatailr_run.py
CHANGED
|
@@ -35,6 +35,7 @@
|
|
|
35
35
|
import concurrent.futures
|
|
36
36
|
import subprocess
|
|
37
37
|
import os
|
|
38
|
+
import stat
|
|
38
39
|
import shlex
|
|
39
40
|
import sysconfig
|
|
40
41
|
from typing import Optional, Tuple
|
|
@@ -80,6 +81,24 @@ def create_user_and_group() -> Tuple[str, str]:
|
|
|
80
81
|
os.system(
|
|
81
82
|
f"getent passwd {user} || useradd -g {group} -s /bin/bash -m {user} -u {uid} -o"
|
|
82
83
|
)
|
|
84
|
+
|
|
85
|
+
permissions = (
|
|
86
|
+
stat.S_IWOTH
|
|
87
|
+
| stat.S_IXOTH
|
|
88
|
+
| stat.S_IWUSR
|
|
89
|
+
| stat.S_IRUSR
|
|
90
|
+
| stat.S_IRGRP
|
|
91
|
+
| stat.S_IWGRP
|
|
92
|
+
| stat.S_IXUSR
|
|
93
|
+
| stat.S_IXGRP
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if os.path.exists(f"/home/{user}"):
|
|
97
|
+
os.chmod(f"/home/{user}", permissions)
|
|
98
|
+
|
|
99
|
+
if os.path.exists(f"/home/{user}/tmp/.dt"):
|
|
100
|
+
os.chmod(f"/home/{user}/tmp/.dt", permissions)
|
|
101
|
+
|
|
83
102
|
return user, group
|
|
84
103
|
|
|
85
104
|
|
|
@@ -88,16 +107,13 @@ def prepare_command_argv(command: str | list, user: str, env_vars: dict) -> list
|
|
|
88
107
|
command = shlex.split(command)
|
|
89
108
|
|
|
90
109
|
python_libdir = sysconfig.get_config_var("LIBDIR")
|
|
91
|
-
ld_library_path = get_env_var("LD_LIBRARY_PATH",
|
|
92
|
-
|
|
93
|
-
if ld_library_path:
|
|
94
|
-
python_libdir = ld_library_path + ":" + python_libdir
|
|
110
|
+
ld_library_path = get_env_var("LD_LIBRARY_PATH", None)
|
|
95
111
|
|
|
96
112
|
# Base environment variables setup
|
|
97
113
|
base_env = {
|
|
98
114
|
"PATH": get_env_var("PATH", ""),
|
|
99
115
|
"PYTHONPATH": get_env_var("PYTHONPATH", ""),
|
|
100
|
-
"LD_LIBRARY_PATH": python_libdir,
|
|
116
|
+
"LD_LIBRARY_PATH": ":".join(filter(None, [python_libdir, ld_library_path])),
|
|
101
117
|
}
|
|
102
118
|
|
|
103
119
|
merged_env = base_env | env_vars
|
|
@@ -144,7 +160,7 @@ def run_commands_in_parallel(
|
|
|
144
160
|
user: str,
|
|
145
161
|
env_vars: dict,
|
|
146
162
|
log_stream_names: Optional[list[str | None]] = None,
|
|
147
|
-
) ->
|
|
163
|
+
) -> int:
|
|
148
164
|
"""
|
|
149
165
|
Executes two commands concurrently using a ThreadPoolExecutor.
|
|
150
166
|
Returns a tuple of (return_code_cmd1, return_code_cmd2).
|
|
@@ -166,7 +182,7 @@ def run_commands_in_parallel(
|
|
|
166
182
|
results = [
|
|
167
183
|
future.result() for future in concurrent.futures.as_completed(futures)
|
|
168
184
|
]
|
|
169
|
-
return
|
|
185
|
+
return 0 if all(code == 0 for code in results) else 1
|
|
170
186
|
|
|
171
187
|
|
|
172
188
|
def main():
|
|
@@ -226,6 +242,7 @@ def main():
|
|
|
226
242
|
"--bind-addr=0.0.0.0:9090",
|
|
227
243
|
f'--app-name="Datatailr IDE {get_env_var("DATATAILR_USER")}"',
|
|
228
244
|
]
|
|
245
|
+
job_name = get_env_var("DATATAILR_JOB_NAME")
|
|
229
246
|
jupyter_command = [
|
|
230
247
|
"jupyter-lab",
|
|
231
248
|
"--ip='*'",
|
|
@@ -233,6 +250,9 @@ def main():
|
|
|
233
250
|
"--no-browser",
|
|
234
251
|
"--NotebookApp.token=''",
|
|
235
252
|
"--NotebookApp.password=''",
|
|
253
|
+
f"--ServerApp.base_url=/workspace/{job_name}/jupyter/",
|
|
254
|
+
f"--ServerApp.static_url_prefix=/workspace/{job_name}/jupyter/static/",
|
|
255
|
+
f"--ServerApp.root_dir=/home/{user}",
|
|
236
256
|
]
|
|
237
257
|
run_commands_in_parallel(
|
|
238
258
|
[ide_command, jupyter_command], user, env, ["code-server", "jupyter"]
|
|
@@ -49,10 +49,7 @@ class ArgumentsCache:
|
|
|
49
49
|
|
|
50
50
|
:param use_persistent_cache: If True, use the persistent cache backend. Otherwise, use in-memory cache.
|
|
51
51
|
"""
|
|
52
|
-
|
|
53
|
-
self.__bucket_name__ = dt__Tag().get("blob_storage_prefix") + "batch"
|
|
54
|
-
except Exception:
|
|
55
|
-
self.__bucket_name__ = "local-batch"
|
|
52
|
+
self.__bucket_name__ = dt__Tag().get("blob_storage_prefix") + "batch"
|
|
56
53
|
self.use_persistent_cache = use_persistent_cache
|
|
57
54
|
if not self.use_persistent_cache:
|
|
58
55
|
# Create a temp folder, for local caching
|
datatailr/scheduler/base.py
CHANGED
|
@@ -183,6 +183,9 @@ class Job:
|
|
|
183
183
|
build_script_pre=build_script_pre,
|
|
184
184
|
build_script_post=build_script_post,
|
|
185
185
|
)
|
|
186
|
+
if entrypoint is not None:
|
|
187
|
+
image.path_to_repo = entrypoint.path_to_repo
|
|
188
|
+
image.path_to_module = entrypoint.path_to_module
|
|
186
189
|
self.image = image
|
|
187
190
|
self.type = type if entrypoint is None else entrypoint.type
|
|
188
191
|
self.entrypoint = entrypoint
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
datatailr/__init__.py,sha256=QTTG8X76BnlQwVx5N4ZQtSbLkgFipZ9NJGAbvtfuk_g,1051
|
|
2
2
|
datatailr/acl.py,sha256=7hBwF7TP_ADoDryYEFuXx2FCLavLmp3k_F0-sEXg26g,4173
|
|
3
|
-
datatailr/blob.py,sha256=
|
|
3
|
+
datatailr/blob.py,sha256=FHAB90wpt0DgDsejo46iCtZ1N2d6QMpo19uY_7NX1t8,3581
|
|
4
4
|
datatailr/dt_json.py,sha256=3xmTqDBk68oPl2UW8UVOYPaBw4lAsVg6nDLwcen5nuo,2252
|
|
5
5
|
datatailr/errors.py,sha256=p_e4ao3sFEfz1g4LvEDqw6bVzHJPJSINLjJ8H6_PqOo,751
|
|
6
6
|
datatailr/group.py,sha256=AC0nCA44eEWZCJCq2klPqkFg_995mS3C_wu5uSFFLtU,4426
|
|
@@ -12,23 +12,24 @@ datatailr/version.py,sha256=N9K8ZxlwFFSz8XSgbgaTWZY4k2J0JKfj698nZ_O2pIU,536
|
|
|
12
12
|
datatailr/wrapper.py,sha256=45RrMeYIFFWJAtOlQZRe1fT9daeq4vFlj6nIajbewEY,8080
|
|
13
13
|
datatailr/build/__init__.py,sha256=_dA7b4L6wsaAFaSxUoYSJ1oaRqDHDMR20kqoCocSOss,487
|
|
14
14
|
datatailr/build/image.py,sha256=YC8ML-l-sj6TcIBY-DCx_vaeI_7SmL9fPFhHnuxzRh0,5509
|
|
15
|
-
datatailr/excel/__init__.py,sha256=
|
|
16
|
-
datatailr/
|
|
15
|
+
datatailr/excel/__init__.py,sha256=wox5ltPeOYZcZoRDW4R6tJsfOjf-0WZM2_pGgltGjdo,682
|
|
16
|
+
datatailr/excel/addin.py,sha256=at0S1cNHShCOCXAml1W2sJmJ5DdNroTN6Bp6KWnYZ94,6104
|
|
17
|
+
datatailr/sbin/datatailr_run.py,sha256=MKNlI6YM1UvBwdk90YUYYQbrbAEdba5ZryFIbYT2CiE,10051
|
|
17
18
|
datatailr/sbin/datatailr_run_app.py,sha256=itF76XC2F4RK9s6bkoEppEiYwSLHK_5Jai3yvC-kFhY,1501
|
|
18
19
|
datatailr/sbin/datatailr_run_batch.py,sha256=UWnp96j_G66R_Cape7Bb-rbK6UBLF7Y5_mTlWyGJAVQ,1818
|
|
19
20
|
datatailr/sbin/datatailr_run_excel.py,sha256=BLWmvxpKEE_8vJhs8E4VWq07FOBof5tlow-AkIEXtHw,1470
|
|
20
21
|
datatailr/sbin/datatailr_run_service.py,sha256=DO9LGOpz3CVZOJJRHb4ac7AgY_mLbXHGadSyVCeIknc,1212
|
|
21
22
|
datatailr/scheduler/__init__.py,sha256=qydHYVtEP6SUWd2CQ6FRdTdRWNz3SbYPJy4FK_wOvMk,1772
|
|
22
|
-
datatailr/scheduler/arguments_cache.py,sha256=
|
|
23
|
-
datatailr/scheduler/base.py,sha256=
|
|
23
|
+
datatailr/scheduler/arguments_cache.py,sha256=00OE0DhobYteBOnirjulO1ltgGBRamAdCO168O3_Zes,6236
|
|
24
|
+
datatailr/scheduler/base.py,sha256=WWi_VnDxev0GG6QolF3Wtj-p_JS5t2CN9VALYPl1OYo,16994
|
|
24
25
|
datatailr/scheduler/batch.py,sha256=CQCH1wHhW1qx09J7iQNQleErJ4n0nssAbd6u9YS6FMY,17735
|
|
25
26
|
datatailr/scheduler/batch_decorator.py,sha256=LqL1bsupWLn-YEQUvFJYae7R3ogrL5-VodyiiScrkRw,5806
|
|
26
27
|
datatailr/scheduler/constants.py,sha256=5WWTsfwZ_BA8gVDOTa2AQX9DJ0NzfaWgtY3vrODS2-8,606
|
|
27
28
|
datatailr/scheduler/schedule.py,sha256=0XJJen2nL1xplRs0Xbjwgq3T-0bFCOrJzkSALdio998,3741
|
|
28
29
|
datatailr/scheduler/utils.py,sha256=up6oR2iwe6G52LkvgfO394xchXgCYNjOMGRQW3e8PQk,1082
|
|
29
|
-
datatailr-0.1.
|
|
30
|
-
datatailr-0.1.
|
|
31
|
-
datatailr-0.1.
|
|
32
|
-
datatailr-0.1.
|
|
33
|
-
datatailr-0.1.
|
|
34
|
-
datatailr-0.1.
|
|
30
|
+
datatailr-0.1.71.dist-info/licenses/LICENSE,sha256=ikKP4_O-UD_b8FuNdKmbzTb6odd0JX085ZW_FAPN3VI,1066
|
|
31
|
+
datatailr-0.1.71.dist-info/METADATA,sha256=jtM6J7uHqGgQ_MHJBvKDuIdYAMvQ8T4XM16b0RyF09M,5146
|
|
32
|
+
datatailr-0.1.71.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
33
|
+
datatailr-0.1.71.dist-info/entry_points.txt,sha256=YqXfk2At-olW4PUSRkqvy_O3Mbv7uTKCCPuAAiz3Qbg,312
|
|
34
|
+
datatailr-0.1.71.dist-info/top_level.txt,sha256=75gntW0X_SKpqxLL6hAPipvpk28GAhJBvoyqN_HohWU,10
|
|
35
|
+
datatailr-0.1.71.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|