datatailr 0.1.34__tar.gz → 0.1.36__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datatailr might be problematic. Click here for more details.
- {datatailr-0.1.34/src/datatailr.egg-info → datatailr-0.1.36}/PKG-INFO +1 -1
- {datatailr-0.1.34 → datatailr-0.1.36}/pyproject.toml +1 -1
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/blob.py +6 -2
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/group.py +2 -2
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/logging.py +40 -9
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/scheduler/base.py +4 -6
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/scheduler/batch.py +9 -1
- {datatailr-0.1.34 → datatailr-0.1.36/src/datatailr.egg-info}/PKG-INFO +1 -1
- {datatailr-0.1.34 → datatailr-0.1.36}/src/sbin/datatailr_run.py +52 -25
- {datatailr-0.1.34 → datatailr-0.1.36}/LICENSE +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/README.md +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/setup.cfg +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/setup.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/__init__.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/acl.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/build/__init__.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/build/image.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/dt_json.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/errors.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/excel.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/scheduler/__init__.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/scheduler/arguments_cache.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/scheduler/batch_decorator.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/scheduler/constants.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/scheduler/schedule.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/scheduler/utils.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/user.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/utils.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/version.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr/wrapper.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr.egg-info/SOURCES.txt +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr.egg-info/dependency_links.txt +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr.egg-info/entry_points.txt +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr.egg-info/requires.txt +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/datatailr.egg-info/top_level.txt +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/sbin/datatailr_run_app.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/sbin/datatailr_run_batch.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/sbin/datatailr_run_excel.py +0 -0
- {datatailr-0.1.34 → datatailr-0.1.36}/src/sbin/datatailr_run_service.py +0 -0
|
@@ -98,6 +98,10 @@ class Blob:
|
|
|
98
98
|
# Since direct reading and writting of blobs is not implemented yet, we are using a temporary file.
|
|
99
99
|
# This is a workaround to allow writing the blob content directly to the blob storage.
|
|
100
100
|
with tempfile.NamedTemporaryFile(delete=True) as temp_file:
|
|
101
|
-
|
|
102
|
-
|
|
101
|
+
if isinstance(blob, bytes):
|
|
102
|
+
with open(temp_file.name, "wb") as f:
|
|
103
|
+
f.write(blob)
|
|
104
|
+
else:
|
|
105
|
+
with open(temp_file.name, "w") as f:
|
|
106
|
+
f.write(blob)
|
|
103
107
|
self.put_file(name, temp_file.name)
|
|
@@ -103,8 +103,8 @@ class Group:
|
|
|
103
103
|
|
|
104
104
|
@staticmethod
|
|
105
105
|
def add(name: str) -> Optional["Group"]:
|
|
106
|
-
__client__.add(name)
|
|
107
|
-
return Group
|
|
106
|
+
new_group = __client__.add(name, json_enrichened=True)
|
|
107
|
+
return Group(new_group["name"]) if new_group else None
|
|
108
108
|
|
|
109
109
|
@staticmethod
|
|
110
110
|
def ls() -> list:
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
import logging
|
|
12
12
|
import os
|
|
13
|
-
|
|
13
|
+
import sys
|
|
14
14
|
from logging.handlers import RotatingFileHandler
|
|
15
15
|
from typing import Optional
|
|
16
16
|
from datatailr import User
|
|
@@ -33,6 +33,28 @@ def get_log_level() -> int:
|
|
|
33
33
|
return logging.INFO
|
|
34
34
|
|
|
35
35
|
|
|
36
|
+
class MaxLevelFilter(logging.Filter):
|
|
37
|
+
"""Allow only log records at or below a given level."""
|
|
38
|
+
|
|
39
|
+
def __init__(self, level):
|
|
40
|
+
super().__init__()
|
|
41
|
+
self.level = level
|
|
42
|
+
|
|
43
|
+
def filter(self, record: logging.LogRecord) -> bool:
|
|
44
|
+
return record.levelno <= self.level
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class MinLevelFilter(logging.Filter):
|
|
48
|
+
"""Allow only log records at or above a given level."""
|
|
49
|
+
|
|
50
|
+
def __init__(self, level):
|
|
51
|
+
super().__init__()
|
|
52
|
+
self.level = level
|
|
53
|
+
|
|
54
|
+
def filter(self, record: logging.LogRecord) -> bool:
|
|
55
|
+
return record.levelno >= self.level
|
|
56
|
+
|
|
57
|
+
|
|
36
58
|
tag = dt__Tag()
|
|
37
59
|
node_name = tag.get("node_name") or "local"
|
|
38
60
|
node_ip = tag.get("node_ip")
|
|
@@ -66,12 +88,21 @@ class DatatailrLogger:
|
|
|
66
88
|
self.logger = logging.getLogger(name)
|
|
67
89
|
self.logger.setLevel(log_level)
|
|
68
90
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
91
|
+
formatter = logging.Formatter(log_format)
|
|
92
|
+
|
|
93
|
+
# stdout handler (DEBUG/INFO only)
|
|
94
|
+
stdout_handler = logging.StreamHandler(sys.stdout)
|
|
95
|
+
stdout_handler.setLevel(logging.DEBUG)
|
|
96
|
+
stdout_handler.addFilter(MaxLevelFilter(logging.INFO))
|
|
97
|
+
stdout_handler.setFormatter(formatter)
|
|
98
|
+
self.logger.addHandler(stdout_handler)
|
|
99
|
+
|
|
100
|
+
# stderr handler (WARNING and above)
|
|
101
|
+
stderr_handler = logging.StreamHandler(sys.stderr)
|
|
102
|
+
stderr_handler.setLevel(logging.WARNING)
|
|
103
|
+
stderr_handler.addFilter(MinLevelFilter(logging.WARNING))
|
|
104
|
+
stderr_handler.setFormatter(formatter)
|
|
105
|
+
self.logger.addHandler(stderr_handler)
|
|
75
106
|
|
|
76
107
|
# Optional file handler
|
|
77
108
|
if log_file:
|
|
@@ -79,9 +110,9 @@ class DatatailrLogger:
|
|
|
79
110
|
log_file, maxBytes=10 * 1024 * 1024, backupCount=5
|
|
80
111
|
)
|
|
81
112
|
file_handler.setLevel(log_level)
|
|
82
|
-
|
|
83
|
-
file_handler.setFormatter(file_formatter)
|
|
113
|
+
file_handler.setFormatter(formatter)
|
|
84
114
|
self.logger.addHandler(file_handler)
|
|
115
|
+
|
|
85
116
|
self.enable_opentelemetry()
|
|
86
117
|
|
|
87
118
|
def get_logger(self):
|
|
@@ -123,12 +123,10 @@ class EntryPoint:
|
|
|
123
123
|
module = importlib.import_module(self.module_name)
|
|
124
124
|
func = getattr(module, self.function_name)
|
|
125
125
|
return func(*args, **kwargs)
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
elif self.type == JobType.APP:
|
|
131
|
-
raise NotImplementedError("App jobs are not yet implemented.")
|
|
126
|
+
else:
|
|
127
|
+
raise NotImplementedError(
|
|
128
|
+
f"EntryPoint of type '{self.type}' is not callable."
|
|
129
|
+
)
|
|
132
130
|
|
|
133
131
|
def __repr__(self):
|
|
134
132
|
return f"EntryPoint({self.function_name} from {self.module_name}, type={self.type})"
|
|
@@ -459,7 +459,7 @@ class Batch(Job):
|
|
|
459
459
|
return args
|
|
460
460
|
return {}
|
|
461
461
|
|
|
462
|
-
def
|
|
462
|
+
def prepare_args(self) -> None:
|
|
463
463
|
def arg_name(arg: Union[BatchJob, str]) -> str:
|
|
464
464
|
return arg.name if isinstance(arg, BatchJob) else arg
|
|
465
465
|
|
|
@@ -481,6 +481,14 @@ class Batch(Job):
|
|
|
481
481
|
}
|
|
482
482
|
|
|
483
483
|
__ARGUMENTS_CACHE__.add_arguments(self.id, args)
|
|
484
|
+
|
|
485
|
+
def save(self) -> Tuple[bool, str]:
|
|
486
|
+
self.prepare_args()
|
|
487
|
+
return super().save()
|
|
488
|
+
|
|
489
|
+
def run(self) -> Tuple[bool, str]:
|
|
490
|
+
self.prepare_args()
|
|
491
|
+
|
|
484
492
|
if not self.__local_run and is_dt_installed():
|
|
485
493
|
return super().run()
|
|
486
494
|
else:
|
|
@@ -32,6 +32,7 @@
|
|
|
32
32
|
# DATATAILR_JOB_ID - the unique identifier for the job.
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
import subprocess
|
|
35
36
|
import os
|
|
36
37
|
import sys
|
|
37
38
|
from typing import Tuple
|
|
@@ -42,7 +43,7 @@ logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
|
|
|
42
43
|
|
|
43
44
|
if not is_dt_installed():
|
|
44
45
|
logger.error("Datatailr is not installed.")
|
|
45
|
-
sys.exit(1)
|
|
46
|
+
# sys.exit(1) # TODO: Uncomment after testing
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
def get_env_var(name: str, default: str | None = None) -> str:
|
|
@@ -80,18 +81,32 @@ def create_user_and_group() -> Tuple[str, str]:
|
|
|
80
81
|
return user, group
|
|
81
82
|
|
|
82
83
|
|
|
83
|
-
def run_command_as_user(command: str, user: str, env_vars: dict):
|
|
84
|
+
def run_command_as_user(command: str | list, user: str, env_vars: dict):
|
|
84
85
|
"""
|
|
85
86
|
Run a command as a specific user with the given environment variables.
|
|
86
87
|
"""
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
88
|
+
if isinstance(command, str):
|
|
89
|
+
command = command.split(" ")
|
|
90
|
+
env_vars = {
|
|
91
|
+
"PATH": get_env_var("PATH", ""),
|
|
92
|
+
"PYTHONPATH": get_env_var("PYTHONPATH", ""),
|
|
93
|
+
} | env_vars
|
|
94
|
+
|
|
95
|
+
env_kv = [f"{k}={v}" for k, v in env_vars.items()]
|
|
96
|
+
argv = ["sudo", "-u", user, "env", *env_kv, "bash", "-lc", *command]
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
result = subprocess.run(
|
|
100
|
+
argv,
|
|
101
|
+
check=True, # raises if returncode != 0
|
|
102
|
+
capture_output=True,
|
|
103
|
+
text=True,
|
|
104
|
+
)
|
|
105
|
+
logger.info(f"stdout: {result.stdout}")
|
|
106
|
+
logger.debug(f"stderr: {result.stderr}")
|
|
107
|
+
except subprocess.CalledProcessError as e:
|
|
108
|
+
logger.error(f"Command failed with exit code {e.returncode}")
|
|
109
|
+
logger.error(f"stderr: {e.stderr}")
|
|
95
110
|
sys.exit(1)
|
|
96
111
|
|
|
97
112
|
|
|
@@ -99,8 +114,11 @@ def main():
|
|
|
99
114
|
user, _ = create_user_and_group()
|
|
100
115
|
job_type = get_env_var("DATATAILR_JOB_TYPE")
|
|
101
116
|
|
|
102
|
-
|
|
103
|
-
|
|
117
|
+
env = {
|
|
118
|
+
"DATATAILR_JOB_TYPE": job_type,
|
|
119
|
+
"DATATAILR_JOB_NAME": get_env_var("DATATAILR_JOB_NAME"),
|
|
120
|
+
"DATATAILR_JOB_ID": get_env_var("DATATAILR_JOB_ID"),
|
|
121
|
+
}
|
|
104
122
|
|
|
105
123
|
if job_type == "batch":
|
|
106
124
|
run_id = get_env_var("DATATAILR_BATCH_RUN_ID")
|
|
@@ -109,40 +127,49 @@ def main():
|
|
|
109
127
|
env = {
|
|
110
128
|
"DATATAILR_BATCH_RUN_ID": run_id,
|
|
111
129
|
"DATATAILR_BATCH_ID": batch_id,
|
|
112
|
-
"DATATAILR_JOB_ID": job_id,
|
|
113
130
|
"DATATAILR_BATCH_ENTRYPOINT": entrypoint,
|
|
114
|
-
}
|
|
131
|
+
} | env
|
|
115
132
|
run_command_as_user("datatailr_run_batch", user, env)
|
|
116
133
|
elif job_type == "service":
|
|
117
134
|
port = get_env_var("DATATAILR_SERVICE_PORT")
|
|
118
135
|
entrypoint = get_env_var("DATATAILR_ENTRYPOINT")
|
|
119
136
|
env = {
|
|
120
|
-
"DATATAILR_JOB_NAME": job_name,
|
|
121
|
-
"DATATAILR_JOB_ID": job_id,
|
|
122
137
|
"DATATAILR_ENTRYPOINT": entrypoint,
|
|
123
138
|
"DATATAILR_SERVICE_PORT": port,
|
|
124
|
-
}
|
|
139
|
+
} | env
|
|
125
140
|
run_command_as_user("datatailr_run_service", user, env)
|
|
126
141
|
elif job_type == "app":
|
|
127
142
|
entrypoint = get_env_var("DATATAILR_ENTRYPOINT")
|
|
128
143
|
env = {
|
|
129
|
-
"DATATAILR_JOB_NAME": job_name,
|
|
130
|
-
"DATATAILR_JOB_ID": job_id,
|
|
131
144
|
"DATATAILR_ENTRYPOINT": entrypoint,
|
|
132
|
-
}
|
|
145
|
+
} | env
|
|
133
146
|
run_command_as_user("datatailr_run_app", user, env)
|
|
134
147
|
elif job_type == "excel":
|
|
135
|
-
host = get_env_var("DATATAILR_HOST")
|
|
148
|
+
host = get_env_var("DATATAILR_HOST", "")
|
|
136
149
|
entrypoint = get_env_var("DATATAILR_ENTRYPOINT")
|
|
137
150
|
env = {
|
|
138
|
-
"DATATAILR_JOB_NAME": job_name,
|
|
139
|
-
"DATATAILR_JOB_ID": job_id,
|
|
140
151
|
"DATATAILR_ENTRYPOINT": entrypoint,
|
|
141
152
|
"DATATAILR_HOST": host,
|
|
142
|
-
}
|
|
153
|
+
} | env
|
|
143
154
|
run_command_as_user("datatailr_run_excel", user, env)
|
|
144
155
|
elif job_type == "IDE":
|
|
145
|
-
|
|
156
|
+
command = [
|
|
157
|
+
"code-server",
|
|
158
|
+
"--auth=none",
|
|
159
|
+
"--bind-addr=0.0.0.0:8080",
|
|
160
|
+
f'--app-name="Datatailr IDE {get_env_var("DATATAILR_USER")}"',
|
|
161
|
+
]
|
|
162
|
+
run_command_as_user(command, user, env)
|
|
163
|
+
elif job_type == "jupyter":
|
|
164
|
+
command = [
|
|
165
|
+
"jupyter-lab",
|
|
166
|
+
"--ip='*'",
|
|
167
|
+
"--port=8080",
|
|
168
|
+
"--no-browser",
|
|
169
|
+
"--NotebookApp.token=''",
|
|
170
|
+
"--NotebookApp.password=''",
|
|
171
|
+
]
|
|
172
|
+
run_command_as_user(command, user, env)
|
|
146
173
|
else:
|
|
147
174
|
raise ValueError(f"Unknown job type: {job_type}")
|
|
148
175
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|