datatailr 0.1.73__tar.gz → 0.1.81__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {datatailr-0.1.73/src/datatailr.egg-info → datatailr-0.1.81}/PKG-INFO +19 -15
  2. {datatailr-0.1.73 → datatailr-0.1.81}/README.md +18 -14
  3. {datatailr-0.1.73 → datatailr-0.1.81}/pyproject.toml +2 -1
  4. {datatailr-0.1.73 → datatailr-0.1.81}/setup.py +3 -1
  5. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/__init__.py +14 -0
  6. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/build/image.py +6 -4
  7. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/excel/addin.py +35 -8
  8. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/logging.py +85 -4
  9. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/scheduler/__init__.py +8 -2
  10. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/scheduler/base.py +28 -15
  11. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/scheduler/batch.py +32 -6
  12. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/scheduler/batch_decorator.py +12 -3
  13. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/scheduler/constants.py +2 -2
  14. datatailr-0.1.81/src/datatailr/scheduler/job.py +112 -0
  15. datatailr-0.1.81/src/datatailr/scheduler/workflow.py +84 -0
  16. {datatailr-0.1.73 → datatailr-0.1.81/src/datatailr.egg-info}/PKG-INFO +19 -15
  17. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr.egg-info/SOURCES.txt +6 -0
  18. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr.egg-info/entry_points.txt +1 -0
  19. datatailr-0.1.81/src/datatailr.egg-info/top_level.txt +2 -0
  20. datatailr-0.1.81/src/datatailr_demo/README.md +112 -0
  21. datatailr-0.1.81/src/datatailr_demo/__init__.py +15 -0
  22. datatailr-0.1.81/src/datatailr_demo/examples.py +47 -0
  23. datatailr-0.1.81/src/sbin/datatailr_cli.py +195 -0
  24. {datatailr-0.1.73 → datatailr-0.1.81}/src/sbin/datatailr_run.py +147 -35
  25. {datatailr-0.1.73 → datatailr-0.1.81}/src/sbin/datatailr_run_excel.py +2 -2
  26. datatailr-0.1.73/src/datatailr.egg-info/top_level.txt +0 -1
  27. {datatailr-0.1.73 → datatailr-0.1.81}/LICENSE +0 -0
  28. {datatailr-0.1.73 → datatailr-0.1.81}/setup.cfg +0 -0
  29. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/acl.py +0 -0
  30. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/blob.py +0 -0
  31. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/build/__init__.py +0 -0
  32. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/dt_json.py +0 -0
  33. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/errors.py +0 -0
  34. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/excel/__init__.py +0 -0
  35. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/excel/stubs.py +0 -0
  36. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/group.py +0 -0
  37. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/scheduler/arguments_cache.py +0 -0
  38. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/scheduler/schedule.py +0 -0
  39. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/scheduler/utils.py +0 -0
  40. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/tag.py +0 -0
  41. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/user.py +0 -0
  42. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/utils.py +0 -0
  43. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/version.py +0 -0
  44. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr/wrapper.py +0 -0
  45. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr.egg-info/dependency_links.txt +0 -0
  46. {datatailr-0.1.73 → datatailr-0.1.81}/src/datatailr.egg-info/requires.txt +0 -0
  47. {datatailr-0.1.73 → datatailr-0.1.81}/src/sbin/datatailr_run_app.py +0 -0
  48. {datatailr-0.1.73 → datatailr-0.1.81}/src/sbin/datatailr_run_batch.py +0 -0
  49. {datatailr-0.1.73 → datatailr-0.1.81}/src/sbin/datatailr_run_service.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datatailr
3
- Version: 0.1.73
3
+ Version: 0.1.81
4
4
  Summary: Ready-to-Use Platform That Drives Business Insights
5
5
  Author-email: Datatailr <info@datatailr.com>
6
6
  License-Expression: MIT
@@ -84,25 +84,27 @@ print(datatailr.__provider__)
84
84
  The following example shows how to create a simple data pipeline using the Datatailr Python package.
85
85
 
86
86
  ```python
87
- from datatailr.scheduler import batch_job, Batch
87
+ from datatailr import workflow, task
88
88
 
89
- @batch_job()
89
+ @task()
90
90
  def func_no_args() -> str:
91
91
  return "no_args"
92
92
 
93
93
 
94
- @batch_job()
94
+ @task()
95
95
  def func_with_args(a: int, b: float) -> str:
96
96
  return f"args: {a}, {b}"
97
97
 
98
- with Batch(name="MY test DAG", local_run=True) as dag:
98
+ @workflow(name="MY test DAG")
99
+ def my_workflow():
99
100
  for n in range(2):
100
101
  res1 = func_no_args().alias(f"func_{n}")
101
102
  res2 = func_with_args(1, res1).alias(f"func_with_args_{n}")
103
+ my_workflow(local_run=True)
102
104
  ```
103
105
 
104
106
  Running this code will create a graph of jobs and execute it.
105
- Each node on the graph represents a job, which in turn is a call to a function decorated with `@batch_job()`.
107
+ Each node on the graph represents a job, which in turn is a call to a function decorated with `@task()`.
106
108
 
107
109
  Since this is a local run then the execution of each node will happen sequentially in the same process.
108
110
 
@@ -117,14 +119,14 @@ You will first need to separate your function definitions from the DAG definitio
117
119
  ```python
118
120
  # my_module.py
119
121
 
120
- from datatailr.scheduler import batch_job
122
+ from datatailr import task
121
123
 
122
- @batch_job()
124
+ @task()
123
125
  def func_no_args() -> str:
124
126
  return "no_args"
125
127
 
126
128
 
127
- @batch_job()
129
+ @task()
128
130
  def func_with_args(a: int, b: float) -> str:
129
131
  return f"args: {a}, {b}"
130
132
  ```
@@ -133,18 +135,20 @@ To use these functions in a batch job, you just need to import them and run in a
133
135
 
134
136
  ```python
135
137
  from my_module import func_no_args, func_with_args
136
- from datatailr.scheduler import Batch, Schedule
138
+ from datatailr import workflow
137
139
 
138
- schedule = Schedule(at_hours=0)
139
-
140
- with Batch(name="MY test DAG", schedule=schedule) as dag:
140
+ @workflow(name="MY test DAG")
141
+ def my_workflow():
141
142
  for n in range(2):
142
143
  res1 = func_no_args().alias(f"func_{n}")
143
144
  res2 = func_with_args(1, res1).alias(f"func_with_args_{n}")
145
+
146
+ schedule = Schedule(at_hours=0)
147
+ my_workflow(schedule=schedule)
144
148
  ```
145
149
 
146
- This will submit the entire DAG for execution, and the scheduler will take care of running the jobs in parallel and managing the resources.
147
- The DAG in the example above will be scheduled to run daily at 00:00.
150
+ This will submit the entire workflow for execution, and the scheduler will take care of running the jobs in parallel and managing the resources.
151
+ The workflow in the example above will be scheduled to run daily at 00:00.
148
152
 
149
153
  ___
150
154
  Visit [our website](https://www.datatailr.com/) for more!
@@ -47,25 +47,27 @@ print(datatailr.__provider__)
47
47
  The following example shows how to create a simple data pipeline using the Datatailr Python package.
48
48
 
49
49
  ```python
50
- from datatailr.scheduler import batch_job, Batch
50
+ from datatailr import workflow, task
51
51
 
52
- @batch_job()
52
+ @task()
53
53
  def func_no_args() -> str:
54
54
  return "no_args"
55
55
 
56
56
 
57
- @batch_job()
57
+ @task()
58
58
  def func_with_args(a: int, b: float) -> str:
59
59
  return f"args: {a}, {b}"
60
60
 
61
- with Batch(name="MY test DAG", local_run=True) as dag:
61
+ @workflow(name="MY test DAG")
62
+ def my_workflow():
62
63
  for n in range(2):
63
64
  res1 = func_no_args().alias(f"func_{n}")
64
65
  res2 = func_with_args(1, res1).alias(f"func_with_args_{n}")
66
+ my_workflow(local_run=True)
65
67
  ```
66
68
 
67
69
  Running this code will create a graph of jobs and execute it.
68
- Each node on the graph represents a job, which in turn is a call to a function decorated with `@batch_job()`.
70
+ Each node on the graph represents a job, which in turn is a call to a function decorated with `@task()`.
69
71
 
70
72
  Since this is a local run then the execution of each node will happen sequentially in the same process.
71
73
 
@@ -80,14 +82,14 @@ You will first need to separate your function definitions from the DAG definitio
80
82
  ```python
81
83
  # my_module.py
82
84
 
83
- from datatailr.scheduler import batch_job
85
+ from datatailr import task
84
86
 
85
- @batch_job()
87
+ @task()
86
88
  def func_no_args() -> str:
87
89
  return "no_args"
88
90
 
89
91
 
90
- @batch_job()
92
+ @task()
91
93
  def func_with_args(a: int, b: float) -> str:
92
94
  return f"args: {a}, {b}"
93
95
  ```
@@ -96,18 +98,20 @@ To use these functions in a batch job, you just need to import them and run in a
96
98
 
97
99
  ```python
98
100
  from my_module import func_no_args, func_with_args
99
- from datatailr.scheduler import Batch, Schedule
101
+ from datatailr import workflow
100
102
 
101
- schedule = Schedule(at_hours=0)
102
-
103
- with Batch(name="MY test DAG", schedule=schedule) as dag:
103
+ @workflow(name="MY test DAG")
104
+ def my_workflow():
104
105
  for n in range(2):
105
106
  res1 = func_no_args().alias(f"func_{n}")
106
107
  res2 = func_with_args(1, res1).alias(f"func_with_args_{n}")
108
+
109
+ schedule = Schedule(at_hours=0)
110
+ my_workflow(schedule=schedule)
107
111
  ```
108
112
 
109
- This will submit the entire DAG for execution, and the scheduler will take care of running the jobs in parallel and managing the resources.
110
- The DAG in the example above will be scheduled to run daily at 00:00.
113
+ This will submit the entire workflow for execution, and the scheduler will take care of running the jobs in parallel and managing the resources.
114
+ The workflow in the example above will be scheduled to run daily at 00:00.
111
115
 
112
116
  ___
113
117
  Visit [our website](https://www.datatailr.com/) for more!
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "datatailr"
7
- version = "0.1.73"
7
+ version = "0.1.81"
8
8
  description = "Ready-to-Use Platform That Drives Business Insights"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -39,6 +39,7 @@ datatailr_run_batch = "datatailr.sbin.datatailr_run_batch:run"
39
39
  datatailr_run_app = "datatailr.sbin.datatailr_run_app:run"
40
40
  datatailr_run_excel = "datatailr.sbin.datatailr_run_excel:run"
41
41
  datatailr_run_service = "datatailr.sbin.datatailr_run_service:run"
42
+ datatailr = "datatailr.sbin.datatailr_cli:main"
42
43
 
43
44
  [project.optional-dependencies]
44
45
  dev = [
@@ -10,12 +10,14 @@ setup(
10
10
  (
11
11
  "/datatailr/sbin",
12
12
  [
13
+ "src/sbin/datatailr_cli.py",
13
14
  "src/sbin/datatailr_run.py",
14
15
  "src/sbin/datatailr_run_batch.py",
15
16
  "src/sbin/datatailr_run_app.py",
16
17
  "src/sbin/datatailr_run_excel.py",
17
18
  "src/sbin/datatailr_run_service.py",
18
19
  ],
19
- )
20
+ ),
21
+ ("datatailr_demo", ["src/datatailr_demo/README.md"]),
20
22
  ],
21
23
  )
@@ -16,6 +16,14 @@ from datatailr.blob import Blob
16
16
  from datatailr.build import Image
17
17
  from datatailr.utils import Environment, is_dt_installed
18
18
  from datatailr.version import __version__
19
+ from datatailr.scheduler import (
20
+ App,
21
+ Service,
22
+ ExcelAddin,
23
+ workflow,
24
+ task,
25
+ set_allow_unsafe_scheduling,
26
+ )
19
27
 
20
28
  system = dt__System()
21
29
  if isinstance(system, mock_cli_tool):
@@ -33,4 +41,10 @@ __all__ = [
33
41
  "__version__",
34
42
  "__provider__",
35
43
  "is_dt_installed",
44
+ "App",
45
+ "Service",
46
+ "ExcelAddin",
47
+ "workflow",
48
+ "task",
49
+ "set_allow_unsafe_scheduling",
36
50
  ]
@@ -10,7 +10,7 @@
10
10
 
11
11
  import json
12
12
  import os
13
- import re
13
+ import sys
14
14
  from typing import Optional
15
15
 
16
16
  from datatailr import ACL, User
@@ -26,7 +26,7 @@ class Image:
26
26
  def __init__(
27
27
  self,
28
28
  acl: Optional[ACL] = None,
29
- python_version: str = "3.12",
29
+ python_version: str = "auto",
30
30
  python_requirements: str | list[str] = "",
31
31
  build_script_pre: str = "",
32
32
  build_script_post: str = "",
@@ -56,8 +56,10 @@ class Image:
56
56
  def python_version(self, value: str):
57
57
  if not isinstance(value, str):
58
58
  raise TypeError("python_version must be a string.")
59
- if not re.match(r"^\d+\.\d+(\.\d+)?$", value):
60
- raise ValueError("Invalid python_version format. Expected format: X.Y[.Z]")
59
+ if value.lower() == "auto":
60
+ value = f"{sys.version_info.major}.{sys.version_info.minor}"
61
+ if value not in ["3.10", "3.11", "3.12", "3.13", "3.14"]:
62
+ raise ValueError(f"Invalid python_version: {value}")
61
63
  self._python_version = value
62
64
 
63
65
  @property
@@ -12,6 +12,8 @@ import sys
12
12
  import importlib
13
13
  import subprocess
14
14
  import inspect
15
+ from urllib.parse import urlparse
16
+
15
17
  import numpy as np
16
18
 
17
19
  try:
@@ -45,11 +47,32 @@ def get_package_root(mod):
45
47
  return mod_path
46
48
 
47
49
 
50
+ def matches_annotation(value, annotation):
51
+ if isinstance(value, np.ndarray):
52
+ return True
53
+ if annotation is bool:
54
+ return isinstance(value, bool) or (type(value) is int and value in (0, 1))
55
+ if annotation is float:
56
+ return isinstance(value, float) or (type(value) is int)
57
+ return isinstance(value, annotation)
58
+
59
+
60
+ def extract_hostname(url: str) -> str | None:
61
+ url = url if url else ""
62
+ if "://" not in url:
63
+ url = "//" + url
64
+ return urlparse(url).hostname
65
+
66
+
48
67
  class Addin(AddinBase):
49
68
  def __init__(self, *args, **kwargs):
50
69
  super(Addin, self).__init__(*args, **kwargs)
70
+ f = inspect.currentframe().f_back
71
+ mod = inspect.getmodule(f)
72
+ if mod is not None:
73
+ setattr(mod, "__dt_addin__", self)
51
74
 
52
- def run(self, port):
75
+ def run(self, port, ws_port, ide=True):
53
76
  # Excel addin executable will try to import an object literally called "addin"
54
77
  # from a module passed to dt-excel.sh as an argument. So to find which module
55
78
  # to pass to dt-excel.sh, we walk the callstack until a module with "addin"
@@ -67,14 +90,14 @@ class Addin(AddinBase):
67
90
  finally:
68
91
  sys.path.pop(0)
69
92
 
70
- addin_obj = getattr(imported_mod, "addin", None)
93
+ addin_obj = getattr(imported_mod, "__dt_addin__", None)
71
94
  if addin_obj is self or id(addin_obj) == id(self):
72
95
  found_module = mod
73
96
  break
74
97
 
75
98
  if not found_module:
76
99
  raise ValueError(
77
- "'addin' not found. Please, use 'addin' as variable name for your Addin instance."
100
+ "'__dt_addin__' not found."
78
101
  )
79
102
 
80
103
  if found_module.__name__ != "__main__":
@@ -91,11 +114,14 @@ class Addin(AddinBase):
91
114
  module_name = os.path.splitext(os.path.basename(filename))[0]
92
115
  dir_name = os.path.dirname(os.path.abspath(filename))
93
116
 
117
+ ide_flag = "-i" if ide else ""
118
+ hostname = extract_hostname(os.environ.get("VSCODE_PROXY_URI"))
119
+
94
120
  subprocess.run(
95
121
  [
96
122
  "bash",
97
123
  "-c",
98
- f'PYTHONPATH="{dir_name}:$PYTHONPATH" /opt/datatailr/bin/dt-excel.sh -n -H "localhost" -l -p {port} -w 8000 {module_name}',
124
+ f'PYTHONPATH="{dir_name}:$PYTHONPATH" /opt/datatailr/bin/dt-excel.sh {ide_flag} -n -H {hostname} -p {port} -w {ws_port} {module_name}',
99
125
  ]
100
126
  )
101
127
 
@@ -115,17 +141,18 @@ class Addin(AddinBase):
115
141
  # be called directly from python code without requiring positional argument for _id
116
142
  _id = args[0]
117
143
 
144
+ bound = signature.bind_partial(**kwargs)
145
+ bound.apply_defaults()
118
146
  for arg in signature.parameters.values():
119
147
  if streaming and arg.name == "queue":
120
148
  continue
121
149
 
122
- if not (
123
- isinstance(kwargs[arg.name], arg.annotation)
124
- or isinstance(kwargs[arg.name], np.ndarray)
150
+ if not matches_annotation(
151
+ bound.arguments[arg.name], arg.annotation
125
152
  ):
126
153
  raise ValueError(
127
154
  "excel/python/dt/excel.py: Got argument of wrong type, expected %s or numpy.ndarray, got %s"
128
- % (arg.annotation, type(kwargs[arg.name]))
155
+ % (arg.annotation, type(bound.arguments[arg.name]))
129
156
  )
130
157
  queue = Queue(self.name.lower() + "." + func.__name__, _id)
131
158
  if not streaming:
@@ -33,6 +33,70 @@ def get_log_level() -> int:
33
33
  return logging.INFO
34
34
 
35
35
 
36
+ def ansi_symbols_supported() -> bool:
37
+ """Check if the terminal supports ANSI symbols."""
38
+ if sys.platform.startswith("win"):
39
+ return (
40
+ os.getenv("ANSICON") is not None
41
+ or os.getenv("WT_SESSION") is not None
42
+ or "TERM" in os.environ
43
+ and os.environ["TERM"] == "xterm-256color"
44
+ )
45
+ else:
46
+ return sys.stdout.isatty()
47
+
48
+
49
+ ANSI_AVAILABLE = ansi_symbols_supported()
50
+
51
+
52
+ def color_text(text: str, color_name: str) -> str:
53
+ """Wrap text with ANSI color codes if supported."""
54
+ if not ANSI_AVAILABLE:
55
+ return text
56
+
57
+ colors = {
58
+ "red": "\033[31m",
59
+ "green": "\033[32m",
60
+ "yellow": "\033[33m",
61
+ "blue": "\033[34m",
62
+ "magenta": "\033[35m",
63
+ "cyan": "\033[36m",
64
+ "bold": "\033[1m",
65
+ "reset": "\033[0m",
66
+ }
67
+ color_code = colors.get(color_name.lower(), "")
68
+ reset_code = colors["reset"] if color_code else ""
69
+ return f"{color_code}{text}{reset_code}"
70
+
71
+
72
+ def RED(text: str) -> str:
73
+ return color_text(text, "red")
74
+
75
+
76
+ def GREEN(text: str) -> str:
77
+ return color_text(text, "green")
78
+
79
+
80
+ def YELLOW(text: str) -> str:
81
+ return color_text(text, "yellow")
82
+
83
+
84
+ def BLUE(text: str) -> str:
85
+ return color_text(text, "blue")
86
+
87
+
88
+ def MAGENTA(text: str) -> str:
89
+ return color_text(text, "magenta")
90
+
91
+
92
+ def CYAN(text: str) -> str:
93
+ return color_text(text, "cyan")
94
+
95
+
96
+ def BOLD(text: str) -> str:
97
+ return color_text(text, "bold")
98
+
99
+
36
100
  class MaxLevelFilter(logging.Filter):
37
101
  """Allow only log records at or below a given level."""
38
102
 
@@ -55,6 +119,26 @@ class MinLevelFilter(logging.Filter):
55
119
  return record.levelno >= self.level
56
120
 
57
121
 
122
+ class ColoredFormatter(logging.Formatter):
123
+ COLORS = {
124
+ logging.DEBUG: "\033[34m", # Blue
125
+ logging.INFO: "\033[32m", # Green
126
+ logging.WARNING: "\033[33m", # Yellow
127
+ logging.ERROR: "\033[31m", # Red
128
+ logging.CRITICAL: "\033[41m", # Red background
129
+ }
130
+ RESET = "\033[0m"
131
+ BOLD = "\033[1m"
132
+
133
+ def format(self, record):
134
+ color = self.COLORS.get(record.levelno, self.RESET)
135
+ timestamp = f"{self.BOLD}{self.formatTime(record)}{self.RESET}"
136
+ level = f"{color}{record.levelname}{self.RESET}"
137
+ message = f"{color}{record.getMessage()}{self.RESET}"
138
+ LOG_FORMAT = f"{timestamp} - {level} - {node_name}:{node_ip} - {user} - {job_name} - {record.name} - [Line {record.lineno}]: {message}"
139
+ return LOG_FORMAT
140
+
141
+
58
142
  tag = dt__Tag()
59
143
  node_name = tag.get("node_name") or "local"
60
144
  node_ip = tag.get("node_ip")
@@ -67,8 +151,6 @@ except Exception:
67
151
 
68
152
  user = getpass.getuser()
69
153
 
70
- LOG_FORMAT = f"%(asctime)s - %(levelname)s - {node_name}:{node_ip} - {user} - {job_name} - %(name)s - [Line %(lineno)d]: %(message)s"
71
-
72
154
 
73
155
  class DatatailrLogger:
74
156
  def __init__(
@@ -76,7 +158,6 @@ class DatatailrLogger:
76
158
  name: str,
77
159
  log_file: Optional[str] = None,
78
160
  log_level: int = get_log_level(),
79
- log_format: str = LOG_FORMAT,
80
161
  ):
81
162
  """
82
163
  Initialize the DatatailrLogger.
@@ -88,7 +169,7 @@ class DatatailrLogger:
88
169
  self.logger = logging.getLogger(name)
89
170
  self.logger.setLevel(log_level)
90
171
 
91
- formatter = logging.Formatter(log_format)
172
+ formatter = ColoredFormatter()
92
173
 
93
174
  # stdout handler (DEBUG/INFO only)
94
175
  stdout_handler = logging.StreamHandler(sys.stdout)
@@ -35,8 +35,10 @@ from datatailr.scheduler.base import (
35
35
  set_allow_unsafe_scheduling,
36
36
  )
37
37
  from datatailr.scheduler.batch import Batch, BatchJob, DuplicateJobNameError
38
- from datatailr.scheduler.batch_decorator import batch_decorator as batch_job
38
+ from datatailr.scheduler.batch_decorator import batch_decorator as task
39
39
  from datatailr.scheduler.schedule import Schedule
40
+ from datatailr.scheduler.job import App, Service, ExcelAddin
41
+ from datatailr.scheduler.workflow import workflow
40
42
 
41
43
  __all__ = [
42
44
  "Job",
@@ -46,9 +48,13 @@ __all__ = [
46
48
  "EntryPoint",
47
49
  "Batch",
48
50
  "BatchJob",
49
- "batch_job",
51
+ "task",
50
52
  "BatchJobError",
51
53
  "DuplicateJobNameError",
52
54
  "set_allow_unsafe_scheduling",
53
55
  "Schedule",
56
+ "App",
57
+ "Service",
58
+ "ExcelAddin",
59
+ "workflow",
54
60
  ]
@@ -14,18 +14,19 @@ from datetime import datetime
14
14
  import importlib.util
15
15
  import json
16
16
  import os
17
+ import re
17
18
  import tempfile
18
19
  import uuid
19
20
  from dataclasses import dataclass
20
21
  from enum import Enum
21
- from typing import Callable, Dict, Optional, Tuple, Union
22
+ from typing import Callable, Dict, Optional, Tuple, Union, List
22
23
 
23
24
  from datatailr import ACL, Environment, User, is_dt_installed
24
25
  from datatailr.wrapper import dt__Job
25
26
  from datatailr.scheduler.constants import DEFAULT_TASK_MEMORY, DEFAULT_TASK_CPU
26
27
  from datatailr.build.image import Image
27
28
  from datatailr.errors import BatchJobError
28
- from datatailr.logging import DatatailrLogger
29
+ from datatailr.logging import CYAN, DatatailrLogger
29
30
  from datatailr.utils import run_shell_command, dict_to_env_vars
30
31
 
31
32
  logger = DatatailrLogger(os.path.abspath(__file__)).get_logger()
@@ -142,10 +143,10 @@ class Job:
142
143
  environment: Optional[Environment] = Environment.DEV,
143
144
  image: Optional[Image] = None,
144
145
  run_as: Optional[Union[str, User]] = None,
145
- resources: Resources = Resources(memory="128m", cpu=0.25),
146
+ resources: Resources = Resources(),
146
147
  acl: Optional[ACL] = None,
147
- python_version: str = "3.12",
148
- python_requirements: str = "",
148
+ python_version: str = "auto",
149
+ python_requirements: str | List[str] = "",
149
150
  build_script_pre: str = "",
150
151
  build_script_post: str = "",
151
152
  env_vars: Dict[str, str | int | float | bool] = {},
@@ -153,6 +154,12 @@ class Job:
153
154
  entrypoint: Optional[EntryPoint] = None,
154
155
  update_existing: bool = False,
155
156
  ):
157
+ # valid names must be lowercase, alphanumeric and underscores only
158
+ if not re.match(r"^[a-z0-9_]+$", name):
159
+ raise ValueError(
160
+ f"Invalid job name: {name}. Only lowercase letters, numbers, and underscores are allowed."
161
+ )
162
+
156
163
  if environment is None:
157
164
  environment = Environment.DEV
158
165
 
@@ -245,7 +252,6 @@ class Job:
245
252
  if self.type == JobType.EXCEL:
246
253
  if "DATATAILR_LOCAL" not in self.__env_vars:
247
254
  self.__env_vars.update({"DATATAILR_LOCAL": "false"})
248
- job_dict["per_user_job"] = True
249
255
  if self.type != JobType.BATCH:
250
256
  job_dict["entrypoint"] = str(self.entrypoint) if self.entrypoint else None
251
257
  job_dict["env"] = dict_to_env_vars(self.__env_vars)
@@ -294,6 +300,7 @@ class Job:
294
300
  Returns a tuple of (branch: str, commit_hash: str).
295
301
  """
296
302
  path_to_repo = self.image.path_to_repo or "."
303
+ branch_name, local_commit, return_code = "unknown", "unknown", None
297
304
  try:
298
305
  local_commit = run_shell_command(
299
306
  f"cd {path_to_repo} && git rev-parse HEAD"
@@ -301,6 +308,13 @@ class Job:
301
308
  branch_name = run_shell_command(
302
309
  f"cd {path_to_repo} && git rev-parse --abbrev-ref HEAD"
303
310
  )[0]
311
+
312
+ if (
313
+ os.getenv("DATATAILR_ALLOW_UNSAFE_SCHEDULING", "false").lower()
314
+ == "true"
315
+ ):
316
+ return branch_name, local_commit
317
+
304
318
  return_code = run_shell_command(
305
319
  f"cd {path_to_repo} && git diff --exit-code"
306
320
  )
@@ -309,15 +323,11 @@ class Job:
309
323
  logger.warning(
310
324
  "Git is not installed or not found in PATH. Repository validation is not possible."
311
325
  )
312
- branch_name, local_commit, return_code = "unknown", "unknown", None
313
326
  else:
314
327
  raise RepoValidationError(
315
328
  f"Error accessing git repository at {path_to_repo}: {e}"
316
329
  ) from e
317
330
 
318
- if os.getenv("DATATAILR_ALLOW_UNSAFE_SCHEDULING", "false").lower() == "true":
319
- return branch_name, local_commit
320
-
321
331
  is_committed = return_code is not None and return_code[1] == 0
322
332
 
323
333
  if not is_committed:
@@ -342,10 +352,6 @@ class Job:
342
352
  branch_name=branch_name,
343
353
  commit_hash=local_commit,
344
354
  )
345
- logger.info(
346
- f"Running job '{self.name}' in environment '{self.environment}' as '{self.run_as}'"
347
- )
348
-
349
355
  with tempfile.NamedTemporaryFile(delete=False, suffix=".json") as temp_file:
350
356
  temp_file.write(self.to_json().encode())
351
357
  return temp_file.name
@@ -375,7 +381,10 @@ class Job:
375
381
  )
376
382
  try:
377
383
  temp_file_name = self.__prepare__()
378
-
384
+ action = {"run": "Running", "save": "Saving", "start": "Starting"}.get(
385
+ command, "Processing"
386
+ )
387
+ print(CYAN(f"{action} '{self.name}' as {self.run_as} ..."))
379
388
  if command == "run":
380
389
  result = __client__.run(
381
390
  f"file://{temp_file_name}", **self.get_schedule_args()
@@ -393,6 +402,10 @@ class Job:
393
402
  logger.error(f"Error running command '{command}': {e}")
394
403
  return False, str(e)
395
404
  self.__set_existing_id__(result)
405
+ action = {"run": "ran", "save": "saved", "start": "started"}.get(
406
+ command, "processed"
407
+ )
408
+ print(CYAN(f"Job '{self.name}' {action} successfully."))
396
409
  return True, result
397
410
 
398
411
  def save(self) -> Tuple[bool, str]: