datatailr 0.1.10__tar.gz → 0.1.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datatailr might be problematic. Click here for more details.
- {datatailr-0.1.10/src/datatailr.egg-info → datatailr-0.1.11}/PKG-INFO +43 -1
- {datatailr-0.1.10 → datatailr-0.1.11}/README.md +42 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/pyproject.toml +1 -1
- {datatailr-0.1.10 → datatailr-0.1.11/src/datatailr.egg-info}/PKG-INFO +43 -1
- {datatailr-0.1.10 → datatailr-0.1.11}/src/sbin/datatailr_run.py +4 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/LICENSE +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/setup.cfg +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/setup.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/__init__.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/acl.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/blob.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/build/__init__.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/build/image.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/dt_json.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/errors.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/group.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/logging.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/scheduler/__init__.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/scheduler/arguments_cache.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/scheduler/base.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/scheduler/batch.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/scheduler/batch_decorator.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/scheduler/constants.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/scheduler/schedule.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/scheduler/utils.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/user.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/utils.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/version.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr/wrapper.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr.egg-info/SOURCES.txt +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr.egg-info/dependency_links.txt +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr.egg-info/entry_points.txt +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr.egg-info/requires.txt +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/datatailr.egg-info/top_level.txt +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/sbin/datatailr_run_app.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/sbin/datatailr_run_batch.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/sbin/datatailr_run_excel.py +0 -0
- {datatailr-0.1.10 → datatailr-0.1.11}/src/sbin/datatailr_run_service.py +1 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datatailr
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.11
|
|
4
4
|
Summary: Ready-to-Use Platform That Drives Business Insights
|
|
5
5
|
Author-email: Datatailr <info@datatailr.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -104,5 +104,47 @@ with Batch(name="MY test DAG", local_run=True) as dag:
|
|
|
104
104
|
Running this code will create a graph of jobs and execute it.
|
|
105
105
|
Each node on the graph represents a job, which in turn is a call to a function decorated with `@batch()`.
|
|
106
106
|
|
|
107
|
+
Since this is a local run then the execution of each node will happen sequentially in the same process.
|
|
108
|
+
|
|
109
|
+
To take advantage of the datatailr platform and execute the graph at scale, you can run it using the job scheduler as presented in the next section.
|
|
110
|
+
|
|
111
|
+
### Execution at Scale
|
|
112
|
+
To execute the graph at scale, you can use the Datatailr job scheduler. This allows you to run your jobs in parallel, taking advantage of the underlying infrastructure.
|
|
113
|
+
|
|
114
|
+
You will first need to separate your function definitions from the DAG definition. This means you should define your functions as a separate module, which can be imported into the DAG definition.
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
# my_module.py
|
|
119
|
+
|
|
120
|
+
from datatailr.scheduler import batch, Batch
|
|
121
|
+
|
|
122
|
+
@batch()
|
|
123
|
+
def func_no_args() -> str:
|
|
124
|
+
return "no_args"
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@batch()
|
|
128
|
+
def func_with_args(a: int, b: float) -> str:
|
|
129
|
+
return f"args: {a}, {b}"
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
To use these functions in a batch job, you just need to import them and run in a DAG context:
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
from my_module import func_no_args, func_with_args
|
|
136
|
+
from datatailr.scheduler import Schedule
|
|
137
|
+
|
|
138
|
+
schedule = Schedule(at_hour=0)
|
|
139
|
+
|
|
140
|
+
with Batch(name="MY test DAG", schedule=schedule) as dag:
|
|
141
|
+
for n in range(2):
|
|
142
|
+
res1 = func_no_args().alias(f"func_{n}")
|
|
143
|
+
res2 = func_with_args(1, res1).alias(f"func_with_args_{n}")
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
This will submit the entire DAG for execution, and the scheduler will take care of running the jobs in parallel and managing the resources.
|
|
147
|
+
The DAG in the example above will be scheduled to run daily at 00:00.
|
|
148
|
+
|
|
107
149
|
___
|
|
108
150
|
Visit [our website](https://www.datatailr.com/) for more!
|
|
@@ -67,5 +67,47 @@ with Batch(name="MY test DAG", local_run=True) as dag:
|
|
|
67
67
|
Running this code will create a graph of jobs and execute it.
|
|
68
68
|
Each node on the graph represents a job, which in turn is a call to a function decorated with `@batch()`.
|
|
69
69
|
|
|
70
|
+
Since this is a local run then the execution of each node will happen sequentially in the same process.
|
|
71
|
+
|
|
72
|
+
To take advantage of the datatailr platform and execute the graph at scale, you can run it using the job scheduler as presented in the next section.
|
|
73
|
+
|
|
74
|
+
### Execution at Scale
|
|
75
|
+
To execute the graph at scale, you can use the Datatailr job scheduler. This allows you to run your jobs in parallel, taking advantage of the underlying infrastructure.
|
|
76
|
+
|
|
77
|
+
You will first need to separate your function definitions from the DAG definition. This means you should define your functions as a separate module, which can be imported into the DAG definition.
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
# my_module.py
|
|
82
|
+
|
|
83
|
+
from datatailr.scheduler import batch, Batch
|
|
84
|
+
|
|
85
|
+
@batch()
|
|
86
|
+
def func_no_args() -> str:
|
|
87
|
+
return "no_args"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@batch()
|
|
91
|
+
def func_with_args(a: int, b: float) -> str:
|
|
92
|
+
return f"args: {a}, {b}"
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
To use these functions in a batch job, you just need to import them and run in a DAG context:
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from my_module import func_no_args, func_with_args
|
|
99
|
+
from datatailr.scheduler import Schedule
|
|
100
|
+
|
|
101
|
+
schedule = Schedule(at_hour=0)
|
|
102
|
+
|
|
103
|
+
with Batch(name="MY test DAG", schedule=schedule) as dag:
|
|
104
|
+
for n in range(2):
|
|
105
|
+
res1 = func_no_args().alias(f"func_{n}")
|
|
106
|
+
res2 = func_with_args(1, res1).alias(f"func_with_args_{n}")
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
This will submit the entire DAG for execution, and the scheduler will take care of running the jobs in parallel and managing the resources.
|
|
110
|
+
The DAG in the example above will be scheduled to run daily at 00:00.
|
|
111
|
+
|
|
70
112
|
___
|
|
71
113
|
Visit [our website](https://www.datatailr.com/) for more!
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datatailr
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.11
|
|
4
4
|
Summary: Ready-to-Use Platform That Drives Business Insights
|
|
5
5
|
Author-email: Datatailr <info@datatailr.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -104,5 +104,47 @@ with Batch(name="MY test DAG", local_run=True) as dag:
|
|
|
104
104
|
Running this code will create a graph of jobs and execute it.
|
|
105
105
|
Each node on the graph represents a job, which in turn is a call to a function decorated with `@batch()`.
|
|
106
106
|
|
|
107
|
+
Since this is a local run then the execution of each node will happen sequentially in the same process.
|
|
108
|
+
|
|
109
|
+
To take advantage of the datatailr platform and execute the graph at scale, you can run it using the job scheduler as presented in the next section.
|
|
110
|
+
|
|
111
|
+
### Execution at Scale
|
|
112
|
+
To execute the graph at scale, you can use the Datatailr job scheduler. This allows you to run your jobs in parallel, taking advantage of the underlying infrastructure.
|
|
113
|
+
|
|
114
|
+
You will first need to separate your function definitions from the DAG definition. This means you should define your functions as a separate module, which can be imported into the DAG definition.
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
# my_module.py
|
|
119
|
+
|
|
120
|
+
from datatailr.scheduler import batch, Batch
|
|
121
|
+
|
|
122
|
+
@batch()
|
|
123
|
+
def func_no_args() -> str:
|
|
124
|
+
return "no_args"
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@batch()
|
|
128
|
+
def func_with_args(a: int, b: float) -> str:
|
|
129
|
+
return f"args: {a}, {b}"
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
To use these functions in a batch job, you just need to import them and run in a DAG context:
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
from my_module import func_no_args, func_with_args
|
|
136
|
+
from datatailr.scheduler import Schedule
|
|
137
|
+
|
|
138
|
+
schedule = Schedule(at_hour=0)
|
|
139
|
+
|
|
140
|
+
with Batch(name="MY test DAG", schedule=schedule) as dag:
|
|
141
|
+
for n in range(2):
|
|
142
|
+
res1 = func_no_args().alias(f"func_{n}")
|
|
143
|
+
res2 = func_with_args(1, res1).alias(f"func_with_args_{n}")
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
This will submit the entire DAG for execution, and the scheduler will take care of running the jobs in parallel and managing the resources.
|
|
147
|
+
The DAG in the example above will be scheduled to run daily at 00:00.
|
|
148
|
+
|
|
107
149
|
___
|
|
108
150
|
Visit [our website](https://www.datatailr.com/) for more!
|
|
@@ -111,10 +111,12 @@ def main():
|
|
|
111
111
|
}
|
|
112
112
|
run_command_as_user("datatailr_run_batch", user, env)
|
|
113
113
|
elif job_type == "service":
|
|
114
|
+
port = get_env_var("DATATAILR_SERVICE_PORT")
|
|
114
115
|
env = {
|
|
115
116
|
"DATATAILR_JOB_NAME": job_name,
|
|
116
117
|
"DATATAILR_JOB_ID": job_id,
|
|
117
118
|
"DATATAILR_ENTRYPOINT": entrypoint,
|
|
119
|
+
"DATATAILR_SERVICE_PORT": port,
|
|
118
120
|
}
|
|
119
121
|
run_command_as_user("datatailr_run_service", user, env)
|
|
120
122
|
elif job_type == "app":
|
|
@@ -125,10 +127,12 @@ def main():
|
|
|
125
127
|
}
|
|
126
128
|
run_command_as_user("datatailr_run_app", user, env)
|
|
127
129
|
elif job_type == "excel":
|
|
130
|
+
host = get_env_var("DATATAILR_HOST")
|
|
128
131
|
env = {
|
|
129
132
|
"DATATAILR_JOB_NAME": job_name,
|
|
130
133
|
"DATATAILR_JOB_ID": job_id,
|
|
131
134
|
"DATATAILR_ENTRYPOINT": entrypoint,
|
|
135
|
+
"DATATAILR_HOST": host,
|
|
132
136
|
}
|
|
133
137
|
run_command_as_user("datatailr_run_excel", user, env)
|
|
134
138
|
elif job_type == "IDE":
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -30,5 +30,5 @@ def run():
|
|
|
30
30
|
raise ValueError("Environment variable 'DATATAILR_SERVICE_PORT' is not set.")
|
|
31
31
|
|
|
32
32
|
entrypoint_module = importlib.import_module(entrypoint)
|
|
33
|
-
entrypoint_module.__service_main__(int(port))
|
|
34
33
|
logger.info(f"Running entrypoint: {entrypoint}")
|
|
34
|
+
entrypoint_module.__service_main__(int(port))
|