dagster-teradata 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_teradata-0.0.1/PKG-INFO +148 -0
- dagster_teradata-0.0.1/README.md +134 -0
- dagster_teradata-0.0.1/dagster_teradata/__init__.py +6 -0
- dagster_teradata-0.0.1/dagster_teradata/constants.py +23 -0
- dagster_teradata-0.0.1/dagster_teradata/resources.py +550 -0
- dagster_teradata-0.0.1/dagster_teradata/teradata_compute_cluster_manager.py +404 -0
- dagster_teradata-0.0.1/dagster_teradata.egg-info/PKG-INFO +148 -0
- dagster_teradata-0.0.1/dagster_teradata.egg-info/SOURCES.txt +11 -0
- dagster_teradata-0.0.1/dagster_teradata.egg-info/dependency_links.txt +1 -0
- dagster_teradata-0.0.1/dagster_teradata.egg-info/requires.txt +9 -0
- dagster_teradata-0.0.1/dagster_teradata.egg-info/top_level.txt +1 -0
- dagster_teradata-0.0.1/pyproject.toml +31 -0
- dagster_teradata-0.0.1/setup.cfg +4 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: dagster-teradata
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: <insert description here>
|
|
5
|
+
Requires-Python: >=3.8
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: dagster>=1.8.0
|
|
8
|
+
Requires-Dist: teradatasql
|
|
9
|
+
Provides-Extra: azure
|
|
10
|
+
Requires-Dist: dagster-azure; extra == "azure"
|
|
11
|
+
Provides-Extra: aws
|
|
12
|
+
Requires-Dist: boto3; extra == "aws"
|
|
13
|
+
Requires-Dist: dagster-aws; extra == "aws"
|
|
14
|
+
|
|
15
|
+
# dagster-teradata
|
|
16
|
+
|
|
17
|
+
A dagster module that provides integration with [Teradata Vantage](https://www.teradata.com/).
|
|
18
|
+
|
|
19
|
+
## Installation
|
|
20
|
+
The `dagster_teradata` module is available as a PyPI package - install with your preferred python
|
|
21
|
+
environment manager.
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
source .venv/bin/activate
|
|
25
|
+
pip install dagster-teradata
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Example Usage
|
|
29
|
+
|
|
30
|
+
This offers seamless integration with Teradata Vantage, facilitating efficient workflows for data processing, management,
|
|
31
|
+
and transformation. This module supports a range of scenarios, such as executing queries, managing tables,
|
|
32
|
+
and integrating with cloud storage solutions like AWS S3 and Azure Data Lake Storage (ADLS). Additionally,
|
|
33
|
+
it enables compute cluster management for Teradata Vantage Cloud Lake.
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import os
|
|
37
|
+
import pytest
|
|
38
|
+
from dagster import job, op, EnvVar
|
|
39
|
+
from dagster_teradata import TeradataResource
|
|
40
|
+
|
|
41
|
+
td_resource = TeradataResource(
|
|
42
|
+
host=EnvVar("TERADATA_HOST"),
|
|
43
|
+
user=EnvVar("TERADATA_USER"),
|
|
44
|
+
password=EnvVar("TERADATA_PASSWORD"),
|
|
45
|
+
database=EnvVar("TERADATA_DATABASE"),
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
def test_execute_query(tmp_path):
|
|
49
|
+
@op(required_resource_keys={"teradata"})
|
|
50
|
+
def example_test_execute_query(context):
|
|
51
|
+
result = context.resources.teradata.execute_queries(
|
|
52
|
+
["select order_id from orders_24", "select order_id from orders_25"], True
|
|
53
|
+
)
|
|
54
|
+
context.log.info(result)
|
|
55
|
+
|
|
56
|
+
@job(resource_defs={"teradata": td_resource})
|
|
57
|
+
def example_job():
|
|
58
|
+
example_test_execute_query()
|
|
59
|
+
|
|
60
|
+
example_job.execute_in_process(resources={"teradata": td_resource})
|
|
61
|
+
```
|
|
62
|
+
```python
|
|
63
|
+
import os
|
|
64
|
+
import pytest
|
|
65
|
+
from dagster import job, op, EnvVar
|
|
66
|
+
from dagster_teradata import TeradataResource
|
|
67
|
+
|
|
68
|
+
td_resource = TeradataResource(
|
|
69
|
+
host=EnvVar("TERADATA_HOST"),
|
|
70
|
+
user=EnvVar("TERADATA_USER"),
|
|
71
|
+
password=EnvVar("TERADATA_PASSWORD"),
|
|
72
|
+
database=EnvVar("TERADATA_DATABASE"),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
def test_drop_table(tmp_path):
|
|
76
|
+
@op(required_resource_keys={"teradata"})
|
|
77
|
+
def example_test_drop_table(context):
|
|
78
|
+
result = context.resources.teradata.drop_table(["process_tmp1", "process_tmp2"])
|
|
79
|
+
context.log.info(result)
|
|
80
|
+
|
|
81
|
+
@job(resource_defs={"teradata": td_resource})
|
|
82
|
+
def example_job():
|
|
83
|
+
example_test_drop_table()
|
|
84
|
+
|
|
85
|
+
example_job.execute_in_process(resources={"teradata": td_resource})
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Here is another example of compute cluster management in Teradata VantageCloud Lake:
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
import os
|
|
92
|
+
|
|
93
|
+
import pytest
|
|
94
|
+
from dagster import job, op, EnvVar
|
|
95
|
+
from dagster_teradata import teradata_resource
|
|
96
|
+
|
|
97
|
+
def test_create_teradata_compute_cluster(tmp_path):
|
|
98
|
+
@op(required_resource_keys={"teradata"})
|
|
99
|
+
def example_create_teradata_compute_cluster(context):
|
|
100
|
+
"""Args for create_teradata_compute_cluster():
|
|
101
|
+
compute_profile_name: Name of the Compute Profile to manage.
|
|
102
|
+
compute_group_name: Name of compute group to which compute profile belongs.
|
|
103
|
+
query_strategy: Query strategy to use. Refers to the approach or method used by the
|
|
104
|
+
Teradata Optimizer to execute SQL queries efficiently within a Teradata computer cluster.
|
|
105
|
+
Valid query_strategy value is either 'STANDARD' or 'ANALYTIC'. Default at database level is STANDARD
|
|
106
|
+
compute_map: ComputeMapName of the compute map. The compute_map in a compute cluster profile refers
|
|
107
|
+
to the mapping of compute resources to a specific node or set of nodes within the cluster.
|
|
108
|
+
compute_attribute: Optional attributes of compute profile. Example compute attribute
|
|
109
|
+
MIN_COMPUTE_COUNT(1) MAX_COMPUTE_COUNT(5) INITIALLY_SUSPENDED('FALSE')
|
|
110
|
+
compute_attribute (str, optional): Additional attributes for compute profile. Defaults to None.
|
|
111
|
+
"""
|
|
112
|
+
context.resources.teradata.create_teradata_compute_cluster(
|
|
113
|
+
"ShippingCG01",
|
|
114
|
+
"Shipping",
|
|
115
|
+
"STANDARD",
|
|
116
|
+
"TD_COMPUTE_MEDIUM",
|
|
117
|
+
"MIN_COMPUTE_COUNT(1) MAX_COMPUTE_COUNT(1) INITIALLY_SUSPENDED('FALSE')",
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
@job(resource_defs={"teradata": teradata_resource})
|
|
121
|
+
def example_job():
|
|
122
|
+
example_create_teradata_compute_cluster()
|
|
123
|
+
|
|
124
|
+
example_job.execute_in_process(
|
|
125
|
+
run_config={
|
|
126
|
+
"resources": {
|
|
127
|
+
"teradata": {
|
|
128
|
+
"config": {
|
|
129
|
+
"host": EnvVar("TERADATA_HOST"),
|
|
130
|
+
"user": EnvVar("TERADATA_USER"),
|
|
131
|
+
"password": EnvVar("TERADATA_PASSWORD"),
|
|
132
|
+
"database": EnvVar("TERADATA_DATABASE"),
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Development
|
|
141
|
+
|
|
142
|
+
The `Makefile` provides the tools required to test and lint your local installation.
|
|
143
|
+
|
|
144
|
+
```sh
|
|
145
|
+
make test
|
|
146
|
+
make ruff
|
|
147
|
+
make check
|
|
148
|
+
```
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
# dagster-teradata
|
|
2
|
+
|
|
3
|
+
A dagster module that provides integration with [Teradata Vantage](https://www.teradata.com/).
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
The `dagster_teradata` module is available as a PyPI package - install with your preferred python
|
|
7
|
+
environment manager.
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
source .venv/bin/activate
|
|
11
|
+
pip install dagster-teradata
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Example Usage
|
|
15
|
+
|
|
16
|
+
This offers seamless integration with Teradata Vantage, facilitating efficient workflows for data processing, management,
|
|
17
|
+
and transformation. This module supports a range of scenarios, such as executing queries, managing tables,
|
|
18
|
+
and integrating with cloud storage solutions like AWS S3 and Azure Data Lake Storage (ADLS). Additionally,
|
|
19
|
+
it enables compute cluster management for Teradata Vantage Cloud Lake.
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
import os
|
|
23
|
+
import pytest
|
|
24
|
+
from dagster import job, op, EnvVar
|
|
25
|
+
from dagster_teradata import TeradataResource
|
|
26
|
+
|
|
27
|
+
td_resource = TeradataResource(
|
|
28
|
+
host=EnvVar("TERADATA_HOST"),
|
|
29
|
+
user=EnvVar("TERADATA_USER"),
|
|
30
|
+
password=EnvVar("TERADATA_PASSWORD"),
|
|
31
|
+
database=EnvVar("TERADATA_DATABASE"),
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def test_execute_query(tmp_path):
|
|
35
|
+
@op(required_resource_keys={"teradata"})
|
|
36
|
+
def example_test_execute_query(context):
|
|
37
|
+
result = context.resources.teradata.execute_queries(
|
|
38
|
+
["select order_id from orders_24", "select order_id from orders_25"], True
|
|
39
|
+
)
|
|
40
|
+
context.log.info(result)
|
|
41
|
+
|
|
42
|
+
@job(resource_defs={"teradata": td_resource})
|
|
43
|
+
def example_job():
|
|
44
|
+
example_test_execute_query()
|
|
45
|
+
|
|
46
|
+
example_job.execute_in_process(resources={"teradata": td_resource})
|
|
47
|
+
```
|
|
48
|
+
```python
|
|
49
|
+
import os
|
|
50
|
+
import pytest
|
|
51
|
+
from dagster import job, op, EnvVar
|
|
52
|
+
from dagster_teradata import TeradataResource
|
|
53
|
+
|
|
54
|
+
td_resource = TeradataResource(
|
|
55
|
+
host=EnvVar("TERADATA_HOST"),
|
|
56
|
+
user=EnvVar("TERADATA_USER"),
|
|
57
|
+
password=EnvVar("TERADATA_PASSWORD"),
|
|
58
|
+
database=EnvVar("TERADATA_DATABASE"),
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def test_drop_table(tmp_path):
|
|
62
|
+
@op(required_resource_keys={"teradata"})
|
|
63
|
+
def example_test_drop_table(context):
|
|
64
|
+
result = context.resources.teradata.drop_table(["process_tmp1", "process_tmp2"])
|
|
65
|
+
context.log.info(result)
|
|
66
|
+
|
|
67
|
+
@job(resource_defs={"teradata": td_resource})
|
|
68
|
+
def example_job():
|
|
69
|
+
example_test_drop_table()
|
|
70
|
+
|
|
71
|
+
example_job.execute_in_process(resources={"teradata": td_resource})
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Here is another example of compute cluster management in Teradata VantageCloud Lake:
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
import os
|
|
78
|
+
|
|
79
|
+
import pytest
|
|
80
|
+
from dagster import job, op, EnvVar
|
|
81
|
+
from dagster_teradata import teradata_resource
|
|
82
|
+
|
|
83
|
+
def test_create_teradata_compute_cluster(tmp_path):
|
|
84
|
+
@op(required_resource_keys={"teradata"})
|
|
85
|
+
def example_create_teradata_compute_cluster(context):
|
|
86
|
+
"""Args for create_teradata_compute_cluster():
|
|
87
|
+
compute_profile_name: Name of the Compute Profile to manage.
|
|
88
|
+
compute_group_name: Name of compute group to which compute profile belongs.
|
|
89
|
+
query_strategy: Query strategy to use. Refers to the approach or method used by the
|
|
90
|
+
Teradata Optimizer to execute SQL queries efficiently within a Teradata computer cluster.
|
|
91
|
+
Valid query_strategy value is either 'STANDARD' or 'ANALYTIC'. Default at database level is STANDARD
|
|
92
|
+
compute_map: ComputeMapName of the compute map. The compute_map in a compute cluster profile refers
|
|
93
|
+
to the mapping of compute resources to a specific node or set of nodes within the cluster.
|
|
94
|
+
compute_attribute: Optional attributes of compute profile. Example compute attribute
|
|
95
|
+
MIN_COMPUTE_COUNT(1) MAX_COMPUTE_COUNT(5) INITIALLY_SUSPENDED('FALSE')
|
|
96
|
+
compute_attribute (str, optional): Additional attributes for compute profile. Defaults to None.
|
|
97
|
+
"""
|
|
98
|
+
context.resources.teradata.create_teradata_compute_cluster(
|
|
99
|
+
"ShippingCG01",
|
|
100
|
+
"Shipping",
|
|
101
|
+
"STANDARD",
|
|
102
|
+
"TD_COMPUTE_MEDIUM",
|
|
103
|
+
"MIN_COMPUTE_COUNT(1) MAX_COMPUTE_COUNT(1) INITIALLY_SUSPENDED('FALSE')",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
@job(resource_defs={"teradata": teradata_resource})
|
|
107
|
+
def example_job():
|
|
108
|
+
example_create_teradata_compute_cluster()
|
|
109
|
+
|
|
110
|
+
example_job.execute_in_process(
|
|
111
|
+
run_config={
|
|
112
|
+
"resources": {
|
|
113
|
+
"teradata": {
|
|
114
|
+
"config": {
|
|
115
|
+
"host": EnvVar("TERADATA_HOST"),
|
|
116
|
+
"user": EnvVar("TERADATA_USER"),
|
|
117
|
+
"password": EnvVar("TERADATA_PASSWORD"),
|
|
118
|
+
"database": EnvVar("TERADATA_DATABASE"),
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
)
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Development
|
|
127
|
+
|
|
128
|
+
The `Makefile` provides the tools required to test and lint your local installation.
|
|
129
|
+
|
|
130
|
+
```sh
|
|
131
|
+
make test
|
|
132
|
+
make ruff
|
|
133
|
+
make check
|
|
134
|
+
```
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Define constants for dagster-teradata."""
|
|
2
|
+
|
|
3
|
+
CC_OPR_SUCCESS_STATUS_MSG = "Compute Cluster %s %s operation completed successfully."
|
|
4
|
+
CC_OPR_FAILURE_STATUS_MSG = "Compute Cluster %s %s operation has failed."
|
|
5
|
+
CC_OPR_INITIALIZING_STATUS_MSG = (
|
|
6
|
+
"The environment is currently initializing. Please wait."
|
|
7
|
+
)
|
|
8
|
+
CC_OPR_EMPTY_PROFILE_ERROR_MSG = (
|
|
9
|
+
"Please provide a valid name for the compute cluster profile."
|
|
10
|
+
)
|
|
11
|
+
CC_GRP_PRP_NON_EXISTS_MSG = (
|
|
12
|
+
"The specified Compute cluster is not present or The user doesn't have permission to "
|
|
13
|
+
"access compute cluster."
|
|
14
|
+
)
|
|
15
|
+
CC_GRP_PRP_UN_AUTHORIZED_MSG = "The %s operation is not authorized for the user."
|
|
16
|
+
CC_GRP_LAKE_SUPPORT_ONLY_MSG = "Compute Groups is supported only on Vantage Cloud Lake."
|
|
17
|
+
CC_OPR_TIMEOUT_ERROR = "There is an issue with the %s operation. Kindly consult the administrator for assistance."
|
|
18
|
+
CC_GRP_PRP_EXISTS_MSG = "The specified Compute cluster is already exists."
|
|
19
|
+
CC_OPR_EMPTY_COPY_PROFILE_ERROR_MSG = (
|
|
20
|
+
"Please provide a valid name for the source and target compute profile."
|
|
21
|
+
)
|
|
22
|
+
CC_OPR_TIME_OUT = 1200
|
|
23
|
+
CC_POLL_INTERVAL = 60
|