acryl-datahub-airflow-plugin 1.3.1.5__py3-none-any.whl → 1.3.1.5rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- acryl_datahub_airflow_plugin-1.3.1.5rc2.dist-info/METADATA +91 -0
- acryl_datahub_airflow_plugin-1.3.1.5rc2.dist-info/RECORD +33 -0
- datahub_airflow_plugin/_airflow_shims.py +31 -64
- datahub_airflow_plugin/_config.py +19 -97
- datahub_airflow_plugin/_datahub_ol_adapter.py +2 -14
- datahub_airflow_plugin/_extractors.py +365 -0
- datahub_airflow_plugin/_version.py +1 -1
- datahub_airflow_plugin/client/airflow_generator.py +43 -147
- datahub_airflow_plugin/datahub_listener.py +790 -19
- datahub_airflow_plugin/example_dags/__init__.py +0 -32
- datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py +4 -12
- datahub_airflow_plugin/hooks/datahub.py +2 -11
- datahub_airflow_plugin/operators/datahub.py +3 -20
- acryl_datahub_airflow_plugin-1.3.1.5.dist-info/METADATA +0 -303
- acryl_datahub_airflow_plugin-1.3.1.5.dist-info/RECORD +0 -65
- datahub_airflow_plugin/_airflow_compat.py +0 -32
- datahub_airflow_plugin/_airflow_version_specific.py +0 -184
- datahub_airflow_plugin/_constants.py +0 -16
- datahub_airflow_plugin/airflow2/__init__.py +0 -6
- datahub_airflow_plugin/airflow2/_airflow2_sql_parser_patch.py +0 -402
- datahub_airflow_plugin/airflow2/_airflow_compat.py +0 -95
- datahub_airflow_plugin/airflow2/_extractors.py +0 -477
- datahub_airflow_plugin/airflow2/_legacy_shims.py +0 -20
- datahub_airflow_plugin/airflow2/_openlineage_compat.py +0 -123
- datahub_airflow_plugin/airflow2/_provider_shims.py +0 -29
- datahub_airflow_plugin/airflow2/_shims.py +0 -88
- datahub_airflow_plugin/airflow2/datahub_listener.py +0 -1072
- datahub_airflow_plugin/airflow3/__init__.py +0 -6
- datahub_airflow_plugin/airflow3/_airflow3_sql_parser_patch.py +0 -408
- datahub_airflow_plugin/airflow3/_airflow_compat.py +0 -108
- datahub_airflow_plugin/airflow3/_athena_openlineage_patch.py +0 -153
- datahub_airflow_plugin/airflow3/_bigquery_openlineage_patch.py +0 -273
- datahub_airflow_plugin/airflow3/_shims.py +0 -82
- datahub_airflow_plugin/airflow3/_sqlite_openlineage_patch.py +0 -88
- datahub_airflow_plugin/airflow3/_teradata_openlineage_patch.py +0 -308
- datahub_airflow_plugin/airflow3/datahub_listener.py +0 -1452
- datahub_airflow_plugin/example_dags/airflow2/__init__.py +0 -8
- datahub_airflow_plugin/example_dags/airflow2/generic_recipe_sample_dag.py +0 -54
- datahub_airflow_plugin/example_dags/airflow2/graph_usage_sample_dag.py +0 -43
- datahub_airflow_plugin/example_dags/airflow2/lineage_backend_demo.py +0 -69
- datahub_airflow_plugin/example_dags/airflow2/lineage_backend_taskflow_demo.py +0 -69
- datahub_airflow_plugin/example_dags/airflow2/lineage_emission_dag.py +0 -81
- datahub_airflow_plugin/example_dags/airflow2/mysql_sample_dag.py +0 -68
- datahub_airflow_plugin/example_dags/airflow2/snowflake_sample_dag.py +0 -99
- datahub_airflow_plugin/example_dags/airflow3/__init__.py +0 -8
- datahub_airflow_plugin/example_dags/airflow3/lineage_backend_demo.py +0 -51
- datahub_airflow_plugin/example_dags/airflow3/lineage_backend_taskflow_demo.py +0 -51
- datahub_airflow_plugin/example_dags/airflow3/snowflake_sample_dag.py +0 -89
- {acryl_datahub_airflow_plugin-1.3.1.5.dist-info → acryl_datahub_airflow_plugin-1.3.1.5rc2.dist-info}/WHEEL +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5.dist-info → acryl_datahub_airflow_plugin-1.3.1.5rc2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5.dist-info → acryl_datahub_airflow_plugin-1.3.1.5rc2.dist-info}/top_level.txt +0 -0
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
"""Snowflake DataHub Ingest DAG
|
|
2
|
-
|
|
3
|
-
This example demonstrates how to ingest metadata from Snowflake into DataHub
|
|
4
|
-
from within an Airflow DAG. In contrast to the MySQL example, this DAG
|
|
5
|
-
pulls the DB connection configuration from Airflow's connection store.
|
|
6
|
-
|
|
7
|
-
This is the Airflow 3.0+ version.
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
from datetime import datetime, timedelta
|
|
11
|
-
|
|
12
|
-
from airflow import DAG
|
|
13
|
-
from airflow.hooks.base_hook import BaseHook
|
|
14
|
-
from airflow.operators.python import PythonVirtualenvOperator
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def ingest_from_snowflake(snowflake_credentials, datahub_gms_server):
|
|
18
|
-
from datahub.ingestion.run.pipeline import Pipeline
|
|
19
|
-
|
|
20
|
-
pipeline = Pipeline.create(
|
|
21
|
-
# This configuration is analogous to a recipe configuration.
|
|
22
|
-
{
|
|
23
|
-
"source": {
|
|
24
|
-
"type": "snowflake",
|
|
25
|
-
"config": {
|
|
26
|
-
**snowflake_credentials,
|
|
27
|
-
# Other Snowflake config can be added here.
|
|
28
|
-
"profiling": {"enabled": False},
|
|
29
|
-
},
|
|
30
|
-
},
|
|
31
|
-
# Other ingestion features, like transformers, are also supported.
|
|
32
|
-
# "transformers": [
|
|
33
|
-
# {
|
|
34
|
-
# "type": "simple_add_dataset_ownership",
|
|
35
|
-
# "config": {
|
|
36
|
-
# "owner_urns": [
|
|
37
|
-
# "urn:li:corpuser:example",
|
|
38
|
-
# ]
|
|
39
|
-
# },
|
|
40
|
-
# }
|
|
41
|
-
# ],
|
|
42
|
-
"sink": {
|
|
43
|
-
"type": "datahub-rest",
|
|
44
|
-
"config": {"server": datahub_gms_server},
|
|
45
|
-
},
|
|
46
|
-
}
|
|
47
|
-
)
|
|
48
|
-
pipeline.run()
|
|
49
|
-
pipeline.pretty_print_summary()
|
|
50
|
-
pipeline.raise_from_status()
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
with DAG(
|
|
54
|
-
"datahub_snowflake_ingest",
|
|
55
|
-
default_args={
|
|
56
|
-
"owner": "airflow",
|
|
57
|
-
},
|
|
58
|
-
description="An example DAG which ingests metadata from Snowflake to DataHub",
|
|
59
|
-
start_date=datetime(2022, 1, 1),
|
|
60
|
-
schedule=timedelta(days=1),
|
|
61
|
-
catchup=False,
|
|
62
|
-
) as dag:
|
|
63
|
-
# This example pulls credentials from Airflow's connection store.
|
|
64
|
-
# For this to work, you must have previously configured these connections in Airflow.
|
|
65
|
-
# See the Airflow docs for details: https://airflow.apache.org/docs/apache-airflow/stable/howto/connection.html
|
|
66
|
-
snowflake_conn = BaseHook.get_connection("snowflake_admin_default")
|
|
67
|
-
datahub_conn = BaseHook.get_connection("datahub_rest_default")
|
|
68
|
-
|
|
69
|
-
# While it is also possible to use the PythonOperator, we recommend using
|
|
70
|
-
# the PythonVirtualenvOperator to ensure that there are no dependency
|
|
71
|
-
# conflicts between DataHub and the rest of your Airflow environment.
|
|
72
|
-
ingest_task = PythonVirtualenvOperator(
|
|
73
|
-
task_id="ingest_from_snowflake",
|
|
74
|
-
requirements=[
|
|
75
|
-
"acryl-datahub[snowflake]",
|
|
76
|
-
],
|
|
77
|
-
system_site_packages=False,
|
|
78
|
-
python_callable=ingest_from_snowflake,
|
|
79
|
-
op_kwargs={
|
|
80
|
-
"snowflake_credentials": {
|
|
81
|
-
"username": snowflake_conn.login,
|
|
82
|
-
"password": snowflake_conn.password,
|
|
83
|
-
"account_id": snowflake_conn.extra_dejson["account"],
|
|
84
|
-
"warehouse": snowflake_conn.extra_dejson.get("warehouse"),
|
|
85
|
-
"role": snowflake_conn.extra_dejson.get("role"),
|
|
86
|
-
},
|
|
87
|
-
"datahub_gms_server": datahub_conn.host,
|
|
88
|
-
},
|
|
89
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|