acryl-datahub-airflow-plugin 1.3.1.5__tar.gz → 1.3.1.5rc2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- acryl_datahub_airflow_plugin-1.3.1.5rc2/PKG-INFO +91 -0
- acryl_datahub_airflow_plugin-1.3.1.5rc2/README.md +7 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/setup.cfg +0 -18
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/setup.py +6 -14
- acryl_datahub_airflow_plugin-1.3.1.5rc2/src/acryl_datahub_airflow_plugin.egg-info/PKG-INFO +91 -0
- acryl_datahub_airflow_plugin-1.3.1.5rc2/src/acryl_datahub_airflow_plugin.egg-info/SOURCES.txt +39 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/acryl_datahub_airflow_plugin.egg-info/requires.txt +33 -37
- acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/_airflow_shims.py +42 -0
- acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/_config.py +142 -0
- acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/_datahub_ol_adapter.py +23 -0
- acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/_extractors.py +365 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/_version.py +1 -1
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/client/airflow_generator.py +43 -147
- {acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2 → acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin}/datahub_listener.py +188 -464
- {acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow2 → acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/example_dags}/graph_usage_sample_dag.py +4 -12
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/hooks/datahub.py +2 -11
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/operators/datahub.py +3 -20
- acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/py.typed +0 -0
- acryl_datahub_airflow_plugin-1.3.1.5/PKG-INFO +0 -303
- acryl_datahub_airflow_plugin-1.3.1.5/README.md +0 -217
- acryl_datahub_airflow_plugin-1.3.1.5/src/acryl_datahub_airflow_plugin.egg-info/PKG-INFO +0 -303
- acryl_datahub_airflow_plugin-1.3.1.5/src/acryl_datahub_airflow_plugin.egg-info/SOURCES.txt +0 -71
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/_airflow_compat.py +0 -32
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/_airflow_shims.py +0 -75
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/_airflow_version_specific.py +0 -184
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/_config.py +0 -220
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/_constants.py +0 -16
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/_datahub_ol_adapter.py +0 -35
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2/__init__.py +0 -6
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2/_airflow2_sql_parser_patch.py +0 -402
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2/_airflow_compat.py +0 -95
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2/_extractors.py +0 -477
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2/_legacy_shims.py +0 -20
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2/_openlineage_compat.py +0 -123
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2/_provider_shims.py +0 -29
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2/_shims.py +0 -88
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/__init__.py +0 -6
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/_airflow3_sql_parser_patch.py +0 -408
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/_airflow_compat.py +0 -108
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/_athena_openlineage_patch.py +0 -153
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/_bigquery_openlineage_patch.py +0 -273
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/_shims.py +0 -82
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/_sqlite_openlineage_patch.py +0 -88
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/_teradata_openlineage_patch.py +0 -308
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/datahub_listener.py +0 -1452
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/datahub_listener.py +0 -25
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/__init__.py +0 -32
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow2/__init__.py +0 -8
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow2/generic_recipe_sample_dag.py +0 -54
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow2/lineage_backend_demo.py +0 -69
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow2/lineage_backend_taskflow_demo.py +0 -69
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow2/lineage_emission_dag.py +0 -81
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow2/mysql_sample_dag.py +0 -68
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow2/snowflake_sample_dag.py +0 -99
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow3/__init__.py +0 -8
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow3/lineage_backend_demo.py +0 -51
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow3/lineage_backend_taskflow_demo.py +0 -51
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow3/snowflake_sample_dag.py +0 -89
- acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py +0 -43
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/pyproject.toml +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/acryl_datahub_airflow_plugin.egg-info/dependency_links.txt +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/acryl_datahub_airflow_plugin.egg-info/entry_points.txt +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/acryl_datahub_airflow_plugin.egg-info/not-zip-safe +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/acryl_datahub_airflow_plugin.egg-info/top_level.txt +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/__init__.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/client/__init__.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/datahub_plugin.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/entities.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/hooks → acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/example_dags}/__init__.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/example_dags/lineage_backend_demo.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/example_dags/lineage_backend_taskflow_demo.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/example_dags/mysql_sample_dag.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/example_dags/snowflake_sample_dag.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/operators → acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/hooks}/__init__.py +0 -0
- /acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/py.typed → /acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/operators/__init__.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/operators/datahub_assertion_operator.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/operators/datahub_operation_operator.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/operators/datahub_operation_sensor.py +0 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: acryl-datahub-airflow-plugin
|
|
3
|
+
Version: 1.3.1.5rc2
|
|
4
|
+
Summary: Datahub Airflow plugin to capture executions and send to Datahub
|
|
5
|
+
Home-page: https://docs.datahub.com/
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Documentation, https://docs.datahub.com/docs/
|
|
8
|
+
Project-URL: Source, https://github.com/datahub-project/datahub
|
|
9
|
+
Project-URL: Changelog, https://github.com/datahub-project/datahub/releases
|
|
10
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
11
|
+
Classifier: Programming Language :: Python
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Information Technology
|
|
16
|
+
Classifier: Intended Audience :: System Administrators
|
|
17
|
+
Classifier: Operating System :: Unix
|
|
18
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
19
|
+
Classifier: Environment :: Console
|
|
20
|
+
Classifier: Environment :: MacOS X
|
|
21
|
+
Classifier: Topic :: Software Development
|
|
22
|
+
Requires-Python: >=3.9
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
Requires-Dist: openlineage-airflow<=1.30.1,>=1.2.0
|
|
25
|
+
Requires-Dist: apache-airflow<3,>=2.7.0
|
|
26
|
+
Requires-Dist: pydantic>=2.4.0
|
|
27
|
+
Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc2
|
|
28
|
+
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc2
|
|
29
|
+
Provides-Extra: ignore
|
|
30
|
+
Provides-Extra: datahub-rest
|
|
31
|
+
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc2; extra == "datahub-rest"
|
|
32
|
+
Provides-Extra: datahub-kafka
|
|
33
|
+
Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.5rc2; extra == "datahub-kafka"
|
|
34
|
+
Provides-Extra: datahub-file
|
|
35
|
+
Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.5rc2; extra == "datahub-file"
|
|
36
|
+
Provides-Extra: dev
|
|
37
|
+
Requires-Dist: apache-airflow<3,>=2.7.0; extra == "dev"
|
|
38
|
+
Requires-Dist: tenacity; extra == "dev"
|
|
39
|
+
Requires-Dist: types-PyYAML; extra == "dev"
|
|
40
|
+
Requires-Dist: pytest-cov>=2.8.1; extra == "dev"
|
|
41
|
+
Requires-Dist: openlineage-airflow<=1.30.1,>=1.2.0; extra == "dev"
|
|
42
|
+
Requires-Dist: pydantic>=2.4.0; extra == "dev"
|
|
43
|
+
Requires-Dist: types-click==0.1.12; extra == "dev"
|
|
44
|
+
Requires-Dist: tox; extra == "dev"
|
|
45
|
+
Requires-Dist: types-toml; extra == "dev"
|
|
46
|
+
Requires-Dist: deepdiff!=8.0.0; extra == "dev"
|
|
47
|
+
Requires-Dist: build; extra == "dev"
|
|
48
|
+
Requires-Dist: types-requests; extra == "dev"
|
|
49
|
+
Requires-Dist: sqlalchemy-stubs; extra == "dev"
|
|
50
|
+
Requires-Dist: types-dataclasses; extra == "dev"
|
|
51
|
+
Requires-Dist: types-tabulate; extra == "dev"
|
|
52
|
+
Requires-Dist: mypy==1.17.1; extra == "dev"
|
|
53
|
+
Requires-Dist: types-python-dateutil; extra == "dev"
|
|
54
|
+
Requires-Dist: types-setuptools; extra == "dev"
|
|
55
|
+
Requires-Dist: pytest>=6.2.2; extra == "dev"
|
|
56
|
+
Requires-Dist: packaging; extra == "dev"
|
|
57
|
+
Requires-Dist: types-six; extra == "dev"
|
|
58
|
+
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc2; extra == "dev"
|
|
59
|
+
Requires-Dist: ruff==0.11.7; extra == "dev"
|
|
60
|
+
Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc2; extra == "dev"
|
|
61
|
+
Requires-Dist: tox-uv; extra == "dev"
|
|
62
|
+
Requires-Dist: twine; extra == "dev"
|
|
63
|
+
Requires-Dist: types-cachetools; extra == "dev"
|
|
64
|
+
Requires-Dist: coverage>=5.1; extra == "dev"
|
|
65
|
+
Provides-Extra: integration-tests
|
|
66
|
+
Requires-Dist: acryl-datahub[testing-utils]==1.3.1.5rc2; extra == "integration-tests"
|
|
67
|
+
Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.5rc2; extra == "integration-tests"
|
|
68
|
+
Requires-Dist: virtualenv; extra == "integration-tests"
|
|
69
|
+
Requires-Dist: apache-airflow[amazon,google,snowflake]>=2.0.2; extra == "integration-tests"
|
|
70
|
+
Requires-Dist: snowflake-connector-python>=2.7.10; extra == "integration-tests"
|
|
71
|
+
Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.5rc2; extra == "integration-tests"
|
|
72
|
+
Requires-Dist: apache-airflow-providers-sqlite; extra == "integration-tests"
|
|
73
|
+
Requires-Dist: apache-airflow-providers-teradata; extra == "integration-tests"
|
|
74
|
+
Dynamic: classifier
|
|
75
|
+
Dynamic: description
|
|
76
|
+
Dynamic: description-content-type
|
|
77
|
+
Dynamic: home-page
|
|
78
|
+
Dynamic: license
|
|
79
|
+
Dynamic: project-url
|
|
80
|
+
Dynamic: provides-extra
|
|
81
|
+
Dynamic: requires-dist
|
|
82
|
+
Dynamic: requires-python
|
|
83
|
+
Dynamic: summary
|
|
84
|
+
|
|
85
|
+
# Datahub Airflow Plugin
|
|
86
|
+
|
|
87
|
+
See [the DataHub Airflow docs](https://docs.datahub.com/docs/lineage/airflow) for details.
|
|
88
|
+
|
|
89
|
+
## Developing
|
|
90
|
+
|
|
91
|
+
See the [developing docs](../../metadata-ingestion/developing.md).
|
|
@@ -9,24 +9,6 @@ check_untyped_defs = yes
|
|
|
9
9
|
disallow_incomplete_defs = yes
|
|
10
10
|
disallow_untyped_decorators = yes
|
|
11
11
|
warn_unused_configs = yes
|
|
12
|
-
|
|
13
|
-
[mypy-datahub_airflow_plugin._extractors]
|
|
14
|
-
ignore_errors = True
|
|
15
|
-
|
|
16
|
-
[mypy-datahub_airflow_plugin.airflow2._extractors]
|
|
17
|
-
ignore_errors = True
|
|
18
|
-
|
|
19
|
-
[mypy-datahub_airflow_plugin.airflow2.datahub_listener]
|
|
20
|
-
ignore_errors = True
|
|
21
|
-
|
|
22
|
-
[mypy-tests.unit.test_teradata_extractor]
|
|
23
|
-
ignore_errors = True
|
|
24
|
-
|
|
25
|
-
[mypy-datahub_airflow_plugin.example_dags.*]
|
|
26
|
-
ignore_errors = True
|
|
27
|
-
|
|
28
|
-
[mypy-tests.integration.dags.*]
|
|
29
|
-
ignore_errors = True
|
|
30
12
|
disallow_untyped_defs = no
|
|
31
13
|
|
|
32
14
|
[mypy-datahub.*]
|
|
@@ -25,12 +25,12 @@ _self_pin = (
|
|
|
25
25
|
base_requirements = {
|
|
26
26
|
f"acryl-datahub[sql-parser,datahub-rest]{_self_pin}",
|
|
27
27
|
"pydantic>=2.4.0",
|
|
28
|
-
# We require Airflow 2.
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
#
|
|
32
|
-
#
|
|
33
|
-
|
|
28
|
+
# We require Airflow 2.7.x at minimum, to be compatible with the native Airflow Openlineage provider.
|
|
29
|
+
"apache-airflow>=2.7.0,<3",
|
|
30
|
+
# We remain restrictive on the versions allowed here to prevent
|
|
31
|
+
# us from being broken by backwards-incompatible changes in the
|
|
32
|
+
# underlying package.
|
|
33
|
+
"openlineage-airflow>=1.2.0,<=1.30.1",
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
plugins: Dict[str, Set[str]] = {
|
|
@@ -43,14 +43,6 @@ plugins: Dict[str, Set[str]] = {
|
|
|
43
43
|
"datahub-file": {
|
|
44
44
|
f"acryl-datahub[sync-file-emitter]{_self_pin}",
|
|
45
45
|
},
|
|
46
|
-
# airflow2: For Airflow 2.x, use standalone openlineage-airflow package
|
|
47
|
-
"airflow2": {
|
|
48
|
-
"openlineage-airflow>=1.2.0",
|
|
49
|
-
},
|
|
50
|
-
# airflow3: For Airflow 3.x, use native OpenLineage provider
|
|
51
|
-
"airflow3": {
|
|
52
|
-
"apache-airflow-providers-openlineage>=1.0.0",
|
|
53
|
-
},
|
|
54
46
|
}
|
|
55
47
|
|
|
56
48
|
# Require some plugins by default.
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: acryl-datahub-airflow-plugin
|
|
3
|
+
Version: 1.3.1.5rc2
|
|
4
|
+
Summary: Datahub Airflow plugin to capture executions and send to Datahub
|
|
5
|
+
Home-page: https://docs.datahub.com/
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Documentation, https://docs.datahub.com/docs/
|
|
8
|
+
Project-URL: Source, https://github.com/datahub-project/datahub
|
|
9
|
+
Project-URL: Changelog, https://github.com/datahub-project/datahub/releases
|
|
10
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
11
|
+
Classifier: Programming Language :: Python
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Information Technology
|
|
16
|
+
Classifier: Intended Audience :: System Administrators
|
|
17
|
+
Classifier: Operating System :: Unix
|
|
18
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
19
|
+
Classifier: Environment :: Console
|
|
20
|
+
Classifier: Environment :: MacOS X
|
|
21
|
+
Classifier: Topic :: Software Development
|
|
22
|
+
Requires-Python: >=3.9
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
Requires-Dist: openlineage-airflow<=1.30.1,>=1.2.0
|
|
25
|
+
Requires-Dist: apache-airflow<3,>=2.7.0
|
|
26
|
+
Requires-Dist: pydantic>=2.4.0
|
|
27
|
+
Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc2
|
|
28
|
+
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc2
|
|
29
|
+
Provides-Extra: ignore
|
|
30
|
+
Provides-Extra: datahub-rest
|
|
31
|
+
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc2; extra == "datahub-rest"
|
|
32
|
+
Provides-Extra: datahub-kafka
|
|
33
|
+
Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.5rc2; extra == "datahub-kafka"
|
|
34
|
+
Provides-Extra: datahub-file
|
|
35
|
+
Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.5rc2; extra == "datahub-file"
|
|
36
|
+
Provides-Extra: dev
|
|
37
|
+
Requires-Dist: apache-airflow<3,>=2.7.0; extra == "dev"
|
|
38
|
+
Requires-Dist: tenacity; extra == "dev"
|
|
39
|
+
Requires-Dist: types-PyYAML; extra == "dev"
|
|
40
|
+
Requires-Dist: pytest-cov>=2.8.1; extra == "dev"
|
|
41
|
+
Requires-Dist: openlineage-airflow<=1.30.1,>=1.2.0; extra == "dev"
|
|
42
|
+
Requires-Dist: pydantic>=2.4.0; extra == "dev"
|
|
43
|
+
Requires-Dist: types-click==0.1.12; extra == "dev"
|
|
44
|
+
Requires-Dist: tox; extra == "dev"
|
|
45
|
+
Requires-Dist: types-toml; extra == "dev"
|
|
46
|
+
Requires-Dist: deepdiff!=8.0.0; extra == "dev"
|
|
47
|
+
Requires-Dist: build; extra == "dev"
|
|
48
|
+
Requires-Dist: types-requests; extra == "dev"
|
|
49
|
+
Requires-Dist: sqlalchemy-stubs; extra == "dev"
|
|
50
|
+
Requires-Dist: types-dataclasses; extra == "dev"
|
|
51
|
+
Requires-Dist: types-tabulate; extra == "dev"
|
|
52
|
+
Requires-Dist: mypy==1.17.1; extra == "dev"
|
|
53
|
+
Requires-Dist: types-python-dateutil; extra == "dev"
|
|
54
|
+
Requires-Dist: types-setuptools; extra == "dev"
|
|
55
|
+
Requires-Dist: pytest>=6.2.2; extra == "dev"
|
|
56
|
+
Requires-Dist: packaging; extra == "dev"
|
|
57
|
+
Requires-Dist: types-six; extra == "dev"
|
|
58
|
+
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc2; extra == "dev"
|
|
59
|
+
Requires-Dist: ruff==0.11.7; extra == "dev"
|
|
60
|
+
Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc2; extra == "dev"
|
|
61
|
+
Requires-Dist: tox-uv; extra == "dev"
|
|
62
|
+
Requires-Dist: twine; extra == "dev"
|
|
63
|
+
Requires-Dist: types-cachetools; extra == "dev"
|
|
64
|
+
Requires-Dist: coverage>=5.1; extra == "dev"
|
|
65
|
+
Provides-Extra: integration-tests
|
|
66
|
+
Requires-Dist: acryl-datahub[testing-utils]==1.3.1.5rc2; extra == "integration-tests"
|
|
67
|
+
Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.5rc2; extra == "integration-tests"
|
|
68
|
+
Requires-Dist: virtualenv; extra == "integration-tests"
|
|
69
|
+
Requires-Dist: apache-airflow[amazon,google,snowflake]>=2.0.2; extra == "integration-tests"
|
|
70
|
+
Requires-Dist: snowflake-connector-python>=2.7.10; extra == "integration-tests"
|
|
71
|
+
Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.5rc2; extra == "integration-tests"
|
|
72
|
+
Requires-Dist: apache-airflow-providers-sqlite; extra == "integration-tests"
|
|
73
|
+
Requires-Dist: apache-airflow-providers-teradata; extra == "integration-tests"
|
|
74
|
+
Dynamic: classifier
|
|
75
|
+
Dynamic: description
|
|
76
|
+
Dynamic: description-content-type
|
|
77
|
+
Dynamic: home-page
|
|
78
|
+
Dynamic: license
|
|
79
|
+
Dynamic: project-url
|
|
80
|
+
Dynamic: provides-extra
|
|
81
|
+
Dynamic: requires-dist
|
|
82
|
+
Dynamic: requires-python
|
|
83
|
+
Dynamic: summary
|
|
84
|
+
|
|
85
|
+
# Datahub Airflow Plugin
|
|
86
|
+
|
|
87
|
+
See [the DataHub Airflow docs](https://docs.datahub.com/docs/lineage/airflow) for details.
|
|
88
|
+
|
|
89
|
+
## Developing
|
|
90
|
+
|
|
91
|
+
See the [developing docs](../../metadata-ingestion/developing.md).
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
setup.cfg
|
|
4
|
+
setup.py
|
|
5
|
+
src/acryl_datahub_airflow_plugin.egg-info/PKG-INFO
|
|
6
|
+
src/acryl_datahub_airflow_plugin.egg-info/SOURCES.txt
|
|
7
|
+
src/acryl_datahub_airflow_plugin.egg-info/dependency_links.txt
|
|
8
|
+
src/acryl_datahub_airflow_plugin.egg-info/entry_points.txt
|
|
9
|
+
src/acryl_datahub_airflow_plugin.egg-info/not-zip-safe
|
|
10
|
+
src/acryl_datahub_airflow_plugin.egg-info/requires.txt
|
|
11
|
+
src/acryl_datahub_airflow_plugin.egg-info/top_level.txt
|
|
12
|
+
src/datahub_airflow_plugin/__init__.py
|
|
13
|
+
src/datahub_airflow_plugin/_airflow_shims.py
|
|
14
|
+
src/datahub_airflow_plugin/_config.py
|
|
15
|
+
src/datahub_airflow_plugin/_datahub_ol_adapter.py
|
|
16
|
+
src/datahub_airflow_plugin/_extractors.py
|
|
17
|
+
src/datahub_airflow_plugin/_version.py
|
|
18
|
+
src/datahub_airflow_plugin/datahub_listener.py
|
|
19
|
+
src/datahub_airflow_plugin/datahub_plugin.py
|
|
20
|
+
src/datahub_airflow_plugin/entities.py
|
|
21
|
+
src/datahub_airflow_plugin/py.typed
|
|
22
|
+
src/datahub_airflow_plugin/client/__init__.py
|
|
23
|
+
src/datahub_airflow_plugin/client/airflow_generator.py
|
|
24
|
+
src/datahub_airflow_plugin/example_dags/__init__.py
|
|
25
|
+
src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py
|
|
26
|
+
src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py
|
|
27
|
+
src/datahub_airflow_plugin/example_dags/lineage_backend_demo.py
|
|
28
|
+
src/datahub_airflow_plugin/example_dags/lineage_backend_taskflow_demo.py
|
|
29
|
+
src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py
|
|
30
|
+
src/datahub_airflow_plugin/example_dags/mysql_sample_dag.py
|
|
31
|
+
src/datahub_airflow_plugin/example_dags/snowflake_sample_dag.py
|
|
32
|
+
src/datahub_airflow_plugin/hooks/__init__.py
|
|
33
|
+
src/datahub_airflow_plugin/hooks/datahub.py
|
|
34
|
+
src/datahub_airflow_plugin/operators/__init__.py
|
|
35
|
+
src/datahub_airflow_plugin/operators/datahub.py
|
|
36
|
+
src/datahub_airflow_plugin/operators/datahub_assertion_operator.py
|
|
37
|
+
src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py
|
|
38
|
+
src/datahub_airflow_plugin/operators/datahub_operation_operator.py
|
|
39
|
+
src/datahub_airflow_plugin/operators/datahub_operation_sensor.py
|
|
@@ -1,60 +1,56 @@
|
|
|
1
|
+
openlineage-airflow<=1.30.1,>=1.2.0
|
|
2
|
+
apache-airflow<3,>=2.7.0
|
|
1
3
|
pydantic>=2.4.0
|
|
2
|
-
acryl-datahub[datahub-rest,sql-parser]==1.3.1.
|
|
3
|
-
acryl-datahub[datahub-rest]==1.3.1.
|
|
4
|
-
apache-airflow<4.0.0,>=2.5.0
|
|
5
|
-
|
|
6
|
-
[airflow2]
|
|
7
|
-
openlineage-airflow>=1.2.0
|
|
8
|
-
|
|
9
|
-
[airflow3]
|
|
10
|
-
apache-airflow-providers-openlineage>=1.0.0
|
|
4
|
+
acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc2
|
|
5
|
+
acryl-datahub[datahub-rest]==1.3.1.5rc2
|
|
11
6
|
|
|
12
7
|
[datahub-file]
|
|
13
|
-
acryl-datahub[sync-file-emitter]==1.3.1.
|
|
8
|
+
acryl-datahub[sync-file-emitter]==1.3.1.5rc2
|
|
14
9
|
|
|
15
10
|
[datahub-kafka]
|
|
16
|
-
acryl-datahub[datahub-kafka]==1.3.1.
|
|
11
|
+
acryl-datahub[datahub-kafka]==1.3.1.5rc2
|
|
17
12
|
|
|
18
13
|
[datahub-rest]
|
|
19
|
-
acryl-datahub[datahub-rest]==1.3.1.
|
|
14
|
+
acryl-datahub[datahub-rest]==1.3.1.5rc2
|
|
20
15
|
|
|
21
16
|
[dev]
|
|
22
|
-
|
|
23
|
-
types-setuptools
|
|
24
|
-
deepdiff!=8.0.0
|
|
25
|
-
pydantic>=2.4.0
|
|
26
|
-
types-six
|
|
27
|
-
types-toml
|
|
28
|
-
pytest>=6.2.2
|
|
17
|
+
apache-airflow<3,>=2.7.0
|
|
29
18
|
tenacity
|
|
19
|
+
types-PyYAML
|
|
30
20
|
pytest-cov>=2.8.1
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
coverage>=5.1
|
|
21
|
+
openlineage-airflow<=1.30.1,>=1.2.0
|
|
22
|
+
pydantic>=2.4.0
|
|
23
|
+
types-click==0.1.12
|
|
35
24
|
tox
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
types-PyYAML
|
|
39
|
-
mypy==1.17.1
|
|
40
|
-
packaging
|
|
41
|
-
acryl-datahub[datahub-rest,sql-parser]==1.3.1.5
|
|
42
|
-
types-python-dateutil
|
|
25
|
+
types-toml
|
|
26
|
+
deepdiff!=8.0.0
|
|
43
27
|
build
|
|
44
|
-
|
|
45
|
-
|
|
28
|
+
types-requests
|
|
29
|
+
sqlalchemy-stubs
|
|
46
30
|
types-dataclasses
|
|
47
31
|
types-tabulate
|
|
48
|
-
|
|
32
|
+
mypy==1.17.1
|
|
33
|
+
types-python-dateutil
|
|
34
|
+
types-setuptools
|
|
35
|
+
pytest>=6.2.2
|
|
36
|
+
packaging
|
|
37
|
+
types-six
|
|
38
|
+
acryl-datahub[datahub-rest]==1.3.1.5rc2
|
|
39
|
+
ruff==0.11.7
|
|
40
|
+
acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc2
|
|
41
|
+
tox-uv
|
|
42
|
+
twine
|
|
43
|
+
types-cachetools
|
|
44
|
+
coverage>=5.1
|
|
49
45
|
|
|
50
46
|
[ignore]
|
|
51
47
|
|
|
52
48
|
[integration-tests]
|
|
49
|
+
acryl-datahub[testing-utils]==1.3.1.5rc2
|
|
50
|
+
acryl-datahub[sync-file-emitter]==1.3.1.5rc2
|
|
53
51
|
virtualenv
|
|
54
52
|
apache-airflow[amazon,google,snowflake]>=2.0.2
|
|
55
|
-
acryl-datahub[sync-file-emitter]==1.3.1.5
|
|
56
|
-
apache-airflow-providers-sqlite
|
|
57
53
|
snowflake-connector-python>=2.7.10
|
|
54
|
+
acryl-datahub[datahub-kafka]==1.3.1.5rc2
|
|
55
|
+
apache-airflow-providers-sqlite
|
|
58
56
|
apache-airflow-providers-teradata
|
|
59
|
-
acryl-datahub[testing-utils]==1.3.1.5
|
|
60
|
-
acryl-datahub[datahub-kafka]==1.3.1.5
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
import airflow.version
|
|
4
|
+
import packaging.version
|
|
5
|
+
from airflow.models.operator import Operator
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from airflow.sensors.external_task import ExternalTaskSensor
|
|
9
|
+
except ImportError:
|
|
10
|
+
from airflow.sensors.external_task_sensor import ExternalTaskSensor # type: ignore
|
|
11
|
+
|
|
12
|
+
# Approach suggested by https://stackoverflow.com/a/11887885/5004662.
|
|
13
|
+
AIRFLOW_VERSION = packaging.version.parse(airflow.version.version)
|
|
14
|
+
HAS_AIRFLOW_DAG_LISTENER_API = True # this is in Airflow 2.5+
|
|
15
|
+
HAS_AIRFLOW_DATASET_LISTENER_API = AIRFLOW_VERSION >= packaging.version.parse(
|
|
16
|
+
"2.8.0.dev0"
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_task_inlets(operator: "Operator") -> List:
|
|
21
|
+
# From Airflow 2.4 _inlets is dropped and inlets used consistently. Earlier it was not the case, so we have to stick there to _inlets
|
|
22
|
+
if hasattr(operator, "_inlets"):
|
|
23
|
+
return operator._inlets # type: ignore[attr-defined, union-attr]
|
|
24
|
+
if hasattr(operator, "get_inlet_defs"):
|
|
25
|
+
return operator.get_inlet_defs() # type: ignore[attr-defined]
|
|
26
|
+
return operator.inlets or []
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def get_task_outlets(operator: "Operator") -> List:
|
|
30
|
+
# From Airflow 2.4 _outlets is dropped and inlets used consistently. Earlier it was not the case, so we have to stick there to _outlets
|
|
31
|
+
# We have to use _outlets because outlets is empty in Airflow < 2.4.0
|
|
32
|
+
if hasattr(operator, "_outlets"):
|
|
33
|
+
return operator._outlets # type: ignore[attr-defined, union-attr]
|
|
34
|
+
if hasattr(operator, "get_outlet_defs"):
|
|
35
|
+
return operator.get_outlet_defs()
|
|
36
|
+
return operator.outlets or []
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
__all__ = [
|
|
40
|
+
"AIRFLOW_VERSION",
|
|
41
|
+
"ExternalTaskSensor",
|
|
42
|
+
]
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import TYPE_CHECKING, Dict, List, Optional, Union
|
|
3
|
+
|
|
4
|
+
from airflow.configuration import conf
|
|
5
|
+
from pydantic import root_validator
|
|
6
|
+
from pydantic.fields import Field
|
|
7
|
+
|
|
8
|
+
import datahub.emitter.mce_builder as builder
|
|
9
|
+
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from datahub_airflow_plugin.hooks.datahub import (
|
|
13
|
+
DatahubCompositeHook,
|
|
14
|
+
DatahubGenericHook,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DatajobUrl(Enum):
|
|
19
|
+
GRID = "grid"
|
|
20
|
+
TASKINSTANCE = "taskinstance"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DatahubLineageConfig(ConfigModel):
|
|
24
|
+
enabled: bool
|
|
25
|
+
|
|
26
|
+
# DataHub hook connection ID.
|
|
27
|
+
datahub_conn_id: str
|
|
28
|
+
|
|
29
|
+
_datahub_connection_ids: List[str]
|
|
30
|
+
|
|
31
|
+
# Cluster to associate with the pipelines and tasks. Defaults to "prod".
|
|
32
|
+
cluster: str
|
|
33
|
+
|
|
34
|
+
# Platform instance to associate with the pipelines and tasks.
|
|
35
|
+
platform_instance: Optional[str]
|
|
36
|
+
|
|
37
|
+
# If true, the owners field of the DAG will be captured as a DataHub corpuser.
|
|
38
|
+
capture_ownership_info: bool
|
|
39
|
+
|
|
40
|
+
# If true, the owners field of the DAG will instead be captured as a DataHub corpgroup.
|
|
41
|
+
capture_ownership_as_group: bool
|
|
42
|
+
|
|
43
|
+
# If true, the tags field of the DAG will be captured as DataHub tags.
|
|
44
|
+
capture_tags_info: bool
|
|
45
|
+
|
|
46
|
+
# If true (default), we'll materialize and un-soft-delete any urns
|
|
47
|
+
# referenced by inlets or outlets.
|
|
48
|
+
materialize_iolets: bool
|
|
49
|
+
|
|
50
|
+
capture_executions: bool
|
|
51
|
+
|
|
52
|
+
datajob_url_link: DatajobUrl
|
|
53
|
+
|
|
54
|
+
enable_extractors: bool
|
|
55
|
+
|
|
56
|
+
# If true, ti.render_templates() will be called in the listener.
|
|
57
|
+
# Makes extraction of jinja-templated fields more accurate.
|
|
58
|
+
render_templates: bool
|
|
59
|
+
|
|
60
|
+
# Only if true, lineage will be emitted for the DataJobs.
|
|
61
|
+
enable_datajob_lineage: bool
|
|
62
|
+
|
|
63
|
+
dag_filter_pattern: AllowDenyPattern = Field(
|
|
64
|
+
description="regex patterns for DAGs to ingest",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
log_level: Optional[str]
|
|
68
|
+
debug_emitter: bool
|
|
69
|
+
|
|
70
|
+
disable_openlineage_plugin: bool
|
|
71
|
+
|
|
72
|
+
def make_emitter_hook(self) -> Union["DatahubGenericHook", "DatahubCompositeHook"]:
|
|
73
|
+
# This is necessary to avoid issues with circular imports.
|
|
74
|
+
from datahub_airflow_plugin.hooks.datahub import (
|
|
75
|
+
DatahubCompositeHook,
|
|
76
|
+
DatahubGenericHook,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
if len(self._datahub_connection_ids) == 1:
|
|
80
|
+
return DatahubGenericHook(self._datahub_connection_ids[0])
|
|
81
|
+
else:
|
|
82
|
+
return DatahubCompositeHook(self._datahub_connection_ids)
|
|
83
|
+
|
|
84
|
+
@root_validator(skip_on_failure=True)
|
|
85
|
+
def split_conn_ids(cls, values: Dict) -> Dict:
|
|
86
|
+
if not values.get("datahub_conn_id"):
|
|
87
|
+
raise ValueError("datahub_conn_id is required")
|
|
88
|
+
conn_ids = values.get("datahub_conn_id", "").split(",")
|
|
89
|
+
cls._datahub_connection_ids = [conn_id.strip() for conn_id in conn_ids]
|
|
90
|
+
return values
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def get_lineage_config() -> DatahubLineageConfig:
|
|
94
|
+
"""Load the DataHub plugin config from airflow.cfg."""
|
|
95
|
+
|
|
96
|
+
enabled = conf.get("datahub", "enabled", fallback=True)
|
|
97
|
+
datahub_conn_id = conf.get("datahub", "conn_id", fallback="datahub_rest_default")
|
|
98
|
+
cluster = conf.get("datahub", "cluster", fallback=builder.DEFAULT_FLOW_CLUSTER)
|
|
99
|
+
platform_instance = conf.get("datahub", "platform_instance", fallback=None)
|
|
100
|
+
capture_tags_info = conf.get("datahub", "capture_tags_info", fallback=True)
|
|
101
|
+
capture_ownership_info = conf.get(
|
|
102
|
+
"datahub", "capture_ownership_info", fallback=True
|
|
103
|
+
)
|
|
104
|
+
capture_ownership_as_group = conf.get(
|
|
105
|
+
"datahub", "capture_ownership_as_group", fallback=False
|
|
106
|
+
)
|
|
107
|
+
capture_executions = conf.get("datahub", "capture_executions", fallback=True)
|
|
108
|
+
materialize_iolets = conf.get("datahub", "materialize_iolets", fallback=True)
|
|
109
|
+
enable_extractors = conf.get("datahub", "enable_extractors", fallback=True)
|
|
110
|
+
log_level = conf.get("datahub", "log_level", fallback=None)
|
|
111
|
+
debug_emitter = conf.get("datahub", "debug_emitter", fallback=False)
|
|
112
|
+
disable_openlineage_plugin = conf.get(
|
|
113
|
+
"datahub", "disable_openlineage_plugin", fallback=True
|
|
114
|
+
)
|
|
115
|
+
render_templates = conf.get("datahub", "render_templates", fallback=True)
|
|
116
|
+
datajob_url_link = conf.get(
|
|
117
|
+
"datahub", "datajob_url_link", fallback=DatajobUrl.TASKINSTANCE.value
|
|
118
|
+
)
|
|
119
|
+
dag_filter_pattern = AllowDenyPattern.model_validate_json(
|
|
120
|
+
conf.get("datahub", "dag_filter_str", fallback='{"allow": [".*"]}')
|
|
121
|
+
)
|
|
122
|
+
enable_lineage = conf.get("datahub", "enable_datajob_lineage", fallback=True)
|
|
123
|
+
|
|
124
|
+
return DatahubLineageConfig(
|
|
125
|
+
enabled=enabled,
|
|
126
|
+
datahub_conn_id=datahub_conn_id,
|
|
127
|
+
cluster=cluster,
|
|
128
|
+
platform_instance=platform_instance,
|
|
129
|
+
capture_ownership_info=capture_ownership_info,
|
|
130
|
+
capture_ownership_as_group=capture_ownership_as_group,
|
|
131
|
+
capture_tags_info=capture_tags_info,
|
|
132
|
+
capture_executions=capture_executions,
|
|
133
|
+
materialize_iolets=materialize_iolets,
|
|
134
|
+
enable_extractors=enable_extractors,
|
|
135
|
+
log_level=log_level,
|
|
136
|
+
debug_emitter=debug_emitter,
|
|
137
|
+
disable_openlineage_plugin=disable_openlineage_plugin,
|
|
138
|
+
datajob_url_link=datajob_url_link,
|
|
139
|
+
render_templates=render_templates,
|
|
140
|
+
dag_filter_pattern=dag_filter_pattern,
|
|
141
|
+
enable_datajob_lineage=enable_lineage,
|
|
142
|
+
)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from openlineage.client.run import Dataset as OpenLineageDataset
|
|
4
|
+
|
|
5
|
+
import datahub.emitter.mce_builder as builder
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
OL_SCHEME_TWEAKS = {
|
|
11
|
+
"sqlserver": "mssql",
|
|
12
|
+
"awsathena": "athena",
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def translate_ol_to_datahub_urn(ol_uri: OpenLineageDataset) -> str:
|
|
17
|
+
namespace = ol_uri.namespace
|
|
18
|
+
name = ol_uri.name
|
|
19
|
+
|
|
20
|
+
scheme, *rest = namespace.split("://", maxsplit=1)
|
|
21
|
+
|
|
22
|
+
platform = OL_SCHEME_TWEAKS.get(scheme, scheme)
|
|
23
|
+
return builder.make_dataset_urn(platform=platform, name=name)
|