acryl-datahub-airflow-plugin 1.3.1.7rc1__tar.gz → 1.3.1.8rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/PKG-INFO +32 -32
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/pyproject.toml +1 -1
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/setup.py +1 -1
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/acryl_datahub_airflow_plugin.egg-info/PKG-INFO +32 -32
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/acryl_datahub_airflow_plugin.egg-info/SOURCES.txt +1 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/acryl_datahub_airflow_plugin.egg-info/requires.txt +30 -30
- acryl_datahub_airflow_plugin-1.3.1.8rc1/src/datahub_airflow_plugin/_airflow_asset_adapter.py +164 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/_config.py +9 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/_version.py +1 -1
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow2/datahub_listener.py +38 -12
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow3/datahub_listener.py +12 -4
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/client/airflow_generator.py +33 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/README.md +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/setup.cfg +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/acryl_datahub_airflow_plugin.egg-info/dependency_links.txt +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/acryl_datahub_airflow_plugin.egg-info/entry_points.txt +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/acryl_datahub_airflow_plugin.egg-info/not-zip-safe +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/acryl_datahub_airflow_plugin.egg-info/top_level.txt +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/__init__.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/_airflow_compat.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/_airflow_shims.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/_airflow_version_specific.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/_constants.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/_datahub_ol_adapter.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow2/__init__.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow2/_airflow2_sql_parser_patch.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow2/_airflow_compat.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow2/_extractors.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow2/_legacy_shims.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow2/_openlineage_compat.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow2/_provider_shims.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow2/_shims.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow3/__init__.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow3/_airflow3_sql_parser_patch.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow3/_airflow_compat.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow3/_athena_openlineage_patch.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow3/_bigquery_openlineage_patch.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow3/_shims.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow3/_sqlite_openlineage_patch.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/airflow3/_teradata_openlineage_patch.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/client/__init__.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/datahub_listener.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/datahub_plugin.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/entities.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/__init__.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/airflow2/__init__.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/airflow2/generic_recipe_sample_dag.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/airflow2/graph_usage_sample_dag.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/airflow2/lineage_backend_demo.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/airflow2/lineage_backend_taskflow_demo.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/airflow2/lineage_emission_dag.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/airflow2/mysql_sample_dag.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/airflow2/snowflake_sample_dag.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/airflow3/__init__.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/airflow3/lineage_backend_demo.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/airflow3/lineage_backend_taskflow_demo.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/airflow3/snowflake_sample_dag.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/lineage_backend_demo.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/lineage_backend_taskflow_demo.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/mysql_sample_dag.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/example_dags/snowflake_sample_dag.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/hooks/__init__.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/hooks/datahub.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/operators/__init__.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/operators/datahub.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/operators/datahub_assertion_operator.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/operators/datahub_operation_operator.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/operators/datahub_operation_sensor.py +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/src/datahub_airflow_plugin/py.typed +0 -0
{acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: acryl-datahub-airflow-plugin
|
|
3
|
-
Version: 1.3.1.
|
|
3
|
+
Version: 1.3.1.8rc1
|
|
4
4
|
Summary: Datahub Airflow plugin to capture executions and send to Datahub
|
|
5
5
|
Home-page: https://docs.datahub.com/
|
|
6
6
|
License: Apache-2.0
|
|
@@ -19,60 +19,60 @@ Classifier: Operating System :: POSIX :: Linux
|
|
|
19
19
|
Classifier: Environment :: Console
|
|
20
20
|
Classifier: Environment :: MacOS X
|
|
21
21
|
Classifier: Topic :: Software Development
|
|
22
|
-
Requires-Python: >=3.
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
|
-
Requires-Dist:
|
|
25
|
-
Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.7rc1
|
|
24
|
+
Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.8rc1
|
|
26
25
|
Requires-Dist: apache-airflow<4.0.0,>=2.5.0
|
|
27
|
-
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.
|
|
26
|
+
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.8rc1
|
|
27
|
+
Requires-Dist: pydantic>=2.4.0
|
|
28
28
|
Provides-Extra: ignore
|
|
29
29
|
Provides-Extra: datahub-rest
|
|
30
|
-
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.
|
|
30
|
+
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.8rc1; extra == "datahub-rest"
|
|
31
31
|
Provides-Extra: datahub-kafka
|
|
32
|
-
Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.
|
|
32
|
+
Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.8rc1; extra == "datahub-kafka"
|
|
33
33
|
Provides-Extra: datahub-file
|
|
34
|
-
Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.
|
|
34
|
+
Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.8rc1; extra == "datahub-file"
|
|
35
35
|
Provides-Extra: airflow2
|
|
36
36
|
Requires-Dist: openlineage-airflow>=1.2.0; extra == "airflow2"
|
|
37
37
|
Provides-Extra: airflow3
|
|
38
38
|
Requires-Dist: apache-airflow-providers-openlineage>=1.0.0; extra == "airflow3"
|
|
39
39
|
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: tox; extra == "dev"
|
|
40
41
|
Requires-Dist: pydantic>=2.4.0; extra == "dev"
|
|
42
|
+
Requires-Dist: packaging; extra == "dev"
|
|
41
43
|
Requires-Dist: apache-airflow<4.0.0,>=2.5.0; extra == "dev"
|
|
42
|
-
Requires-Dist:
|
|
43
|
-
Requires-Dist:
|
|
44
|
+
Requires-Dist: build; extra == "dev"
|
|
45
|
+
Requires-Dist: types-tabulate; extra == "dev"
|
|
46
|
+
Requires-Dist: types-click==0.1.12; extra == "dev"
|
|
47
|
+
Requires-Dist: types-cachetools; extra == "dev"
|
|
44
48
|
Requires-Dist: types-six; extra == "dev"
|
|
45
|
-
Requires-Dist:
|
|
46
|
-
Requires-Dist: coverage>=5.1; extra == "dev"
|
|
49
|
+
Requires-Dist: tenacity; extra == "dev"
|
|
47
50
|
Requires-Dist: deepdiff!=8.0.0; extra == "dev"
|
|
48
|
-
Requires-Dist:
|
|
51
|
+
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.8rc1; extra == "dev"
|
|
49
52
|
Requires-Dist: tox-uv; extra == "dev"
|
|
50
|
-
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.7rc1; extra == "dev"
|
|
51
|
-
Requires-Dist: types-cachetools; extra == "dev"
|
|
52
|
-
Requires-Dist: mypy==1.17.1; extra == "dev"
|
|
53
|
-
Requires-Dist: twine; extra == "dev"
|
|
54
|
-
Requires-Dist: pytest>=6.2.2; extra == "dev"
|
|
55
|
-
Requires-Dist: build; extra == "dev"
|
|
56
53
|
Requires-Dist: types-dataclasses; extra == "dev"
|
|
57
|
-
Requires-Dist:
|
|
54
|
+
Requires-Dist: types-setuptools; extra == "dev"
|
|
58
55
|
Requires-Dist: types-toml; extra == "dev"
|
|
59
|
-
Requires-Dist:
|
|
60
|
-
Requires-Dist:
|
|
61
|
-
Requires-Dist:
|
|
56
|
+
Requires-Dist: types-PyYAML; extra == "dev"
|
|
57
|
+
Requires-Dist: mypy==1.17.1; extra == "dev"
|
|
58
|
+
Requires-Dist: pytest-cov>=2.8.1; extra == "dev"
|
|
62
59
|
Requires-Dist: types-requests; extra == "dev"
|
|
60
|
+
Requires-Dist: twine; extra == "dev"
|
|
61
|
+
Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.8rc1; extra == "dev"
|
|
62
|
+
Requires-Dist: sqlalchemy-stubs; extra == "dev"
|
|
63
|
+
Requires-Dist: coverage>=5.1; extra == "dev"
|
|
64
|
+
Requires-Dist: ruff==0.11.7; extra == "dev"
|
|
63
65
|
Requires-Dist: types-python-dateutil; extra == "dev"
|
|
64
|
-
Requires-Dist:
|
|
65
|
-
Requires-Dist: types-click==0.1.12; extra == "dev"
|
|
66
|
-
Requires-Dist: pytest-cov>=2.8.1; extra == "dev"
|
|
66
|
+
Requires-Dist: pytest>=6.2.2; extra == "dev"
|
|
67
67
|
Provides-Extra: integration-tests
|
|
68
|
-
Requires-Dist: acryl-datahub[
|
|
69
|
-
Requires-Dist: acryl-datahub[testing-utils]==1.3.1.7rc1; extra == "integration-tests"
|
|
70
|
-
Requires-Dist: apache-airflow[amazon,google,snowflake]>=2.0.2; extra == "integration-tests"
|
|
71
|
-
Requires-Dist: snowflake-connector-python>=2.7.10; extra == "integration-tests"
|
|
72
|
-
Requires-Dist: apache-airflow-providers-sqlite; extra == "integration-tests"
|
|
68
|
+
Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.8rc1; extra == "integration-tests"
|
|
73
69
|
Requires-Dist: apache-airflow-providers-teradata; extra == "integration-tests"
|
|
70
|
+
Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.8rc1; extra == "integration-tests"
|
|
71
|
+
Requires-Dist: snowflake-connector-python>=2.7.10; extra == "integration-tests"
|
|
72
|
+
Requires-Dist: acryl-datahub[testing-utils]==1.3.1.8rc1; extra == "integration-tests"
|
|
73
|
+
Requires-Dist: apache-airflow[amazon,google,snowflake]>=2.0.2; extra == "integration-tests"
|
|
74
74
|
Requires-Dist: virtualenv; extra == "integration-tests"
|
|
75
|
-
Requires-Dist:
|
|
75
|
+
Requires-Dist: apache-airflow-providers-sqlite; extra == "integration-tests"
|
|
76
76
|
Dynamic: classifier
|
|
77
77
|
Dynamic: description
|
|
78
78
|
Dynamic: description-content-type
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: acryl-datahub-airflow-plugin
|
|
3
|
-
Version: 1.3.1.
|
|
3
|
+
Version: 1.3.1.8rc1
|
|
4
4
|
Summary: Datahub Airflow plugin to capture executions and send to Datahub
|
|
5
5
|
Home-page: https://docs.datahub.com/
|
|
6
6
|
License: Apache-2.0
|
|
@@ -19,60 +19,60 @@ Classifier: Operating System :: POSIX :: Linux
|
|
|
19
19
|
Classifier: Environment :: Console
|
|
20
20
|
Classifier: Environment :: MacOS X
|
|
21
21
|
Classifier: Topic :: Software Development
|
|
22
|
-
Requires-Python: >=3.
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
|
-
Requires-Dist:
|
|
25
|
-
Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.7rc1
|
|
24
|
+
Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.8rc1
|
|
26
25
|
Requires-Dist: apache-airflow<4.0.0,>=2.5.0
|
|
27
|
-
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.
|
|
26
|
+
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.8rc1
|
|
27
|
+
Requires-Dist: pydantic>=2.4.0
|
|
28
28
|
Provides-Extra: ignore
|
|
29
29
|
Provides-Extra: datahub-rest
|
|
30
|
-
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.
|
|
30
|
+
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.8rc1; extra == "datahub-rest"
|
|
31
31
|
Provides-Extra: datahub-kafka
|
|
32
|
-
Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.
|
|
32
|
+
Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.8rc1; extra == "datahub-kafka"
|
|
33
33
|
Provides-Extra: datahub-file
|
|
34
|
-
Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.
|
|
34
|
+
Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.8rc1; extra == "datahub-file"
|
|
35
35
|
Provides-Extra: airflow2
|
|
36
36
|
Requires-Dist: openlineage-airflow>=1.2.0; extra == "airflow2"
|
|
37
37
|
Provides-Extra: airflow3
|
|
38
38
|
Requires-Dist: apache-airflow-providers-openlineage>=1.0.0; extra == "airflow3"
|
|
39
39
|
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: tox; extra == "dev"
|
|
40
41
|
Requires-Dist: pydantic>=2.4.0; extra == "dev"
|
|
42
|
+
Requires-Dist: packaging; extra == "dev"
|
|
41
43
|
Requires-Dist: apache-airflow<4.0.0,>=2.5.0; extra == "dev"
|
|
42
|
-
Requires-Dist:
|
|
43
|
-
Requires-Dist:
|
|
44
|
+
Requires-Dist: build; extra == "dev"
|
|
45
|
+
Requires-Dist: types-tabulate; extra == "dev"
|
|
46
|
+
Requires-Dist: types-click==0.1.12; extra == "dev"
|
|
47
|
+
Requires-Dist: types-cachetools; extra == "dev"
|
|
44
48
|
Requires-Dist: types-six; extra == "dev"
|
|
45
|
-
Requires-Dist:
|
|
46
|
-
Requires-Dist: coverage>=5.1; extra == "dev"
|
|
49
|
+
Requires-Dist: tenacity; extra == "dev"
|
|
47
50
|
Requires-Dist: deepdiff!=8.0.0; extra == "dev"
|
|
48
|
-
Requires-Dist:
|
|
51
|
+
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.8rc1; extra == "dev"
|
|
49
52
|
Requires-Dist: tox-uv; extra == "dev"
|
|
50
|
-
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.7rc1; extra == "dev"
|
|
51
|
-
Requires-Dist: types-cachetools; extra == "dev"
|
|
52
|
-
Requires-Dist: mypy==1.17.1; extra == "dev"
|
|
53
|
-
Requires-Dist: twine; extra == "dev"
|
|
54
|
-
Requires-Dist: pytest>=6.2.2; extra == "dev"
|
|
55
|
-
Requires-Dist: build; extra == "dev"
|
|
56
53
|
Requires-Dist: types-dataclasses; extra == "dev"
|
|
57
|
-
Requires-Dist:
|
|
54
|
+
Requires-Dist: types-setuptools; extra == "dev"
|
|
58
55
|
Requires-Dist: types-toml; extra == "dev"
|
|
59
|
-
Requires-Dist:
|
|
60
|
-
Requires-Dist:
|
|
61
|
-
Requires-Dist:
|
|
56
|
+
Requires-Dist: types-PyYAML; extra == "dev"
|
|
57
|
+
Requires-Dist: mypy==1.17.1; extra == "dev"
|
|
58
|
+
Requires-Dist: pytest-cov>=2.8.1; extra == "dev"
|
|
62
59
|
Requires-Dist: types-requests; extra == "dev"
|
|
60
|
+
Requires-Dist: twine; extra == "dev"
|
|
61
|
+
Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.8rc1; extra == "dev"
|
|
62
|
+
Requires-Dist: sqlalchemy-stubs; extra == "dev"
|
|
63
|
+
Requires-Dist: coverage>=5.1; extra == "dev"
|
|
64
|
+
Requires-Dist: ruff==0.11.7; extra == "dev"
|
|
63
65
|
Requires-Dist: types-python-dateutil; extra == "dev"
|
|
64
|
-
Requires-Dist:
|
|
65
|
-
Requires-Dist: types-click==0.1.12; extra == "dev"
|
|
66
|
-
Requires-Dist: pytest-cov>=2.8.1; extra == "dev"
|
|
66
|
+
Requires-Dist: pytest>=6.2.2; extra == "dev"
|
|
67
67
|
Provides-Extra: integration-tests
|
|
68
|
-
Requires-Dist: acryl-datahub[
|
|
69
|
-
Requires-Dist: acryl-datahub[testing-utils]==1.3.1.7rc1; extra == "integration-tests"
|
|
70
|
-
Requires-Dist: apache-airflow[amazon,google,snowflake]>=2.0.2; extra == "integration-tests"
|
|
71
|
-
Requires-Dist: snowflake-connector-python>=2.7.10; extra == "integration-tests"
|
|
72
|
-
Requires-Dist: apache-airflow-providers-sqlite; extra == "integration-tests"
|
|
68
|
+
Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.8rc1; extra == "integration-tests"
|
|
73
69
|
Requires-Dist: apache-airflow-providers-teradata; extra == "integration-tests"
|
|
70
|
+
Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.8rc1; extra == "integration-tests"
|
|
71
|
+
Requires-Dist: snowflake-connector-python>=2.7.10; extra == "integration-tests"
|
|
72
|
+
Requires-Dist: acryl-datahub[testing-utils]==1.3.1.8rc1; extra == "integration-tests"
|
|
73
|
+
Requires-Dist: apache-airflow[amazon,google,snowflake]>=2.0.2; extra == "integration-tests"
|
|
74
74
|
Requires-Dist: virtualenv; extra == "integration-tests"
|
|
75
|
-
Requires-Dist:
|
|
75
|
+
Requires-Dist: apache-airflow-providers-sqlite; extra == "integration-tests"
|
|
76
76
|
Dynamic: classifier
|
|
77
77
|
Dynamic: description
|
|
78
78
|
Dynamic: description-content-type
|
|
@@ -10,6 +10,7 @@ src/acryl_datahub_airflow_plugin.egg-info/not-zip-safe
|
|
|
10
10
|
src/acryl_datahub_airflow_plugin.egg-info/requires.txt
|
|
11
11
|
src/acryl_datahub_airflow_plugin.egg-info/top_level.txt
|
|
12
12
|
src/datahub_airflow_plugin/__init__.py
|
|
13
|
+
src/datahub_airflow_plugin/_airflow_asset_adapter.py
|
|
13
14
|
src/datahub_airflow_plugin/_airflow_compat.py
|
|
14
15
|
src/datahub_airflow_plugin/_airflow_shims.py
|
|
15
16
|
src/datahub_airflow_plugin/_airflow_version_specific.py
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
acryl-datahub[datahub-rest,sql-parser]==1.3.1.7rc1
|
|
1
|
+
acryl-datahub[datahub-rest,sql-parser]==1.3.1.8rc1
|
|
3
2
|
apache-airflow<4.0.0,>=2.5.0
|
|
4
|
-
acryl-datahub[datahub-rest]==1.3.1.
|
|
3
|
+
acryl-datahub[datahub-rest]==1.3.1.8rc1
|
|
4
|
+
pydantic>=2.4.0
|
|
5
5
|
|
|
6
6
|
[airflow2]
|
|
7
7
|
openlineage-airflow>=1.2.0
|
|
@@ -10,51 +10,51 @@ openlineage-airflow>=1.2.0
|
|
|
10
10
|
apache-airflow-providers-openlineage>=1.0.0
|
|
11
11
|
|
|
12
12
|
[datahub-file]
|
|
13
|
-
acryl-datahub[sync-file-emitter]==1.3.1.
|
|
13
|
+
acryl-datahub[sync-file-emitter]==1.3.1.8rc1
|
|
14
14
|
|
|
15
15
|
[datahub-kafka]
|
|
16
|
-
acryl-datahub[datahub-kafka]==1.3.1.
|
|
16
|
+
acryl-datahub[datahub-kafka]==1.3.1.8rc1
|
|
17
17
|
|
|
18
18
|
[datahub-rest]
|
|
19
|
-
acryl-datahub[datahub-rest]==1.3.1.
|
|
19
|
+
acryl-datahub[datahub-rest]==1.3.1.8rc1
|
|
20
20
|
|
|
21
21
|
[dev]
|
|
22
|
+
tox
|
|
22
23
|
pydantic>=2.4.0
|
|
24
|
+
packaging
|
|
23
25
|
apache-airflow<4.0.0,>=2.5.0
|
|
24
|
-
|
|
25
|
-
|
|
26
|
+
build
|
|
27
|
+
types-tabulate
|
|
28
|
+
types-click==0.1.12
|
|
29
|
+
types-cachetools
|
|
26
30
|
types-six
|
|
27
|
-
|
|
28
|
-
coverage>=5.1
|
|
31
|
+
tenacity
|
|
29
32
|
deepdiff!=8.0.0
|
|
30
|
-
|
|
33
|
+
acryl-datahub[datahub-rest]==1.3.1.8rc1
|
|
31
34
|
tox-uv
|
|
32
|
-
acryl-datahub[datahub-rest]==1.3.1.7rc1
|
|
33
|
-
types-cachetools
|
|
34
|
-
mypy==1.17.1
|
|
35
|
-
twine
|
|
36
|
-
pytest>=6.2.2
|
|
37
|
-
build
|
|
38
35
|
types-dataclasses
|
|
39
|
-
|
|
36
|
+
types-setuptools
|
|
40
37
|
types-toml
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
38
|
+
types-PyYAML
|
|
39
|
+
mypy==1.17.1
|
|
40
|
+
pytest-cov>=2.8.1
|
|
44
41
|
types-requests
|
|
42
|
+
twine
|
|
43
|
+
acryl-datahub[datahub-rest,sql-parser]==1.3.1.8rc1
|
|
44
|
+
sqlalchemy-stubs
|
|
45
|
+
coverage>=5.1
|
|
46
|
+
ruff==0.11.7
|
|
45
47
|
types-python-dateutil
|
|
46
|
-
|
|
47
|
-
types-click==0.1.12
|
|
48
|
-
pytest-cov>=2.8.1
|
|
48
|
+
pytest>=6.2.2
|
|
49
49
|
|
|
50
50
|
[ignore]
|
|
51
51
|
|
|
52
52
|
[integration-tests]
|
|
53
|
-
acryl-datahub[
|
|
54
|
-
acryl-datahub[testing-utils]==1.3.1.7rc1
|
|
55
|
-
apache-airflow[amazon,google,snowflake]>=2.0.2
|
|
56
|
-
snowflake-connector-python>=2.7.10
|
|
57
|
-
apache-airflow-providers-sqlite
|
|
53
|
+
acryl-datahub[datahub-kafka]==1.3.1.8rc1
|
|
58
54
|
apache-airflow-providers-teradata
|
|
55
|
+
acryl-datahub[sync-file-emitter]==1.3.1.8rc1
|
|
56
|
+
snowflake-connector-python>=2.7.10
|
|
57
|
+
acryl-datahub[testing-utils]==1.3.1.8rc1
|
|
58
|
+
apache-airflow[amazon,google,snowflake]>=2.0.2
|
|
59
59
|
virtualenv
|
|
60
|
-
|
|
60
|
+
apache-airflow-providers-sqlite
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""Adapter for converting Airflow Assets/Datasets to DataHub URNs.
|
|
2
|
+
|
|
3
|
+
Airflow 2.4+ introduced airflow.datasets.Dataset for data-aware scheduling.
|
|
4
|
+
Airflow 3.0+ renamed this to airflow.sdk.Asset (Dataset still works as alias).
|
|
5
|
+
|
|
6
|
+
This module provides utilities to detect and convert these native Airflow
|
|
7
|
+
Asset/Dataset objects to DataHub dataset URNs based on their URI.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Any, Iterable, List, Optional
|
|
12
|
+
from urllib.parse import urlparse
|
|
13
|
+
|
|
14
|
+
import datahub.emitter.mce_builder as builder
|
|
15
|
+
from datahub_airflow_plugin.entities import _Entity
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
# URI scheme to DataHub platform mapping
|
|
20
|
+
URI_SCHEME_TO_PLATFORM = {
|
|
21
|
+
"s3": "s3",
|
|
22
|
+
"s3a": "s3",
|
|
23
|
+
"gs": "gcs",
|
|
24
|
+
"gcs": "gcs",
|
|
25
|
+
"file": "file",
|
|
26
|
+
"hdfs": "hdfs",
|
|
27
|
+
"abfs": "adls",
|
|
28
|
+
"abfss": "adls",
|
|
29
|
+
"postgresql": "postgres",
|
|
30
|
+
"mysql": "mysql",
|
|
31
|
+
"bigquery": "bigquery",
|
|
32
|
+
"snowflake": "snowflake",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def is_airflow_asset(obj: Any) -> bool:
|
|
37
|
+
"""Check if object is an Airflow Asset or Dataset.
|
|
38
|
+
|
|
39
|
+
Works with both Airflow 2.x Dataset and Airflow 3.x Asset classes,
|
|
40
|
+
including subclasses, by checking the class hierarchy (MRO) and
|
|
41
|
+
required 'uri' attribute.
|
|
42
|
+
"""
|
|
43
|
+
if not hasattr(obj, "uri"):
|
|
44
|
+
return False
|
|
45
|
+
|
|
46
|
+
# Check class name and all base classes in the MRO
|
|
47
|
+
for cls in type(obj).__mro__:
|
|
48
|
+
if cls.__name__ in ("Asset", "Dataset"):
|
|
49
|
+
return True
|
|
50
|
+
|
|
51
|
+
return False
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def translate_airflow_asset_to_urn(
|
|
55
|
+
asset: Any, env: str = "PROD", platform_fallback: str = "airflow"
|
|
56
|
+
) -> Optional[str]:
|
|
57
|
+
"""Convert Airflow Asset URI to DataHub dataset URN.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
asset: An Airflow Asset or Dataset object with a 'uri' attribute.
|
|
61
|
+
env: The DataHub environment (default: "PROD").
|
|
62
|
+
platform_fallback: Platform to use when URI has no scheme (default: "airflow").
|
|
63
|
+
This is used for @asset decorated functions that don't specify a URI.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
A DataHub dataset URN string, or None if the asset URI is invalid.
|
|
67
|
+
|
|
68
|
+
Examples:
|
|
69
|
+
>>> translate_airflow_asset_to_urn(mock_asset("s3://bucket/path"))
|
|
70
|
+
"urn:li:dataset:(urn:li:dataPlatform:s3,bucket/path,PROD)"
|
|
71
|
+
|
|
72
|
+
>>> translate_airflow_asset_to_urn(mock_asset("postgresql://host/db/table"))
|
|
73
|
+
"urn:li:dataset:(urn:li:dataPlatform:postgres,host/db/table,PROD)"
|
|
74
|
+
|
|
75
|
+
>>> translate_airflow_asset_to_urn(mock_asset("my_asset")) # @asset decorator
|
|
76
|
+
"urn:li:dataset:(urn:li:dataPlatform:airflow,my_asset,PROD)"
|
|
77
|
+
"""
|
|
78
|
+
uri = getattr(asset, "uri", None)
|
|
79
|
+
if not uri:
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
parsed = urlparse(uri)
|
|
84
|
+
except Exception as e:
|
|
85
|
+
logger.warning(
|
|
86
|
+
f"Failed to parse Airflow asset URI '{uri}': {e}. "
|
|
87
|
+
f"This asset will be excluded from lineage."
|
|
88
|
+
)
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
scheme = parsed.scheme.lower() if parsed.scheme else ""
|
|
92
|
+
|
|
93
|
+
if scheme:
|
|
94
|
+
# URI has a scheme - map to DataHub platform
|
|
95
|
+
platform = URI_SCHEME_TO_PLATFORM.get(scheme, scheme)
|
|
96
|
+
# Build dataset name from URI components
|
|
97
|
+
if parsed.netloc:
|
|
98
|
+
name = f"{parsed.netloc}{parsed.path}".lstrip("/")
|
|
99
|
+
else:
|
|
100
|
+
name = parsed.path.lstrip("/")
|
|
101
|
+
else:
|
|
102
|
+
# No scheme - this is likely an @asset decorated function with just a name
|
|
103
|
+
# Use the fallback platform (default: "airflow") and the URI as the name
|
|
104
|
+
platform = platform_fallback
|
|
105
|
+
name = uri
|
|
106
|
+
|
|
107
|
+
# Ensure we have a valid name
|
|
108
|
+
if not name:
|
|
109
|
+
logger.debug(f"Airflow asset URI has empty name: '{uri}'. Skipping.")
|
|
110
|
+
return None
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
return builder.make_dataset_urn(platform=platform, name=name, env=env)
|
|
114
|
+
except Exception as e:
|
|
115
|
+
logger.warning(
|
|
116
|
+
f"Failed to create DataHub URN for Airflow asset: {e}. "
|
|
117
|
+
f"URI: '{uri}', platform: '{platform}', name: '{name}'. "
|
|
118
|
+
f"This asset will be excluded from lineage."
|
|
119
|
+
)
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def extract_urns_from_iolets(
|
|
124
|
+
iolets: Iterable[Any],
|
|
125
|
+
capture_airflow_assets: bool,
|
|
126
|
+
env: str = "PROD",
|
|
127
|
+
) -> List[str]:
|
|
128
|
+
"""Extract URNs from a list of inlets or outlets.
|
|
129
|
+
|
|
130
|
+
Processes both DataHub entity objects and native Airflow Assets/Datasets.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
iolets: Iterable of inlet/outlet objects (from task.inlets or task.outlets).
|
|
134
|
+
capture_airflow_assets: Whether to capture native Airflow Assets.
|
|
135
|
+
env: The DataHub environment (default: "PROD").
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
List of URN strings extracted from the iolets.
|
|
139
|
+
"""
|
|
140
|
+
urns: List[str] = []
|
|
141
|
+
for iolet in iolets:
|
|
142
|
+
try:
|
|
143
|
+
if isinstance(iolet, _Entity):
|
|
144
|
+
urns.append(iolet.urn)
|
|
145
|
+
elif capture_airflow_assets and is_airflow_asset(iolet):
|
|
146
|
+
urn = translate_airflow_asset_to_urn(iolet, env=env)
|
|
147
|
+
if urn:
|
|
148
|
+
urns.append(urn)
|
|
149
|
+
else:
|
|
150
|
+
# translate_airflow_asset_to_urn already logs details
|
|
151
|
+
uri = getattr(iolet, "uri", None)
|
|
152
|
+
logger.warning(
|
|
153
|
+
f"Skipping Airflow asset with URI '{uri}' - "
|
|
154
|
+
f"could not convert to DataHub URN. "
|
|
155
|
+
f"Check that the URI is valid and has a proper scheme."
|
|
156
|
+
)
|
|
157
|
+
except Exception as e:
|
|
158
|
+
# Catch any unexpected errors to avoid breaking lineage extraction
|
|
159
|
+
logger.warning(
|
|
160
|
+
f"Failed to extract URN from iolet {type(iolet).__name__}: {e}. "
|
|
161
|
+
f"Continuing with remaining iolets."
|
|
162
|
+
)
|
|
163
|
+
continue
|
|
164
|
+
return urns
|
|
@@ -46,6 +46,11 @@ class DatahubLineageConfig(ConfigModel):
|
|
|
46
46
|
# referenced by inlets or outlets.
|
|
47
47
|
materialize_iolets: bool
|
|
48
48
|
|
|
49
|
+
# If true (default), capture native Airflow Assets/Datasets as DataHub lineage.
|
|
50
|
+
# Airflow 2.4+ Dataset and Airflow 3.x Asset objects in inlets/outlets
|
|
51
|
+
# will be converted to DataHub dataset URNs.
|
|
52
|
+
capture_airflow_assets: bool
|
|
53
|
+
|
|
49
54
|
capture_executions: bool
|
|
50
55
|
|
|
51
56
|
datajob_url_link: DatajobUrl
|
|
@@ -135,6 +140,9 @@ def get_lineage_config() -> DatahubLineageConfig:
|
|
|
135
140
|
)
|
|
136
141
|
capture_executions = conf.get("datahub", "capture_executions", fallback=True)
|
|
137
142
|
materialize_iolets = conf.get("datahub", "materialize_iolets", fallback=True)
|
|
143
|
+
capture_airflow_assets = conf.get(
|
|
144
|
+
"datahub", "capture_airflow_assets", fallback=True
|
|
145
|
+
)
|
|
138
146
|
enable_extractors = conf.get("datahub", "enable_extractors", fallback=True)
|
|
139
147
|
|
|
140
148
|
# OpenLineage extractor patching/override configuration
|
|
@@ -204,6 +212,7 @@ def get_lineage_config() -> DatahubLineageConfig:
|
|
|
204
212
|
capture_tags_info=capture_tags_info,
|
|
205
213
|
capture_executions=capture_executions,
|
|
206
214
|
materialize_iolets=materialize_iolets,
|
|
215
|
+
capture_airflow_assets=capture_airflow_assets,
|
|
207
216
|
enable_extractors=enable_extractors,
|
|
208
217
|
patch_sql_parser=patch_sql_parser,
|
|
209
218
|
patch_snowflake_schema=patch_snowflake_schema,
|
|
@@ -22,7 +22,9 @@ import airflow
|
|
|
22
22
|
# Import Airflow 2.x specific shims (clean, no cross-version complexity)
|
|
23
23
|
import airflow.version
|
|
24
24
|
import packaging.version
|
|
25
|
-
|
|
25
|
+
|
|
26
|
+
# Note: We intentionally don't import Variable here to avoid DB access in listeners.
|
|
27
|
+
# See check_kill_switch() for details on why we use os.getenv() instead.
|
|
26
28
|
from airflow.models.serialized_dag import SerializedDagModel
|
|
27
29
|
|
|
28
30
|
# Import Airflow 2.x compatibility and patches before any Airflow imports
|
|
@@ -74,6 +76,7 @@ from datahub.metadata.schema_classes import (
|
|
|
74
76
|
)
|
|
75
77
|
from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult
|
|
76
78
|
from datahub.telemetry import telemetry
|
|
79
|
+
from datahub_airflow_plugin._airflow_asset_adapter import extract_urns_from_iolets
|
|
77
80
|
from datahub_airflow_plugin._config import DatahubLineageConfig, get_lineage_config
|
|
78
81
|
from datahub_airflow_plugin._constants import (
|
|
79
82
|
DATAHUB_SQL_PARSING_RESULT_KEY,
|
|
@@ -94,7 +97,6 @@ from datahub_airflow_plugin.client.airflow_generator import ( # type: ignore[at
|
|
|
94
97
|
AirflowGenerator,
|
|
95
98
|
)
|
|
96
99
|
from datahub_airflow_plugin.entities import (
|
|
97
|
-
_Entity,
|
|
98
100
|
entities_to_datajob_urn_list,
|
|
99
101
|
entities_to_dataset_urn_list,
|
|
100
102
|
)
|
|
@@ -498,12 +500,20 @@ class DataHubListener:
|
|
|
498
500
|
output_urns.extend(sql_output_urns)
|
|
499
501
|
fine_grained_lineages.extend(sql_fine_grained_lineages)
|
|
500
502
|
|
|
501
|
-
# Add DataHub-native inlets/outlets
|
|
503
|
+
# Add DataHub-native inlets/outlets and Airflow Assets
|
|
502
504
|
input_urns.extend(
|
|
503
|
-
|
|
505
|
+
extract_urns_from_iolets(
|
|
506
|
+
get_task_inlets(task),
|
|
507
|
+
self.config.capture_airflow_assets,
|
|
508
|
+
env=self.config.cluster,
|
|
509
|
+
)
|
|
504
510
|
)
|
|
505
511
|
output_urns.extend(
|
|
506
|
-
|
|
512
|
+
extract_urns_from_iolets(
|
|
513
|
+
get_task_outlets(task),
|
|
514
|
+
self.config.capture_airflow_assets,
|
|
515
|
+
env=self.config.cluster,
|
|
516
|
+
)
|
|
507
517
|
)
|
|
508
518
|
|
|
509
519
|
# Write the lineage to the datajob object
|
|
@@ -568,13 +578,29 @@ class DataHubListener:
|
|
|
568
578
|
)
|
|
569
579
|
|
|
570
580
|
def check_kill_switch(self) -> bool:
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
581
|
+
"""
|
|
582
|
+
Check kill switch using environment variable.
|
|
583
|
+
|
|
584
|
+
We use os.getenv() instead of Variable.get() because Variable.get()
|
|
585
|
+
creates a new database session and commits it. When called from listener
|
|
586
|
+
hooks (which execute during SQLAlchemy's after_flush event, before the
|
|
587
|
+
main transaction commits), this nested commit can corrupt the outer
|
|
588
|
+
transaction state and cause data loss.
|
|
589
|
+
|
|
590
|
+
Specifically, this was causing TaskInstanceHistory records to not be
|
|
591
|
+
persisted for retried tasks. See: https://github.com/apache/airflow/pull/48780
|
|
592
|
+
|
|
593
|
+
Users can set the kill switch via environment variable:
|
|
594
|
+
export AIRFLOW_VAR_DATAHUB_AIRFLOW_PLUGIN_DISABLE_LISTENER=true
|
|
595
|
+
"""
|
|
596
|
+
if (
|
|
597
|
+
os.getenv(
|
|
598
|
+
f"AIRFLOW_VAR_{KILL_SWITCH_VARIABLE_NAME}".upper(), "false"
|
|
599
|
+
).lower()
|
|
600
|
+
== "true"
|
|
601
|
+
):
|
|
602
|
+
logger.debug("DataHub listener disabled by kill switch (env var)")
|
|
603
|
+
return True
|
|
578
604
|
return False
|
|
579
605
|
|
|
580
606
|
def _prepare_task_context(
|
|
@@ -52,6 +52,7 @@ from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult
|
|
|
52
52
|
from datahub.telemetry import telemetry
|
|
53
53
|
|
|
54
54
|
# Import Airflow 3.x specific shims (clean, no cross-version complexity)
|
|
55
|
+
from datahub_airflow_plugin._airflow_asset_adapter import extract_urns_from_iolets
|
|
55
56
|
from datahub_airflow_plugin._config import DatahubLineageConfig, get_lineage_config
|
|
56
57
|
from datahub_airflow_plugin._constants import DATAHUB_SQL_PARSING_RESULT_KEY
|
|
57
58
|
from datahub_airflow_plugin._version import __package_name__, __version__
|
|
@@ -69,7 +70,6 @@ from datahub_airflow_plugin.client.airflow_generator import ( # type: ignore[at
|
|
|
69
70
|
AirflowGenerator,
|
|
70
71
|
)
|
|
71
72
|
from datahub_airflow_plugin.entities import (
|
|
72
|
-
_Entity,
|
|
73
73
|
entities_to_datajob_urn_list,
|
|
74
74
|
entities_to_dataset_urn_list,
|
|
75
75
|
)
|
|
@@ -830,12 +830,20 @@ class DataHubListener:
|
|
|
830
830
|
|
|
831
831
|
fine_grained_lineages.extend(sql_fine_grained_lineages)
|
|
832
832
|
|
|
833
|
-
# Add DataHub-native inlets/outlets
|
|
833
|
+
# Add DataHub-native inlets/outlets and Airflow Assets
|
|
834
834
|
input_urns.extend(
|
|
835
|
-
|
|
835
|
+
extract_urns_from_iolets(
|
|
836
|
+
get_task_inlets(task),
|
|
837
|
+
self.config.capture_airflow_assets,
|
|
838
|
+
env=self.config.cluster,
|
|
839
|
+
)
|
|
836
840
|
)
|
|
837
841
|
output_urns.extend(
|
|
838
|
-
|
|
842
|
+
extract_urns_from_iolets(
|
|
843
|
+
get_task_outlets(task),
|
|
844
|
+
self.config.capture_airflow_assets,
|
|
845
|
+
env=self.config.cluster,
|
|
846
|
+
)
|
|
839
847
|
)
|
|
840
848
|
|
|
841
849
|
# Write the lineage to the datajob object
|
|
@@ -75,6 +75,35 @@ def _task_downstream_task_ids(operator: "Operator") -> Set[str]:
|
|
|
75
75
|
return operator._downstream_task_id # type: ignore[attr-defined,union-attr]
|
|
76
76
|
|
|
77
77
|
|
|
78
|
+
def _serialize_iolets_for_properties(iolets: List[Any]) -> List[str]:
|
|
79
|
+
"""Serialize inlets/outlets to a list of URIs for stable custom properties.
|
|
80
|
+
|
|
81
|
+
This function extracts the URI from various inlet/outlet object types:
|
|
82
|
+
- Airflow 2.4+ Dataset objects (have .uri attribute)
|
|
83
|
+
- Airflow 3.x Asset objects (have .uri attribute)
|
|
84
|
+
- Airflow 3.x AssetDefinition objects from @asset decorator (have .uri attribute)
|
|
85
|
+
- DataHub Dataset entities (have __repr__ that returns URN)
|
|
86
|
+
- Strings (used as-is)
|
|
87
|
+
|
|
88
|
+
This avoids using repr() on complex objects which would include memory addresses
|
|
89
|
+
and other unstable information.
|
|
90
|
+
"""
|
|
91
|
+
result = []
|
|
92
|
+
for iolet in iolets:
|
|
93
|
+
if hasattr(iolet, "uri"):
|
|
94
|
+
# Airflow Dataset/Asset/AssetDefinition objects
|
|
95
|
+
result.append(str(iolet.uri))
|
|
96
|
+
elif hasattr(iolet, "urn"):
|
|
97
|
+
# DataHub entities with URN
|
|
98
|
+
result.append(str(iolet.urn))
|
|
99
|
+
elif isinstance(iolet, str):
|
|
100
|
+
result.append(iolet)
|
|
101
|
+
else:
|
|
102
|
+
# Fallback to string representation for unknown types
|
|
103
|
+
result.append(str(iolet))
|
|
104
|
+
return result
|
|
105
|
+
|
|
106
|
+
|
|
78
107
|
class AirflowGenerator:
|
|
79
108
|
@staticmethod
|
|
80
109
|
def _get_dependencies(
|
|
@@ -362,6 +391,10 @@ class AirflowGenerator:
|
|
|
362
391
|
if out_key == "downstream_task_ids":
|
|
363
392
|
# Generate these in a consistent order.
|
|
364
393
|
v = list(sorted(v))
|
|
394
|
+
if out_key in ("inlets", "outlets"):
|
|
395
|
+
# Serialize inlets/outlets as list of URIs for stable representation.
|
|
396
|
+
# This avoids including memory addresses from repr() of complex objects.
|
|
397
|
+
v = _serialize_iolets_for_properties(v)
|
|
365
398
|
job_property_bag[out_key] = repr(v)
|
|
366
399
|
break
|
|
367
400
|
|
{acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/README.md
RENAMED
|
File without changes
|
{acryl_datahub_airflow_plugin-1.3.1.7rc1 → acryl_datahub_airflow_plugin-1.3.1.8rc1}/setup.cfg
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|