acryl-datahub-airflow-plugin 1.3.1.5__tar.gz → 1.3.1.5rc2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. acryl_datahub_airflow_plugin-1.3.1.5rc2/PKG-INFO +91 -0
  2. acryl_datahub_airflow_plugin-1.3.1.5rc2/README.md +7 -0
  3. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/setup.cfg +0 -18
  4. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/setup.py +6 -14
  5. acryl_datahub_airflow_plugin-1.3.1.5rc2/src/acryl_datahub_airflow_plugin.egg-info/PKG-INFO +91 -0
  6. acryl_datahub_airflow_plugin-1.3.1.5rc2/src/acryl_datahub_airflow_plugin.egg-info/SOURCES.txt +39 -0
  7. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/acryl_datahub_airflow_plugin.egg-info/requires.txt +33 -37
  8. acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/_airflow_shims.py +42 -0
  9. acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/_config.py +142 -0
  10. acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/_datahub_ol_adapter.py +23 -0
  11. acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/_extractors.py +365 -0
  12. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/_version.py +1 -1
  13. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/client/airflow_generator.py +43 -147
  14. {acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2 → acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin}/datahub_listener.py +188 -464
  15. {acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow2 → acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/example_dags}/graph_usage_sample_dag.py +4 -12
  16. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/hooks/datahub.py +2 -11
  17. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/operators/datahub.py +3 -20
  18. acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/py.typed +0 -0
  19. acryl_datahub_airflow_plugin-1.3.1.5/PKG-INFO +0 -303
  20. acryl_datahub_airflow_plugin-1.3.1.5/README.md +0 -217
  21. acryl_datahub_airflow_plugin-1.3.1.5/src/acryl_datahub_airflow_plugin.egg-info/PKG-INFO +0 -303
  22. acryl_datahub_airflow_plugin-1.3.1.5/src/acryl_datahub_airflow_plugin.egg-info/SOURCES.txt +0 -71
  23. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/_airflow_compat.py +0 -32
  24. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/_airflow_shims.py +0 -75
  25. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/_airflow_version_specific.py +0 -184
  26. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/_config.py +0 -220
  27. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/_constants.py +0 -16
  28. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/_datahub_ol_adapter.py +0 -35
  29. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2/__init__.py +0 -6
  30. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2/_airflow2_sql_parser_patch.py +0 -402
  31. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2/_airflow_compat.py +0 -95
  32. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2/_extractors.py +0 -477
  33. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2/_legacy_shims.py +0 -20
  34. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2/_openlineage_compat.py +0 -123
  35. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2/_provider_shims.py +0 -29
  36. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow2/_shims.py +0 -88
  37. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/__init__.py +0 -6
  38. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/_airflow3_sql_parser_patch.py +0 -408
  39. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/_airflow_compat.py +0 -108
  40. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/_athena_openlineage_patch.py +0 -153
  41. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/_bigquery_openlineage_patch.py +0 -273
  42. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/_shims.py +0 -82
  43. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/_sqlite_openlineage_patch.py +0 -88
  44. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/_teradata_openlineage_patch.py +0 -308
  45. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/airflow3/datahub_listener.py +0 -1452
  46. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/datahub_listener.py +0 -25
  47. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/__init__.py +0 -32
  48. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow2/__init__.py +0 -8
  49. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow2/generic_recipe_sample_dag.py +0 -54
  50. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow2/lineage_backend_demo.py +0 -69
  51. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow2/lineage_backend_taskflow_demo.py +0 -69
  52. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow2/lineage_emission_dag.py +0 -81
  53. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow2/mysql_sample_dag.py +0 -68
  54. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow2/snowflake_sample_dag.py +0 -99
  55. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow3/__init__.py +0 -8
  56. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow3/lineage_backend_demo.py +0 -51
  57. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow3/lineage_backend_taskflow_demo.py +0 -51
  58. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/airflow3/snowflake_sample_dag.py +0 -89
  59. acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py +0 -43
  60. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/pyproject.toml +0 -0
  61. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/acryl_datahub_airflow_plugin.egg-info/dependency_links.txt +0 -0
  62. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/acryl_datahub_airflow_plugin.egg-info/entry_points.txt +0 -0
  63. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/acryl_datahub_airflow_plugin.egg-info/not-zip-safe +0 -0
  64. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/acryl_datahub_airflow_plugin.egg-info/top_level.txt +0 -0
  65. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/__init__.py +0 -0
  66. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/client/__init__.py +0 -0
  67. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/datahub_plugin.py +0 -0
  68. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/entities.py +0 -0
  69. {acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/hooks → acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/example_dags}/__init__.py +0 -0
  70. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py +0 -0
  71. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/example_dags/lineage_backend_demo.py +0 -0
  72. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/example_dags/lineage_backend_taskflow_demo.py +0 -0
  73. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py +0 -0
  74. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/example_dags/mysql_sample_dag.py +0 -0
  75. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/example_dags/snowflake_sample_dag.py +0 -0
  76. {acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/operators → acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/hooks}/__init__.py +0 -0
  77. /acryl_datahub_airflow_plugin-1.3.1.5/src/datahub_airflow_plugin/py.typed → /acryl_datahub_airflow_plugin-1.3.1.5rc2/src/datahub_airflow_plugin/operators/__init__.py +0 -0
  78. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/operators/datahub_assertion_operator.py +0 -0
  79. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py +0 -0
  80. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/operators/datahub_operation_operator.py +0 -0
  81. {acryl_datahub_airflow_plugin-1.3.1.5 → acryl_datahub_airflow_plugin-1.3.1.5rc2}/src/datahub_airflow_plugin/operators/datahub_operation_sensor.py +0 -0
@@ -0,0 +1,91 @@
1
+ Metadata-Version: 2.4
2
+ Name: acryl-datahub-airflow-plugin
3
+ Version: 1.3.1.5rc2
4
+ Summary: Datahub Airflow plugin to capture executions and send to Datahub
5
+ Home-page: https://docs.datahub.com/
6
+ License: Apache-2.0
7
+ Project-URL: Documentation, https://docs.datahub.com/docs/
8
+ Project-URL: Source, https://github.com/datahub-project/datahub
9
+ Project-URL: Changelog, https://github.com/datahub-project/datahub/releases
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: Programming Language :: Python
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3 :: Only
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Information Technology
16
+ Classifier: Intended Audience :: System Administrators
17
+ Classifier: Operating System :: Unix
18
+ Classifier: Operating System :: POSIX :: Linux
19
+ Classifier: Environment :: Console
20
+ Classifier: Environment :: MacOS X
21
+ Classifier: Topic :: Software Development
22
+ Requires-Python: >=3.9
23
+ Description-Content-Type: text/markdown
24
+ Requires-Dist: openlineage-airflow<=1.30.1,>=1.2.0
25
+ Requires-Dist: apache-airflow<3,>=2.7.0
26
+ Requires-Dist: pydantic>=2.4.0
27
+ Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc2
28
+ Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc2
29
+ Provides-Extra: ignore
30
+ Provides-Extra: datahub-rest
31
+ Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc2; extra == "datahub-rest"
32
+ Provides-Extra: datahub-kafka
33
+ Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.5rc2; extra == "datahub-kafka"
34
+ Provides-Extra: datahub-file
35
+ Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.5rc2; extra == "datahub-file"
36
+ Provides-Extra: dev
37
+ Requires-Dist: apache-airflow<3,>=2.7.0; extra == "dev"
38
+ Requires-Dist: tenacity; extra == "dev"
39
+ Requires-Dist: types-PyYAML; extra == "dev"
40
+ Requires-Dist: pytest-cov>=2.8.1; extra == "dev"
41
+ Requires-Dist: openlineage-airflow<=1.30.1,>=1.2.0; extra == "dev"
42
+ Requires-Dist: pydantic>=2.4.0; extra == "dev"
43
+ Requires-Dist: types-click==0.1.12; extra == "dev"
44
+ Requires-Dist: tox; extra == "dev"
45
+ Requires-Dist: types-toml; extra == "dev"
46
+ Requires-Dist: deepdiff!=8.0.0; extra == "dev"
47
+ Requires-Dist: build; extra == "dev"
48
+ Requires-Dist: types-requests; extra == "dev"
49
+ Requires-Dist: sqlalchemy-stubs; extra == "dev"
50
+ Requires-Dist: types-dataclasses; extra == "dev"
51
+ Requires-Dist: types-tabulate; extra == "dev"
52
+ Requires-Dist: mypy==1.17.1; extra == "dev"
53
+ Requires-Dist: types-python-dateutil; extra == "dev"
54
+ Requires-Dist: types-setuptools; extra == "dev"
55
+ Requires-Dist: pytest>=6.2.2; extra == "dev"
56
+ Requires-Dist: packaging; extra == "dev"
57
+ Requires-Dist: types-six; extra == "dev"
58
+ Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc2; extra == "dev"
59
+ Requires-Dist: ruff==0.11.7; extra == "dev"
60
+ Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc2; extra == "dev"
61
+ Requires-Dist: tox-uv; extra == "dev"
62
+ Requires-Dist: twine; extra == "dev"
63
+ Requires-Dist: types-cachetools; extra == "dev"
64
+ Requires-Dist: coverage>=5.1; extra == "dev"
65
+ Provides-Extra: integration-tests
66
+ Requires-Dist: acryl-datahub[testing-utils]==1.3.1.5rc2; extra == "integration-tests"
67
+ Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.5rc2; extra == "integration-tests"
68
+ Requires-Dist: virtualenv; extra == "integration-tests"
69
+ Requires-Dist: apache-airflow[amazon,google,snowflake]>=2.0.2; extra == "integration-tests"
70
+ Requires-Dist: snowflake-connector-python>=2.7.10; extra == "integration-tests"
71
+ Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.5rc2; extra == "integration-tests"
72
+ Requires-Dist: apache-airflow-providers-sqlite; extra == "integration-tests"
73
+ Requires-Dist: apache-airflow-providers-teradata; extra == "integration-tests"
74
+ Dynamic: classifier
75
+ Dynamic: description
76
+ Dynamic: description-content-type
77
+ Dynamic: home-page
78
+ Dynamic: license
79
+ Dynamic: project-url
80
+ Dynamic: provides-extra
81
+ Dynamic: requires-dist
82
+ Dynamic: requires-python
83
+ Dynamic: summary
84
+
85
+ # Datahub Airflow Plugin
86
+
87
+ See [the DataHub Airflow docs](https://docs.datahub.com/docs/lineage/airflow) for details.
88
+
89
+ ## Developing
90
+
91
+ See the [developing docs](../../metadata-ingestion/developing.md).
@@ -0,0 +1,7 @@
1
+ # Datahub Airflow Plugin
2
+
3
+ See [the DataHub Airflow docs](https://docs.datahub.com/docs/lineage/airflow) for details.
4
+
5
+ ## Developing
6
+
7
+ See the [developing docs](../../metadata-ingestion/developing.md).
@@ -9,24 +9,6 @@ check_untyped_defs = yes
9
9
  disallow_incomplete_defs = yes
10
10
  disallow_untyped_decorators = yes
11
11
  warn_unused_configs = yes
12
-
13
- [mypy-datahub_airflow_plugin._extractors]
14
- ignore_errors = True
15
-
16
- [mypy-datahub_airflow_plugin.airflow2._extractors]
17
- ignore_errors = True
18
-
19
- [mypy-datahub_airflow_plugin.airflow2.datahub_listener]
20
- ignore_errors = True
21
-
22
- [mypy-tests.unit.test_teradata_extractor]
23
- ignore_errors = True
24
-
25
- [mypy-datahub_airflow_plugin.example_dags.*]
26
- ignore_errors = True
27
-
28
- [mypy-tests.integration.dags.*]
29
- ignore_errors = True
30
12
  disallow_untyped_defs = no
31
13
 
32
14
  [mypy-datahub.*]
@@ -25,12 +25,12 @@ _self_pin = (
25
25
  base_requirements = {
26
26
  f"acryl-datahub[sql-parser,datahub-rest]{_self_pin}",
27
27
  "pydantic>=2.4.0",
28
- # We require Airflow 2.5.x at minimum, since we need the new DAG listener API.
29
- # We support both Airflow 2.x and 3.x with full backward compatibility.
30
- "apache-airflow>=2.5.0,<4.0.0",
31
- # Note: OpenLineage dependencies are version-specific and provided via extras:
32
- # - airflow2: for Airflow 2.x (uses standalone openlineage-airflow package)
33
- # - airflow3: for Airflow 3.x (uses native apache-airflow-providers-openlineage)
28
+ # We require Airflow 2.7.x at minimum, to be compatible with the native Airflow Openlineage provider.
29
+ "apache-airflow>=2.7.0,<3",
30
+ # We remain restrictive on the versions allowed here to prevent
31
+ # us from being broken by backwards-incompatible changes in the
32
+ # underlying package.
33
+ "openlineage-airflow>=1.2.0,<=1.30.1",
34
34
  }
35
35
 
36
36
  plugins: Dict[str, Set[str]] = {
@@ -43,14 +43,6 @@ plugins: Dict[str, Set[str]] = {
43
43
  "datahub-file": {
44
44
  f"acryl-datahub[sync-file-emitter]{_self_pin}",
45
45
  },
46
- # airflow2: For Airflow 2.x, use standalone openlineage-airflow package
47
- "airflow2": {
48
- "openlineage-airflow>=1.2.0",
49
- },
50
- # airflow3: For Airflow 3.x, use native OpenLineage provider
51
- "airflow3": {
52
- "apache-airflow-providers-openlineage>=1.0.0",
53
- },
54
46
  }
55
47
 
56
48
  # Require some plugins by default.
@@ -0,0 +1,91 @@
1
+ Metadata-Version: 2.4
2
+ Name: acryl-datahub-airflow-plugin
3
+ Version: 1.3.1.5rc2
4
+ Summary: Datahub Airflow plugin to capture executions and send to Datahub
5
+ Home-page: https://docs.datahub.com/
6
+ License: Apache-2.0
7
+ Project-URL: Documentation, https://docs.datahub.com/docs/
8
+ Project-URL: Source, https://github.com/datahub-project/datahub
9
+ Project-URL: Changelog, https://github.com/datahub-project/datahub/releases
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: Programming Language :: Python
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3 :: Only
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Information Technology
16
+ Classifier: Intended Audience :: System Administrators
17
+ Classifier: Operating System :: Unix
18
+ Classifier: Operating System :: POSIX :: Linux
19
+ Classifier: Environment :: Console
20
+ Classifier: Environment :: MacOS X
21
+ Classifier: Topic :: Software Development
22
+ Requires-Python: >=3.9
23
+ Description-Content-Type: text/markdown
24
+ Requires-Dist: openlineage-airflow<=1.30.1,>=1.2.0
25
+ Requires-Dist: apache-airflow<3,>=2.7.0
26
+ Requires-Dist: pydantic>=2.4.0
27
+ Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc2
28
+ Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc2
29
+ Provides-Extra: ignore
30
+ Provides-Extra: datahub-rest
31
+ Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc2; extra == "datahub-rest"
32
+ Provides-Extra: datahub-kafka
33
+ Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.5rc2; extra == "datahub-kafka"
34
+ Provides-Extra: datahub-file
35
+ Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.5rc2; extra == "datahub-file"
36
+ Provides-Extra: dev
37
+ Requires-Dist: apache-airflow<3,>=2.7.0; extra == "dev"
38
+ Requires-Dist: tenacity; extra == "dev"
39
+ Requires-Dist: types-PyYAML; extra == "dev"
40
+ Requires-Dist: pytest-cov>=2.8.1; extra == "dev"
41
+ Requires-Dist: openlineage-airflow<=1.30.1,>=1.2.0; extra == "dev"
42
+ Requires-Dist: pydantic>=2.4.0; extra == "dev"
43
+ Requires-Dist: types-click==0.1.12; extra == "dev"
44
+ Requires-Dist: tox; extra == "dev"
45
+ Requires-Dist: types-toml; extra == "dev"
46
+ Requires-Dist: deepdiff!=8.0.0; extra == "dev"
47
+ Requires-Dist: build; extra == "dev"
48
+ Requires-Dist: types-requests; extra == "dev"
49
+ Requires-Dist: sqlalchemy-stubs; extra == "dev"
50
+ Requires-Dist: types-dataclasses; extra == "dev"
51
+ Requires-Dist: types-tabulate; extra == "dev"
52
+ Requires-Dist: mypy==1.17.1; extra == "dev"
53
+ Requires-Dist: types-python-dateutil; extra == "dev"
54
+ Requires-Dist: types-setuptools; extra == "dev"
55
+ Requires-Dist: pytest>=6.2.2; extra == "dev"
56
+ Requires-Dist: packaging; extra == "dev"
57
+ Requires-Dist: types-six; extra == "dev"
58
+ Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc2; extra == "dev"
59
+ Requires-Dist: ruff==0.11.7; extra == "dev"
60
+ Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc2; extra == "dev"
61
+ Requires-Dist: tox-uv; extra == "dev"
62
+ Requires-Dist: twine; extra == "dev"
63
+ Requires-Dist: types-cachetools; extra == "dev"
64
+ Requires-Dist: coverage>=5.1; extra == "dev"
65
+ Provides-Extra: integration-tests
66
+ Requires-Dist: acryl-datahub[testing-utils]==1.3.1.5rc2; extra == "integration-tests"
67
+ Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.5rc2; extra == "integration-tests"
68
+ Requires-Dist: virtualenv; extra == "integration-tests"
69
+ Requires-Dist: apache-airflow[amazon,google,snowflake]>=2.0.2; extra == "integration-tests"
70
+ Requires-Dist: snowflake-connector-python>=2.7.10; extra == "integration-tests"
71
+ Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.5rc2; extra == "integration-tests"
72
+ Requires-Dist: apache-airflow-providers-sqlite; extra == "integration-tests"
73
+ Requires-Dist: apache-airflow-providers-teradata; extra == "integration-tests"
74
+ Dynamic: classifier
75
+ Dynamic: description
76
+ Dynamic: description-content-type
77
+ Dynamic: home-page
78
+ Dynamic: license
79
+ Dynamic: project-url
80
+ Dynamic: provides-extra
81
+ Dynamic: requires-dist
82
+ Dynamic: requires-python
83
+ Dynamic: summary
84
+
85
+ # Datahub Airflow Plugin
86
+
87
+ See [the DataHub Airflow docs](https://docs.datahub.com/docs/lineage/airflow) for details.
88
+
89
+ ## Developing
90
+
91
+ See the [developing docs](../../metadata-ingestion/developing.md).
@@ -0,0 +1,39 @@
1
+ README.md
2
+ pyproject.toml
3
+ setup.cfg
4
+ setup.py
5
+ src/acryl_datahub_airflow_plugin.egg-info/PKG-INFO
6
+ src/acryl_datahub_airflow_plugin.egg-info/SOURCES.txt
7
+ src/acryl_datahub_airflow_plugin.egg-info/dependency_links.txt
8
+ src/acryl_datahub_airflow_plugin.egg-info/entry_points.txt
9
+ src/acryl_datahub_airflow_plugin.egg-info/not-zip-safe
10
+ src/acryl_datahub_airflow_plugin.egg-info/requires.txt
11
+ src/acryl_datahub_airflow_plugin.egg-info/top_level.txt
12
+ src/datahub_airflow_plugin/__init__.py
13
+ src/datahub_airflow_plugin/_airflow_shims.py
14
+ src/datahub_airflow_plugin/_config.py
15
+ src/datahub_airflow_plugin/_datahub_ol_adapter.py
16
+ src/datahub_airflow_plugin/_extractors.py
17
+ src/datahub_airflow_plugin/_version.py
18
+ src/datahub_airflow_plugin/datahub_listener.py
19
+ src/datahub_airflow_plugin/datahub_plugin.py
20
+ src/datahub_airflow_plugin/entities.py
21
+ src/datahub_airflow_plugin/py.typed
22
+ src/datahub_airflow_plugin/client/__init__.py
23
+ src/datahub_airflow_plugin/client/airflow_generator.py
24
+ src/datahub_airflow_plugin/example_dags/__init__.py
25
+ src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py
26
+ src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py
27
+ src/datahub_airflow_plugin/example_dags/lineage_backend_demo.py
28
+ src/datahub_airflow_plugin/example_dags/lineage_backend_taskflow_demo.py
29
+ src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py
30
+ src/datahub_airflow_plugin/example_dags/mysql_sample_dag.py
31
+ src/datahub_airflow_plugin/example_dags/snowflake_sample_dag.py
32
+ src/datahub_airflow_plugin/hooks/__init__.py
33
+ src/datahub_airflow_plugin/hooks/datahub.py
34
+ src/datahub_airflow_plugin/operators/__init__.py
35
+ src/datahub_airflow_plugin/operators/datahub.py
36
+ src/datahub_airflow_plugin/operators/datahub_assertion_operator.py
37
+ src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py
38
+ src/datahub_airflow_plugin/operators/datahub_operation_operator.py
39
+ src/datahub_airflow_plugin/operators/datahub_operation_sensor.py
@@ -1,60 +1,56 @@
1
+ openlineage-airflow<=1.30.1,>=1.2.0
2
+ apache-airflow<3,>=2.7.0
1
3
  pydantic>=2.4.0
2
- acryl-datahub[datahub-rest,sql-parser]==1.3.1.5
3
- acryl-datahub[datahub-rest]==1.3.1.5
4
- apache-airflow<4.0.0,>=2.5.0
5
-
6
- [airflow2]
7
- openlineage-airflow>=1.2.0
8
-
9
- [airflow3]
10
- apache-airflow-providers-openlineage>=1.0.0
4
+ acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc2
5
+ acryl-datahub[datahub-rest]==1.3.1.5rc2
11
6
 
12
7
  [datahub-file]
13
- acryl-datahub[sync-file-emitter]==1.3.1.5
8
+ acryl-datahub[sync-file-emitter]==1.3.1.5rc2
14
9
 
15
10
  [datahub-kafka]
16
- acryl-datahub[datahub-kafka]==1.3.1.5
11
+ acryl-datahub[datahub-kafka]==1.3.1.5rc2
17
12
 
18
13
  [datahub-rest]
19
- acryl-datahub[datahub-rest]==1.3.1.5
14
+ acryl-datahub[datahub-rest]==1.3.1.5rc2
20
15
 
21
16
  [dev]
22
- acryl-datahub[datahub-rest]==1.3.1.5
23
- types-setuptools
24
- deepdiff!=8.0.0
25
- pydantic>=2.4.0
26
- types-six
27
- types-toml
28
- pytest>=6.2.2
17
+ apache-airflow<3,>=2.7.0
29
18
  tenacity
19
+ types-PyYAML
30
20
  pytest-cov>=2.8.1
31
- sqlalchemy-stubs
32
- types-cachetools
33
- ruff==0.11.7
34
- coverage>=5.1
21
+ openlineage-airflow<=1.30.1,>=1.2.0
22
+ pydantic>=2.4.0
23
+ types-click==0.1.12
35
24
  tox
36
- twine
37
- tox-uv
38
- types-PyYAML
39
- mypy==1.17.1
40
- packaging
41
- acryl-datahub[datahub-rest,sql-parser]==1.3.1.5
42
- types-python-dateutil
25
+ types-toml
26
+ deepdiff!=8.0.0
43
27
  build
44
- apache-airflow<4.0.0,>=2.5.0
45
- types-click==0.1.12
28
+ types-requests
29
+ sqlalchemy-stubs
46
30
  types-dataclasses
47
31
  types-tabulate
48
- types-requests
32
+ mypy==1.17.1
33
+ types-python-dateutil
34
+ types-setuptools
35
+ pytest>=6.2.2
36
+ packaging
37
+ types-six
38
+ acryl-datahub[datahub-rest]==1.3.1.5rc2
39
+ ruff==0.11.7
40
+ acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc2
41
+ tox-uv
42
+ twine
43
+ types-cachetools
44
+ coverage>=5.1
49
45
 
50
46
  [ignore]
51
47
 
52
48
  [integration-tests]
49
+ acryl-datahub[testing-utils]==1.3.1.5rc2
50
+ acryl-datahub[sync-file-emitter]==1.3.1.5rc2
53
51
  virtualenv
54
52
  apache-airflow[amazon,google,snowflake]>=2.0.2
55
- acryl-datahub[sync-file-emitter]==1.3.1.5
56
- apache-airflow-providers-sqlite
57
53
  snowflake-connector-python>=2.7.10
54
+ acryl-datahub[datahub-kafka]==1.3.1.5rc2
55
+ apache-airflow-providers-sqlite
58
56
  apache-airflow-providers-teradata
59
- acryl-datahub[testing-utils]==1.3.1.5
60
- acryl-datahub[datahub-kafka]==1.3.1.5
@@ -0,0 +1,42 @@
1
+ from typing import List
2
+
3
+ import airflow.version
4
+ import packaging.version
5
+ from airflow.models.operator import Operator
6
+
7
+ try:
8
+ from airflow.sensors.external_task import ExternalTaskSensor
9
+ except ImportError:
10
+ from airflow.sensors.external_task_sensor import ExternalTaskSensor # type: ignore
11
+
12
+ # Approach suggested by https://stackoverflow.com/a/11887885/5004662.
13
+ AIRFLOW_VERSION = packaging.version.parse(airflow.version.version)
14
+ HAS_AIRFLOW_DAG_LISTENER_API = True # this is in Airflow 2.5+
15
+ HAS_AIRFLOW_DATASET_LISTENER_API = AIRFLOW_VERSION >= packaging.version.parse(
16
+ "2.8.0.dev0"
17
+ )
18
+
19
+
20
+ def get_task_inlets(operator: "Operator") -> List:
21
+ # From Airflow 2.4 _inlets is dropped and inlets used consistently. Earlier it was not the case, so we have to stick there to _inlets
22
+ if hasattr(operator, "_inlets"):
23
+ return operator._inlets # type: ignore[attr-defined, union-attr]
24
+ if hasattr(operator, "get_inlet_defs"):
25
+ return operator.get_inlet_defs() # type: ignore[attr-defined]
26
+ return operator.inlets or []
27
+
28
+
29
+ def get_task_outlets(operator: "Operator") -> List:
30
+ # From Airflow 2.4 _outlets is dropped and inlets used consistently. Earlier it was not the case, so we have to stick there to _outlets
31
+ # We have to use _outlets because outlets is empty in Airflow < 2.4.0
32
+ if hasattr(operator, "_outlets"):
33
+ return operator._outlets # type: ignore[attr-defined, union-attr]
34
+ if hasattr(operator, "get_outlet_defs"):
35
+ return operator.get_outlet_defs()
36
+ return operator.outlets or []
37
+
38
+
39
+ __all__ = [
40
+ "AIRFLOW_VERSION",
41
+ "ExternalTaskSensor",
42
+ ]
@@ -0,0 +1,142 @@
1
+ from enum import Enum
2
+ from typing import TYPE_CHECKING, Dict, List, Optional, Union
3
+
4
+ from airflow.configuration import conf
5
+ from pydantic import root_validator
6
+ from pydantic.fields import Field
7
+
8
+ import datahub.emitter.mce_builder as builder
9
+ from datahub.configuration.common import AllowDenyPattern, ConfigModel
10
+
11
+ if TYPE_CHECKING:
12
+ from datahub_airflow_plugin.hooks.datahub import (
13
+ DatahubCompositeHook,
14
+ DatahubGenericHook,
15
+ )
16
+
17
+
18
+ class DatajobUrl(Enum):
19
+ GRID = "grid"
20
+ TASKINSTANCE = "taskinstance"
21
+
22
+
23
+ class DatahubLineageConfig(ConfigModel):
24
+ enabled: bool
25
+
26
+ # DataHub hook connection ID.
27
+ datahub_conn_id: str
28
+
29
+ _datahub_connection_ids: List[str]
30
+
31
+ # Cluster to associate with the pipelines and tasks. Defaults to "prod".
32
+ cluster: str
33
+
34
+ # Platform instance to associate with the pipelines and tasks.
35
+ platform_instance: Optional[str]
36
+
37
+ # If true, the owners field of the DAG will be captured as a DataHub corpuser.
38
+ capture_ownership_info: bool
39
+
40
+ # If true, the owners field of the DAG will instead be captured as a DataHub corpgroup.
41
+ capture_ownership_as_group: bool
42
+
43
+ # If true, the tags field of the DAG will be captured as DataHub tags.
44
+ capture_tags_info: bool
45
+
46
+ # If true (default), we'll materialize and un-soft-delete any urns
47
+ # referenced by inlets or outlets.
48
+ materialize_iolets: bool
49
+
50
+ capture_executions: bool
51
+
52
+ datajob_url_link: DatajobUrl
53
+
54
+ enable_extractors: bool
55
+
56
+ # If true, ti.render_templates() will be called in the listener.
57
+ # Makes extraction of jinja-templated fields more accurate.
58
+ render_templates: bool
59
+
60
+ # Only if true, lineage will be emitted for the DataJobs.
61
+ enable_datajob_lineage: bool
62
+
63
+ dag_filter_pattern: AllowDenyPattern = Field(
64
+ description="regex patterns for DAGs to ingest",
65
+ )
66
+
67
+ log_level: Optional[str]
68
+ debug_emitter: bool
69
+
70
+ disable_openlineage_plugin: bool
71
+
72
+ def make_emitter_hook(self) -> Union["DatahubGenericHook", "DatahubCompositeHook"]:
73
+ # This is necessary to avoid issues with circular imports.
74
+ from datahub_airflow_plugin.hooks.datahub import (
75
+ DatahubCompositeHook,
76
+ DatahubGenericHook,
77
+ )
78
+
79
+ if len(self._datahub_connection_ids) == 1:
80
+ return DatahubGenericHook(self._datahub_connection_ids[0])
81
+ else:
82
+ return DatahubCompositeHook(self._datahub_connection_ids)
83
+
84
+ @root_validator(skip_on_failure=True)
85
+ def split_conn_ids(cls, values: Dict) -> Dict:
86
+ if not values.get("datahub_conn_id"):
87
+ raise ValueError("datahub_conn_id is required")
88
+ conn_ids = values.get("datahub_conn_id", "").split(",")
89
+ cls._datahub_connection_ids = [conn_id.strip() for conn_id in conn_ids]
90
+ return values
91
+
92
+
93
+ def get_lineage_config() -> DatahubLineageConfig:
94
+ """Load the DataHub plugin config from airflow.cfg."""
95
+
96
+ enabled = conf.get("datahub", "enabled", fallback=True)
97
+ datahub_conn_id = conf.get("datahub", "conn_id", fallback="datahub_rest_default")
98
+ cluster = conf.get("datahub", "cluster", fallback=builder.DEFAULT_FLOW_CLUSTER)
99
+ platform_instance = conf.get("datahub", "platform_instance", fallback=None)
100
+ capture_tags_info = conf.get("datahub", "capture_tags_info", fallback=True)
101
+ capture_ownership_info = conf.get(
102
+ "datahub", "capture_ownership_info", fallback=True
103
+ )
104
+ capture_ownership_as_group = conf.get(
105
+ "datahub", "capture_ownership_as_group", fallback=False
106
+ )
107
+ capture_executions = conf.get("datahub", "capture_executions", fallback=True)
108
+ materialize_iolets = conf.get("datahub", "materialize_iolets", fallback=True)
109
+ enable_extractors = conf.get("datahub", "enable_extractors", fallback=True)
110
+ log_level = conf.get("datahub", "log_level", fallback=None)
111
+ debug_emitter = conf.get("datahub", "debug_emitter", fallback=False)
112
+ disable_openlineage_plugin = conf.get(
113
+ "datahub", "disable_openlineage_plugin", fallback=True
114
+ )
115
+ render_templates = conf.get("datahub", "render_templates", fallback=True)
116
+ datajob_url_link = conf.get(
117
+ "datahub", "datajob_url_link", fallback=DatajobUrl.TASKINSTANCE.value
118
+ )
119
+ dag_filter_pattern = AllowDenyPattern.model_validate_json(
120
+ conf.get("datahub", "dag_filter_str", fallback='{"allow": [".*"]}')
121
+ )
122
+ enable_lineage = conf.get("datahub", "enable_datajob_lineage", fallback=True)
123
+
124
+ return DatahubLineageConfig(
125
+ enabled=enabled,
126
+ datahub_conn_id=datahub_conn_id,
127
+ cluster=cluster,
128
+ platform_instance=platform_instance,
129
+ capture_ownership_info=capture_ownership_info,
130
+ capture_ownership_as_group=capture_ownership_as_group,
131
+ capture_tags_info=capture_tags_info,
132
+ capture_executions=capture_executions,
133
+ materialize_iolets=materialize_iolets,
134
+ enable_extractors=enable_extractors,
135
+ log_level=log_level,
136
+ debug_emitter=debug_emitter,
137
+ disable_openlineage_plugin=disable_openlineage_plugin,
138
+ datajob_url_link=datajob_url_link,
139
+ render_templates=render_templates,
140
+ dag_filter_pattern=dag_filter_pattern,
141
+ enable_datajob_lineage=enable_lineage,
142
+ )
@@ -0,0 +1,23 @@
1
+ import logging
2
+
3
+ from openlineage.client.run import Dataset as OpenLineageDataset
4
+
5
+ import datahub.emitter.mce_builder as builder
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ OL_SCHEME_TWEAKS = {
11
+ "sqlserver": "mssql",
12
+ "awsathena": "athena",
13
+ }
14
+
15
+
16
+ def translate_ol_to_datahub_urn(ol_uri: OpenLineageDataset) -> str:
17
+ namespace = ol_uri.namespace
18
+ name = ol_uri.name
19
+
20
+ scheme, *rest = namespace.split("://", maxsplit=1)
21
+
22
+ platform = OL_SCHEME_TWEAKS.get(scheme, scheme)
23
+ return builder.make_dataset_urn(platform=platform, name=name)