acryl-datahub-airflow-plugin 1.3.1.5rc6__tar.gz → 1.3.1.5rc9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. acryl_datahub_airflow_plugin-1.3.1.5rc9/PKG-INFO +303 -0
  2. acryl_datahub_airflow_plugin-1.3.1.5rc9/README.md +217 -0
  3. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/setup.cfg +18 -0
  4. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/setup.py +14 -6
  5. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/acryl_datahub_airflow_plugin.egg-info/PKG-INFO +303 -0
  6. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/acryl_datahub_airflow_plugin.egg-info/SOURCES.txt +71 -0
  7. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/acryl_datahub_airflow_plugin.egg-info/requires.txt +60 -0
  8. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/_airflow_compat.py +32 -0
  9. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/_airflow_shims.py +75 -0
  10. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/_airflow_version_specific.py +184 -0
  11. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/_config.py +220 -0
  12. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/_constants.py +16 -0
  13. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/_datahub_ol_adapter.py +35 -0
  14. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/_version.py +1 -1
  15. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow2/__init__.py +6 -0
  16. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow2/_airflow2_sql_parser_patch.py +402 -0
  17. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow2/_airflow_compat.py +95 -0
  18. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow2/_extractors.py +477 -0
  19. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow2/_legacy_shims.py +20 -0
  20. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow2/_openlineage_compat.py +123 -0
  21. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow2/_provider_shims.py +29 -0
  22. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow2/_shims.py +88 -0
  23. {acryl_datahub_airflow_plugin-1.3.1.5rc6/src/datahub_airflow_plugin → acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow2}/datahub_listener.py +464 -188
  24. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow3/__init__.py +6 -0
  25. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow3/_airflow3_sql_parser_patch.py +408 -0
  26. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow3/_airflow_compat.py +108 -0
  27. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow3/_athena_openlineage_patch.py +153 -0
  28. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow3/_bigquery_openlineage_patch.py +273 -0
  29. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow3/_shims.py +82 -0
  30. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow3/_sqlite_openlineage_patch.py +88 -0
  31. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow3/_teradata_openlineage_patch.py +308 -0
  32. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/airflow3/datahub_listener.py +1452 -0
  33. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/client/airflow_generator.py +147 -43
  34. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/datahub_listener.py +25 -0
  35. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/example_dags/__init__.py +32 -0
  36. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/example_dags/airflow2/__init__.py +8 -0
  37. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/example_dags/airflow2/generic_recipe_sample_dag.py +54 -0
  38. {acryl_datahub_airflow_plugin-1.3.1.5rc6/src/datahub_airflow_plugin/example_dags → acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/example_dags/airflow2}/graph_usage_sample_dag.py +12 -4
  39. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/example_dags/airflow2/lineage_backend_demo.py +69 -0
  40. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/example_dags/airflow2/lineage_backend_taskflow_demo.py +69 -0
  41. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/example_dags/airflow2/lineage_emission_dag.py +81 -0
  42. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/example_dags/airflow2/mysql_sample_dag.py +68 -0
  43. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/example_dags/airflow2/snowflake_sample_dag.py +99 -0
  44. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/example_dags/airflow3/__init__.py +8 -0
  45. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/example_dags/airflow3/lineage_backend_demo.py +51 -0
  46. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/example_dags/airflow3/lineage_backend_taskflow_demo.py +51 -0
  47. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/example_dags/airflow3/snowflake_sample_dag.py +89 -0
  48. acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py +43 -0
  49. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/hooks/datahub.py +11 -2
  50. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/operators/datahub.py +20 -3
  51. acryl_datahub_airflow_plugin-1.3.1.5rc6/PKG-INFO +0 -91
  52. acryl_datahub_airflow_plugin-1.3.1.5rc6/README.md +0 -7
  53. acryl_datahub_airflow_plugin-1.3.1.5rc6/src/acryl_datahub_airflow_plugin.egg-info/PKG-INFO +0 -91
  54. acryl_datahub_airflow_plugin-1.3.1.5rc6/src/acryl_datahub_airflow_plugin.egg-info/SOURCES.txt +0 -39
  55. acryl_datahub_airflow_plugin-1.3.1.5rc6/src/acryl_datahub_airflow_plugin.egg-info/requires.txt +0 -56
  56. acryl_datahub_airflow_plugin-1.3.1.5rc6/src/datahub_airflow_plugin/_airflow_shims.py +0 -42
  57. acryl_datahub_airflow_plugin-1.3.1.5rc6/src/datahub_airflow_plugin/_config.py +0 -142
  58. acryl_datahub_airflow_plugin-1.3.1.5rc6/src/datahub_airflow_plugin/_datahub_ol_adapter.py +0 -23
  59. acryl_datahub_airflow_plugin-1.3.1.5rc6/src/datahub_airflow_plugin/_extractors.py +0 -365
  60. acryl_datahub_airflow_plugin-1.3.1.5rc6/src/datahub_airflow_plugin/operators/__init__.py +0 -0
  61. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/pyproject.toml +0 -0
  62. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/acryl_datahub_airflow_plugin.egg-info/dependency_links.txt +0 -0
  63. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/acryl_datahub_airflow_plugin.egg-info/entry_points.txt +0 -0
  64. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/acryl_datahub_airflow_plugin.egg-info/not-zip-safe +0 -0
  65. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/acryl_datahub_airflow_plugin.egg-info/top_level.txt +0 -0
  66. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/__init__.py +0 -0
  67. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/client/__init__.py +0 -0
  68. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/datahub_plugin.py +0 -0
  69. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/entities.py +0 -0
  70. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py +0 -0
  71. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/example_dags/lineage_backend_demo.py +0 -0
  72. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/example_dags/lineage_backend_taskflow_demo.py +0 -0
  73. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py +0 -0
  74. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/example_dags/mysql_sample_dag.py +0 -0
  75. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/example_dags/snowflake_sample_dag.py +0 -0
  76. {acryl_datahub_airflow_plugin-1.3.1.5rc6/src/datahub_airflow_plugin/example_dags → acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/hooks}/__init__.py +0 -0
  77. {acryl_datahub_airflow_plugin-1.3.1.5rc6/src/datahub_airflow_plugin/hooks → acryl_datahub_airflow_plugin-1.3.1.5rc9/src/datahub_airflow_plugin/operators}/__init__.py +0 -0
  78. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/operators/datahub_assertion_operator.py +0 -0
  79. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py +0 -0
  80. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/operators/datahub_operation_operator.py +0 -0
  81. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/operators/datahub_operation_sensor.py +0 -0
  82. {acryl_datahub_airflow_plugin-1.3.1.5rc6 → acryl_datahub_airflow_plugin-1.3.1.5rc9}/src/datahub_airflow_plugin/py.typed +0 -0
@@ -0,0 +1,303 @@
1
+ Metadata-Version: 2.4
2
+ Name: acryl-datahub-airflow-plugin
3
+ Version: 1.3.1.5rc9
4
+ Summary: Datahub Airflow plugin to capture executions and send to Datahub
5
+ Home-page: https://docs.datahub.com/
6
+ License: Apache-2.0
7
+ Project-URL: Documentation, https://docs.datahub.com/docs/
8
+ Project-URL: Source, https://github.com/datahub-project/datahub
9
+ Project-URL: Changelog, https://github.com/datahub-project/datahub/releases
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: Programming Language :: Python
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3 :: Only
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Information Technology
16
+ Classifier: Intended Audience :: System Administrators
17
+ Classifier: Operating System :: Unix
18
+ Classifier: Operating System :: POSIX :: Linux
19
+ Classifier: Environment :: Console
20
+ Classifier: Environment :: MacOS X
21
+ Classifier: Topic :: Software Development
22
+ Requires-Python: >=3.9
23
+ Description-Content-Type: text/markdown
24
+ Requires-Dist: pydantic>=2.4.0
25
+ Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc9
26
+ Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc9
27
+ Requires-Dist: apache-airflow<4.0.0,>=2.5.0
28
+ Provides-Extra: ignore
29
+ Provides-Extra: datahub-rest
30
+ Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc9; extra == "datahub-rest"
31
+ Provides-Extra: datahub-kafka
32
+ Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.5rc9; extra == "datahub-kafka"
33
+ Provides-Extra: datahub-file
34
+ Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.5rc9; extra == "datahub-file"
35
+ Provides-Extra: airflow2
36
+ Requires-Dist: openlineage-airflow>=1.2.0; extra == "airflow2"
37
+ Provides-Extra: airflow3
38
+ Requires-Dist: apache-airflow-providers-openlineage>=1.0.0; extra == "airflow3"
39
+ Provides-Extra: dev
40
+ Requires-Dist: pydantic>=2.4.0; extra == "dev"
41
+ Requires-Dist: mypy==1.17.1; extra == "dev"
42
+ Requires-Dist: coverage>=5.1; extra == "dev"
43
+ Requires-Dist: types-toml; extra == "dev"
44
+ Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc9; extra == "dev"
45
+ Requires-Dist: pytest>=6.2.2; extra == "dev"
46
+ Requires-Dist: tox-uv; extra == "dev"
47
+ Requires-Dist: apache-airflow<4.0.0,>=2.5.0; extra == "dev"
48
+ Requires-Dist: tox; extra == "dev"
49
+ Requires-Dist: sqlalchemy-stubs; extra == "dev"
50
+ Requires-Dist: types-tabulate; extra == "dev"
51
+ Requires-Dist: types-cachetools; extra == "dev"
52
+ Requires-Dist: build; extra == "dev"
53
+ Requires-Dist: types-click==0.1.12; extra == "dev"
54
+ Requires-Dist: packaging; extra == "dev"
55
+ Requires-Dist: pytest-cov>=2.8.1; extra == "dev"
56
+ Requires-Dist: deepdiff!=8.0.0; extra == "dev"
57
+ Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc9; extra == "dev"
58
+ Requires-Dist: types-python-dateutil; extra == "dev"
59
+ Requires-Dist: types-PyYAML; extra == "dev"
60
+ Requires-Dist: types-dataclasses; extra == "dev"
61
+ Requires-Dist: types-six; extra == "dev"
62
+ Requires-Dist: twine; extra == "dev"
63
+ Requires-Dist: types-requests; extra == "dev"
64
+ Requires-Dist: types-setuptools; extra == "dev"
65
+ Requires-Dist: tenacity; extra == "dev"
66
+ Requires-Dist: ruff==0.11.7; extra == "dev"
67
+ Provides-Extra: integration-tests
68
+ Requires-Dist: apache-airflow-providers-teradata; extra == "integration-tests"
69
+ Requires-Dist: apache-airflow[amazon,google,snowflake]>=2.0.2; extra == "integration-tests"
70
+ Requires-Dist: virtualenv; extra == "integration-tests"
71
+ Requires-Dist: snowflake-connector-python>=2.7.10; extra == "integration-tests"
72
+ Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.5rc9; extra == "integration-tests"
73
+ Requires-Dist: apache-airflow-providers-sqlite; extra == "integration-tests"
74
+ Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.5rc9; extra == "integration-tests"
75
+ Requires-Dist: acryl-datahub[testing-utils]==1.3.1.5rc9; extra == "integration-tests"
76
+ Dynamic: classifier
77
+ Dynamic: description
78
+ Dynamic: description-content-type
79
+ Dynamic: home-page
80
+ Dynamic: license
81
+ Dynamic: project-url
82
+ Dynamic: provides-extra
83
+ Dynamic: requires-dist
84
+ Dynamic: requires-python
85
+ Dynamic: summary
86
+
87
+ # Datahub Airflow Plugin
88
+
89
+ See [the DataHub Airflow docs](https://docs.datahub.com/docs/lineage/airflow) for details.
90
+
91
+ ## Version Compatibility
92
+
93
+ The plugin supports Apache Airflow versions 2.7+ and 3.1+.
94
+
95
+ | Airflow Version | Extra to Install | Status | Notes |
96
+ | --------------- | ---------------- | ---------------------- | -------------------------------- |
97
+ | 2.7-2.10 | `[airflow2]` | ✅ Fully Supported | |
98
+ | 3.0.x | `[airflow3]` | ⚠️ Requires manual fix | Needs `pydantic>=2.11.8` upgrade |
99
+ | 3.1+ | `[airflow3]` | ✅ Fully Supported | |
100
+
101
+ **Note on Airflow 3.0.x**: Airflow 3.0.6 pins pydantic==2.11.7, which contains a bug that prevents the DataHub plugin from importing correctly. This issue is resolved in Airflow 3.1.0+ which uses pydantic>=2.11.8. If you must use Airflow 3.0.6, you can manually upgrade pydantic to >=2.11.8, though this may conflict with Airflow's dependency constraints. We recommend upgrading to Airflow 3.1.0 or later.
102
+
103
+ Related issue: https://github.com/pydantic/pydantic/issues/10963
104
+
105
+ ## Installation
106
+
107
+ The installation command varies depending on your Airflow version due to different OpenLineage dependencies.
108
+
109
+ ### For Airflow 2.x (2.7+)
110
+
111
+ ```bash
112
+ pip install 'acryl-datahub-airflow-plugin[airflow2]'
113
+ ```
114
+
115
+ This installs the plugin with Legacy OpenLineage (`openlineage-airflow>=1.2.0`), which is required for Airflow 2.x lineage extraction.
116
+
117
+ #### Alternative: Using Native OpenLineage Provider on Airflow 2.7+
118
+
119
+ If your Airflow 2.7+ environment rejects the Legacy OpenLineage package (e.g., due to dependency conflicts), you can use the native OpenLineage provider instead:
120
+
121
+ ```bash
122
+ # Install the native Airflow provider first
123
+ pip install 'apache-airflow-providers-openlineage>=1.0.0'
124
+
125
+ # Then install the DataHub plugin without OpenLineage extras
126
+ pip install acryl-datahub-airflow-plugin
127
+ ```
128
+
129
+ The plugin will automatically detect and use `apache-airflow-providers-openlineage` when available, providing the same functionality.
130
+
131
+ ### For Airflow 3.x (3.1+)
132
+
133
+ ```bash
134
+ pip install 'acryl-datahub-airflow-plugin[airflow3]'
135
+ ```
136
+
137
+ This installs the plugin with `apache-airflow-providers-openlineage>=1.0.0`, which is the native OpenLineage provider for Airflow 3.x.
138
+
139
+ **Note**: If using Airflow 3.0.x (3.0.6 specifically), you'll need to manually upgrade pydantic:
140
+
141
+ ```bash
142
+ pip install 'acryl-datahub-airflow-plugin[airflow3]' 'pydantic>=2.11.8'
143
+ ```
144
+
145
+ We recommend using Airflow 3.1.0+ which resolves this issue. See the Version Compatibility section above for details.
146
+
147
+ ### What Gets Installed
148
+
149
+ #### Base Installation (No Extras)
150
+
151
+ When you install without any extras:
152
+
153
+ ```bash
154
+ pip install acryl-datahub-airflow-plugin
155
+ ```
156
+
157
+ You get:
158
+
159
+ - `acryl-datahub[sql-parser,datahub-rest]` - DataHub SDK with SQL parsing and REST emitter
160
+ - `pydantic>=2.4.0` - Required for data validation
161
+ - `apache-airflow>=2.5.0,<4.0.0` - Airflow itself
162
+ - **No OpenLineage package** - You'll need to provide your own or use one of the extras below
163
+
164
+ #### With `[airflow2]` Extra
165
+
166
+ ```bash
167
+ pip install 'acryl-datahub-airflow-plugin[airflow2]'
168
+ ```
169
+
170
+ Adds:
171
+
172
+ - `openlineage-airflow>=1.2.0` - Standalone OpenLineage package for Airflow 2.x
173
+
174
+ #### With `[airflow3]` Extra
175
+
176
+ ```bash
177
+ pip install 'acryl-datahub-airflow-plugin[airflow3]'
178
+ ```
179
+
180
+ Adds:
181
+
182
+ - `apache-airflow-providers-openlineage>=1.0.0` - Native OpenLineage provider for Airflow 3.x
183
+
184
+ ### Additional Extras
185
+
186
+ You can combine multiple extras if needed:
187
+
188
+ ```bash
189
+ # For Airflow 3.x with Kafka emitter support
190
+ pip install 'acryl-datahub-airflow-plugin[airflow3,datahub-kafka]'
191
+
192
+ # For Airflow 2.x with file emitter support
193
+ pip install 'acryl-datahub-airflow-plugin[airflow2,datahub-file]'
194
+ ```
195
+
196
+ Available extras:
197
+
198
+ - `airflow2`: OpenLineage support for Airflow 2.x (adds `openlineage-airflow>=1.2.0`)
199
+ - `airflow3`: OpenLineage support for Airflow 3.x (adds `apache-airflow-providers-openlineage>=1.0.0`)
200
+ - `datahub-kafka`: Kafka-based metadata emission (adds `acryl-datahub[datahub-kafka]`)
201
+ - `datahub-file`: File-based metadata emission (adds `acryl-datahub[sync-file-emitter]`) - useful for testing
202
+
203
+ ### Why Different Extras?
204
+
205
+ Airflow 2.x and 3.x have different OpenLineage integrations:
206
+
207
+ - **Airflow 2.x (2.5-2.6)** typically uses Legacy OpenLineage (`openlineage-airflow` package)
208
+ - **Airflow 2.x (2.7+)** can use either Legacy OpenLineage or native OpenLineage Provider (`apache-airflow-providers-openlineage`)
209
+ - **Airflow 3.x** uses native OpenLineage Provider (`apache-airflow-providers-openlineage`)
210
+
211
+ The plugin automatically detects which OpenLineage variant is installed and uses it accordingly. This means:
212
+
213
+ 1. **With extras** (`[airflow2]` or `[airflow3]`): The appropriate OpenLineage dependency is installed automatically
214
+ 2. **Without extras**: You provide your own OpenLineage installation, and the plugin auto-detects it
215
+
216
+ This flexibility allows you to adapt to different Airflow environments and dependency constraints.
217
+
218
+ ## Configuration
219
+
220
+ The plugin can be configured via `airflow.cfg` under the `[datahub]` section. Below are the key configuration options:
221
+
222
+ ### Extractor Patching (OpenLineage Enhancements)
223
+
224
+ When `enable_extractors=True` (default), the DataHub plugin enhances OpenLineage extractors to provide better lineage. You can fine-tune these enhancements:
225
+
226
+ ```ini
227
+ [datahub]
228
+ # Enable/disable all OpenLineage extractors
229
+ enable_extractors = True # Default: True
230
+
231
+ # Fine-grained control over DataHub's OpenLineage enhancements
232
+
233
+ # --- Patches (work with both Legacy OpenLineage and OpenLineage Provider) ---
234
+
235
+ # Patch SqlExtractor to use DataHub's advanced SQL parser (enables column-level lineage)
236
+ patch_sql_parser = True # Default: True
237
+
238
+ # Patch SnowflakeExtractor to fix default schema detection
239
+ patch_snowflake_schema = True # Default: True
240
+
241
+ # --- Custom Extractors (only apply to Legacy OpenLineage) ---
242
+
243
+ # Use DataHub's custom AthenaOperatorExtractor (better Athena lineage)
244
+ extract_athena_operator = True # Default: True
245
+
246
+ # Use DataHub's custom BigQueryInsertJobOperatorExtractor (handles BQ job configuration)
247
+ extract_bigquery_insert_job_operator = True # Default: True
248
+ ```
249
+
250
+ **How it works:**
251
+
252
+ **Patches** (apply to both Legacy OpenLineage and OpenLineage Provider):
253
+
254
+ - Apply **monkey-patching** to OpenLineage extractor/operator classes at runtime
255
+ - Work on **both Airflow 2.x and Airflow 3.x**
256
+ - When `patch_sql_parser=True`:
257
+ - **Airflow 2**: Patches `SqlExtractor.extract()` method
258
+ - **Airflow 3**: Patches `SQLParser.generate_openlineage_metadata_from_sql()` method
259
+ - Provides: More accurate lineage extraction, column-level lineage (CLL), better SQL dialect support
260
+ - When `patch_snowflake_schema=True`:
261
+ - **Airflow 2**: Patches `SnowflakeExtractor.default_schema` property
262
+ - **Airflow 3**: Currently not needed (handled by Airflow's native support)
263
+ - Fixes Snowflake schema detection issues
264
+
265
+ **Custom Extractors/Operator Patches**:
266
+
267
+ - Register DataHub's custom implementations for specific operators
268
+ - Work on **both Airflow 2.x and Airflow 3.x**
269
+ - `extract_athena_operator`:
270
+ - **Airflow 2 (Legacy OpenLineage only)**: Registers `AthenaOperatorExtractor`
271
+ - **Airflow 3**: Patches `AthenaOperator.get_openlineage_facets_on_complete()`
272
+ - Uses DataHub's SQL parser for better Athena lineage
273
+ - `extract_bigquery_insert_job_operator`:
274
+ - **Airflow 2 (Legacy OpenLineage only)**: Registers `BigQueryInsertJobOperatorExtractor`
275
+ - **Airflow 3**: Patches `BigQueryInsertJobOperator.get_openlineage_facets_on_complete()`
276
+ - Handles BigQuery job configuration and destination tables
277
+
278
+ **Example use cases:**
279
+
280
+ Disable DataHub's SQL parser to use OpenLineage's native parsing:
281
+
282
+ ```ini
283
+ [datahub]
284
+ enable_extractors = True
285
+ patch_sql_parser = False # Use OpenLineage's native SQL parser
286
+ patch_snowflake_schema = True # Still fix Snowflake schema detection
287
+ ```
288
+
289
+ Disable custom Athena extractor (only relevant for Legacy OpenLineage):
290
+
291
+ ```ini
292
+ [datahub]
293
+ enable_extractors = True
294
+ extract_athena_operator = False # Use OpenLineage's default Athena extractor
295
+ ```
296
+
297
+ ### Other Configuration Options
298
+
299
+ For a complete list of configuration options, see the [DataHub Airflow documentation](https://docs.datahub.com/docs/lineage/airflow#configuration).
300
+
301
+ ## Developing
302
+
303
+ See the [developing docs](../../metadata-ingestion/developing.md).
@@ -0,0 +1,217 @@
1
+ # Datahub Airflow Plugin
2
+
3
+ See [the DataHub Airflow docs](https://docs.datahub.com/docs/lineage/airflow) for details.
4
+
5
+ ## Version Compatibility
6
+
7
+ The plugin supports Apache Airflow versions 2.7+ and 3.1+.
8
+
9
+ | Airflow Version | Extra to Install | Status | Notes |
10
+ | --------------- | ---------------- | ---------------------- | -------------------------------- |
11
+ | 2.7-2.10 | `[airflow2]` | ✅ Fully Supported | |
12
+ | 3.0.x | `[airflow3]` | ⚠️ Requires manual fix | Needs `pydantic>=2.11.8` upgrade |
13
+ | 3.1+ | `[airflow3]` | ✅ Fully Supported | |
14
+
15
+ **Note on Airflow 3.0.x**: Airflow 3.0.6 pins pydantic==2.11.7, which contains a bug that prevents the DataHub plugin from importing correctly. This issue is resolved in Airflow 3.1.0+ which uses pydantic>=2.11.8. If you must use Airflow 3.0.6, you can manually upgrade pydantic to >=2.11.8, though this may conflict with Airflow's dependency constraints. We recommend upgrading to Airflow 3.1.0 or later.
16
+
17
+ Related issue: https://github.com/pydantic/pydantic/issues/10963
18
+
19
+ ## Installation
20
+
21
+ The installation command varies depending on your Airflow version due to different OpenLineage dependencies.
22
+
23
+ ### For Airflow 2.x (2.7+)
24
+
25
+ ```bash
26
+ pip install 'acryl-datahub-airflow-plugin[airflow2]'
27
+ ```
28
+
29
+ This installs the plugin with Legacy OpenLineage (`openlineage-airflow>=1.2.0`), which is required for Airflow 2.x lineage extraction.
30
+
31
+ #### Alternative: Using Native OpenLineage Provider on Airflow 2.7+
32
+
33
+ If your Airflow 2.7+ environment rejects the Legacy OpenLineage package (e.g., due to dependency conflicts), you can use the native OpenLineage provider instead:
34
+
35
+ ```bash
36
+ # Install the native Airflow provider first
37
+ pip install 'apache-airflow-providers-openlineage>=1.0.0'
38
+
39
+ # Then install the DataHub plugin without OpenLineage extras
40
+ pip install acryl-datahub-airflow-plugin
41
+ ```
42
+
43
+ The plugin will automatically detect and use `apache-airflow-providers-openlineage` when available, providing the same functionality.
44
+
45
+ ### For Airflow 3.x (3.1+)
46
+
47
+ ```bash
48
+ pip install 'acryl-datahub-airflow-plugin[airflow3]'
49
+ ```
50
+
51
+ This installs the plugin with `apache-airflow-providers-openlineage>=1.0.0`, which is the native OpenLineage provider for Airflow 3.x.
52
+
53
+ **Note**: If using Airflow 3.0.x (3.0.6 specifically), you'll need to manually upgrade pydantic:
54
+
55
+ ```bash
56
+ pip install 'acryl-datahub-airflow-plugin[airflow3]' 'pydantic>=2.11.8'
57
+ ```
58
+
59
+ We recommend using Airflow 3.1.0+ which resolves this issue. See the Version Compatibility section above for details.
60
+
61
+ ### What Gets Installed
62
+
63
+ #### Base Installation (No Extras)
64
+
65
+ When you install without any extras:
66
+
67
+ ```bash
68
+ pip install acryl-datahub-airflow-plugin
69
+ ```
70
+
71
+ You get:
72
+
73
+ - `acryl-datahub[sql-parser,datahub-rest]` - DataHub SDK with SQL parsing and REST emitter
74
+ - `pydantic>=2.4.0` - Required for data validation
75
+ - `apache-airflow>=2.5.0,<4.0.0` - Airflow itself
76
+ - **No OpenLineage package** - You'll need to provide your own or use one of the extras below
77
+
78
+ #### With `[airflow2]` Extra
79
+
80
+ ```bash
81
+ pip install 'acryl-datahub-airflow-plugin[airflow2]'
82
+ ```
83
+
84
+ Adds:
85
+
86
+ - `openlineage-airflow>=1.2.0` - Standalone OpenLineage package for Airflow 2.x
87
+
88
+ #### With `[airflow3]` Extra
89
+
90
+ ```bash
91
+ pip install 'acryl-datahub-airflow-plugin[airflow3]'
92
+ ```
93
+
94
+ Adds:
95
+
96
+ - `apache-airflow-providers-openlineage>=1.0.0` - Native OpenLineage provider for Airflow 3.x
97
+
98
+ ### Additional Extras
99
+
100
+ You can combine multiple extras if needed:
101
+
102
+ ```bash
103
+ # For Airflow 3.x with Kafka emitter support
104
+ pip install 'acryl-datahub-airflow-plugin[airflow3,datahub-kafka]'
105
+
106
+ # For Airflow 2.x with file emitter support
107
+ pip install 'acryl-datahub-airflow-plugin[airflow2,datahub-file]'
108
+ ```
109
+
110
+ Available extras:
111
+
112
+ - `airflow2`: OpenLineage support for Airflow 2.x (adds `openlineage-airflow>=1.2.0`)
113
+ - `airflow3`: OpenLineage support for Airflow 3.x (adds `apache-airflow-providers-openlineage>=1.0.0`)
114
+ - `datahub-kafka`: Kafka-based metadata emission (adds `acryl-datahub[datahub-kafka]`)
115
+ - `datahub-file`: File-based metadata emission (adds `acryl-datahub[sync-file-emitter]`) - useful for testing
116
+
117
+ ### Why Different Extras?
118
+
119
+ Airflow 2.x and 3.x have different OpenLineage integrations:
120
+
121
+ - **Airflow 2.x (2.5-2.6)** typically uses Legacy OpenLineage (`openlineage-airflow` package)
122
+ - **Airflow 2.x (2.7+)** can use either Legacy OpenLineage or native OpenLineage Provider (`apache-airflow-providers-openlineage`)
123
+ - **Airflow 3.x** uses native OpenLineage Provider (`apache-airflow-providers-openlineage`)
124
+
125
+ The plugin automatically detects which OpenLineage variant is installed and uses it accordingly. This means:
126
+
127
+ 1. **With extras** (`[airflow2]` or `[airflow3]`): The appropriate OpenLineage dependency is installed automatically
128
+ 2. **Without extras**: You provide your own OpenLineage installation, and the plugin auto-detects it
129
+
130
+ This flexibility allows you to adapt to different Airflow environments and dependency constraints.
131
+
132
+ ## Configuration
133
+
134
+ The plugin can be configured via `airflow.cfg` under the `[datahub]` section. Below are the key configuration options:
135
+
136
+ ### Extractor Patching (OpenLineage Enhancements)
137
+
138
+ When `enable_extractors=True` (default), the DataHub plugin enhances OpenLineage extractors to provide better lineage. You can fine-tune these enhancements:
139
+
140
+ ```ini
141
+ [datahub]
142
+ # Enable/disable all OpenLineage extractors
143
+ enable_extractors = True # Default: True
144
+
145
+ # Fine-grained control over DataHub's OpenLineage enhancements
146
+
147
+ # --- Patches (work with both Legacy OpenLineage and OpenLineage Provider) ---
148
+
149
+ # Patch SqlExtractor to use DataHub's advanced SQL parser (enables column-level lineage)
150
+ patch_sql_parser = True # Default: True
151
+
152
+ # Patch SnowflakeExtractor to fix default schema detection
153
+ patch_snowflake_schema = True # Default: True
154
+
155
+ # --- Custom Extractors (only apply to Legacy OpenLineage) ---
156
+
157
+ # Use DataHub's custom AthenaOperatorExtractor (better Athena lineage)
158
+ extract_athena_operator = True # Default: True
159
+
160
+ # Use DataHub's custom BigQueryInsertJobOperatorExtractor (handles BQ job configuration)
161
+ extract_bigquery_insert_job_operator = True # Default: True
162
+ ```
163
+
164
+ **How it works:**
165
+
166
+ **Patches** (apply to both Legacy OpenLineage and OpenLineage Provider):
167
+
168
+ - Apply **monkey-patching** to OpenLineage extractor/operator classes at runtime
169
+ - Work on **both Airflow 2.x and Airflow 3.x**
170
+ - When `patch_sql_parser=True`:
171
+ - **Airflow 2**: Patches `SqlExtractor.extract()` method
172
+ - **Airflow 3**: Patches `SQLParser.generate_openlineage_metadata_from_sql()` method
173
+ - Provides: More accurate lineage extraction, column-level lineage (CLL), better SQL dialect support
174
+ - When `patch_snowflake_schema=True`:
175
+ - **Airflow 2**: Patches `SnowflakeExtractor.default_schema` property
176
+ - **Airflow 3**: Currently not needed (handled by Airflow's native support)
177
+ - Fixes Snowflake schema detection issues
178
+
179
+ **Custom Extractors/Operator Patches**:
180
+
181
+ - Register DataHub's custom implementations for specific operators
182
+ - Work on **both Airflow 2.x and Airflow 3.x**
183
+ - `extract_athena_operator`:
184
+ - **Airflow 2 (Legacy OpenLineage only)**: Registers `AthenaOperatorExtractor`
185
+ - **Airflow 3**: Patches `AthenaOperator.get_openlineage_facets_on_complete()`
186
+ - Uses DataHub's SQL parser for better Athena lineage
187
+ - `extract_bigquery_insert_job_operator`:
188
+ - **Airflow 2 (Legacy OpenLineage only)**: Registers `BigQueryInsertJobOperatorExtractor`
189
+ - **Airflow 3**: Patches `BigQueryInsertJobOperator.get_openlineage_facets_on_complete()`
190
+ - Handles BigQuery job configuration and destination tables
191
+
192
+ **Example use cases:**
193
+
194
+ Disable DataHub's SQL parser to use OpenLineage's native parsing:
195
+
196
+ ```ini
197
+ [datahub]
198
+ enable_extractors = True
199
+ patch_sql_parser = False # Use OpenLineage's native SQL parser
200
+ patch_snowflake_schema = True # Still fix Snowflake schema detection
201
+ ```
202
+
203
+ Disable custom Athena extractor (only relevant for Legacy OpenLineage):
204
+
205
+ ```ini
206
+ [datahub]
207
+ enable_extractors = True
208
+ extract_athena_operator = False # Use OpenLineage's default Athena extractor
209
+ ```
210
+
211
+ ### Other Configuration Options
212
+
213
+ For a complete list of configuration options, see the [DataHub Airflow documentation](https://docs.datahub.com/docs/lineage/airflow#configuration).
214
+
215
+ ## Developing
216
+
217
+ See the [developing docs](../../metadata-ingestion/developing.md).
@@ -9,6 +9,24 @@ check_untyped_defs = yes
9
9
  disallow_incomplete_defs = yes
10
10
  disallow_untyped_decorators = yes
11
11
  warn_unused_configs = yes
12
+
13
+ [mypy-datahub_airflow_plugin._extractors]
14
+ ignore_errors = True
15
+
16
+ [mypy-datahub_airflow_plugin.airflow2._extractors]
17
+ ignore_errors = True
18
+
19
+ [mypy-datahub_airflow_plugin.airflow2.datahub_listener]
20
+ ignore_errors = True
21
+
22
+ [mypy-tests.unit.test_teradata_extractor]
23
+ ignore_errors = True
24
+
25
+ [mypy-datahub_airflow_plugin.example_dags.*]
26
+ ignore_errors = True
27
+
28
+ [mypy-tests.integration.dags.*]
29
+ ignore_errors = True
12
30
  disallow_untyped_defs = no
13
31
 
14
32
  [mypy-datahub.*]
@@ -25,12 +25,12 @@ _self_pin = (
25
25
  base_requirements = {
26
26
  f"acryl-datahub[sql-parser,datahub-rest]{_self_pin}",
27
27
  "pydantic>=2.4.0",
28
- # We require Airflow 2.7.x at minimum, to be compatible with the native Airflow Openlineage provider.
29
- "apache-airflow>=2.7.0,<3",
30
- # We remain restrictive on the versions allowed here to prevent
31
- # us from being broken by backwards-incompatible changes in the
32
- # underlying package.
33
- "openlineage-airflow>=1.2.0,<=1.30.1",
28
+ # We require Airflow 2.5.x at minimum, since we need the new DAG listener API.
29
+ # We support both Airflow 2.x and 3.x with full backward compatibility.
30
+ "apache-airflow>=2.5.0,<4.0.0",
31
+ # Note: OpenLineage dependencies are version-specific and provided via extras:
32
+ # - airflow2: for Airflow 2.x (uses standalone openlineage-airflow package)
33
+ # - airflow3: for Airflow 3.x (uses native apache-airflow-providers-openlineage)
34
34
  }
35
35
 
36
36
  plugins: Dict[str, Set[str]] = {
@@ -43,6 +43,14 @@ plugins: Dict[str, Set[str]] = {
43
43
  "datahub-file": {
44
44
  f"acryl-datahub[sync-file-emitter]{_self_pin}",
45
45
  },
46
+ # airflow2: For Airflow 2.x, use standalone openlineage-airflow package
47
+ "airflow2": {
48
+ "openlineage-airflow>=1.2.0",
49
+ },
50
+ # airflow3: For Airflow 3.x, use native OpenLineage provider
51
+ "airflow3": {
52
+ "apache-airflow-providers-openlineage>=1.0.0",
53
+ },
46
54
  }
47
55
 
48
56
  # Require some plugins by default.