acryl-datahub-airflow-plugin 1.3.1.5rc6__py3-none-any.whl → 1.3.1.5rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. acryl_datahub_airflow_plugin-1.3.1.5rc9.dist-info/METADATA +303 -0
  2. acryl_datahub_airflow_plugin-1.3.1.5rc9.dist-info/RECORD +65 -0
  3. datahub_airflow_plugin/_airflow_compat.py +32 -0
  4. datahub_airflow_plugin/_airflow_shims.py +64 -31
  5. datahub_airflow_plugin/_airflow_version_specific.py +184 -0
  6. datahub_airflow_plugin/_config.py +97 -19
  7. datahub_airflow_plugin/_constants.py +16 -0
  8. datahub_airflow_plugin/_datahub_ol_adapter.py +14 -2
  9. datahub_airflow_plugin/_version.py +1 -1
  10. datahub_airflow_plugin/airflow2/__init__.py +6 -0
  11. datahub_airflow_plugin/airflow2/_airflow2_sql_parser_patch.py +402 -0
  12. datahub_airflow_plugin/airflow2/_airflow_compat.py +95 -0
  13. datahub_airflow_plugin/airflow2/_extractors.py +477 -0
  14. datahub_airflow_plugin/airflow2/_legacy_shims.py +20 -0
  15. datahub_airflow_plugin/airflow2/_openlineage_compat.py +123 -0
  16. datahub_airflow_plugin/airflow2/_provider_shims.py +29 -0
  17. datahub_airflow_plugin/airflow2/_shims.py +88 -0
  18. datahub_airflow_plugin/airflow2/datahub_listener.py +1072 -0
  19. datahub_airflow_plugin/airflow3/__init__.py +6 -0
  20. datahub_airflow_plugin/airflow3/_airflow3_sql_parser_patch.py +408 -0
  21. datahub_airflow_plugin/airflow3/_airflow_compat.py +108 -0
  22. datahub_airflow_plugin/airflow3/_athena_openlineage_patch.py +153 -0
  23. datahub_airflow_plugin/airflow3/_bigquery_openlineage_patch.py +273 -0
  24. datahub_airflow_plugin/airflow3/_shims.py +82 -0
  25. datahub_airflow_plugin/airflow3/_sqlite_openlineage_patch.py +88 -0
  26. datahub_airflow_plugin/airflow3/_teradata_openlineage_patch.py +308 -0
  27. datahub_airflow_plugin/airflow3/datahub_listener.py +1452 -0
  28. datahub_airflow_plugin/client/airflow_generator.py +147 -43
  29. datahub_airflow_plugin/datahub_listener.py +19 -790
  30. datahub_airflow_plugin/example_dags/__init__.py +32 -0
  31. datahub_airflow_plugin/example_dags/airflow2/__init__.py +8 -0
  32. datahub_airflow_plugin/example_dags/airflow2/generic_recipe_sample_dag.py +54 -0
  33. datahub_airflow_plugin/example_dags/airflow2/graph_usage_sample_dag.py +43 -0
  34. datahub_airflow_plugin/example_dags/airflow2/lineage_backend_demo.py +69 -0
  35. datahub_airflow_plugin/example_dags/airflow2/lineage_backend_taskflow_demo.py +69 -0
  36. datahub_airflow_plugin/example_dags/airflow2/lineage_emission_dag.py +81 -0
  37. datahub_airflow_plugin/example_dags/airflow2/mysql_sample_dag.py +68 -0
  38. datahub_airflow_plugin/example_dags/airflow2/snowflake_sample_dag.py +99 -0
  39. datahub_airflow_plugin/example_dags/airflow3/__init__.py +8 -0
  40. datahub_airflow_plugin/example_dags/airflow3/lineage_backend_demo.py +51 -0
  41. datahub_airflow_plugin/example_dags/airflow3/lineage_backend_taskflow_demo.py +51 -0
  42. datahub_airflow_plugin/example_dags/airflow3/snowflake_sample_dag.py +89 -0
  43. datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py +12 -4
  44. datahub_airflow_plugin/hooks/datahub.py +11 -2
  45. datahub_airflow_plugin/operators/datahub.py +20 -3
  46. acryl_datahub_airflow_plugin-1.3.1.5rc6.dist-info/METADATA +0 -91
  47. acryl_datahub_airflow_plugin-1.3.1.5rc6.dist-info/RECORD +0 -33
  48. datahub_airflow_plugin/_extractors.py +0 -365
  49. {acryl_datahub_airflow_plugin-1.3.1.5rc6.dist-info → acryl_datahub_airflow_plugin-1.3.1.5rc9.dist-info}/WHEEL +0 -0
  50. {acryl_datahub_airflow_plugin-1.3.1.5rc6.dist-info → acryl_datahub_airflow_plugin-1.3.1.5rc9.dist-info}/entry_points.txt +0 -0
  51. {acryl_datahub_airflow_plugin-1.3.1.5rc6.dist-info → acryl_datahub_airflow_plugin-1.3.1.5rc9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,303 @@
1
+ Metadata-Version: 2.4
2
+ Name: acryl-datahub-airflow-plugin
3
+ Version: 1.3.1.5rc9
4
+ Summary: Datahub Airflow plugin to capture executions and send to Datahub
5
+ Home-page: https://docs.datahub.com/
6
+ License: Apache-2.0
7
+ Project-URL: Documentation, https://docs.datahub.com/docs/
8
+ Project-URL: Source, https://github.com/datahub-project/datahub
9
+ Project-URL: Changelog, https://github.com/datahub-project/datahub/releases
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: Programming Language :: Python
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3 :: Only
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Information Technology
16
+ Classifier: Intended Audience :: System Administrators
17
+ Classifier: Operating System :: Unix
18
+ Classifier: Operating System :: POSIX :: Linux
19
+ Classifier: Environment :: Console
20
+ Classifier: Environment :: MacOS X
21
+ Classifier: Topic :: Software Development
22
+ Requires-Python: >=3.9
23
+ Description-Content-Type: text/markdown
24
+ Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc9
25
+ Requires-Dist: pydantic>=2.4.0
26
+ Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc9
27
+ Requires-Dist: apache-airflow<4.0.0,>=2.5.0
28
+ Provides-Extra: ignore
29
+ Provides-Extra: datahub-rest
30
+ Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc9; extra == "datahub-rest"
31
+ Provides-Extra: datahub-kafka
32
+ Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.5rc9; extra == "datahub-kafka"
33
+ Provides-Extra: datahub-file
34
+ Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.5rc9; extra == "datahub-file"
35
+ Provides-Extra: airflow2
36
+ Requires-Dist: openlineage-airflow>=1.2.0; extra == "airflow2"
37
+ Provides-Extra: airflow3
38
+ Requires-Dist: apache-airflow-providers-openlineage>=1.0.0; extra == "airflow3"
39
+ Provides-Extra: dev
40
+ Requires-Dist: coverage>=5.1; extra == "dev"
41
+ Requires-Dist: tox; extra == "dev"
42
+ Requires-Dist: apache-airflow<4.0.0,>=2.5.0; extra == "dev"
43
+ Requires-Dist: sqlalchemy-stubs; extra == "dev"
44
+ Requires-Dist: types-tabulate; extra == "dev"
45
+ Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc9; extra == "dev"
46
+ Requires-Dist: types-setuptools; extra == "dev"
47
+ Requires-Dist: packaging; extra == "dev"
48
+ Requires-Dist: tox-uv; extra == "dev"
49
+ Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc9; extra == "dev"
50
+ Requires-Dist: ruff==0.11.7; extra == "dev"
51
+ Requires-Dist: types-six; extra == "dev"
52
+ Requires-Dist: tenacity; extra == "dev"
53
+ Requires-Dist: twine; extra == "dev"
54
+ Requires-Dist: pydantic>=2.4.0; extra == "dev"
55
+ Requires-Dist: types-dataclasses; extra == "dev"
56
+ Requires-Dist: pytest>=6.2.2; extra == "dev"
57
+ Requires-Dist: types-python-dateutil; extra == "dev"
58
+ Requires-Dist: types-toml; extra == "dev"
59
+ Requires-Dist: mypy==1.17.1; extra == "dev"
60
+ Requires-Dist: types-requests; extra == "dev"
61
+ Requires-Dist: types-PyYAML; extra == "dev"
62
+ Requires-Dist: types-cachetools; extra == "dev"
63
+ Requires-Dist: deepdiff!=8.0.0; extra == "dev"
64
+ Requires-Dist: types-click==0.1.12; extra == "dev"
65
+ Requires-Dist: pytest-cov>=2.8.1; extra == "dev"
66
+ Requires-Dist: build; extra == "dev"
67
+ Provides-Extra: integration-tests
68
+ Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.5rc9; extra == "integration-tests"
69
+ Requires-Dist: acryl-datahub[testing-utils]==1.3.1.5rc9; extra == "integration-tests"
70
+ Requires-Dist: snowflake-connector-python>=2.7.10; extra == "integration-tests"
71
+ Requires-Dist: apache-airflow-providers-teradata; extra == "integration-tests"
72
+ Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.5rc9; extra == "integration-tests"
73
+ Requires-Dist: virtualenv; extra == "integration-tests"
74
+ Requires-Dist: apache-airflow-providers-sqlite; extra == "integration-tests"
75
+ Requires-Dist: apache-airflow[amazon,google,snowflake]>=2.0.2; extra == "integration-tests"
76
+ Dynamic: classifier
77
+ Dynamic: description
78
+ Dynamic: description-content-type
79
+ Dynamic: home-page
80
+ Dynamic: license
81
+ Dynamic: project-url
82
+ Dynamic: provides-extra
83
+ Dynamic: requires-dist
84
+ Dynamic: requires-python
85
+ Dynamic: summary
86
+
87
+ # Datahub Airflow Plugin
88
+
89
+ See [the DataHub Airflow docs](https://docs.datahub.com/docs/lineage/airflow) for details.
90
+
91
+ ## Version Compatibility
92
+
93
+ The plugin supports Apache Airflow versions 2.7+ and 3.1+.
94
+
95
+ | Airflow Version | Extra to Install | Status | Notes |
96
+ | --------------- | ---------------- | ---------------------- | -------------------------------- |
97
+ | 2.7-2.10 | `[airflow2]` | ✅ Fully Supported | |
98
+ | 3.0.x | `[airflow3]` | ⚠️ Requires manual fix | Needs `pydantic>=2.11.8` upgrade |
99
+ | 3.1+ | `[airflow3]` | ✅ Fully Supported | |
100
+
101
+ **Note on Airflow 3.0.x**: Airflow 3.0.6 pins pydantic==2.11.7, which contains a bug that prevents the DataHub plugin from importing correctly. This issue is resolved in Airflow 3.1.0+ which uses pydantic>=2.11.8. If you must use Airflow 3.0.6, you can manually upgrade pydantic to >=2.11.8, though this may conflict with Airflow's dependency constraints. We recommend upgrading to Airflow 3.1.0 or later.
102
+
103
+ Related issue: https://github.com/pydantic/pydantic/issues/10963
104
+
105
+ ## Installation
106
+
107
+ The installation command varies depending on your Airflow version due to different OpenLineage dependencies.
108
+
109
+ ### For Airflow 2.x (2.7+)
110
+
111
+ ```bash
112
+ pip install 'acryl-datahub-airflow-plugin[airflow2]'
113
+ ```
114
+
115
+ This installs the plugin with Legacy OpenLineage (`openlineage-airflow>=1.2.0`), which is required for Airflow 2.x lineage extraction.
116
+
117
+ #### Alternative: Using Native OpenLineage Provider on Airflow 2.7+
118
+
119
+ If your Airflow 2.7+ environment rejects the Legacy OpenLineage package (e.g., due to dependency conflicts), you can use the native OpenLineage provider instead:
120
+
121
+ ```bash
122
+ # Install the native Airflow provider first
123
+ pip install 'apache-airflow-providers-openlineage>=1.0.0'
124
+
125
+ # Then install the DataHub plugin without OpenLineage extras
126
+ pip install acryl-datahub-airflow-plugin
127
+ ```
128
+
129
+ The plugin will automatically detect and use `apache-airflow-providers-openlineage` when available, providing the same functionality.
130
+
131
+ ### For Airflow 3.x (3.1+)
132
+
133
+ ```bash
134
+ pip install 'acryl-datahub-airflow-plugin[airflow3]'
135
+ ```
136
+
137
+ This installs the plugin with `apache-airflow-providers-openlineage>=1.0.0`, which is the native OpenLineage provider for Airflow 3.x.
138
+
139
+ **Note**: If using Airflow 3.0.x (3.0.6 specifically), you'll need to manually upgrade pydantic:
140
+
141
+ ```bash
142
+ pip install 'acryl-datahub-airflow-plugin[airflow3]' 'pydantic>=2.11.8'
143
+ ```
144
+
145
+ We recommend using Airflow 3.1.0+ which resolves this issue. See the Version Compatibility section above for details.
146
+
147
+ ### What Gets Installed
148
+
149
+ #### Base Installation (No Extras)
150
+
151
+ When you install without any extras:
152
+
153
+ ```bash
154
+ pip install acryl-datahub-airflow-plugin
155
+ ```
156
+
157
+ You get:
158
+
159
+ - `acryl-datahub[sql-parser,datahub-rest]` - DataHub SDK with SQL parsing and REST emitter
160
+ - `pydantic>=2.4.0` - Required for data validation
161
+ - `apache-airflow>=2.5.0,<4.0.0` - Airflow itself
162
+ - **No OpenLineage package** - You'll need to provide your own or use one of the extras below
163
+
164
+ #### With `[airflow2]` Extra
165
+
166
+ ```bash
167
+ pip install 'acryl-datahub-airflow-plugin[airflow2]'
168
+ ```
169
+
170
+ Adds:
171
+
172
+ - `openlineage-airflow>=1.2.0` - Standalone OpenLineage package for Airflow 2.x
173
+
174
+ #### With `[airflow3]` Extra
175
+
176
+ ```bash
177
+ pip install 'acryl-datahub-airflow-plugin[airflow3]'
178
+ ```
179
+
180
+ Adds:
181
+
182
+ - `apache-airflow-providers-openlineage>=1.0.0` - Native OpenLineage provider for Airflow 3.x
183
+
184
+ ### Additional Extras
185
+
186
+ You can combine multiple extras if needed:
187
+
188
+ ```bash
189
+ # For Airflow 3.x with Kafka emitter support
190
+ pip install 'acryl-datahub-airflow-plugin[airflow3,datahub-kafka]'
191
+
192
+ # For Airflow 2.x with file emitter support
193
+ pip install 'acryl-datahub-airflow-plugin[airflow2,datahub-file]'
194
+ ```
195
+
196
+ Available extras:
197
+
198
+ - `airflow2`: OpenLineage support for Airflow 2.x (adds `openlineage-airflow>=1.2.0`)
199
+ - `airflow3`: OpenLineage support for Airflow 3.x (adds `apache-airflow-providers-openlineage>=1.0.0`)
200
+ - `datahub-kafka`: Kafka-based metadata emission (adds `acryl-datahub[datahub-kafka]`)
201
+ - `datahub-file`: File-based metadata emission (adds `acryl-datahub[sync-file-emitter]`) - useful for testing
202
+
203
+ ### Why Different Extras?
204
+
205
+ Airflow 2.x and 3.x have different OpenLineage integrations:
206
+
207
+ - **Airflow 2.x (2.5-2.6)** typically uses Legacy OpenLineage (`openlineage-airflow` package)
208
+ - **Airflow 2.x (2.7+)** can use either Legacy OpenLineage or native OpenLineage Provider (`apache-airflow-providers-openlineage`)
209
+ - **Airflow 3.x** uses native OpenLineage Provider (`apache-airflow-providers-openlineage`)
210
+
211
+ The plugin automatically detects which OpenLineage variant is installed and uses it accordingly. This means:
212
+
213
+ 1. **With extras** (`[airflow2]` or `[airflow3]`): The appropriate OpenLineage dependency is installed automatically
214
+ 2. **Without extras**: You provide your own OpenLineage installation, and the plugin auto-detects it
215
+
216
+ This flexibility allows you to adapt to different Airflow environments and dependency constraints.
217
+
218
+ ## Configuration
219
+
220
+ The plugin can be configured via `airflow.cfg` under the `[datahub]` section. Below are the key configuration options:
221
+
222
+ ### Extractor Patching (OpenLineage Enhancements)
223
+
224
+ When `enable_extractors=True` (default), the DataHub plugin enhances OpenLineage extractors to provide better lineage. You can fine-tune these enhancements:
225
+
226
+ ```ini
227
+ [datahub]
228
+ # Enable/disable all OpenLineage extractors
229
+ enable_extractors = True # Default: True
230
+
231
+ # Fine-grained control over DataHub's OpenLineage enhancements
232
+
233
+ # --- Patches (work with both Legacy OpenLineage and OpenLineage Provider) ---
234
+
235
+ # Patch SqlExtractor to use DataHub's advanced SQL parser (enables column-level lineage)
236
+ patch_sql_parser = True # Default: True
237
+
238
+ # Patch SnowflakeExtractor to fix default schema detection
239
+ patch_snowflake_schema = True # Default: True
240
+
241
+ # --- Custom Extractors (only apply to Legacy OpenLineage) ---
242
+
243
+ # Use DataHub's custom AthenaOperatorExtractor (better Athena lineage)
244
+ extract_athena_operator = True # Default: True
245
+
246
+ # Use DataHub's custom BigQueryInsertJobOperatorExtractor (handles BQ job configuration)
247
+ extract_bigquery_insert_job_operator = True # Default: True
248
+ ```
249
+
250
+ **How it works:**
251
+
252
+ **Patches** (apply to both Legacy OpenLineage and OpenLineage Provider):
253
+
254
+ - Apply **monkey-patching** to OpenLineage extractor/operator classes at runtime
255
+ - Work on **both Airflow 2.x and Airflow 3.x**
256
+ - When `patch_sql_parser=True`:
257
+ - **Airflow 2**: Patches `SqlExtractor.extract()` method
258
+ - **Airflow 3**: Patches `SQLParser.generate_openlineage_metadata_from_sql()` method
259
+ - Provides: More accurate lineage extraction, column-level lineage (CLL), better SQL dialect support
260
+ - When `patch_snowflake_schema=True`:
261
+ - **Airflow 2**: Patches `SnowflakeExtractor.default_schema` property
262
+ - **Airflow 3**: Currently not needed (handled by Airflow's native support)
263
+ - Fixes Snowflake schema detection issues
264
+
265
+ **Custom Extractors/Operator Patches**:
266
+
267
+ - Register DataHub's custom implementations for specific operators
268
+ - Work on **both Airflow 2.x and Airflow 3.x**
269
+ - `extract_athena_operator`:
270
+ - **Airflow 2 (Legacy OpenLineage only)**: Registers `AthenaOperatorExtractor`
271
+ - **Airflow 3**: Patches `AthenaOperator.get_openlineage_facets_on_complete()`
272
+ - Uses DataHub's SQL parser for better Athena lineage
273
+ - `extract_bigquery_insert_job_operator`:
274
+ - **Airflow 2 (Legacy OpenLineage only)**: Registers `BigQueryInsertJobOperatorExtractor`
275
+ - **Airflow 3**: Patches `BigQueryInsertJobOperator.get_openlineage_facets_on_complete()`
276
+ - Handles BigQuery job configuration and destination tables
277
+
278
+ **Example use cases:**
279
+
280
+ Disable DataHub's SQL parser to use OpenLineage's native parsing:
281
+
282
+ ```ini
283
+ [datahub]
284
+ enable_extractors = True
285
+ patch_sql_parser = False # Use OpenLineage's native SQL parser
286
+ patch_snowflake_schema = True # Still fix Snowflake schema detection
287
+ ```
288
+
289
+ Disable custom Athena extractor (only relevant for Legacy OpenLineage):
290
+
291
+ ```ini
292
+ [datahub]
293
+ enable_extractors = True
294
+ extract_athena_operator = False # Use OpenLineage's default Athena extractor
295
+ ```
296
+
297
+ ### Other Configuration Options
298
+
299
+ For a complete list of configuration options, see the [DataHub Airflow documentation](https://docs.datahub.com/docs/lineage/airflow#configuration).
300
+
301
+ ## Developing
302
+
303
+ See the [developing docs](../../metadata-ingestion/developing.md).
@@ -0,0 +1,65 @@
1
+ datahub_airflow_plugin/__init__.py,sha256=NScUtA8N-m66Pyg0DO--YbPkrl48PK3UevpdQVW_y6E,1009
2
+ datahub_airflow_plugin/_airflow_compat.py,sha256=VdlTeQ7XGU40uqeAfnJcDGLt8QD2lLpEhQw1ZEMTFvI,1345
3
+ datahub_airflow_plugin/_airflow_shims.py,sha256=fe-BwJxdA56P6j_nD1uMwmNNCE0FnlbHsqQME30uQ_o,2243
4
+ datahub_airflow_plugin/_airflow_version_specific.py,sha256=q4iNyScNQTNZnoC9n_8wYI99mhqiXbxUhjAv2x_sNks,5870
5
+ datahub_airflow_plugin/_config.py,sha256=AAvxIGcG-wQWkFbpGCRurjnNXVCzqHDBmsFsSRHOEi4,9050
6
+ datahub_airflow_plugin/_constants.py,sha256=HBrUztUwspK1mQ3iM9_Pdiu18pn-lPK3xhjmubqn_kI,627
7
+ datahub_airflow_plugin/_datahub_ol_adapter.py,sha256=2YIhJDyLhzZGK3MMra4NGSIfG-az8sKt6ZXcbQGs2Yg,951
8
+ datahub_airflow_plugin/_version.py,sha256=pYQwM4cdD49xNeRp2fSnBRQ33_fIW_4kuPQE_s4bJtQ,148
9
+ datahub_airflow_plugin/datahub_listener.py,sha256=9g-MBf14zFs-cQP0Jr_9gnCbzQVqIe_x6ZjgQs0onzE,881
10
+ datahub_airflow_plugin/datahub_plugin.py,sha256=rbZhs7s5O3_MlkQw5aZToC2W5mMic_EpI3oybHB0ofw,1224
11
+ datahub_airflow_plugin/entities.py,sha256=xDZ-mZH7hjUkZbatWYUwI43_9B40wGiotlyQhiO8rEM,1987
12
+ datahub_airflow_plugin/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ datahub_airflow_plugin/airflow2/__init__.py,sha256=WdCqiDRuXMkPcLPb3D5PQYZJaRyDtuEIJ7ndHSThjkg,200
14
+ datahub_airflow_plugin/airflow2/_airflow2_sql_parser_patch.py,sha256=nkt57jYixcocrfYpQ3bhAUDTeyblt8p99_QdNzqJl_o,16145
15
+ datahub_airflow_plugin/airflow2/_airflow_compat.py,sha256=7GumOc-bOCySp739MEJmksgHsiElSNfm29hB3kHyJPY,4005
16
+ datahub_airflow_plugin/airflow2/_extractors.py,sha256=S4LbR9_xDMl76YErWgS_FsPzFW1DUDfsOZ0A9tKrZJ4,17981
17
+ datahub_airflow_plugin/airflow2/_legacy_shims.py,sha256=rToXtWz0p41_jIsjPk4ALVL0T8NcH_RXRNuW47uDHDU,527
18
+ datahub_airflow_plugin/airflow2/_openlineage_compat.py,sha256=43Xgtl2LdEVsIxJUb-DFGYoP-kaEots599rTgNdCMD4,4517
19
+ datahub_airflow_plugin/airflow2/_provider_shims.py,sha256=GD_r_RojjBWWFfxvakQWr4eTgPplfkRCMZMRDTaApM4,961
20
+ datahub_airflow_plugin/airflow2/_shims.py,sha256=Ha4gLxDU_GVPj2T0dHTwru4Lm-kYb9rRsL7uhiba0Gs,2769
21
+ datahub_airflow_plugin/airflow2/datahub_listener.py,sha256=IdvTYLY5TWbukJ44HmJI0GJnPHqu60wR8RQt10I3wss,42611
22
+ datahub_airflow_plugin/airflow3/__init__.py,sha256=OIFuu2zwlzBMcB2FVlkM2aXsCVPW6Xoa82pEjPZK6g0,200
23
+ datahub_airflow_plugin/airflow3/_airflow3_sql_parser_patch.py,sha256=7LZjG8I0Vl8sI4j7RZsoZt7IkG9RQ-tL8CxZcYIVqQE,16460
24
+ datahub_airflow_plugin/airflow3/_airflow_compat.py,sha256=g5q6XfOZZQhhJwNHwYb0U_wML7_2K0BsGGnGzxEk22M,3857
25
+ datahub_airflow_plugin/airflow3/_athena_openlineage_patch.py,sha256=BoyCFAZM7lB1lJqqu2XuLF_wgSMC4oUwRlu8e1E8ntU,6709
26
+ datahub_airflow_plugin/airflow3/_bigquery_openlineage_patch.py,sha256=ph9ZePU8eff1ew-j5dvM-vBmp-Or9b1CsnOevAqoluM,10228
27
+ datahub_airflow_plugin/airflow3/_shims.py,sha256=q2Nw5vS21R_y0cYg3aOm7AYylaNAXlN480ve5XItfNU,2553
28
+ datahub_airflow_plugin/airflow3/_sqlite_openlineage_patch.py,sha256=QRgadtwS6a42k3J4zAGsIRjyExvRX7CvAM_U3DYivic,3552
29
+ datahub_airflow_plugin/airflow3/_teradata_openlineage_patch.py,sha256=z-Udbz_7NahufipLOIgUlZGWRsIRF_qkV903rYxR_s4,12563
30
+ datahub_airflow_plugin/airflow3/datahub_listener.py,sha256=Xonjn3SX7q9dmDEyW7uyiryvJ1AVJuQcByBbD6TFWTk,58791
31
+ datahub_airflow_plugin/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
+ datahub_airflow_plugin/client/airflow_generator.py,sha256=7Cc9khsePHqjRAHZchaMKMKP0wCeT21wCtH8gUZeEEE,26135
33
+ datahub_airflow_plugin/example_dags/__init__.py,sha256=nIAjilNA_uGtFYYmAakZOvJQjAeg8d7aR61MegkEjgU,1321
34
+ datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py,sha256=BbrOErFboKMDFn712RHEKI9T4Vh0q6kYSVet56gPqVk,1319
35
+ datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py,sha256=-rWLI58fErRoTpr38RCJXE0_lSBNZp17U38hg9Ow83A,1295
36
+ datahub_airflow_plugin/example_dags/lineage_backend_demo.py,sha256=Dy6MxwtX7G0mQeALqpLRu4F03IyU9fqIkr-CcKpo2JE,1625
37
+ datahub_airflow_plugin/example_dags/lineage_backend_taskflow_demo.py,sha256=kW2rLFtOnoiMxBJ315GzlmR0Sz1cqQ_wwLbG9UC-u7Y,1499
38
+ datahub_airflow_plugin/example_dags/lineage_emission_dag.py,sha256=LE29DzW51a4ZAl_zrcLrqSyzmy8qElcZagXsIMjaZLU,1946
39
+ datahub_airflow_plugin/example_dags/mysql_sample_dag.py,sha256=Unx9Ger3R9ptEutfV-4NjjEaTIEYJ-tLrZr7OsK608k,1922
40
+ datahub_airflow_plugin/example_dags/snowflake_sample_dag.py,sha256=b9iaE7zChQha9u57F84U6uqavGl7WrUnMNOzXEiZxjE,3234
41
+ datahub_airflow_plugin/example_dags/airflow2/__init__.py,sha256=cAFFodve_HVEokz3ZVo9lWvpYZ9WeLgZ4d8lSypHINQ,230
42
+ datahub_airflow_plugin/example_dags/airflow2/generic_recipe_sample_dag.py,sha256=zmrNm1L9q7hAksSYTcCNEFopy4Qy0sElM30JQZfan1U,1493
43
+ datahub_airflow_plugin/example_dags/airflow2/graph_usage_sample_dag.py,sha256=-rWLI58fErRoTpr38RCJXE0_lSBNZp17U38hg9Ow83A,1295
44
+ datahub_airflow_plugin/example_dags/airflow2/lineage_backend_demo.py,sha256=1sUWzCU4tCGOfMNFvV6QbP416Ld-Sc78O-9fNFv9zKo,2331
45
+ datahub_airflow_plugin/example_dags/airflow2/lineage_backend_taskflow_demo.py,sha256=7dB0874wOOVXZwiEcd7uHCK_ngtTW6JOHtFmhoqzaXU,2286
46
+ datahub_airflow_plugin/example_dags/airflow2/lineage_emission_dag.py,sha256=iZ2rN787gdiYvAfMD_ST-ZUijqXtH4UAQzoXgogSG9o,2649
47
+ datahub_airflow_plugin/example_dags/airflow2/mysql_sample_dag.py,sha256=qeB16im8mlQ2U3is55Lxo45dbUXVB29uS_ixSV74dPg,2132
48
+ datahub_airflow_plugin/example_dags/airflow2/snowflake_sample_dag.py,sha256=H1PMTRBBZcjGOiIKkMMJ57ujRFYPoxlc1sq7gMl-ahI,3619
49
+ datahub_airflow_plugin/example_dags/airflow3/__init__.py,sha256=XXQzH-43L_WnjfF-ahoC0jUBltS5X-nZyDPZSbZSNfg,233
50
+ datahub_airflow_plugin/example_dags/airflow3/lineage_backend_demo.py,sha256=U-xqMZtA1DN_WkdP6aE044efu6M0VGtBqMghwmBuDds,1635
51
+ datahub_airflow_plugin/example_dags/airflow3/lineage_backend_taskflow_demo.py,sha256=oTKaIAvoU212ORg8SxfhKtS9ne2oflSyPPEQcu3m--Q,1533
52
+ datahub_airflow_plugin/example_dags/airflow3/snowflake_sample_dag.py,sha256=6G8tJFSX9vFCdWn6wN1nvgZthuc3poBSBEeqylvQtzM,3265
53
+ datahub_airflow_plugin/hooks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
+ datahub_airflow_plugin/hooks/datahub.py,sha256=zFYFTm2HbkBXE5AKVDbUjMpBeRAdyZu0GM84eafqKWo,11550
55
+ datahub_airflow_plugin/operators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
+ datahub_airflow_plugin/operators/datahub.py,sha256=vU-v2fuJUkm1gMZa6zJehaVJ6QCLwvQd4G_vJ37BBVA,4402
57
+ datahub_airflow_plugin/operators/datahub_assertion_operator.py,sha256=j_P9M1a5qME55pKHAfTqZsVVtIslFBO59r8UQOOBvsk,2914
58
+ datahub_airflow_plugin/operators/datahub_assertion_sensor.py,sha256=QJIZZYQhqscj3bhBN5Sei-ABMRRAl2KiQxXTXcZQ51Q,2917
59
+ datahub_airflow_plugin/operators/datahub_operation_operator.py,sha256=KJ8M8jJ7UWW6kNbiS-rELc-kqCPkZ3ck7z51oAXGPSI,3351
60
+ datahub_airflow_plugin/operators/datahub_operation_sensor.py,sha256=U19fi5DpjBRWm_1ljXcjnspUzfa3mqYfOQZHjLk-ufI,3618
61
+ acryl_datahub_airflow_plugin-1.3.1.5rc9.dist-info/METADATA,sha256=aofkCQ5pCOBGXUCuNYj7NueCmbgxtfSSR7dQPGKGH7w,12333
62
+ acryl_datahub_airflow_plugin-1.3.1.5rc9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
63
+ acryl_datahub_airflow_plugin-1.3.1.5rc9.dist-info/entry_points.txt,sha256=HqmajDHtrsz0b5Lswe1-eeuObxdtucd9YoxH77jJBA8,179
64
+ acryl_datahub_airflow_plugin-1.3.1.5rc9.dist-info/top_level.txt,sha256=VBzisOQfzqL1WRbNyItaruf3kTigXltjzgqzbheaFp0,23
65
+ acryl_datahub_airflow_plugin-1.3.1.5rc9.dist-info/RECORD,,
@@ -0,0 +1,32 @@
1
+ # This module must be imported before any Airflow imports in any of our files.
2
+ # It dispatches to version-specific compatibility modules.
3
+
4
+ from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED
5
+
6
+ # Critical safety check: Ensure MarkupSafe compatibility patch is applied
7
+ # This must happen before importing Airflow to prevent MarkupSafe version conflicts
8
+ # Using explicit exception instead of assert to ensure it runs even with python -O
9
+ if not MARKUPSAFE_PATCHED:
10
+ raise RuntimeError(
11
+ "MarkupSafe compatibility patch must be applied before importing Airflow modules. "
12
+ "This is a critical safety check that cannot be disabled. "
13
+ "The patch ensures compatibility between different MarkupSafe versions used by "
14
+ "Airflow and DataHub dependencies."
15
+ )
16
+
17
+ # Detect Airflow version and dispatch to version-specific compat module
18
+ # These imports must be after MARKUPSAFE_PATCHED assertion.
19
+ import airflow
20
+ import packaging.version
21
+
22
+ AIRFLOW_VERSION = packaging.version.parse(airflow.__version__)
23
+ IS_AIRFLOW_3_OR_HIGHER = AIRFLOW_VERSION >= packaging.version.parse("3.0.0")
24
+
25
+ if IS_AIRFLOW_3_OR_HIGHER:
26
+ from datahub_airflow_plugin.airflow3._airflow_compat import AIRFLOW_PATCHED
27
+ else:
28
+ from datahub_airflow_plugin.airflow2._airflow_compat import AIRFLOW_PATCHED
29
+
30
+ __all__ = [
31
+ "AIRFLOW_PATCHED",
32
+ ]
@@ -1,42 +1,75 @@
1
- from typing import List
1
+ """
2
+ Pure dispatcher for version-specific Airflow shims.
2
3
 
3
- import airflow.version
4
- import packaging.version
5
- from airflow.models.operator import Operator
6
-
7
- try:
8
- from airflow.sensors.external_task import ExternalTaskSensor
9
- except ImportError:
10
- from airflow.sensors.external_task_sensor import ExternalTaskSensor # type: ignore
11
-
12
- # Approach suggested by https://stackoverflow.com/a/11887885/5004662.
13
- AIRFLOW_VERSION = packaging.version.parse(airflow.version.version)
14
- HAS_AIRFLOW_DAG_LISTENER_API = True # this is in Airflow 2.5+
15
- HAS_AIRFLOW_DATASET_LISTENER_API = AIRFLOW_VERSION >= packaging.version.parse(
16
- "2.8.0.dev0"
17
- )
4
+ This module automatically imports the correct shims based on the installed
5
+ Airflow version, dispatching to either airflow2 or airflow3 implementations.
18
6
 
7
+ No logic lives here - just clean version detection and re-export.
8
+ """
19
9
 
20
- def get_task_inlets(operator: "Operator") -> List:
21
- # From Airflow 2.4 _inlets is dropped and inlets used consistently. Earlier it was not the case, so we have to stick there to _inlets
22
- if hasattr(operator, "_inlets"):
23
- return operator._inlets # type: ignore[attr-defined, union-attr]
24
- if hasattr(operator, "get_inlet_defs"):
25
- return operator.get_inlet_defs() # type: ignore[attr-defined]
26
- return operator.inlets or []
10
+ import packaging.version
27
11
 
12
+ from datahub_airflow_plugin._airflow_version_specific import (
13
+ AIRFLOW_VERSION,
14
+ IS_AIRFLOW_3_OR_HIGHER,
15
+ )
28
16
 
29
- def get_task_outlets(operator: "Operator") -> List:
30
- # From Airflow 2.4 _outlets is dropped and inlets used consistently. Earlier it was not the case, so we have to stick there to _outlets
31
- # We have to use _outlets because outlets is empty in Airflow < 2.4.0
32
- if hasattr(operator, "_outlets"):
33
- return operator._outlets # type: ignore[attr-defined, union-attr]
34
- if hasattr(operator, "get_outlet_defs"):
35
- return operator.get_outlet_defs()
36
- return operator.outlets or []
17
+ # Version feature flags - hardcode based on Airflow version
18
+ # These were previously in the old _airflow_shims but are better kept simple
19
+ HAS_AIRFLOW_STANDALONE_CMD = AIRFLOW_VERSION >= packaging.version.parse("2.2")
20
+ HAS_AIRFLOW_LISTENER_API = AIRFLOW_VERSION >= packaging.version.parse("2.3")
21
+ HAS_AIRFLOW_DAG_LISTENER_API = AIRFLOW_VERSION >= packaging.version.parse("2.5")
22
+ HAS_AIRFLOW_DATASET_LISTENER_API = AIRFLOW_VERSION >= packaging.version.parse("2.5")
37
23
 
24
+ if IS_AIRFLOW_3_OR_HIGHER:
25
+ # Airflow 3.x - use airflow3 shims
26
+ from datahub_airflow_plugin.airflow3._shims import (
27
+ BaseOperator,
28
+ ExternalTaskSensor,
29
+ MappedOperator,
30
+ OpenLineagePlugin,
31
+ Operator,
32
+ TaskHolder,
33
+ get_operator_class,
34
+ get_task_inlets,
35
+ get_task_outlets,
36
+ redact_with_exclusions,
37
+ try_import_from_string,
38
+ )
39
+ else:
40
+ # Airflow 2.x - use airflow2 shims
41
+ from datahub_airflow_plugin.airflow2._shims import ( # type: ignore[assignment]
42
+ BaseOperator,
43
+ ExternalTaskSensor,
44
+ MappedOperator,
45
+ OpenLineagePlugin,
46
+ Operator,
47
+ TaskHolder,
48
+ get_operator_class,
49
+ get_task_inlets,
50
+ get_task_outlets,
51
+ redact_with_exclusions,
52
+ try_import_from_string,
53
+ )
38
54
 
39
55
  __all__ = [
56
+ # Airflow version and feature flags
40
57
  "AIRFLOW_VERSION",
58
+ "IS_AIRFLOW_3_OR_HIGHER",
59
+ "HAS_AIRFLOW_STANDALONE_CMD",
60
+ "HAS_AIRFLOW_LISTENER_API",
61
+ "HAS_AIRFLOW_DAG_LISTENER_API",
62
+ "HAS_AIRFLOW_DATASET_LISTENER_API",
63
+ # Airflow objects
64
+ "BaseOperator",
65
+ "Operator",
66
+ "MappedOperator",
41
67
  "ExternalTaskSensor",
68
+ "TaskHolder",
69
+ "OpenLineagePlugin",
70
+ "get_operator_class",
71
+ "try_import_from_string",
72
+ "redact_with_exclusions",
73
+ "get_task_inlets",
74
+ "get_task_outlets",
42
75
  ]