acryl-datahub-airflow-plugin 1.3.1.5rc5__py3-none-any.whl → 1.3.1.5rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- acryl_datahub_airflow_plugin-1.3.1.5rc8.dist-info/METADATA +303 -0
- acryl_datahub_airflow_plugin-1.3.1.5rc8.dist-info/RECORD +65 -0
- datahub_airflow_plugin/_airflow_compat.py +32 -0
- datahub_airflow_plugin/_airflow_shims.py +64 -31
- datahub_airflow_plugin/_airflow_version_specific.py +184 -0
- datahub_airflow_plugin/_config.py +97 -19
- datahub_airflow_plugin/_constants.py +16 -0
- datahub_airflow_plugin/_datahub_ol_adapter.py +14 -2
- datahub_airflow_plugin/_version.py +1 -1
- datahub_airflow_plugin/airflow2/__init__.py +6 -0
- datahub_airflow_plugin/airflow2/_airflow2_sql_parser_patch.py +402 -0
- datahub_airflow_plugin/airflow2/_airflow_compat.py +95 -0
- datahub_airflow_plugin/airflow2/_extractors.py +477 -0
- datahub_airflow_plugin/airflow2/_legacy_shims.py +20 -0
- datahub_airflow_plugin/airflow2/_openlineage_compat.py +123 -0
- datahub_airflow_plugin/airflow2/_provider_shims.py +29 -0
- datahub_airflow_plugin/airflow2/_shims.py +88 -0
- datahub_airflow_plugin/airflow2/datahub_listener.py +1072 -0
- datahub_airflow_plugin/airflow3/__init__.py +6 -0
- datahub_airflow_plugin/airflow3/_airflow3_sql_parser_patch.py +408 -0
- datahub_airflow_plugin/airflow3/_airflow_compat.py +108 -0
- datahub_airflow_plugin/airflow3/_athena_openlineage_patch.py +153 -0
- datahub_airflow_plugin/airflow3/_bigquery_openlineage_patch.py +273 -0
- datahub_airflow_plugin/airflow3/_shims.py +82 -0
- datahub_airflow_plugin/airflow3/_sqlite_openlineage_patch.py +88 -0
- datahub_airflow_plugin/airflow3/_teradata_openlineage_patch.py +308 -0
- datahub_airflow_plugin/airflow3/datahub_listener.py +1452 -0
- datahub_airflow_plugin/client/airflow_generator.py +147 -43
- datahub_airflow_plugin/datahub_listener.py +19 -790
- datahub_airflow_plugin/example_dags/__init__.py +32 -0
- datahub_airflow_plugin/example_dags/airflow2/__init__.py +8 -0
- datahub_airflow_plugin/example_dags/airflow2/generic_recipe_sample_dag.py +54 -0
- datahub_airflow_plugin/example_dags/airflow2/graph_usage_sample_dag.py +43 -0
- datahub_airflow_plugin/example_dags/airflow2/lineage_backend_demo.py +69 -0
- datahub_airflow_plugin/example_dags/airflow2/lineage_backend_taskflow_demo.py +69 -0
- datahub_airflow_plugin/example_dags/airflow2/lineage_emission_dag.py +81 -0
- datahub_airflow_plugin/example_dags/airflow2/mysql_sample_dag.py +68 -0
- datahub_airflow_plugin/example_dags/airflow2/snowflake_sample_dag.py +99 -0
- datahub_airflow_plugin/example_dags/airflow3/__init__.py +8 -0
- datahub_airflow_plugin/example_dags/airflow3/lineage_backend_demo.py +51 -0
- datahub_airflow_plugin/example_dags/airflow3/lineage_backend_taskflow_demo.py +51 -0
- datahub_airflow_plugin/example_dags/airflow3/snowflake_sample_dag.py +89 -0
- datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py +12 -4
- datahub_airflow_plugin/hooks/datahub.py +11 -2
- datahub_airflow_plugin/operators/datahub.py +20 -3
- acryl_datahub_airflow_plugin-1.3.1.5rc5.dist-info/METADATA +0 -91
- acryl_datahub_airflow_plugin-1.3.1.5rc5.dist-info/RECORD +0 -33
- datahub_airflow_plugin/_extractors.py +0 -365
- {acryl_datahub_airflow_plugin-1.3.1.5rc5.dist-info → acryl_datahub_airflow_plugin-1.3.1.5rc8.dist-info}/WHEEL +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5rc5.dist-info → acryl_datahub_airflow_plugin-1.3.1.5rc8.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_airflow_plugin-1.3.1.5rc5.dist-info → acryl_datahub_airflow_plugin-1.3.1.5rc8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: acryl-datahub-airflow-plugin
|
|
3
|
+
Version: 1.3.1.5rc8
|
|
4
|
+
Summary: Datahub Airflow plugin to capture executions and send to Datahub
|
|
5
|
+
Home-page: https://docs.datahub.com/
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Documentation, https://docs.datahub.com/docs/
|
|
8
|
+
Project-URL: Source, https://github.com/datahub-project/datahub
|
|
9
|
+
Project-URL: Changelog, https://github.com/datahub-project/datahub/releases
|
|
10
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
11
|
+
Classifier: Programming Language :: Python
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Information Technology
|
|
16
|
+
Classifier: Intended Audience :: System Administrators
|
|
17
|
+
Classifier: Operating System :: Unix
|
|
18
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
19
|
+
Classifier: Environment :: Console
|
|
20
|
+
Classifier: Environment :: MacOS X
|
|
21
|
+
Classifier: Topic :: Software Development
|
|
22
|
+
Requires-Python: >=3.9
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc8
|
|
25
|
+
Requires-Dist: pydantic>=2.4.0
|
|
26
|
+
Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc8
|
|
27
|
+
Requires-Dist: apache-airflow<4.0.0,>=2.5.0
|
|
28
|
+
Provides-Extra: ignore
|
|
29
|
+
Provides-Extra: datahub-rest
|
|
30
|
+
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc8; extra == "datahub-rest"
|
|
31
|
+
Provides-Extra: datahub-kafka
|
|
32
|
+
Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.5rc8; extra == "datahub-kafka"
|
|
33
|
+
Provides-Extra: datahub-file
|
|
34
|
+
Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.5rc8; extra == "datahub-file"
|
|
35
|
+
Provides-Extra: airflow2
|
|
36
|
+
Requires-Dist: openlineage-airflow>=1.2.0; extra == "airflow2"
|
|
37
|
+
Provides-Extra: airflow3
|
|
38
|
+
Requires-Dist: apache-airflow-providers-openlineage>=1.0.0; extra == "airflow3"
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: tox; extra == "dev"
|
|
41
|
+
Requires-Dist: apache-airflow<4.0.0,>=2.5.0; extra == "dev"
|
|
42
|
+
Requires-Dist: types-six; extra == "dev"
|
|
43
|
+
Requires-Dist: sqlalchemy-stubs; extra == "dev"
|
|
44
|
+
Requires-Dist: pydantic>=2.4.0; extra == "dev"
|
|
45
|
+
Requires-Dist: pytest-cov>=2.8.1; extra == "dev"
|
|
46
|
+
Requires-Dist: types-python-dateutil; extra == "dev"
|
|
47
|
+
Requires-Dist: types-dataclasses; extra == "dev"
|
|
48
|
+
Requires-Dist: mypy==1.17.1; extra == "dev"
|
|
49
|
+
Requires-Dist: types-tabulate; extra == "dev"
|
|
50
|
+
Requires-Dist: ruff==0.11.7; extra == "dev"
|
|
51
|
+
Requires-Dist: types-cachetools; extra == "dev"
|
|
52
|
+
Requires-Dist: build; extra == "dev"
|
|
53
|
+
Requires-Dist: types-toml; extra == "dev"
|
|
54
|
+
Requires-Dist: types-click==0.1.12; extra == "dev"
|
|
55
|
+
Requires-Dist: coverage>=5.1; extra == "dev"
|
|
56
|
+
Requires-Dist: types-requests; extra == "dev"
|
|
57
|
+
Requires-Dist: tox-uv; extra == "dev"
|
|
58
|
+
Requires-Dist: types-PyYAML; extra == "dev"
|
|
59
|
+
Requires-Dist: tenacity; extra == "dev"
|
|
60
|
+
Requires-Dist: twine; extra == "dev"
|
|
61
|
+
Requires-Dist: acryl-datahub[datahub-rest,sql-parser]==1.3.1.5rc8; extra == "dev"
|
|
62
|
+
Requires-Dist: acryl-datahub[datahub-rest]==1.3.1.5rc8; extra == "dev"
|
|
63
|
+
Requires-Dist: pytest>=6.2.2; extra == "dev"
|
|
64
|
+
Requires-Dist: types-setuptools; extra == "dev"
|
|
65
|
+
Requires-Dist: deepdiff!=8.0.0; extra == "dev"
|
|
66
|
+
Requires-Dist: packaging; extra == "dev"
|
|
67
|
+
Provides-Extra: integration-tests
|
|
68
|
+
Requires-Dist: apache-airflow-providers-sqlite; extra == "integration-tests"
|
|
69
|
+
Requires-Dist: apache-airflow[amazon,google,snowflake]>=2.0.2; extra == "integration-tests"
|
|
70
|
+
Requires-Dist: acryl-datahub[testing-utils]==1.3.1.5rc8; extra == "integration-tests"
|
|
71
|
+
Requires-Dist: acryl-datahub[datahub-kafka]==1.3.1.5rc8; extra == "integration-tests"
|
|
72
|
+
Requires-Dist: virtualenv; extra == "integration-tests"
|
|
73
|
+
Requires-Dist: apache-airflow-providers-teradata; extra == "integration-tests"
|
|
74
|
+
Requires-Dist: acryl-datahub[sync-file-emitter]==1.3.1.5rc8; extra == "integration-tests"
|
|
75
|
+
Requires-Dist: snowflake-connector-python>=2.7.10; extra == "integration-tests"
|
|
76
|
+
Dynamic: classifier
|
|
77
|
+
Dynamic: description
|
|
78
|
+
Dynamic: description-content-type
|
|
79
|
+
Dynamic: home-page
|
|
80
|
+
Dynamic: license
|
|
81
|
+
Dynamic: project-url
|
|
82
|
+
Dynamic: provides-extra
|
|
83
|
+
Dynamic: requires-dist
|
|
84
|
+
Dynamic: requires-python
|
|
85
|
+
Dynamic: summary
|
|
86
|
+
|
|
87
|
+
# Datahub Airflow Plugin
|
|
88
|
+
|
|
89
|
+
See [the DataHub Airflow docs](https://docs.datahub.com/docs/lineage/airflow) for details.
|
|
90
|
+
|
|
91
|
+
## Version Compatibility
|
|
92
|
+
|
|
93
|
+
The plugin supports Apache Airflow versions 2.7+ and 3.1+.
|
|
94
|
+
|
|
95
|
+
| Airflow Version | Extra to Install | Status | Notes |
|
|
96
|
+
| --------------- | ---------------- | ---------------------- | -------------------------------- |
|
|
97
|
+
| 2.7-2.10 | `[airflow2]` | ✅ Fully Supported | |
|
|
98
|
+
| 3.0.x | `[airflow3]` | ⚠️ Requires manual fix | Needs `pydantic>=2.11.8` upgrade |
|
|
99
|
+
| 3.1+ | `[airflow3]` | ✅ Fully Supported | |
|
|
100
|
+
|
|
101
|
+
**Note on Airflow 3.0.x**: Airflow 3.0.6 pins pydantic==2.11.7, which contains a bug that prevents the DataHub plugin from importing correctly. This issue is resolved in Airflow 3.1.0+ which uses pydantic>=2.11.8. If you must use Airflow 3.0.6, you can manually upgrade pydantic to >=2.11.8, though this may conflict with Airflow's dependency constraints. We recommend upgrading to Airflow 3.1.0 or later.
|
|
102
|
+
|
|
103
|
+
Related issue: https://github.com/pydantic/pydantic/issues/10963
|
|
104
|
+
|
|
105
|
+
## Installation
|
|
106
|
+
|
|
107
|
+
The installation command varies depending on your Airflow version due to different OpenLineage dependencies.
|
|
108
|
+
|
|
109
|
+
### For Airflow 2.x (2.7+)
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
pip install 'acryl-datahub-airflow-plugin[airflow2]'
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
This installs the plugin with Legacy OpenLineage (`openlineage-airflow>=1.2.0`), which is required for Airflow 2.x lineage extraction.
|
|
116
|
+
|
|
117
|
+
#### Alternative: Using Native OpenLineage Provider on Airflow 2.7+
|
|
118
|
+
|
|
119
|
+
If your Airflow 2.7+ environment rejects the Legacy OpenLineage package (e.g., due to dependency conflicts), you can use the native OpenLineage provider instead:
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
# Install the native Airflow provider first
|
|
123
|
+
pip install 'apache-airflow-providers-openlineage>=1.0.0'
|
|
124
|
+
|
|
125
|
+
# Then install the DataHub plugin without OpenLineage extras
|
|
126
|
+
pip install acryl-datahub-airflow-plugin
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
The plugin will automatically detect and use `apache-airflow-providers-openlineage` when available, providing the same functionality.
|
|
130
|
+
|
|
131
|
+
### For Airflow 3.x (3.1+)
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
pip install 'acryl-datahub-airflow-plugin[airflow3]'
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
This installs the plugin with `apache-airflow-providers-openlineage>=1.0.0`, which is the native OpenLineage provider for Airflow 3.x.
|
|
138
|
+
|
|
139
|
+
**Note**: If using Airflow 3.0.x (3.0.6 specifically), you'll need to manually upgrade pydantic:
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
pip install 'acryl-datahub-airflow-plugin[airflow3]' 'pydantic>=2.11.8'
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
We recommend using Airflow 3.1.0+ which resolves this issue. See the Version Compatibility section above for details.
|
|
146
|
+
|
|
147
|
+
### What Gets Installed
|
|
148
|
+
|
|
149
|
+
#### Base Installation (No Extras)
|
|
150
|
+
|
|
151
|
+
When you install without any extras:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
pip install acryl-datahub-airflow-plugin
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
You get:
|
|
158
|
+
|
|
159
|
+
- `acryl-datahub[sql-parser,datahub-rest]` - DataHub SDK with SQL parsing and REST emitter
|
|
160
|
+
- `pydantic>=2.4.0` - Required for data validation
|
|
161
|
+
- `apache-airflow>=2.5.0,<4.0.0` - Airflow itself
|
|
162
|
+
- **No OpenLineage package** - You'll need to provide your own or use one of the extras below
|
|
163
|
+
|
|
164
|
+
#### With `[airflow2]` Extra
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
pip install 'acryl-datahub-airflow-plugin[airflow2]'
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Adds:
|
|
171
|
+
|
|
172
|
+
- `openlineage-airflow>=1.2.0` - Standalone OpenLineage package for Airflow 2.x
|
|
173
|
+
|
|
174
|
+
#### With `[airflow3]` Extra
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
pip install 'acryl-datahub-airflow-plugin[airflow3]'
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
Adds:
|
|
181
|
+
|
|
182
|
+
- `apache-airflow-providers-openlineage>=1.0.0` - Native OpenLineage provider for Airflow 3.x
|
|
183
|
+
|
|
184
|
+
### Additional Extras
|
|
185
|
+
|
|
186
|
+
You can combine multiple extras if needed:
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
# For Airflow 3.x with Kafka emitter support
|
|
190
|
+
pip install 'acryl-datahub-airflow-plugin[airflow3,datahub-kafka]'
|
|
191
|
+
|
|
192
|
+
# For Airflow 2.x with file emitter support
|
|
193
|
+
pip install 'acryl-datahub-airflow-plugin[airflow2,datahub-file]'
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
Available extras:
|
|
197
|
+
|
|
198
|
+
- `airflow2`: OpenLineage support for Airflow 2.x (adds `openlineage-airflow>=1.2.0`)
|
|
199
|
+
- `airflow3`: OpenLineage support for Airflow 3.x (adds `apache-airflow-providers-openlineage>=1.0.0`)
|
|
200
|
+
- `datahub-kafka`: Kafka-based metadata emission (adds `acryl-datahub[datahub-kafka]`)
|
|
201
|
+
- `datahub-file`: File-based metadata emission (adds `acryl-datahub[sync-file-emitter]`) - useful for testing
|
|
202
|
+
|
|
203
|
+
### Why Different Extras?
|
|
204
|
+
|
|
205
|
+
Airflow 2.x and 3.x have different OpenLineage integrations:
|
|
206
|
+
|
|
207
|
+
- **Airflow 2.x (2.5-2.6)** typically uses Legacy OpenLineage (`openlineage-airflow` package)
|
|
208
|
+
- **Airflow 2.x (2.7+)** can use either Legacy OpenLineage or native OpenLineage Provider (`apache-airflow-providers-openlineage`)
|
|
209
|
+
- **Airflow 3.x** uses native OpenLineage Provider (`apache-airflow-providers-openlineage`)
|
|
210
|
+
|
|
211
|
+
The plugin automatically detects which OpenLineage variant is installed and uses it accordingly. This means:
|
|
212
|
+
|
|
213
|
+
1. **With extras** (`[airflow2]` or `[airflow3]`): The appropriate OpenLineage dependency is installed automatically
|
|
214
|
+
2. **Without extras**: You provide your own OpenLineage installation, and the plugin auto-detects it
|
|
215
|
+
|
|
216
|
+
This flexibility allows you to adapt to different Airflow environments and dependency constraints.
|
|
217
|
+
|
|
218
|
+
## Configuration
|
|
219
|
+
|
|
220
|
+
The plugin can be configured via `airflow.cfg` under the `[datahub]` section. Below are the key configuration options:
|
|
221
|
+
|
|
222
|
+
### Extractor Patching (OpenLineage Enhancements)
|
|
223
|
+
|
|
224
|
+
When `enable_extractors=True` (default), the DataHub plugin enhances OpenLineage extractors to provide better lineage. You can fine-tune these enhancements:
|
|
225
|
+
|
|
226
|
+
```ini
|
|
227
|
+
[datahub]
|
|
228
|
+
# Enable/disable all OpenLineage extractors
|
|
229
|
+
enable_extractors = True # Default: True
|
|
230
|
+
|
|
231
|
+
# Fine-grained control over DataHub's OpenLineage enhancements
|
|
232
|
+
|
|
233
|
+
# --- Patches (work with both Legacy OpenLineage and OpenLineage Provider) ---
|
|
234
|
+
|
|
235
|
+
# Patch SqlExtractor to use DataHub's advanced SQL parser (enables column-level lineage)
|
|
236
|
+
patch_sql_parser = True # Default: True
|
|
237
|
+
|
|
238
|
+
# Patch SnowflakeExtractor to fix default schema detection
|
|
239
|
+
patch_snowflake_schema = True # Default: True
|
|
240
|
+
|
|
241
|
+
# --- Custom Extractors (only apply to Legacy OpenLineage) ---
|
|
242
|
+
|
|
243
|
+
# Use DataHub's custom AthenaOperatorExtractor (better Athena lineage)
|
|
244
|
+
extract_athena_operator = True # Default: True
|
|
245
|
+
|
|
246
|
+
# Use DataHub's custom BigQueryInsertJobOperatorExtractor (handles BQ job configuration)
|
|
247
|
+
extract_bigquery_insert_job_operator = True # Default: True
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
**How it works:**
|
|
251
|
+
|
|
252
|
+
**Patches** (apply to both Legacy OpenLineage and OpenLineage Provider):
|
|
253
|
+
|
|
254
|
+
- Apply **monkey-patching** to OpenLineage extractor/operator classes at runtime
|
|
255
|
+
- Work on **both Airflow 2.x and Airflow 3.x**
|
|
256
|
+
- When `patch_sql_parser=True`:
|
|
257
|
+
- **Airflow 2**: Patches `SqlExtractor.extract()` method
|
|
258
|
+
- **Airflow 3**: Patches `SQLParser.generate_openlineage_metadata_from_sql()` method
|
|
259
|
+
- Provides: More accurate lineage extraction, column-level lineage (CLL), better SQL dialect support
|
|
260
|
+
- When `patch_snowflake_schema=True`:
|
|
261
|
+
- **Airflow 2**: Patches `SnowflakeExtractor.default_schema` property
|
|
262
|
+
- **Airflow 3**: Currently not needed (handled by Airflow's native support)
|
|
263
|
+
- Fixes Snowflake schema detection issues
|
|
264
|
+
|
|
265
|
+
**Custom Extractors/Operator Patches**:
|
|
266
|
+
|
|
267
|
+
- Register DataHub's custom implementations for specific operators
|
|
268
|
+
- Work on **both Airflow 2.x and Airflow 3.x**
|
|
269
|
+
- `extract_athena_operator`:
|
|
270
|
+
- **Airflow 2 (Legacy OpenLineage only)**: Registers `AthenaOperatorExtractor`
|
|
271
|
+
- **Airflow 3**: Patches `AthenaOperator.get_openlineage_facets_on_complete()`
|
|
272
|
+
- Uses DataHub's SQL parser for better Athena lineage
|
|
273
|
+
- `extract_bigquery_insert_job_operator`:
|
|
274
|
+
- **Airflow 2 (Legacy OpenLineage only)**: Registers `BigQueryInsertJobOperatorExtractor`
|
|
275
|
+
- **Airflow 3**: Patches `BigQueryInsertJobOperator.get_openlineage_facets_on_complete()`
|
|
276
|
+
- Handles BigQuery job configuration and destination tables
|
|
277
|
+
|
|
278
|
+
**Example use cases:**
|
|
279
|
+
|
|
280
|
+
Disable DataHub's SQL parser to use OpenLineage's native parsing:
|
|
281
|
+
|
|
282
|
+
```ini
|
|
283
|
+
[datahub]
|
|
284
|
+
enable_extractors = True
|
|
285
|
+
patch_sql_parser = False # Use OpenLineage's native SQL parser
|
|
286
|
+
patch_snowflake_schema = True # Still fix Snowflake schema detection
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
Disable custom Athena extractor (only relevant for Legacy OpenLineage):
|
|
290
|
+
|
|
291
|
+
```ini
|
|
292
|
+
[datahub]
|
|
293
|
+
enable_extractors = True
|
|
294
|
+
extract_athena_operator = False # Use OpenLineage's default Athena extractor
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
### Other Configuration Options
|
|
298
|
+
|
|
299
|
+
For a complete list of configuration options, see the [DataHub Airflow documentation](https://docs.datahub.com/docs/lineage/airflow#configuration).
|
|
300
|
+
|
|
301
|
+
## Developing
|
|
302
|
+
|
|
303
|
+
See the [developing docs](../../metadata-ingestion/developing.md).
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
datahub_airflow_plugin/__init__.py,sha256=NScUtA8N-m66Pyg0DO--YbPkrl48PK3UevpdQVW_y6E,1009
|
|
2
|
+
datahub_airflow_plugin/_airflow_compat.py,sha256=VdlTeQ7XGU40uqeAfnJcDGLt8QD2lLpEhQw1ZEMTFvI,1345
|
|
3
|
+
datahub_airflow_plugin/_airflow_shims.py,sha256=fe-BwJxdA56P6j_nD1uMwmNNCE0FnlbHsqQME30uQ_o,2243
|
|
4
|
+
datahub_airflow_plugin/_airflow_version_specific.py,sha256=q4iNyScNQTNZnoC9n_8wYI99mhqiXbxUhjAv2x_sNks,5870
|
|
5
|
+
datahub_airflow_plugin/_config.py,sha256=AAvxIGcG-wQWkFbpGCRurjnNXVCzqHDBmsFsSRHOEi4,9050
|
|
6
|
+
datahub_airflow_plugin/_constants.py,sha256=HBrUztUwspK1mQ3iM9_Pdiu18pn-lPK3xhjmubqn_kI,627
|
|
7
|
+
datahub_airflow_plugin/_datahub_ol_adapter.py,sha256=2YIhJDyLhzZGK3MMra4NGSIfG-az8sKt6ZXcbQGs2Yg,951
|
|
8
|
+
datahub_airflow_plugin/_version.py,sha256=a5tPkINCI2HTMmKBJqZtwVQ33U-KzL8xp07XrZ2LIBk,148
|
|
9
|
+
datahub_airflow_plugin/datahub_listener.py,sha256=9g-MBf14zFs-cQP0Jr_9gnCbzQVqIe_x6ZjgQs0onzE,881
|
|
10
|
+
datahub_airflow_plugin/datahub_plugin.py,sha256=rbZhs7s5O3_MlkQw5aZToC2W5mMic_EpI3oybHB0ofw,1224
|
|
11
|
+
datahub_airflow_plugin/entities.py,sha256=xDZ-mZH7hjUkZbatWYUwI43_9B40wGiotlyQhiO8rEM,1987
|
|
12
|
+
datahub_airflow_plugin/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
+
datahub_airflow_plugin/airflow2/__init__.py,sha256=WdCqiDRuXMkPcLPb3D5PQYZJaRyDtuEIJ7ndHSThjkg,200
|
|
14
|
+
datahub_airflow_plugin/airflow2/_airflow2_sql_parser_patch.py,sha256=nkt57jYixcocrfYpQ3bhAUDTeyblt8p99_QdNzqJl_o,16145
|
|
15
|
+
datahub_airflow_plugin/airflow2/_airflow_compat.py,sha256=7GumOc-bOCySp739MEJmksgHsiElSNfm29hB3kHyJPY,4005
|
|
16
|
+
datahub_airflow_plugin/airflow2/_extractors.py,sha256=S4LbR9_xDMl76YErWgS_FsPzFW1DUDfsOZ0A9tKrZJ4,17981
|
|
17
|
+
datahub_airflow_plugin/airflow2/_legacy_shims.py,sha256=rToXtWz0p41_jIsjPk4ALVL0T8NcH_RXRNuW47uDHDU,527
|
|
18
|
+
datahub_airflow_plugin/airflow2/_openlineage_compat.py,sha256=43Xgtl2LdEVsIxJUb-DFGYoP-kaEots599rTgNdCMD4,4517
|
|
19
|
+
datahub_airflow_plugin/airflow2/_provider_shims.py,sha256=GD_r_RojjBWWFfxvakQWr4eTgPplfkRCMZMRDTaApM4,961
|
|
20
|
+
datahub_airflow_plugin/airflow2/_shims.py,sha256=Ha4gLxDU_GVPj2T0dHTwru4Lm-kYb9rRsL7uhiba0Gs,2769
|
|
21
|
+
datahub_airflow_plugin/airflow2/datahub_listener.py,sha256=IdvTYLY5TWbukJ44HmJI0GJnPHqu60wR8RQt10I3wss,42611
|
|
22
|
+
datahub_airflow_plugin/airflow3/__init__.py,sha256=OIFuu2zwlzBMcB2FVlkM2aXsCVPW6Xoa82pEjPZK6g0,200
|
|
23
|
+
datahub_airflow_plugin/airflow3/_airflow3_sql_parser_patch.py,sha256=7LZjG8I0Vl8sI4j7RZsoZt7IkG9RQ-tL8CxZcYIVqQE,16460
|
|
24
|
+
datahub_airflow_plugin/airflow3/_airflow_compat.py,sha256=g5q6XfOZZQhhJwNHwYb0U_wML7_2K0BsGGnGzxEk22M,3857
|
|
25
|
+
datahub_airflow_plugin/airflow3/_athena_openlineage_patch.py,sha256=BoyCFAZM7lB1lJqqu2XuLF_wgSMC4oUwRlu8e1E8ntU,6709
|
|
26
|
+
datahub_airflow_plugin/airflow3/_bigquery_openlineage_patch.py,sha256=ph9ZePU8eff1ew-j5dvM-vBmp-Or9b1CsnOevAqoluM,10228
|
|
27
|
+
datahub_airflow_plugin/airflow3/_shims.py,sha256=q2Nw5vS21R_y0cYg3aOm7AYylaNAXlN480ve5XItfNU,2553
|
|
28
|
+
datahub_airflow_plugin/airflow3/_sqlite_openlineage_patch.py,sha256=QRgadtwS6a42k3J4zAGsIRjyExvRX7CvAM_U3DYivic,3552
|
|
29
|
+
datahub_airflow_plugin/airflow3/_teradata_openlineage_patch.py,sha256=z-Udbz_7NahufipLOIgUlZGWRsIRF_qkV903rYxR_s4,12563
|
|
30
|
+
datahub_airflow_plugin/airflow3/datahub_listener.py,sha256=Xonjn3SX7q9dmDEyW7uyiryvJ1AVJuQcByBbD6TFWTk,58791
|
|
31
|
+
datahub_airflow_plugin/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
|
+
datahub_airflow_plugin/client/airflow_generator.py,sha256=7Cc9khsePHqjRAHZchaMKMKP0wCeT21wCtH8gUZeEEE,26135
|
|
33
|
+
datahub_airflow_plugin/example_dags/__init__.py,sha256=nIAjilNA_uGtFYYmAakZOvJQjAeg8d7aR61MegkEjgU,1321
|
|
34
|
+
datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py,sha256=BbrOErFboKMDFn712RHEKI9T4Vh0q6kYSVet56gPqVk,1319
|
|
35
|
+
datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py,sha256=-rWLI58fErRoTpr38RCJXE0_lSBNZp17U38hg9Ow83A,1295
|
|
36
|
+
datahub_airflow_plugin/example_dags/lineage_backend_demo.py,sha256=Dy6MxwtX7G0mQeALqpLRu4F03IyU9fqIkr-CcKpo2JE,1625
|
|
37
|
+
datahub_airflow_plugin/example_dags/lineage_backend_taskflow_demo.py,sha256=kW2rLFtOnoiMxBJ315GzlmR0Sz1cqQ_wwLbG9UC-u7Y,1499
|
|
38
|
+
datahub_airflow_plugin/example_dags/lineage_emission_dag.py,sha256=LE29DzW51a4ZAl_zrcLrqSyzmy8qElcZagXsIMjaZLU,1946
|
|
39
|
+
datahub_airflow_plugin/example_dags/mysql_sample_dag.py,sha256=Unx9Ger3R9ptEutfV-4NjjEaTIEYJ-tLrZr7OsK608k,1922
|
|
40
|
+
datahub_airflow_plugin/example_dags/snowflake_sample_dag.py,sha256=b9iaE7zChQha9u57F84U6uqavGl7WrUnMNOzXEiZxjE,3234
|
|
41
|
+
datahub_airflow_plugin/example_dags/airflow2/__init__.py,sha256=cAFFodve_HVEokz3ZVo9lWvpYZ9WeLgZ4d8lSypHINQ,230
|
|
42
|
+
datahub_airflow_plugin/example_dags/airflow2/generic_recipe_sample_dag.py,sha256=zmrNm1L9q7hAksSYTcCNEFopy4Qy0sElM30JQZfan1U,1493
|
|
43
|
+
datahub_airflow_plugin/example_dags/airflow2/graph_usage_sample_dag.py,sha256=-rWLI58fErRoTpr38RCJXE0_lSBNZp17U38hg9Ow83A,1295
|
|
44
|
+
datahub_airflow_plugin/example_dags/airflow2/lineage_backend_demo.py,sha256=1sUWzCU4tCGOfMNFvV6QbP416Ld-Sc78O-9fNFv9zKo,2331
|
|
45
|
+
datahub_airflow_plugin/example_dags/airflow2/lineage_backend_taskflow_demo.py,sha256=7dB0874wOOVXZwiEcd7uHCK_ngtTW6JOHtFmhoqzaXU,2286
|
|
46
|
+
datahub_airflow_plugin/example_dags/airflow2/lineage_emission_dag.py,sha256=iZ2rN787gdiYvAfMD_ST-ZUijqXtH4UAQzoXgogSG9o,2649
|
|
47
|
+
datahub_airflow_plugin/example_dags/airflow2/mysql_sample_dag.py,sha256=qeB16im8mlQ2U3is55Lxo45dbUXVB29uS_ixSV74dPg,2132
|
|
48
|
+
datahub_airflow_plugin/example_dags/airflow2/snowflake_sample_dag.py,sha256=H1PMTRBBZcjGOiIKkMMJ57ujRFYPoxlc1sq7gMl-ahI,3619
|
|
49
|
+
datahub_airflow_plugin/example_dags/airflow3/__init__.py,sha256=XXQzH-43L_WnjfF-ahoC0jUBltS5X-nZyDPZSbZSNfg,233
|
|
50
|
+
datahub_airflow_plugin/example_dags/airflow3/lineage_backend_demo.py,sha256=U-xqMZtA1DN_WkdP6aE044efu6M0VGtBqMghwmBuDds,1635
|
|
51
|
+
datahub_airflow_plugin/example_dags/airflow3/lineage_backend_taskflow_demo.py,sha256=oTKaIAvoU212ORg8SxfhKtS9ne2oflSyPPEQcu3m--Q,1533
|
|
52
|
+
datahub_airflow_plugin/example_dags/airflow3/snowflake_sample_dag.py,sha256=6G8tJFSX9vFCdWn6wN1nvgZthuc3poBSBEeqylvQtzM,3265
|
|
53
|
+
datahub_airflow_plugin/hooks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
|
+
datahub_airflow_plugin/hooks/datahub.py,sha256=zFYFTm2HbkBXE5AKVDbUjMpBeRAdyZu0GM84eafqKWo,11550
|
|
55
|
+
datahub_airflow_plugin/operators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
|
+
datahub_airflow_plugin/operators/datahub.py,sha256=vU-v2fuJUkm1gMZa6zJehaVJ6QCLwvQd4G_vJ37BBVA,4402
|
|
57
|
+
datahub_airflow_plugin/operators/datahub_assertion_operator.py,sha256=j_P9M1a5qME55pKHAfTqZsVVtIslFBO59r8UQOOBvsk,2914
|
|
58
|
+
datahub_airflow_plugin/operators/datahub_assertion_sensor.py,sha256=QJIZZYQhqscj3bhBN5Sei-ABMRRAl2KiQxXTXcZQ51Q,2917
|
|
59
|
+
datahub_airflow_plugin/operators/datahub_operation_operator.py,sha256=KJ8M8jJ7UWW6kNbiS-rELc-kqCPkZ3ck7z51oAXGPSI,3351
|
|
60
|
+
datahub_airflow_plugin/operators/datahub_operation_sensor.py,sha256=U19fi5DpjBRWm_1ljXcjnspUzfa3mqYfOQZHjLk-ufI,3618
|
|
61
|
+
acryl_datahub_airflow_plugin-1.3.1.5rc8.dist-info/METADATA,sha256=--lNPJHjgxLt4R1k0wZMNKBeBC4ylenTTbmEDYUe5Qg,12333
|
|
62
|
+
acryl_datahub_airflow_plugin-1.3.1.5rc8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
63
|
+
acryl_datahub_airflow_plugin-1.3.1.5rc8.dist-info/entry_points.txt,sha256=HqmajDHtrsz0b5Lswe1-eeuObxdtucd9YoxH77jJBA8,179
|
|
64
|
+
acryl_datahub_airflow_plugin-1.3.1.5rc8.dist-info/top_level.txt,sha256=VBzisOQfzqL1WRbNyItaruf3kTigXltjzgqzbheaFp0,23
|
|
65
|
+
acryl_datahub_airflow_plugin-1.3.1.5rc8.dist-info/RECORD,,
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# This module must be imported before any Airflow imports in any of our files.
|
|
2
|
+
# It dispatches to version-specific compatibility modules.
|
|
3
|
+
|
|
4
|
+
from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED
|
|
5
|
+
|
|
6
|
+
# Critical safety check: Ensure MarkupSafe compatibility patch is applied
|
|
7
|
+
# This must happen before importing Airflow to prevent MarkupSafe version conflicts
|
|
8
|
+
# Using explicit exception instead of assert to ensure it runs even with python -O
|
|
9
|
+
if not MARKUPSAFE_PATCHED:
|
|
10
|
+
raise RuntimeError(
|
|
11
|
+
"MarkupSafe compatibility patch must be applied before importing Airflow modules. "
|
|
12
|
+
"This is a critical safety check that cannot be disabled. "
|
|
13
|
+
"The patch ensures compatibility between different MarkupSafe versions used by "
|
|
14
|
+
"Airflow and DataHub dependencies."
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# Detect Airflow version and dispatch to version-specific compat module
|
|
18
|
+
# These imports must be after MARKUPSAFE_PATCHED assertion.
|
|
19
|
+
import airflow
|
|
20
|
+
import packaging.version
|
|
21
|
+
|
|
22
|
+
AIRFLOW_VERSION = packaging.version.parse(airflow.__version__)
|
|
23
|
+
IS_AIRFLOW_3_OR_HIGHER = AIRFLOW_VERSION >= packaging.version.parse("3.0.0")
|
|
24
|
+
|
|
25
|
+
if IS_AIRFLOW_3_OR_HIGHER:
|
|
26
|
+
from datahub_airflow_plugin.airflow3._airflow_compat import AIRFLOW_PATCHED
|
|
27
|
+
else:
|
|
28
|
+
from datahub_airflow_plugin.airflow2._airflow_compat import AIRFLOW_PATCHED
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"AIRFLOW_PATCHED",
|
|
32
|
+
]
|
|
@@ -1,42 +1,75 @@
|
|
|
1
|
-
|
|
1
|
+
"""
|
|
2
|
+
Pure dispatcher for version-specific Airflow shims.
|
|
2
3
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
from airflow.models.operator import Operator
|
|
6
|
-
|
|
7
|
-
try:
|
|
8
|
-
from airflow.sensors.external_task import ExternalTaskSensor
|
|
9
|
-
except ImportError:
|
|
10
|
-
from airflow.sensors.external_task_sensor import ExternalTaskSensor # type: ignore
|
|
11
|
-
|
|
12
|
-
# Approach suggested by https://stackoverflow.com/a/11887885/5004662.
|
|
13
|
-
AIRFLOW_VERSION = packaging.version.parse(airflow.version.version)
|
|
14
|
-
HAS_AIRFLOW_DAG_LISTENER_API = True # this is in Airflow 2.5+
|
|
15
|
-
HAS_AIRFLOW_DATASET_LISTENER_API = AIRFLOW_VERSION >= packaging.version.parse(
|
|
16
|
-
"2.8.0.dev0"
|
|
17
|
-
)
|
|
4
|
+
This module automatically imports the correct shims based on the installed
|
|
5
|
+
Airflow version, dispatching to either airflow2 or airflow3 implementations.
|
|
18
6
|
|
|
7
|
+
No logic lives here - just clean version detection and re-export.
|
|
8
|
+
"""
|
|
19
9
|
|
|
20
|
-
|
|
21
|
-
# From Airflow 2.4 _inlets is dropped and inlets used consistently. Earlier it was not the case, so we have to stick there to _inlets
|
|
22
|
-
if hasattr(operator, "_inlets"):
|
|
23
|
-
return operator._inlets # type: ignore[attr-defined, union-attr]
|
|
24
|
-
if hasattr(operator, "get_inlet_defs"):
|
|
25
|
-
return operator.get_inlet_defs() # type: ignore[attr-defined]
|
|
26
|
-
return operator.inlets or []
|
|
10
|
+
import packaging.version
|
|
27
11
|
|
|
12
|
+
from datahub_airflow_plugin._airflow_version_specific import (
|
|
13
|
+
AIRFLOW_VERSION,
|
|
14
|
+
IS_AIRFLOW_3_OR_HIGHER,
|
|
15
|
+
)
|
|
28
16
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
return operator.get_outlet_defs()
|
|
36
|
-
return operator.outlets or []
|
|
17
|
+
# Version feature flags - hardcode based on Airflow version
|
|
18
|
+
# These were previously in the old _airflow_shims but are better kept simple
|
|
19
|
+
HAS_AIRFLOW_STANDALONE_CMD = AIRFLOW_VERSION >= packaging.version.parse("2.2")
|
|
20
|
+
HAS_AIRFLOW_LISTENER_API = AIRFLOW_VERSION >= packaging.version.parse("2.3")
|
|
21
|
+
HAS_AIRFLOW_DAG_LISTENER_API = AIRFLOW_VERSION >= packaging.version.parse("2.5")
|
|
22
|
+
HAS_AIRFLOW_DATASET_LISTENER_API = AIRFLOW_VERSION >= packaging.version.parse("2.5")
|
|
37
23
|
|
|
24
|
+
if IS_AIRFLOW_3_OR_HIGHER:
|
|
25
|
+
# Airflow 3.x - use airflow3 shims
|
|
26
|
+
from datahub_airflow_plugin.airflow3._shims import (
|
|
27
|
+
BaseOperator,
|
|
28
|
+
ExternalTaskSensor,
|
|
29
|
+
MappedOperator,
|
|
30
|
+
OpenLineagePlugin,
|
|
31
|
+
Operator,
|
|
32
|
+
TaskHolder,
|
|
33
|
+
get_operator_class,
|
|
34
|
+
get_task_inlets,
|
|
35
|
+
get_task_outlets,
|
|
36
|
+
redact_with_exclusions,
|
|
37
|
+
try_import_from_string,
|
|
38
|
+
)
|
|
39
|
+
else:
|
|
40
|
+
# Airflow 2.x - use airflow2 shims
|
|
41
|
+
from datahub_airflow_plugin.airflow2._shims import ( # type: ignore[assignment]
|
|
42
|
+
BaseOperator,
|
|
43
|
+
ExternalTaskSensor,
|
|
44
|
+
MappedOperator,
|
|
45
|
+
OpenLineagePlugin,
|
|
46
|
+
Operator,
|
|
47
|
+
TaskHolder,
|
|
48
|
+
get_operator_class,
|
|
49
|
+
get_task_inlets,
|
|
50
|
+
get_task_outlets,
|
|
51
|
+
redact_with_exclusions,
|
|
52
|
+
try_import_from_string,
|
|
53
|
+
)
|
|
38
54
|
|
|
39
55
|
__all__ = [
|
|
56
|
+
# Airflow version and feature flags
|
|
40
57
|
"AIRFLOW_VERSION",
|
|
58
|
+
"IS_AIRFLOW_3_OR_HIGHER",
|
|
59
|
+
"HAS_AIRFLOW_STANDALONE_CMD",
|
|
60
|
+
"HAS_AIRFLOW_LISTENER_API",
|
|
61
|
+
"HAS_AIRFLOW_DAG_LISTENER_API",
|
|
62
|
+
"HAS_AIRFLOW_DATASET_LISTENER_API",
|
|
63
|
+
# Airflow objects
|
|
64
|
+
"BaseOperator",
|
|
65
|
+
"Operator",
|
|
66
|
+
"MappedOperator",
|
|
41
67
|
"ExternalTaskSensor",
|
|
68
|
+
"TaskHolder",
|
|
69
|
+
"OpenLineagePlugin",
|
|
70
|
+
"get_operator_class",
|
|
71
|
+
"try_import_from_string",
|
|
72
|
+
"redact_with_exclusions",
|
|
73
|
+
"get_task_inlets",
|
|
74
|
+
"get_task_outlets",
|
|
42
75
|
]
|