apache-airflow-providers-openlineage 1.6.0rc1__tar.gz → 1.7.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.

Files changed (24) hide show
  1. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/PKG-INFO +10 -9
  2. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/README.rst +4 -4
  3. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/__init__.py +1 -1
  4. apache_airflow_providers_openlineage-1.7.0rc1/airflow/providers/openlineage/conf.py +103 -0
  5. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/extractors/base.py +3 -34
  6. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/extractors/bash.py +6 -22
  7. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/extractors/manager.py +14 -33
  8. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/extractors/python.py +6 -22
  9. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/get_provider_info.py +9 -1
  10. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/plugins/adapter.py +9 -19
  11. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/plugins/facets.py +11 -0
  12. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/plugins/listener.py +49 -4
  13. apache_airflow_providers_openlineage-1.7.0rc1/airflow/providers/openlineage/plugins/macros.py +87 -0
  14. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/plugins/openlineage.py +9 -19
  15. apache_airflow_providers_openlineage-1.7.0rc1/airflow/providers/openlineage/utils/selective_enable.py +87 -0
  16. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/utils/utils.py +79 -125
  17. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/pyproject.toml +6 -5
  18. apache_airflow_providers_openlineage-1.6.0rc1/airflow/providers/openlineage/plugins/macros.py +0 -66
  19. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/LICENSE +0 -0
  20. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/extractors/__init__.py +0 -0
  21. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/plugins/__init__.py +0 -0
  22. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/sqlparser.py +0 -0
  23. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/utils/__init__.py +0 -0
  24. {apache_airflow_providers_openlineage-1.6.0rc1 → apache_airflow_providers_openlineage-1.7.0rc1}/airflow/providers/openlineage/utils/sql.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-openlineage
3
- Version: 1.6.0rc1
3
+ Version: 1.7.0rc1
4
4
  Summary: Provider package apache-airflow-providers-openlineage for Apache Airflow
5
5
  Keywords: airflow-provider,openlineage,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -19,16 +19,17 @@ Classifier: Programming Language :: Python :: 3.8
19
19
  Classifier: Programming Language :: Python :: 3.9
20
20
  Classifier: Programming Language :: Python :: 3.10
21
21
  Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
22
23
  Classifier: Topic :: System :: Monitoring
23
- Requires-Dist: apache-airflow-providers-common-sql>=1.6.0.dev0
24
- Requires-Dist: apache-airflow>=2.7.0.dev0
24
+ Requires-Dist: apache-airflow-providers-common-sql>=1.6.0rc0
25
+ Requires-Dist: apache-airflow>=2.7.0rc0
25
26
  Requires-Dist: attrs>=22.2
26
27
  Requires-Dist: openlineage-integration-common>=0.28.0
27
28
  Requires-Dist: openlineage-python>=0.28.0
28
29
  Requires-Dist: apache-airflow-providers-common-sql ; extra == "common.sql"
29
30
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
30
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.6.0/changelog.html
31
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.6.0
31
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0/changelog.html
32
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0
32
33
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
33
34
  Project-URL: Source Code, https://github.com/apache/airflow
34
35
  Project-URL: Twitter, https://twitter.com/ApacheAirflow
@@ -79,7 +80,7 @@ Provides-Extra: common.sql
79
80
 
80
81
  Package ``apache-airflow-providers-openlineage``
81
82
 
82
- Release: ``1.6.0.rc1``
83
+ Release: ``1.7.0.rc1``
83
84
 
84
85
 
85
86
  `OpenLineage <https://openlineage.io/>`__
@@ -92,7 +93,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
92
93
  are in ``airflow.providers.openlineage`` python package.
93
94
 
94
95
  You can find package information and changelog for the provider
95
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.6.0/>`_.
96
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0/>`_.
96
97
 
97
98
  Installation
98
99
  ------------
@@ -101,7 +102,7 @@ You can install this package on top of an existing Airflow 2 installation (see `
101
102
  for the minimum Airflow version supported) via
102
103
  ``pip install apache-airflow-providers-openlineage``
103
104
 
104
- The package supports the following python versions: 3.8,3.9,3.10,3.11
105
+ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
105
106
 
106
107
  Requirements
107
108
  ------------
@@ -136,4 +137,4 @@ Dependent package
136
137
  ============================================================================================================ ==============
137
138
 
138
139
  The changelog for the provider package can be found in the
139
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.6.0/changelog.html>`_.
140
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0/changelog.html>`_.
@@ -42,7 +42,7 @@
42
42
 
43
43
  Package ``apache-airflow-providers-openlineage``
44
44
 
45
- Release: ``1.6.0.rc1``
45
+ Release: ``1.7.0.rc1``
46
46
 
47
47
 
48
48
  `OpenLineage <https://openlineage.io/>`__
@@ -55,7 +55,7 @@ This is a provider package for ``openlineage`` provider. All classes for this pr
55
55
  are in ``airflow.providers.openlineage`` python package.
56
56
 
57
57
  You can find package information and changelog for the provider
58
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.6.0/>`_.
58
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0/>`_.
59
59
 
60
60
  Installation
61
61
  ------------
@@ -64,7 +64,7 @@ You can install this package on top of an existing Airflow 2 installation (see `
64
64
  for the minimum Airflow version supported) via
65
65
  ``pip install apache-airflow-providers-openlineage``
66
66
 
67
- The package supports the following python versions: 3.8,3.9,3.10,3.11
67
+ The package supports the following python versions: 3.8,3.9,3.10,3.11,3.12
68
68
 
69
69
  Requirements
70
70
  ------------
@@ -99,4 +99,4 @@ Dependent package
99
99
  ============================================================================================================ ==============
100
100
 
101
101
  The changelog for the provider package can be found in the
102
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.6.0/changelog.html>`_.
102
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-openlineage/1.7.0/changelog.html>`_.
@@ -27,7 +27,7 @@ import packaging.version
27
27
 
28
28
  __all__ = ["__version__"]
29
29
 
30
- __version__ = "1.6.0"
30
+ __version__ = "1.7.0"
31
31
 
32
32
  try:
33
33
  from airflow import __version__ as airflow_version
@@ -0,0 +1,103 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ from __future__ import annotations
19
+
20
+ import os
21
+ from typing import Any
22
+
23
+ from airflow.compat.functools import cache
24
+ from airflow.configuration import conf
25
+
26
+ _CONFIG_SECTION = "openlineage"
27
+
28
+
29
+ @cache
30
+ def config_path(check_legacy_env_var: bool = True) -> str:
31
+ """[openlineage] config_path."""
32
+ option = conf.get(_CONFIG_SECTION, "config_path", fallback="")
33
+ if check_legacy_env_var and not option:
34
+ option = os.getenv("OPENLINEAGE_CONFIG", "")
35
+ return option
36
+
37
+
38
+ @cache
39
+ def is_source_enabled() -> bool:
40
+ """[openlineage] disable_source_code."""
41
+ option = conf.get(_CONFIG_SECTION, "disable_source_code", fallback="")
42
+ if not option:
43
+ option = os.getenv("OPENLINEAGE_AIRFLOW_DISABLE_SOURCE_CODE", "")
44
+ return option.lower() not in ("true", "1", "t")
45
+
46
+
47
+ @cache
48
+ def disabled_operators() -> set[str]:
49
+ """[openlineage] disabled_for_operators."""
50
+ option = conf.get(_CONFIG_SECTION, "disabled_for_operators", fallback="")
51
+ return set(operator.strip() for operator in option.split(";") if operator.strip())
52
+
53
+
54
+ @cache
55
+ def selective_enable() -> bool:
56
+ return conf.getboolean(_CONFIG_SECTION, "selective_enable", fallback=False)
57
+
58
+
59
+ @cache
60
+ def custom_extractors() -> set[str]:
61
+ """[openlineage] extractors."""
62
+ option = conf.get(_CONFIG_SECTION, "extractors", fallback="")
63
+ if not option:
64
+ option = os.getenv("OPENLINEAGE_EXTRACTORS", "")
65
+ return set(extractor.strip() for extractor in option.split(";") if extractor.strip())
66
+
67
+
68
+ @cache
69
+ def namespace() -> str:
70
+ """[openlineage] namespace."""
71
+ option = conf.get(_CONFIG_SECTION, "namespace", fallback="")
72
+ if not option:
73
+ option = os.getenv("OPENLINEAGE_NAMESPACE", "default")
74
+ return option
75
+
76
+
77
+ @cache
78
+ def transport() -> dict[str, Any]:
79
+ """[openlineage] transport."""
80
+ option = conf.getjson(_CONFIG_SECTION, "transport", fallback={})
81
+ if not isinstance(option, dict):
82
+ raise ValueError(f"OpenLineage transport `{option}` is not a dict")
83
+ return option
84
+
85
+
86
+ @cache
87
+ def is_disabled() -> bool:
88
+ """[openlineage] disabled + some extra checks."""
89
+
90
+ def _is_true(val):
91
+ return str(val).lower().strip() in ("true", "1", "t")
92
+
93
+ option = conf.get(_CONFIG_SECTION, "disabled", fallback="")
94
+ if _is_true(option):
95
+ return True
96
+
97
+ option = os.getenv("OPENLINEAGE_DISABLED", "")
98
+ if _is_true(option):
99
+ return True
100
+
101
+ # Check if both 'transport' and 'config_path' are not present and also
102
+ # if legacy 'OPENLINEAGE_URL' environment variables is not set
103
+ return transport() == {} and config_path(True) == "" and os.getenv("OPENLINEAGE_URL", "") == ""
@@ -18,12 +18,10 @@
18
18
  from __future__ import annotations
19
19
 
20
20
  from abc import ABC, abstractmethod
21
- from functools import cached_property
22
21
  from typing import TYPE_CHECKING
23
22
 
24
23
  from attrs import Factory, define
25
24
 
26
- from airflow.configuration import conf
27
25
  from airflow.utils.log.logging_mixin import LoggingMixin
28
26
  from airflow.utils.state import TaskInstanceState
29
27
 
@@ -64,33 +62,10 @@ class BaseExtractor(ABC, LoggingMixin):
64
62
  """
65
63
  raise NotImplementedError()
66
64
 
67
- @cached_property
68
- def disabled_operators(self) -> set[str]:
69
- return set(
70
- operator.strip() for operator in conf.get("openlineage", "disabled_for_operators").split(";")
71
- )
72
-
73
- @cached_property
74
- def _is_operator_disabled(self) -> bool:
75
- fully_qualified_class_name = (
76
- self.operator.__class__.__module__ + "." + self.operator.__class__.__name__
77
- )
78
- return fully_qualified_class_name in self.disabled_operators
79
-
80
- def validate(self):
81
- assert self.operator.task_type in self.get_operator_classnames()
82
-
83
65
  @abstractmethod
84
- def _execute_extraction(self) -> OperatorLineage | None:
85
- ...
66
+ def _execute_extraction(self) -> OperatorLineage | None: ...
86
67
 
87
68
  def extract(self) -> OperatorLineage | None:
88
- if self._is_operator_disabled:
89
- self.log.debug(
90
- f"Skipping extraction for operator {self.operator.task_type} "
91
- "due to its presence in [openlineage] openlineage_disabled_for_operators."
92
- )
93
- return None
94
69
  return self._execute_extraction()
95
70
 
96
71
  def extract_on_complete(self, task_instance) -> OperatorLineage | None:
@@ -121,18 +96,12 @@ class DefaultExtractor(BaseExtractor):
121
96
  return None
122
97
  except AttributeError:
123
98
  self.log.debug(
124
- f"Operator {self.operator.task_type} does not have the "
125
- "get_openlineage_facets_on_start method."
99
+ "Operator %s does not have the get_openlineage_facets_on_start method.",
100
+ self.operator.task_type,
126
101
  )
127
102
  return None
128
103
 
129
104
  def extract_on_complete(self, task_instance) -> OperatorLineage | None:
130
- if self._is_operator_disabled:
131
- self.log.debug(
132
- f"Skipping extraction for operator {self.operator.task_type} "
133
- "due to its presence in [openlineage] openlineage_disabled_for_operators."
134
- )
135
- return None
136
105
  if task_instance.state == TaskInstanceState.FAILED:
137
106
  on_failed = getattr(self.operator, "get_openlineage_facets_on_failure", None)
138
107
  if on_failed and callable(on_failed):
@@ -19,15 +19,9 @@ from __future__ import annotations
19
19
 
20
20
  from openlineage.client.facet import SourceCodeJobFacet
21
21
 
22
+ from airflow.providers.openlineage import conf
22
23
  from airflow.providers.openlineage.extractors.base import BaseExtractor, OperatorLineage
23
- from airflow.providers.openlineage.plugins.facets import (
24
- UnknownOperatorAttributeRunFacet,
25
- UnknownOperatorInstance,
26
- )
27
- from airflow.providers.openlineage.utils.utils import (
28
- get_filtered_unknown_operator_keys,
29
- is_source_enabled,
30
- )
24
+ from airflow.providers.openlineage.utils.utils import get_unknown_source_attribute_run_facet
31
25
 
32
26
  """
33
27
  :meta private:
@@ -51,7 +45,7 @@ class BashExtractor(BaseExtractor):
51
45
 
52
46
  def _execute_extraction(self) -> OperatorLineage | None:
53
47
  job_facets: dict = {}
54
- if is_source_enabled():
48
+ if conf.is_source_enabled():
55
49
  job_facets = {
56
50
  "sourceCode": SourceCodeJobFacet(
57
51
  language="bash",
@@ -62,19 +56,9 @@ class BashExtractor(BaseExtractor):
62
56
 
63
57
  return OperatorLineage(
64
58
  job_facets=job_facets,
65
- run_facets={
66
- # The BashOperator is recorded as an "unknownSource" even though we have an
67
- # extractor, as the <i>data lineage</i> cannot be determined from the operator
68
- # directly.
69
- "unknownSourceAttribute": UnknownOperatorAttributeRunFacet(
70
- unknownItems=[
71
- UnknownOperatorInstance(
72
- name="BashOperator",
73
- properties=get_filtered_unknown_operator_keys(self.operator),
74
- )
75
- ]
76
- )
77
- },
59
+ # The BashOperator is recorded as an "unknownSource" even though we have an extractor,
60
+ # as the <i>data lineage</i> cannot be determined from the operator directly.
61
+ run_facets=get_unknown_source_attribute_run_facet(task=self.operator, name="BashOperator"),
78
62
  )
79
63
 
80
64
  def extract(self) -> OperatorLineage | None:
@@ -16,20 +16,15 @@
16
16
  # under the License.
17
17
  from __future__ import annotations
18
18
 
19
- import os
20
19
  from contextlib import suppress
21
20
  from typing import TYPE_CHECKING, Iterator
22
21
 
23
- from airflow.configuration import conf
22
+ from airflow.providers.openlineage import conf
24
23
  from airflow.providers.openlineage.extractors import BaseExtractor, OperatorLineage
25
24
  from airflow.providers.openlineage.extractors.base import DefaultExtractor
26
25
  from airflow.providers.openlineage.extractors.bash import BashExtractor
27
26
  from airflow.providers.openlineage.extractors.python import PythonExtractor
28
- from airflow.providers.openlineage.plugins.facets import (
29
- UnknownOperatorAttributeRunFacet,
30
- UnknownOperatorInstance,
31
- )
32
- from airflow.providers.openlineage.utils.utils import get_filtered_unknown_operator_keys
27
+ from airflow.providers.openlineage.utils.utils import get_unknown_source_attribute_run_facet
33
28
  from airflow.utils.log.logging_mixin import LoggingMixin
34
29
  from airflow.utils.module_loading import import_string
35
30
 
@@ -65,22 +60,17 @@ class ExtractorManager(LoggingMixin):
65
60
  for operator_class in extractor.get_operator_classnames():
66
61
  self.extractors[operator_class] = extractor
67
62
 
68
- # Semicolon-separated extractors in Airflow configuration or OPENLINEAGE_EXTRACTORS variable.
69
- # Extractors should implement BaseExtractor
70
- env_extractors = conf.get("openlineage", "extractors", fallback=os.getenv("OPENLINEAGE_EXTRACTORS"))
71
- # skip either when it's empty string or None
72
- if env_extractors:
73
- for extractor in env_extractors.split(";"):
74
- extractor: type[BaseExtractor] = try_import_from_string(extractor.strip())
75
- for operator_class in extractor.get_operator_classnames():
76
- if operator_class in self.extractors:
77
- self.log.debug(
78
- "Duplicate extractor found for `%s`. `%s` will be used instead of `%s`",
79
- operator_class,
80
- extractor,
81
- self.extractors[operator_class],
82
- )
83
- self.extractors[operator_class] = extractor
63
+ for extractor_path in conf.custom_extractors():
64
+ extractor: type[BaseExtractor] = try_import_from_string(extractor_path)
65
+ for operator_class in extractor.get_operator_classnames():
66
+ if operator_class in self.extractors:
67
+ self.log.debug(
68
+ "Duplicate extractor found for `%s`. `%s` will be used instead of `%s`",
69
+ operator_class,
70
+ extractor_path,
71
+ self.extractors[operator_class],
72
+ )
73
+ self.extractors[operator_class] = extractor
84
74
 
85
75
  def add_extractor(self, operator_class: str, extractor: type[BaseExtractor]):
86
76
  self.extractors[operator_class] = extractor
@@ -121,16 +111,7 @@ class ExtractorManager(LoggingMixin):
121
111
 
122
112
  # Only include the unkonwnSourceAttribute facet if there is no extractor
123
113
  task_metadata = OperatorLineage(
124
- run_facets={
125
- "unknownSourceAttribute": UnknownOperatorAttributeRunFacet(
126
- unknownItems=[
127
- UnknownOperatorInstance(
128
- name=task.task_type,
129
- properties=get_filtered_unknown_operator_keys(task),
130
- )
131
- ]
132
- )
133
- },
114
+ run_facets=get_unknown_source_attribute_run_facet(task=task),
134
115
  )
135
116
  inlets = task.get_inlet_defs()
136
117
  outlets = task.get_outlet_defs()
@@ -22,15 +22,9 @@ from typing import Callable
22
22
 
23
23
  from openlineage.client.facet import SourceCodeJobFacet
24
24
 
25
+ from airflow.providers.openlineage import conf
25
26
  from airflow.providers.openlineage.extractors.base import BaseExtractor, OperatorLineage
26
- from airflow.providers.openlineage.plugins.facets import (
27
- UnknownOperatorAttributeRunFacet,
28
- UnknownOperatorInstance,
29
- )
30
- from airflow.providers.openlineage.utils.utils import (
31
- get_filtered_unknown_operator_keys,
32
- is_source_enabled,
33
- )
27
+ from airflow.providers.openlineage.utils.utils import get_unknown_source_attribute_run_facet
34
28
 
35
29
  """
36
30
  :meta private:
@@ -55,7 +49,7 @@ class PythonExtractor(BaseExtractor):
55
49
  def _execute_extraction(self) -> OperatorLineage | None:
56
50
  source_code = self.get_source_code(self.operator.python_callable)
57
51
  job_facet: dict = {}
58
- if is_source_enabled() and source_code:
52
+ if conf.is_source_enabled() and source_code:
59
53
  job_facet = {
60
54
  "sourceCode": SourceCodeJobFacet(
61
55
  language="python",
@@ -65,19 +59,9 @@ class PythonExtractor(BaseExtractor):
65
59
  }
66
60
  return OperatorLineage(
67
61
  job_facets=job_facet,
68
- run_facets={
69
- # The PythonOperator is recorded as an "unknownSource" even though we have an
70
- # extractor, as the data lineage cannot be determined from the operator
71
- # directly.
72
- "unknownSourceAttribute": UnknownOperatorAttributeRunFacet(
73
- unknownItems=[
74
- UnknownOperatorInstance(
75
- name="PythonOperator",
76
- properties=get_filtered_unknown_operator_keys(self.operator),
77
- )
78
- ]
79
- )
80
- },
62
+ # The PythonOperator is recorded as an "unknownSource" even though we have an extractor,
63
+ # as the <i>data lineage</i> cannot be determined from the operator directly.
64
+ run_facets=get_unknown_source_attribute_run_facet(task=self.operator, name="PythonOperator"),
81
65
  )
82
66
 
83
67
  def get_source_code(self, callable: Callable) -> str | None:
@@ -28,8 +28,9 @@ def get_provider_info():
28
28
  "name": "OpenLineage Airflow",
29
29
  "description": "`OpenLineage <https://openlineage.io/>`__\n",
30
30
  "state": "ready",
31
- "source-date-epoch": 1709555960,
31
+ "source-date-epoch": 1712666247,
32
32
  "versions": [
33
+ "1.7.0",
33
34
  "1.6.0",
34
35
  "1.5.0",
35
36
  "1.4.0",
@@ -82,6 +83,13 @@ def get_provider_info():
82
83
  "default": "",
83
84
  "version_added": "1.1.0",
84
85
  },
86
+ "selective_enable": {
87
+ "description": "If this setting is enabled, OpenLineage integration won't collect and emit metadata,\nunless you explicitly enable it per `DAG` or `Task` using `enable_lineage` method.\n",
88
+ "type": "boolean",
89
+ "default": "False",
90
+ "example": None,
91
+ "version_added": "1.7.0",
92
+ },
85
93
  "namespace": {
86
94
  "description": "Set namespace that the lineage data belongs to, so that if you use multiple OpenLineage producers,\nevents coming from them will be logically separated.\n",
87
95
  "version_added": None,
@@ -16,7 +16,6 @@
16
16
  # under the License.
17
17
  from __future__ import annotations
18
18
 
19
- import os
20
19
  import uuid
21
20
  from contextlib import ExitStack
22
21
  from typing import TYPE_CHECKING
@@ -37,8 +36,7 @@ from openlineage.client.facet import (
37
36
  )
38
37
  from openlineage.client.run import Job, Run, RunEvent, RunState
39
38
 
40
- from airflow.configuration import conf
41
- from airflow.providers.openlineage import __version__ as OPENLINEAGE_PROVIDER_VERSION
39
+ from airflow.providers.openlineage import __version__ as OPENLINEAGE_PROVIDER_VERSION, conf
42
40
  from airflow.providers.openlineage.utils.utils import OpenLineageRedactor
43
41
  from airflow.stats import Stats
44
42
  from airflow.utils.log.logging_mixin import LoggingMixin
@@ -48,12 +46,6 @@ if TYPE_CHECKING:
48
46
  from airflow.providers.openlineage.extractors import OperatorLineage
49
47
  from airflow.utils.log.secrets_masker import SecretsMasker
50
48
 
51
- _DAG_DEFAULT_NAMESPACE = "default"
52
-
53
- _DAG_NAMESPACE = conf.get(
54
- "openlineage", "namespace", fallback=os.getenv("OPENLINEAGE_NAMESPACE", _DAG_DEFAULT_NAMESPACE)
55
- )
56
-
57
49
  _PRODUCER = f"https://github.com/apache/airflow/tree/providers-openlineage/{OPENLINEAGE_PROVIDER_VERSION}"
58
50
 
59
51
  set_producer(_PRODUCER)
@@ -88,18 +80,16 @@ class OpenLineageAdapter(LoggingMixin):
88
80
 
89
81
  def get_openlineage_config(self) -> dict | None:
90
82
  # First, try to read from YAML file
91
- openlineage_config_path = conf.get("openlineage", "config_path")
83
+ openlineage_config_path = conf.config_path(check_legacy_env_var=False)
92
84
  if openlineage_config_path:
93
85
  config = self._read_yaml_config(openlineage_config_path)
94
86
  if config:
95
87
  return config.get("transport", None)
96
88
  # Second, try to get transport config
97
- transport = conf.getjson("openlineage", "transport")
98
- if not transport:
89
+ transport_config = conf.transport()
90
+ if not transport_config:
99
91
  return None
100
- elif not isinstance(transport, dict):
101
- raise ValueError(f"{transport} is not a dict")
102
- return transport
92
+ return transport_config
103
93
 
104
94
  def _read_yaml_config(self, path: str) -> dict | None:
105
95
  with open(path) as config_file:
@@ -107,14 +97,14 @@ class OpenLineageAdapter(LoggingMixin):
107
97
 
108
98
  @staticmethod
109
99
  def build_dag_run_id(dag_id, dag_run_id):
110
- return str(uuid.uuid3(uuid.NAMESPACE_URL, f"{_DAG_NAMESPACE}.{dag_id}.{dag_run_id}"))
100
+ return str(uuid.uuid3(uuid.NAMESPACE_URL, f"{conf.namespace()}.{dag_id}.{dag_run_id}"))
111
101
 
112
102
  @staticmethod
113
103
  def build_task_instance_run_id(dag_id, task_id, execution_date, try_number):
114
104
  return str(
115
105
  uuid.uuid3(
116
106
  uuid.NAMESPACE_URL,
117
- f"{_DAG_NAMESPACE}.{dag_id}.{task_id}.{execution_date}.{try_number}",
107
+ f"{conf.namespace()}.{dag_id}.{task_id}.{execution_date}.{try_number}",
118
108
  )
119
109
  )
120
110
 
@@ -353,7 +343,7 @@ class OpenLineageAdapter(LoggingMixin):
353
343
  if parent_run_id:
354
344
  parent_run_facet = ParentRunFacet.create(
355
345
  runId=parent_run_id,
356
- namespace=_DAG_NAMESPACE,
346
+ namespace=conf.namespace(),
357
347
  name=parent_job_name or job_name,
358
348
  )
359
349
  facets.update(
@@ -396,4 +386,4 @@ class OpenLineageAdapter(LoggingMixin):
396
386
 
397
387
  facets.update({"jobType": job_type})
398
388
 
399
- return Job(_DAG_NAMESPACE, job_name, facets)
389
+ return Job(conf.namespace(), job_name, facets)
@@ -17,10 +17,17 @@
17
17
  from __future__ import annotations
18
18
 
19
19
  from attrs import define
20
+ from deprecated import deprecated
20
21
  from openlineage.client.facet import BaseFacet
21
22
  from openlineage.client.utils import RedactMixin
22
23
 
24
+ from airflow.exceptions import AirflowProviderDeprecationWarning
23
25
 
26
+
27
+ @deprecated(
28
+ reason="To be removed in the next release. Make sure to use information from AirflowRunFacet instead.",
29
+ category=AirflowProviderDeprecationWarning,
30
+ )
24
31
  @define(slots=False)
25
32
  class AirflowMappedTaskRunFacet(BaseFacet):
26
33
  """Run facet containing information about mapped tasks."""
@@ -66,6 +73,10 @@ class UnknownOperatorInstance(RedactMixin):
66
73
  _skip_redact = ["name", "type"]
67
74
 
68
75
 
76
+ @deprecated(
77
+ reason="To be removed in the next release. Make sure to use information from AirflowRunFacet instead.",
78
+ category=AirflowProviderDeprecationWarning,
79
+ )
69
80
  @define(slots=False)
70
81
  class UnknownOperatorAttributeRunFacet(BaseFacet):
71
82
  """RunFacet that describes unknown operators in an Airflow DAG."""