apache-airflow-providers-openlineage 1.9.1rc1__py3-none-any.whl → 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.

@@ -22,24 +22,25 @@ from typing import TYPE_CHECKING
22
22
 
23
23
  import yaml
24
24
  from openlineage.client import OpenLineageClient, set_producer
25
- from openlineage.client.facet import (
26
- BaseFacet,
27
- DocumentationJobFacet,
28
- ErrorMessageRunFacet,
29
- JobTypeJobFacet,
30
- NominalTimeRunFacet,
31
- OwnershipJobFacet,
32
- OwnershipJobFacetOwners,
33
- ParentRunFacet,
34
- ProcessingEngineRunFacet,
35
- SourceCodeLocationJobFacet,
25
+ from openlineage.client.event_v2 import Job, Run, RunEvent, RunState
26
+ from openlineage.client.facet_v2 import (
27
+ JobFacet,
28
+ RunFacet,
29
+ documentation_job,
30
+ error_message_run,
31
+ job_type_job,
32
+ nominal_time_run,
33
+ ownership_job,
34
+ parent_run,
35
+ processing_engine_run,
36
+ source_code_location_job,
36
37
  )
37
- from openlineage.client.run import Job, Run, RunEvent, RunState
38
38
  from openlineage.client.uuid import generate_static_uuid
39
39
 
40
40
  from airflow.providers.openlineage import __version__ as OPENLINEAGE_PROVIDER_VERSION, conf
41
41
  from airflow.providers.openlineage.utils.utils import (
42
42
  OpenLineageRedactor,
43
+ get_airflow_dag_run_facet,
43
44
  get_airflow_state_run_facet,
44
45
  )
45
46
  from airflow.stats import Stats
@@ -59,8 +60,8 @@ set_producer(_PRODUCER)
59
60
  # https://openlineage.io/docs/spec/facets/job-facets/job-type
60
61
  # They must be set after the `set_producer(_PRODUCER)`
61
62
  # otherwise the `JobTypeJobFacet._producer` will be set with the default value
62
- _JOB_TYPE_DAG = JobTypeJobFacet(jobType="DAG", integration="AIRFLOW", processingType="BATCH")
63
- _JOB_TYPE_TASK = JobTypeJobFacet(jobType="TASK", integration="AIRFLOW", processingType="BATCH")
63
+ _JOB_TYPE_DAG = job_type_job.JobTypeJobFacet(jobType="DAG", integration="AIRFLOW", processingType="BATCH")
64
+ _JOB_TYPE_TASK = job_type_job.JobTypeJobFacet(jobType="TASK", integration="AIRFLOW", processingType="BATCH")
64
65
 
65
66
 
66
67
  class OpenLineageAdapter(LoggingMixin):
@@ -148,7 +149,7 @@ class OpenLineageAdapter(LoggingMixin):
148
149
  if not self._client:
149
150
  self._client = self.get_or_create_openlineage_client()
150
151
  redacted_event: RunEvent = self._redacter.redact(event, max_depth=20) # type: ignore[assignment]
151
- event_type = event.eventType.value.lower()
152
+ event_type = event.eventType.value.lower() if event.eventType else ""
152
153
  transport_type = f"{self._client.transport.kind}".lower()
153
154
 
154
155
  try:
@@ -177,7 +178,7 @@ class OpenLineageAdapter(LoggingMixin):
177
178
  nominal_end_time: str | None,
178
179
  owners: list[str],
179
180
  task: OperatorLineage | None,
180
- run_facets: dict[str, BaseFacet] | None = None, # Custom run facets
181
+ run_facets: dict[str, RunFacet] | None = None,
181
182
  ) -> RunEvent:
182
183
  """
183
184
  Emit openlineage event of type START.
@@ -198,14 +199,13 @@ class OpenLineageAdapter(LoggingMixin):
198
199
  """
199
200
  from airflow.version import version as AIRFLOW_VERSION
200
201
 
201
- processing_engine_version_facet = ProcessingEngineRunFacet(
202
+ processing_engine_version_facet = processing_engine_run.ProcessingEngineRunFacet(
202
203
  version=AIRFLOW_VERSION,
203
204
  name="Airflow",
204
205
  openlineageAdapterVersion=OPENLINEAGE_PROVIDER_VERSION,
205
206
  )
206
207
 
207
- if not run_facets:
208
- run_facets = {}
208
+ run_facets = run_facets or {}
209
209
  if task:
210
210
  run_facets = {**task.run_facets, **run_facets}
211
211
  run_facets["processing_engine"] = processing_engine_version_facet # type: ignore
@@ -243,6 +243,7 @@ class OpenLineageAdapter(LoggingMixin):
243
243
  parent_run_id: str | None,
244
244
  end_time: str,
245
245
  task: OperatorLineage,
246
+ run_facets: dict[str, RunFacet] | None = None,
246
247
  ) -> RunEvent:
247
248
  """
248
249
  Emit openlineage event of type COMPLETE.
@@ -254,7 +255,11 @@ class OpenLineageAdapter(LoggingMixin):
254
255
  :param parent_run_id: identifier of job spawning this task
255
256
  :param end_time: time of task completion
256
257
  :param task: metadata container with information extracted from operator
258
+ :param run_facets: additional run facets
257
259
  """
260
+ run_facets = run_facets or {}
261
+ if task:
262
+ run_facets = {**task.run_facets, **run_facets}
258
263
  event = RunEvent(
259
264
  eventType=RunState.COMPLETE,
260
265
  eventTime=end_time,
@@ -263,7 +268,7 @@ class OpenLineageAdapter(LoggingMixin):
263
268
  job_name=job_name,
264
269
  parent_job_name=parent_job_name,
265
270
  parent_run_id=parent_run_id,
266
- run_facets=task.run_facets,
271
+ run_facets=run_facets,
267
272
  ),
268
273
  job=self._build_job(job_name, job_type=_JOB_TYPE_TASK, job_facets=task.job_facets),
269
274
  inputs=task.inputs,
@@ -281,6 +286,7 @@ class OpenLineageAdapter(LoggingMixin):
281
286
  end_time: str,
282
287
  task: OperatorLineage,
283
288
  error: str | BaseException | None = None,
289
+ run_facets: dict[str, RunFacet] | None = None,
284
290
  ) -> RunEvent:
285
291
  """
286
292
  Emit openlineage event of type FAIL.
@@ -292,21 +298,23 @@ class OpenLineageAdapter(LoggingMixin):
292
298
  :param parent_run_id: identifier of job spawning this task
293
299
  :param end_time: time of task completion
294
300
  :param task: metadata container with information extracted from operator
301
+ :param run_facets: custom run facets
295
302
  :param error: error
303
+ :param run_facets: additional run facets
296
304
  """
297
- error_facet = {}
305
+ run_facets = run_facets or {}
306
+ if task:
307
+ run_facets = {**task.run_facets, **run_facets}
308
+
298
309
  if error:
299
310
  stack_trace = None
300
311
  if isinstance(error, BaseException) and error.__traceback__:
301
312
  import traceback
302
313
 
303
314
  stack_trace = "\\n".join(traceback.format_exception(type(error), error, error.__traceback__))
304
-
305
- error_facet = {
306
- "errorMessage": ErrorMessageRunFacet(
307
- message=str(error), programmingLanguage="python", stackTrace=stack_trace
308
- )
309
- }
315
+ run_facets["errorMessage"] = error_message_run.ErrorMessageRunFacet(
316
+ message=str(error), programmingLanguage="python", stackTrace=stack_trace
317
+ )
310
318
 
311
319
  event = RunEvent(
312
320
  eventType=RunState.FAIL,
@@ -316,7 +324,7 @@ class OpenLineageAdapter(LoggingMixin):
316
324
  job_name=job_name,
317
325
  parent_job_name=parent_job_name,
318
326
  parent_run_id=parent_run_id,
319
- run_facets={**task.run_facets, **error_facet},
327
+ run_facets=run_facets,
320
328
  ),
321
329
  job=self._build_job(job_name, job_type=_JOB_TYPE_TASK, job_facets=task.job_facets),
322
330
  inputs=task.inputs,
@@ -331,9 +339,10 @@ class OpenLineageAdapter(LoggingMixin):
331
339
  msg: str,
332
340
  nominal_start_time: str,
333
341
  nominal_end_time: str,
334
- job_facets: dict[str, BaseFacet] | None = None, # Custom job facets
342
+ job_facets: dict[str, JobFacet] | None = None, # Custom job facets
335
343
  ):
336
344
  try:
345
+ owner = [x.strip() for x in dag_run.dag.owner.split(",")] if dag_run.dag else None
337
346
  event = RunEvent(
338
347
  eventType=RunState.START,
339
348
  eventTime=dag_run.start_date.isoformat(),
@@ -341,7 +350,7 @@ class OpenLineageAdapter(LoggingMixin):
341
350
  job_name=dag_run.dag_id,
342
351
  job_type=_JOB_TYPE_DAG,
343
352
  job_description=dag_run.dag.description if dag_run.dag else None,
344
- owners=[x.strip() for x in dag_run.dag.owner.split(",")] if dag_run.dag else None,
353
+ owners=owner,
345
354
  job_facets=job_facets,
346
355
  ),
347
356
  run=self._build_run(
@@ -352,6 +361,7 @@ class OpenLineageAdapter(LoggingMixin):
352
361
  job_name=dag_run.dag_id,
353
362
  nominal_start_time=nominal_start_time,
354
363
  nominal_end_time=nominal_end_time,
364
+ run_facets=get_airflow_dag_run_facet(dag_run),
355
365
  ),
356
366
  inputs=[],
357
367
  outputs=[],
@@ -400,7 +410,9 @@ class OpenLineageAdapter(LoggingMixin):
400
410
  execution_date=dag_run.execution_date,
401
411
  ),
402
412
  facets={
403
- "errorMessage": ErrorMessageRunFacet(message=msg, programmingLanguage="python"),
413
+ "errorMessage": error_message_run.ErrorMessageRunFacet(
414
+ message=msg, programmingLanguage="python"
415
+ ),
404
416
  **get_airflow_state_run_facet(dag_run),
405
417
  },
406
418
  ),
@@ -423,23 +435,19 @@ class OpenLineageAdapter(LoggingMixin):
423
435
  parent_run_id: str | None = None,
424
436
  nominal_start_time: str | None = None,
425
437
  nominal_end_time: str | None = None,
426
- run_facets: dict[str, BaseFacet] | None = None,
438
+ run_facets: dict[str, RunFacet] | None = None,
427
439
  ) -> Run:
428
- facets: dict[str, BaseFacet] = {}
440
+ facets: dict[str, RunFacet] = {}
429
441
  if nominal_start_time:
430
- facets.update({"nominalTime": NominalTimeRunFacet(nominal_start_time, nominal_end_time)})
431
- if parent_run_id:
432
- parent_run_facet = ParentRunFacet.create(
433
- runId=parent_run_id,
434
- namespace=conf.namespace(),
435
- name=parent_job_name or job_name,
436
- )
437
442
  facets.update(
438
- {
439
- "parent": parent_run_facet,
440
- "parentRun": parent_run_facet, # Keep sending this for the backward compatibility
441
- }
443
+ {"nominalTime": nominal_time_run.NominalTimeRunFacet(nominal_start_time, nominal_end_time)}
442
444
  )
445
+ if parent_run_id:
446
+ parent_run_facet = parent_run.ParentRunFacet(
447
+ run=parent_run.Run(runId=parent_run_id),
448
+ job=parent_run.Job(namespace=conf.namespace(), name=parent_job_name or job_name),
449
+ )
450
+ facets.update({"parent": parent_run_facet})
443
451
 
444
452
  if run_facets:
445
453
  facets.update(run_facets)
@@ -449,23 +457,31 @@ class OpenLineageAdapter(LoggingMixin):
449
457
  @staticmethod
450
458
  def _build_job(
451
459
  job_name: str,
452
- job_type: JobTypeJobFacet,
460
+ job_type: job_type_job.JobTypeJobFacet,
453
461
  job_description: str | None = None,
454
462
  code_location: str | None = None,
455
463
  owners: list[str] | None = None,
456
- job_facets: dict[str, BaseFacet] | None = None,
464
+ job_facets: dict[str, JobFacet] | None = None,
457
465
  ):
458
- facets: dict[str, BaseFacet] = {}
466
+ facets: dict[str, JobFacet] = {}
459
467
 
460
468
  if job_description:
461
- facets.update({"documentation": DocumentationJobFacet(description=job_description)})
469
+ facets.update(
470
+ {"documentation": documentation_job.DocumentationJobFacet(description=job_description)}
471
+ )
462
472
  if code_location:
463
- facets.update({"sourceCodeLocation": SourceCodeLocationJobFacet("", url=code_location)})
473
+ facets.update(
474
+ {
475
+ "sourceCodeLocation": source_code_location_job.SourceCodeLocationJobFacet(
476
+ "", url=code_location
477
+ )
478
+ }
479
+ )
464
480
  if owners:
465
481
  facets.update(
466
482
  {
467
- "ownership": OwnershipJobFacet(
468
- owners=[OwnershipJobFacetOwners(name=owner) for owner in owners]
483
+ "ownership": ownership_job.OwnershipJobFacet(
484
+ owners=[ownership_job.Owner(name=owner) for owner in owners]
469
485
  )
470
486
  }
471
487
  )
@@ -18,7 +18,7 @@ from __future__ import annotations
18
18
 
19
19
  from attrs import define
20
20
  from deprecated import deprecated
21
- from openlineage.client.facet import BaseFacet
21
+ from openlineage.client.facet_v2 import JobFacet, RunFacet
22
22
  from openlineage.client.utils import RedactMixin
23
23
 
24
24
  from airflow.exceptions import AirflowProviderDeprecationWarning
@@ -28,8 +28,8 @@ from airflow.exceptions import AirflowProviderDeprecationWarning
28
28
  reason="To be removed in the next release. Make sure to use information from AirflowRunFacet instead.",
29
29
  category=AirflowProviderDeprecationWarning,
30
30
  )
31
- @define(slots=False)
32
- class AirflowMappedTaskRunFacet(BaseFacet):
31
+ @define
32
+ class AirflowMappedTaskRunFacet(RunFacet):
33
33
  """Run facet containing information about mapped tasks."""
34
34
 
35
35
  mapIndex: int
@@ -47,8 +47,8 @@ class AirflowMappedTaskRunFacet(BaseFacet):
47
47
  )
48
48
 
49
49
 
50
- @define(slots=False)
51
- class AirflowJobFacet(BaseFacet):
50
+ @define
51
+ class AirflowJobFacet(JobFacet):
52
52
  """
53
53
  Composite Airflow job facet.
54
54
 
@@ -70,8 +70,8 @@ class AirflowJobFacet(BaseFacet):
70
70
  tasks: dict
71
71
 
72
72
 
73
- @define(slots=False)
74
- class AirflowStateRunFacet(BaseFacet):
73
+ @define
74
+ class AirflowStateRunFacet(RunFacet):
75
75
  """
76
76
  Airflow facet providing state information.
77
77
 
@@ -89,8 +89,8 @@ class AirflowStateRunFacet(BaseFacet):
89
89
  tasksState: dict[str, str]
90
90
 
91
91
 
92
- @define(slots=False)
93
- class AirflowRunFacet(BaseFacet):
92
+ @define
93
+ class AirflowRunFacet(RunFacet):
94
94
  """Composite Airflow run facet."""
95
95
 
96
96
  dag: dict
@@ -100,7 +100,15 @@ class AirflowRunFacet(BaseFacet):
100
100
  taskUuid: str
101
101
 
102
102
 
103
- @define(slots=False)
103
+ @define
104
+ class AirflowDagRunFacet(RunFacet):
105
+ """Composite Airflow DAG run facet."""
106
+
107
+ dag: dict
108
+ dagRun: dict
109
+
110
+
111
+ @define
104
112
  class UnknownOperatorInstance(RedactMixin):
105
113
  """
106
114
  Describes an unknown operator.
@@ -119,8 +127,8 @@ class UnknownOperatorInstance(RedactMixin):
119
127
  reason="To be removed in the next release. Make sure to use information from AirflowRunFacet instead.",
120
128
  category=AirflowProviderDeprecationWarning,
121
129
  )
122
- @define(slots=False)
123
- class UnknownOperatorAttributeRunFacet(BaseFacet):
130
+ @define
131
+ class UnknownOperatorAttributeRunFacet(RunFacet):
124
132
  """RunFacet that describes unknown operators in an Airflow DAG."""
125
133
 
126
134
  unknownItems: list[UnknownOperatorInstance]
@@ -19,45 +19,45 @@ from __future__ import annotations
19
19
  import logging
20
20
  import os
21
21
  from concurrent.futures import ProcessPoolExecutor
22
- from datetime import datetime
23
22
  from typing import TYPE_CHECKING
24
23
 
25
24
  import psutil
26
25
  from openlineage.client.serde import Serde
27
- from packaging.version import Version
28
26
  from setproctitle import getproctitle, setproctitle
29
27
 
30
- from airflow import __version__ as AIRFLOW_VERSION, settings
28
+ from airflow import settings
31
29
  from airflow.listeners import hookimpl
32
30
  from airflow.providers.openlineage import conf
33
31
  from airflow.providers.openlineage.extractors import ExtractorManager
34
32
  from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter, RunState
35
33
  from airflow.providers.openlineage.utils.utils import (
34
+ IS_AIRFLOW_2_10_OR_HIGHER,
36
35
  get_airflow_job_facet,
36
+ get_airflow_mapped_task_facet,
37
37
  get_airflow_run_facet,
38
- get_custom_facets,
39
38
  get_job_name,
39
+ get_user_provided_run_facets,
40
40
  is_operator_disabled,
41
41
  is_selective_lineage_enabled,
42
42
  print_warning,
43
43
  )
44
44
  from airflow.settings import configure_orm
45
45
  from airflow.stats import Stats
46
+ from airflow.utils import timezone
47
+ from airflow.utils.state import TaskInstanceState
46
48
  from airflow.utils.timeout import timeout
47
49
 
48
50
  if TYPE_CHECKING:
49
51
  from sqlalchemy.orm import Session
50
52
 
51
53
  from airflow.models import DagRun, TaskInstance
52
- from airflow.utils.state import TaskInstanceState
53
54
 
54
55
  _openlineage_listener: OpenLineageListener | None = None
55
- _IS_AIRFLOW_2_10_OR_HIGHER = Version(Version(AIRFLOW_VERSION).base_version) >= Version("2.10.0")
56
56
 
57
57
 
58
58
  def _get_try_number_success(val):
59
59
  # todo: remove when min airflow version >= 2.10.0
60
- if _IS_AIRFLOW_2_10_OR_HIGHER:
60
+ if IS_AIRFLOW_2_10_OR_HIGHER:
61
61
  return val.try_number
62
62
  return val.try_number - 1
63
63
 
@@ -145,7 +145,7 @@ class OpenLineageListener:
145
145
  with Stats.timer(f"ol.extract.{event_type}.{operator_name}"):
146
146
  task_metadata = self.extractor_manager.extract_metadata(dagrun, task)
147
147
 
148
- start_date = task_instance.start_date if task_instance.start_date else datetime.now()
148
+ start_date = task_instance.start_date if task_instance.start_date else timezone.utcnow()
149
149
  data_interval_start = (
150
150
  dagrun.data_interval_start.isoformat() if dagrun.data_interval_start else None
151
151
  )
@@ -163,7 +163,8 @@ class OpenLineageListener:
163
163
  owners=dag.owner.split(", "),
164
164
  task=task_metadata,
165
165
  run_facets={
166
- **get_custom_facets(task_instance),
166
+ **get_user_provided_run_facets(task_instance, TaskInstanceState.RUNNING),
167
+ **get_airflow_mapped_task_facet(task_instance),
167
168
  **get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
168
169
  },
169
170
  )
@@ -224,7 +225,7 @@ class OpenLineageListener:
224
225
  dagrun, task, complete=True, task_instance=task_instance
225
226
  )
226
227
 
227
- end_date = task_instance.end_date if task_instance.end_date else datetime.now()
228
+ end_date = task_instance.end_date if task_instance.end_date else timezone.utcnow()
228
229
 
229
230
  redacted_event = self.adapter.complete_task(
230
231
  run_id=task_uuid,
@@ -233,6 +234,10 @@ class OpenLineageListener:
233
234
  parent_run_id=parent_run_id,
234
235
  end_time=end_date.isoformat(),
235
236
  task=task_metadata,
237
+ run_facets={
238
+ **get_user_provided_run_facets(task_instance, TaskInstanceState.SUCCESS),
239
+ **get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
240
+ },
236
241
  )
237
242
  Stats.gauge(
238
243
  f"ol.event.size.{event_type}.{operator_name}",
@@ -241,7 +246,7 @@ class OpenLineageListener:
241
246
 
242
247
  self._execute(on_success, "on_success", use_fork=True)
243
248
 
244
- if _IS_AIRFLOW_2_10_OR_HIGHER:
249
+ if IS_AIRFLOW_2_10_OR_HIGHER:
245
250
 
246
251
  @hookimpl
247
252
  def on_task_instance_failed(
@@ -318,7 +323,7 @@ class OpenLineageListener:
318
323
  dagrun, task, complete=True, task_instance=task_instance
319
324
  )
320
325
 
321
- end_date = task_instance.end_date if task_instance.end_date else datetime.now()
326
+ end_date = task_instance.end_date if task_instance.end_date else timezone.utcnow()
322
327
 
323
328
  redacted_event = self.adapter.fail_task(
324
329
  run_id=task_uuid,
@@ -328,6 +333,10 @@ class OpenLineageListener:
328
333
  end_time=end_date.isoformat(),
329
334
  task=task_metadata,
330
335
  error=error,
336
+ run_facets={
337
+ **get_user_provided_run_facets(task_instance, TaskInstanceState.FAILED),
338
+ **get_airflow_run_facet(dagrun, dag, task_instance, task, task_uuid),
339
+ },
331
340
  )
332
341
  Stats.gauge(
333
342
  f"ol.event.size.{event_type}.{operator_name}",
@@ -420,7 +429,7 @@ class OpenLineageListener:
420
429
  nominal_end_time=data_interval_end,
421
430
  # AirflowJobFacet should be created outside ProcessPoolExecutor that pickles objects,
422
431
  # as it causes lack of some TaskGroup attributes and crashes event emission.
423
- job_facets={**get_airflow_job_facet(dag_run=dag_run)},
432
+ job_facets=get_airflow_job_facet(dag_run=dag_run),
424
433
  )
425
434
 
426
435
  @hookimpl
@@ -20,16 +20,8 @@ from typing import TYPE_CHECKING, Callable
20
20
 
21
21
  import sqlparse
22
22
  from attrs import define
23
- from openlineage.client.facet import (
24
- BaseFacet,
25
- ColumnLineageDatasetFacet,
26
- ColumnLineageDatasetFacetFieldsAdditional,
27
- ColumnLineageDatasetFacetFieldsAdditionalInputFields,
28
- ExtractionError,
29
- ExtractionErrorRunFacet,
30
- SqlJobFacet,
31
- )
32
- from openlineage.client.run import Dataset
23
+ from openlineage.client.event_v2 import Dataset
24
+ from openlineage.client.facet_v2 import column_lineage_dataset, extraction_error_run, sql_job
33
25
  from openlineage.common.sql import DbTableMeta, SqlMeta, parse
34
26
 
35
27
  from airflow.providers.openlineage.extractors.base import OperatorLineage
@@ -42,6 +34,7 @@ from airflow.typing_compat import TypedDict
42
34
  from airflow.utils.log.logging_mixin import LoggingMixin
43
35
 
44
36
  if TYPE_CHECKING:
37
+ from openlineage.client.facet_v2 import JobFacet, RunFacet
45
38
  from sqlalchemy.engine import Engine
46
39
 
47
40
  from airflow.hooks.base import BaseHook
@@ -160,7 +153,6 @@ class SQLParser(LoggingMixin):
160
153
  "database": database or database_info.database,
161
154
  "use_flat_cross_db_query": database_info.use_flat_cross_db_query,
162
155
  }
163
- self.log.info("PRE getting schemas for input and output tables")
164
156
  return get_table_schemas(
165
157
  hook,
166
158
  namespace,
@@ -207,11 +199,12 @@ class SQLParser(LoggingMixin):
207
199
  if not len(parse_result.column_lineage):
208
200
  return
209
201
  for dataset in datasets:
210
- dataset.facets["columnLineage"] = ColumnLineageDatasetFacet(
202
+ dataset.facets = dataset.facets or {}
203
+ dataset.facets["columnLineage"] = column_lineage_dataset.ColumnLineageDatasetFacet(
211
204
  fields={
212
- column_lineage.descendant.name: ColumnLineageDatasetFacetFieldsAdditional(
205
+ column_lineage.descendant.name: column_lineage_dataset.Fields(
213
206
  inputFields=[
214
- ColumnLineageDatasetFacetFieldsAdditionalInputFields(
207
+ column_lineage_dataset.InputField(
215
208
  namespace=dataset.namespace,
216
209
  name=".".join(
217
210
  filter(
@@ -261,18 +254,18 @@ class SQLParser(LoggingMixin):
261
254
  :param database: when passed it takes precedence over parsed database name
262
255
  :param sqlalchemy_engine: when passed, engine's dialect is used to compile SQL queries
263
256
  """
264
- job_facets: dict[str, BaseFacet] = {"sql": SqlJobFacet(query=self.normalize_sql(sql))}
265
- parse_result = self.parse(self.split_sql_string(sql))
257
+ job_facets: dict[str, JobFacet] = {"sql": sql_job.SQLJobFacet(query=self.normalize_sql(sql))}
258
+ parse_result = self.parse(sql=self.split_sql_string(sql))
266
259
  if not parse_result:
267
260
  return OperatorLineage(job_facets=job_facets)
268
261
 
269
- run_facets: dict[str, BaseFacet] = {}
262
+ run_facets: dict[str, RunFacet] = {}
270
263
  if parse_result.errors:
271
- run_facets["extractionError"] = ExtractionErrorRunFacet(
264
+ run_facets["extractionError"] = extraction_error_run.ExtractionErrorRunFacet(
272
265
  totalTasks=len(sql) if isinstance(sql, list) else 1,
273
266
  failedTasks=len(parse_result.errors),
274
267
  errors=[
275
- ExtractionError(
268
+ extraction_error_run.Error(
276
269
  errorMessage=error.message,
277
270
  stackTrace=None,
278
271
  task=error.origin_statement,
@@ -23,8 +23,8 @@ from enum import IntEnum
23
23
  from typing import TYPE_CHECKING, Dict, List, Optional
24
24
 
25
25
  from attrs import define
26
- from openlineage.client.facet import SchemaDatasetFacet, SchemaField
27
- from openlineage.client.run import Dataset
26
+ from openlineage.client.event_v2 import Dataset
27
+ from openlineage.client.facet_v2 import schema_dataset
28
28
  from sqlalchemy import Column, MetaData, Table, and_, or_, union_all
29
29
 
30
30
  if TYPE_CHECKING:
@@ -60,7 +60,7 @@ class TableSchema:
60
60
  table: str
61
61
  schema: str | None
62
62
  database: str | None
63
- fields: list[SchemaField]
63
+ fields: list[schema_dataset.SchemaDatasetFacetFields]
64
64
 
65
65
  def to_dataset(self, namespace: str, database: str | None = None, schema: str | None = None) -> Dataset:
66
66
  # Prefix the table name with database and schema name using
@@ -73,7 +73,7 @@ class TableSchema:
73
73
  return Dataset(
74
74
  namespace=namespace,
75
75
  name=name,
76
- facets={"schema": SchemaDatasetFacet(fields=self.fields)} if self.fields else {},
76
+ facets={"schema": schema_dataset.SchemaDatasetFacet(fields=self.fields)} if self.fields else {},
77
77
  )
78
78
 
79
79
 
@@ -122,7 +122,7 @@ def parse_query_result(cursor) -> list[TableSchema]:
122
122
  for row in cursor.fetchall():
123
123
  table_schema_name: str = row[ColumnIndex.SCHEMA]
124
124
  table_name: str = row[ColumnIndex.TABLE_NAME]
125
- table_column: SchemaField = SchemaField(
125
+ table_column = schema_dataset.SchemaDatasetFacetFields(
126
126
  name=row[ColumnIndex.COLUMN_NAME],
127
127
  type=row[ColumnIndex.UDT_NAME],
128
128
  description=None,