apache-airflow-providers-openlineage 1.8.0rc1__py3-none-any.whl → 1.9.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-openlineage might be problematic. Click here for more details.
- airflow/providers/openlineage/LICENSE +4 -4
- airflow/providers/openlineage/__init__.py +1 -1
- airflow/providers/openlineage/conf.py +16 -1
- airflow/providers/openlineage/facets/AirflowJobFacet.json +40 -0
- airflow/providers/openlineage/facets/AirflowRunFacet.json +261 -0
- airflow/providers/openlineage/facets/AirflowStateRunFacet.json +34 -0
- airflow/providers/openlineage/facets/__init__.py +16 -0
- airflow/providers/openlineage/get_provider_info.py +11 -3
- airflow/providers/openlineage/plugins/adapter.py +61 -14
- airflow/providers/openlineage/plugins/facets.py +44 -3
- airflow/providers/openlineage/plugins/listener.py +128 -33
- airflow/providers/openlineage/plugins/macros.py +1 -1
- airflow/providers/openlineage/sqlparser.py +12 -4
- airflow/providers/openlineage/utils/sql.py +7 -1
- airflow/providers/openlineage/utils/utils.py +179 -21
- {apache_airflow_providers_openlineage-1.8.0rc1.dist-info → apache_airflow_providers_openlineage-1.9.0rc1.dist-info}/METADATA +10 -10
- apache_airflow_providers_openlineage-1.9.0rc1.dist-info/RECORD +28 -0
- apache_airflow_providers_openlineage-1.8.0rc1.dist-info/RECORD +0 -24
- {apache_airflow_providers_openlineage-1.8.0rc1.dist-info → apache_airflow_providers_openlineage-1.9.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_openlineage-1.8.0rc1.dist-info → apache_airflow_providers_openlineage-1.9.0rc1.dist-info}/entry_points.txt +0 -0
|
@@ -17,18 +17,23 @@
|
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
19
|
import logging
|
|
20
|
+
import os
|
|
20
21
|
from concurrent.futures import ProcessPoolExecutor
|
|
21
22
|
from datetime import datetime
|
|
22
23
|
from typing import TYPE_CHECKING
|
|
23
24
|
|
|
25
|
+
import psutil
|
|
24
26
|
from openlineage.client.serde import Serde
|
|
27
|
+
from packaging.version import Version
|
|
28
|
+
from setproctitle import getproctitle, setproctitle
|
|
25
29
|
|
|
26
|
-
from airflow import __version__ as
|
|
30
|
+
from airflow import __version__ as AIRFLOW_VERSION, settings
|
|
27
31
|
from airflow.listeners import hookimpl
|
|
28
32
|
from airflow.providers.openlineage import conf
|
|
29
33
|
from airflow.providers.openlineage.extractors import ExtractorManager
|
|
30
34
|
from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter, RunState
|
|
31
35
|
from airflow.providers.openlineage.utils.utils import (
|
|
36
|
+
get_airflow_job_facet,
|
|
32
37
|
get_airflow_run_facet,
|
|
33
38
|
get_custom_facets,
|
|
34
39
|
get_job_name,
|
|
@@ -36,6 +41,7 @@ from airflow.providers.openlineage.utils.utils import (
|
|
|
36
41
|
is_selective_lineage_enabled,
|
|
37
42
|
print_warning,
|
|
38
43
|
)
|
|
44
|
+
from airflow.settings import configure_orm
|
|
39
45
|
from airflow.stats import Stats
|
|
40
46
|
from airflow.utils.timeout import timeout
|
|
41
47
|
|
|
@@ -43,18 +49,29 @@ if TYPE_CHECKING:
|
|
|
43
49
|
from sqlalchemy.orm import Session
|
|
44
50
|
|
|
45
51
|
from airflow.models import DagRun, TaskInstance
|
|
52
|
+
from airflow.utils.state import TaskInstanceState
|
|
46
53
|
|
|
47
54
|
_openlineage_listener: OpenLineageListener | None = None
|
|
55
|
+
_IS_AIRFLOW_2_10_OR_HIGHER = Version(Version(AIRFLOW_VERSION).base_version) >= Version("2.10.0")
|
|
48
56
|
|
|
49
57
|
|
|
50
58
|
def _get_try_number_success(val):
|
|
51
59
|
# todo: remove when min airflow version >= 2.10.0
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
if parse(parse(airflow_version).base_version) < parse("2.10.0"):
|
|
55
|
-
return val.try_number - 1
|
|
56
|
-
else:
|
|
60
|
+
if _IS_AIRFLOW_2_10_OR_HIGHER:
|
|
57
61
|
return val.try_number
|
|
62
|
+
return val.try_number - 1
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _executor_initializer():
|
|
66
|
+
"""
|
|
67
|
+
Initialize worker processes for the executor used for DagRun listener.
|
|
68
|
+
|
|
69
|
+
This function must be picklable, so it cannot be defined as an inner method or local function.
|
|
70
|
+
|
|
71
|
+
Reconfigures the ORM engine to prevent issues that arise when multiple processes interact with
|
|
72
|
+
the Airflow database.
|
|
73
|
+
"""
|
|
74
|
+
settings.configure_orm()
|
|
58
75
|
|
|
59
76
|
|
|
60
77
|
class OpenLineageListener:
|
|
@@ -69,10 +86,10 @@ class OpenLineageListener:
|
|
|
69
86
|
@hookimpl
|
|
70
87
|
def on_task_instance_running(
|
|
71
88
|
self,
|
|
72
|
-
previous_state,
|
|
89
|
+
previous_state: TaskInstanceState,
|
|
73
90
|
task_instance: TaskInstance,
|
|
74
91
|
session: Session, # This will always be QUEUED
|
|
75
|
-
):
|
|
92
|
+
) -> None:
|
|
76
93
|
if not getattr(task_instance, "task", None) is not None:
|
|
77
94
|
self.log.warning(
|
|
78
95
|
"No task set for TI object task_id: %s - dag_id: %s - run_id %s",
|
|
@@ -111,13 +128,16 @@ class OpenLineageListener:
|
|
|
111
128
|
# we return here because Airflow 2.3 needs task from deferred state
|
|
112
129
|
if task_instance.next_method is not None:
|
|
113
130
|
return
|
|
114
|
-
parent_run_id = self.adapter.build_dag_run_id(
|
|
131
|
+
parent_run_id = self.adapter.build_dag_run_id(
|
|
132
|
+
dag_id=dag.dag_id,
|
|
133
|
+
execution_date=dagrun.execution_date,
|
|
134
|
+
)
|
|
115
135
|
|
|
116
136
|
task_uuid = self.adapter.build_task_instance_run_id(
|
|
117
137
|
dag_id=dag.dag_id,
|
|
118
138
|
task_id=task.task_id,
|
|
119
|
-
execution_date=task_instance.execution_date,
|
|
120
139
|
try_number=task_instance.try_number,
|
|
140
|
+
execution_date=task_instance.execution_date,
|
|
121
141
|
)
|
|
122
142
|
event_type = RunState.RUNNING.value.lower()
|
|
123
143
|
operator_name = task.task_type.lower()
|
|
@@ -130,7 +150,6 @@ class OpenLineageListener:
|
|
|
130
150
|
dagrun.data_interval_start.isoformat() if dagrun.data_interval_start else None
|
|
131
151
|
)
|
|
132
152
|
data_interval_end = dagrun.data_interval_end.isoformat() if dagrun.data_interval_end else None
|
|
133
|
-
|
|
134
153
|
redacted_event = self.adapter.start_task(
|
|
135
154
|
run_id=task_uuid,
|
|
136
155
|
job_name=get_job_name(task),
|
|
@@ -153,10 +172,12 @@ class OpenLineageListener:
|
|
|
153
172
|
len(Serde.to_json(redacted_event).encode("utf-8")),
|
|
154
173
|
)
|
|
155
174
|
|
|
156
|
-
on_running
|
|
175
|
+
self._execute(on_running, "on_running", use_fork=True)
|
|
157
176
|
|
|
158
177
|
@hookimpl
|
|
159
|
-
def on_task_instance_success(
|
|
178
|
+
def on_task_instance_success(
|
|
179
|
+
self, previous_state: TaskInstanceState, task_instance: TaskInstance, session: Session
|
|
180
|
+
) -> None:
|
|
160
181
|
self.log.debug("OpenLineage listener got notification about task instance success")
|
|
161
182
|
|
|
162
183
|
dagrun = task_instance.dag_run
|
|
@@ -184,13 +205,16 @@ class OpenLineageListener:
|
|
|
184
205
|
|
|
185
206
|
@print_warning(self.log)
|
|
186
207
|
def on_success():
|
|
187
|
-
parent_run_id = OpenLineageAdapter.build_dag_run_id(
|
|
208
|
+
parent_run_id = OpenLineageAdapter.build_dag_run_id(
|
|
209
|
+
dag_id=dag.dag_id,
|
|
210
|
+
execution_date=dagrun.execution_date,
|
|
211
|
+
)
|
|
188
212
|
|
|
189
213
|
task_uuid = OpenLineageAdapter.build_task_instance_run_id(
|
|
190
214
|
dag_id=dag.dag_id,
|
|
191
215
|
task_id=task.task_id,
|
|
192
|
-
execution_date=task_instance.execution_date,
|
|
193
216
|
try_number=_get_try_number_success(task_instance),
|
|
217
|
+
execution_date=task_instance.execution_date,
|
|
194
218
|
)
|
|
195
219
|
event_type = RunState.COMPLETE.value.lower()
|
|
196
220
|
operator_name = task.task_type.lower()
|
|
@@ -215,10 +239,39 @@ class OpenLineageListener:
|
|
|
215
239
|
len(Serde.to_json(redacted_event).encode("utf-8")),
|
|
216
240
|
)
|
|
217
241
|
|
|
218
|
-
on_success
|
|
242
|
+
self._execute(on_success, "on_success", use_fork=True)
|
|
219
243
|
|
|
220
|
-
|
|
221
|
-
|
|
244
|
+
if _IS_AIRFLOW_2_10_OR_HIGHER:
|
|
245
|
+
|
|
246
|
+
@hookimpl
|
|
247
|
+
def on_task_instance_failed(
|
|
248
|
+
self,
|
|
249
|
+
previous_state: TaskInstanceState,
|
|
250
|
+
task_instance: TaskInstance,
|
|
251
|
+
error: None | str | BaseException,
|
|
252
|
+
session: Session,
|
|
253
|
+
) -> None:
|
|
254
|
+
self._on_task_instance_failed(
|
|
255
|
+
previous_state=previous_state, task_instance=task_instance, error=error, session=session
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
else:
|
|
259
|
+
|
|
260
|
+
@hookimpl
|
|
261
|
+
def on_task_instance_failed(
|
|
262
|
+
self, previous_state: TaskInstanceState, task_instance: TaskInstance, session: Session
|
|
263
|
+
) -> None:
|
|
264
|
+
self._on_task_instance_failed(
|
|
265
|
+
previous_state=previous_state, task_instance=task_instance, error=None, session=session
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
def _on_task_instance_failed(
|
|
269
|
+
self,
|
|
270
|
+
previous_state: TaskInstanceState,
|
|
271
|
+
task_instance: TaskInstance,
|
|
272
|
+
session: Session,
|
|
273
|
+
error: None | str | BaseException = None,
|
|
274
|
+
) -> None:
|
|
222
275
|
self.log.debug("OpenLineage listener got notification about task instance failure")
|
|
223
276
|
|
|
224
277
|
dagrun = task_instance.dag_run
|
|
@@ -246,13 +299,16 @@ class OpenLineageListener:
|
|
|
246
299
|
|
|
247
300
|
@print_warning(self.log)
|
|
248
301
|
def on_failure():
|
|
249
|
-
parent_run_id = OpenLineageAdapter.build_dag_run_id(
|
|
302
|
+
parent_run_id = OpenLineageAdapter.build_dag_run_id(
|
|
303
|
+
dag_id=dag.dag_id,
|
|
304
|
+
execution_date=dagrun.execution_date,
|
|
305
|
+
)
|
|
250
306
|
|
|
251
307
|
task_uuid = OpenLineageAdapter.build_task_instance_run_id(
|
|
252
308
|
dag_id=dag.dag_id,
|
|
253
309
|
task_id=task.task_id,
|
|
254
|
-
execution_date=task_instance.execution_date,
|
|
255
310
|
try_number=task_instance.try_number,
|
|
311
|
+
execution_date=task_instance.execution_date,
|
|
256
312
|
)
|
|
257
313
|
event_type = RunState.FAIL.value.lower()
|
|
258
314
|
operator_name = task.task_type.lower()
|
|
@@ -271,40 +327,76 @@ class OpenLineageListener:
|
|
|
271
327
|
parent_run_id=parent_run_id,
|
|
272
328
|
end_time=end_date.isoformat(),
|
|
273
329
|
task=task_metadata,
|
|
330
|
+
error=error,
|
|
274
331
|
)
|
|
275
332
|
Stats.gauge(
|
|
276
333
|
f"ol.event.size.{event_type}.{operator_name}",
|
|
277
334
|
len(Serde.to_json(redacted_event).encode("utf-8")),
|
|
278
335
|
)
|
|
279
336
|
|
|
280
|
-
on_failure
|
|
337
|
+
self._execute(on_failure, "on_failure", use_fork=True)
|
|
338
|
+
|
|
339
|
+
def _execute(self, callable, callable_name: str, use_fork: bool = False):
|
|
340
|
+
if use_fork:
|
|
341
|
+
self._fork_execute(callable, callable_name)
|
|
342
|
+
else:
|
|
343
|
+
callable()
|
|
344
|
+
|
|
345
|
+
def _terminate_with_wait(self, process: psutil.Process):
|
|
346
|
+
process.terminate()
|
|
347
|
+
try:
|
|
348
|
+
# Waiting for max 3 seconds to make sure process can clean up before being killed.
|
|
349
|
+
process.wait(timeout=3)
|
|
350
|
+
except psutil.TimeoutExpired:
|
|
351
|
+
# If it's not dead by then, then force kill.
|
|
352
|
+
process.kill()
|
|
353
|
+
|
|
354
|
+
def _fork_execute(self, callable, callable_name: str):
|
|
355
|
+
self.log.debug("Will fork to execute OpenLineage process.")
|
|
356
|
+
pid = os.fork()
|
|
357
|
+
if pid:
|
|
358
|
+
process = psutil.Process(pid)
|
|
359
|
+
try:
|
|
360
|
+
self.log.debug("Waiting for process %s", pid)
|
|
361
|
+
process.wait(conf.execution_timeout())
|
|
362
|
+
except psutil.TimeoutExpired:
|
|
363
|
+
self.log.warning(
|
|
364
|
+
"OpenLineage process %s expired. This should not affect process execution.", pid
|
|
365
|
+
)
|
|
366
|
+
self._terminate_with_wait(process)
|
|
367
|
+
except BaseException:
|
|
368
|
+
# Kill the process directly.
|
|
369
|
+
self._terminate_with_wait(process)
|
|
370
|
+
self.log.warning("Process with pid %s finished - parent", pid)
|
|
371
|
+
else:
|
|
372
|
+
setproctitle(getproctitle() + " - OpenLineage - " + callable_name)
|
|
373
|
+
configure_orm(disable_connection_pool=True)
|
|
374
|
+
self.log.debug("Executing OpenLineage process - %s - pid %s", callable_name, os.getpid())
|
|
375
|
+
callable()
|
|
376
|
+
self.log.debug("Process with current pid finishes after %s", callable_name)
|
|
377
|
+
os._exit(0)
|
|
281
378
|
|
|
282
379
|
@property
|
|
283
|
-
def executor(self):
|
|
284
|
-
def initializer():
|
|
285
|
-
# Re-configure the ORM engine as there are issues with multiple processes
|
|
286
|
-
# if process calls Airflow DB.
|
|
287
|
-
settings.configure_orm()
|
|
288
|
-
|
|
380
|
+
def executor(self) -> ProcessPoolExecutor:
|
|
289
381
|
if not self._executor:
|
|
290
382
|
self._executor = ProcessPoolExecutor(
|
|
291
383
|
max_workers=conf.dag_state_change_process_pool_size(),
|
|
292
|
-
initializer=
|
|
384
|
+
initializer=_executor_initializer(),
|
|
293
385
|
)
|
|
294
386
|
return self._executor
|
|
295
387
|
|
|
296
388
|
@hookimpl
|
|
297
|
-
def on_starting(self, component):
|
|
389
|
+
def on_starting(self, component) -> None:
|
|
298
390
|
self.log.debug("on_starting: %s", component.__class__.__name__)
|
|
299
391
|
|
|
300
392
|
@hookimpl
|
|
301
|
-
def before_stopping(self, component):
|
|
393
|
+
def before_stopping(self, component) -> None:
|
|
302
394
|
self.log.debug("before_stopping: %s", component.__class__.__name__)
|
|
303
395
|
with timeout(30):
|
|
304
396
|
self.executor.shutdown(wait=True)
|
|
305
397
|
|
|
306
398
|
@hookimpl
|
|
307
|
-
def on_dag_run_running(self, dag_run: DagRun, msg: str):
|
|
399
|
+
def on_dag_run_running(self, dag_run: DagRun, msg: str) -> None:
|
|
308
400
|
if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
|
|
309
401
|
self.log.debug(
|
|
310
402
|
"Skipping OpenLineage event emission for DAG `%s` "
|
|
@@ -326,10 +418,13 @@ class OpenLineageListener:
|
|
|
326
418
|
msg=msg,
|
|
327
419
|
nominal_start_time=data_interval_start,
|
|
328
420
|
nominal_end_time=data_interval_end,
|
|
421
|
+
# AirflowJobFacet should be created outside ProcessPoolExecutor that pickles objects,
|
|
422
|
+
# as it causes lack of some TaskGroup attributes and crashes event emission.
|
|
423
|
+
job_facets={**get_airflow_job_facet(dag_run=dag_run)},
|
|
329
424
|
)
|
|
330
425
|
|
|
331
426
|
@hookimpl
|
|
332
|
-
def on_dag_run_success(self, dag_run: DagRun, msg: str):
|
|
427
|
+
def on_dag_run_success(self, dag_run: DagRun, msg: str) -> None:
|
|
333
428
|
if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
|
|
334
429
|
self.log.debug(
|
|
335
430
|
"Skipping OpenLineage event emission for DAG `%s` "
|
|
@@ -346,7 +441,7 @@ class OpenLineageListener:
|
|
|
346
441
|
self.executor.submit(self.adapter.dag_success, dag_run=dag_run, msg=msg)
|
|
347
442
|
|
|
348
443
|
@hookimpl
|
|
349
|
-
def on_dag_run_failed(self, dag_run: DagRun, msg: str):
|
|
444
|
+
def on_dag_run_failed(self, dag_run: DagRun, msg: str) -> None:
|
|
350
445
|
if dag_run.dag and not is_selective_lineage_enabled(dag_run.dag):
|
|
351
446
|
self.log.debug(
|
|
352
447
|
"Skipping OpenLineage event emission for DAG `%s` "
|
|
@@ -61,8 +61,8 @@ def lineage_run_id(task_instance: TaskInstance):
|
|
|
61
61
|
return OpenLineageAdapter.build_task_instance_run_id(
|
|
62
62
|
dag_id=task_instance.dag_id,
|
|
63
63
|
task_id=task_instance.task_id,
|
|
64
|
-
execution_date=task_instance.execution_date,
|
|
65
64
|
try_number=task_instance.try_number,
|
|
65
|
+
execution_date=task_instance.execution_date,
|
|
66
66
|
)
|
|
67
67
|
|
|
68
68
|
|
|
@@ -39,6 +39,7 @@ from airflow.providers.openlineage.utils.sql import (
|
|
|
39
39
|
get_table_schemas,
|
|
40
40
|
)
|
|
41
41
|
from airflow.typing_compat import TypedDict
|
|
42
|
+
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
42
43
|
|
|
43
44
|
if TYPE_CHECKING:
|
|
44
45
|
from sqlalchemy.engine import Engine
|
|
@@ -116,7 +117,7 @@ def from_table_meta(
|
|
|
116
117
|
return Dataset(namespace=namespace, name=name if not is_uppercase else name.upper())
|
|
117
118
|
|
|
118
119
|
|
|
119
|
-
class SQLParser:
|
|
120
|
+
class SQLParser(LoggingMixin):
|
|
120
121
|
"""Interface for openlineage-sql.
|
|
121
122
|
|
|
122
123
|
:param dialect: dialect specific to the database
|
|
@@ -124,11 +125,18 @@ class SQLParser:
|
|
|
124
125
|
"""
|
|
125
126
|
|
|
126
127
|
def __init__(self, dialect: str | None = None, default_schema: str | None = None) -> None:
|
|
128
|
+
super().__init__()
|
|
127
129
|
self.dialect = dialect
|
|
128
130
|
self.default_schema = default_schema
|
|
129
131
|
|
|
130
132
|
def parse(self, sql: list[str] | str) -> SqlMeta | None:
|
|
131
133
|
"""Parse a single or a list of SQL statements."""
|
|
134
|
+
self.log.debug(
|
|
135
|
+
"OpenLineage calling SQL parser with SQL %s dialect %s schema %s",
|
|
136
|
+
sql,
|
|
137
|
+
self.dialect,
|
|
138
|
+
self.default_schema,
|
|
139
|
+
)
|
|
132
140
|
return parse(sql=sql, dialect=self.dialect, default_schema=self.default_schema)
|
|
133
141
|
|
|
134
142
|
def parse_table_schemas(
|
|
@@ -151,6 +159,7 @@ class SQLParser:
|
|
|
151
159
|
"database": database or database_info.database,
|
|
152
160
|
"use_flat_cross_db_query": database_info.use_flat_cross_db_query,
|
|
153
161
|
}
|
|
162
|
+
self.log.info("PRE getting schemas for input and output tables")
|
|
154
163
|
return get_table_schemas(
|
|
155
164
|
hook,
|
|
156
165
|
namespace,
|
|
@@ -335,9 +344,8 @@ class SQLParser:
|
|
|
335
344
|
return split_statement(sql)
|
|
336
345
|
return [obj for stmt in sql for obj in cls.split_sql_string(stmt) if obj != ""]
|
|
337
346
|
|
|
338
|
-
@classmethod
|
|
339
347
|
def create_information_schema_query(
|
|
340
|
-
|
|
348
|
+
self,
|
|
341
349
|
tables: list[DbTableMeta],
|
|
342
350
|
normalize_name: Callable[[str], str],
|
|
343
351
|
is_cross_db: bool,
|
|
@@ -349,7 +357,7 @@ class SQLParser:
|
|
|
349
357
|
sqlalchemy_engine: Engine | None = None,
|
|
350
358
|
) -> str:
|
|
351
359
|
"""Create SELECT statement to query information schema table."""
|
|
352
|
-
tables_hierarchy =
|
|
360
|
+
tables_hierarchy = self._get_tables_hierarchy(
|
|
353
361
|
tables,
|
|
354
362
|
normalize_name=normalize_name,
|
|
355
363
|
database=database,
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
# under the License.
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
+
import logging
|
|
19
20
|
from collections import defaultdict
|
|
20
21
|
from contextlib import closing
|
|
21
22
|
from enum import IntEnum
|
|
@@ -33,6 +34,9 @@ if TYPE_CHECKING:
|
|
|
33
34
|
from airflow.hooks.base import BaseHook
|
|
34
35
|
|
|
35
36
|
|
|
37
|
+
log = logging.getLogger(__name__)
|
|
38
|
+
|
|
39
|
+
|
|
36
40
|
class ColumnIndex(IntEnum):
|
|
37
41
|
"""Enumerates the indices of columns in information schema view."""
|
|
38
42
|
|
|
@@ -90,6 +94,7 @@ def get_table_schemas(
|
|
|
90
94
|
if not in_query and not out_query:
|
|
91
95
|
return [], []
|
|
92
96
|
|
|
97
|
+
log.debug("Starting to query database for table schemas")
|
|
93
98
|
with closing(hook.get_conn()) as conn, closing(conn.cursor()) as cursor:
|
|
94
99
|
if in_query:
|
|
95
100
|
cursor.execute(in_query)
|
|
@@ -101,6 +106,7 @@ def get_table_schemas(
|
|
|
101
106
|
out_datasets = [x.to_dataset(namespace, database, schema) for x in parse_query_result(cursor)]
|
|
102
107
|
else:
|
|
103
108
|
out_datasets = []
|
|
109
|
+
log.debug("Got table schema query result from database.")
|
|
104
110
|
return in_datasets, out_datasets
|
|
105
111
|
|
|
106
112
|
|
|
@@ -149,7 +155,7 @@ def create_information_schema_query(
|
|
|
149
155
|
sqlalchemy_engine: Engine | None = None,
|
|
150
156
|
) -> str:
|
|
151
157
|
"""Create query for getting table schemas from information schema."""
|
|
152
|
-
metadata = MetaData(
|
|
158
|
+
metadata = MetaData()
|
|
153
159
|
select_statements = []
|
|
154
160
|
# Don't iterate over tables hierarchy, just pass it to query single information schema table
|
|
155
161
|
if use_flat_cross_db_query:
|