acryl-datahub 0.15.0rc14__py3-none-any.whl → 0.15.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc14.dist-info → acryl_datahub-0.15.0rc16.dist-info}/METADATA +2414 -2430
- {acryl_datahub-0.15.0rc14.dist-info → acryl_datahub-0.15.0rc16.dist-info}/RECORD +24 -26
- datahub/__init__.py +1 -1
- datahub/cli/cli_utils.py +2 -0
- datahub/ingestion/api/incremental_properties_helper.py +69 -0
- datahub/ingestion/api/source_helpers.py +3 -1
- datahub/ingestion/sink/datahub_rest.py +3 -3
- datahub/ingestion/source/abs/source.py +4 -0
- datahub/ingestion/source/gc/datahub_gc.py +5 -5
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +1 -1
- datahub/ingestion/source/mode.py +0 -23
- datahub/ingestion/source/redash.py +13 -63
- datahub/ingestion/source/redshift/config.py +1 -0
- datahub/ingestion/source/redshift/redshift.py +2 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_v2.py +6 -0
- datahub/ingestion/source/tableau/tableau.py +107 -30
- datahub/ingestion/source/unity/source.py +2 -0
- datahub/ingestion/source/unity/usage.py +20 -11
- datahub/specific/datajob.py +4 -10
- datahub/utilities/partition_executor.py +1 -1
- datahub/utilities/sql_lineage_parser_impl.py +0 -160
- datahub/utilities/sql_parser.py +0 -94
- datahub/utilities/sql_parser_base.py +0 -21
- {acryl_datahub-0.15.0rc14.dist-info → acryl_datahub-0.15.0rc16.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc14.dist-info → acryl_datahub-0.15.0rc16.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0rc14.dist-info → acryl_datahub-0.15.0rc16.dist-info}/top_level.txt +0 -0
datahub/utilities/sql_parser.py
DELETED
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import multiprocessing
|
|
3
|
-
import traceback
|
|
4
|
-
from multiprocessing import Process, Queue
|
|
5
|
-
from typing import Any, List, Optional, Tuple
|
|
6
|
-
|
|
7
|
-
from datahub.utilities.sql_lineage_parser_impl import SqlLineageSQLParserImpl
|
|
8
|
-
from datahub.utilities.sql_parser_base import SQLParser
|
|
9
|
-
|
|
10
|
-
logger = logging.getLogger(__name__)
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def sql_lineage_parser_impl_func_wrapper(
|
|
14
|
-
queue: Optional[multiprocessing.Queue], sql_query: str, use_raw_names: bool = False
|
|
15
|
-
) -> Optional[Tuple[List[str], List[str], Any]]:
|
|
16
|
-
"""
|
|
17
|
-
The wrapper function that computes the tables and columns using the SqlLineageSQLParserImpl
|
|
18
|
-
and puts the results on the shared IPC queue. This is used to isolate SqlLineageSQLParserImpl
|
|
19
|
-
functionality in a separate process, and hence protect our sources from memory leaks originating in
|
|
20
|
-
the sqllineage module.
|
|
21
|
-
:param queue: The shared IPC queue on to which the results will be put.
|
|
22
|
-
:param sql_query: The SQL query to extract the tables & columns from.
|
|
23
|
-
:param use_raw_names: Parameter used to ignore sqllineage's default lowercasing.
|
|
24
|
-
:return: None.
|
|
25
|
-
"""
|
|
26
|
-
exception_details: Optional[Tuple[BaseException, str]] = None
|
|
27
|
-
tables: List[str] = []
|
|
28
|
-
columns: List[str] = []
|
|
29
|
-
try:
|
|
30
|
-
parser = SqlLineageSQLParserImpl(sql_query, use_raw_names)
|
|
31
|
-
tables = parser.get_tables()
|
|
32
|
-
columns = parser.get_columns()
|
|
33
|
-
except BaseException as e:
|
|
34
|
-
exc_msg = traceback.format_exc()
|
|
35
|
-
exception_details = (e, exc_msg)
|
|
36
|
-
logger.debug(exc_msg)
|
|
37
|
-
|
|
38
|
-
if queue is not None:
|
|
39
|
-
queue.put((tables, columns, exception_details))
|
|
40
|
-
return None
|
|
41
|
-
else:
|
|
42
|
-
return (tables, columns, exception_details)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
class SqlLineageSQLParser(SQLParser):
|
|
46
|
-
def __init__(
|
|
47
|
-
self,
|
|
48
|
-
sql_query: str,
|
|
49
|
-
use_external_process: bool = False,
|
|
50
|
-
use_raw_names: bool = False,
|
|
51
|
-
) -> None:
|
|
52
|
-
super().__init__(sql_query, use_external_process)
|
|
53
|
-
if use_external_process:
|
|
54
|
-
self.tables, self.columns = self._get_tables_columns_process_wrapped(
|
|
55
|
-
sql_query, use_raw_names
|
|
56
|
-
)
|
|
57
|
-
else:
|
|
58
|
-
return_tuple = sql_lineage_parser_impl_func_wrapper(
|
|
59
|
-
None, sql_query, use_raw_names
|
|
60
|
-
)
|
|
61
|
-
if return_tuple is not None:
|
|
62
|
-
(
|
|
63
|
-
self.tables,
|
|
64
|
-
self.columns,
|
|
65
|
-
some_exception,
|
|
66
|
-
) = return_tuple
|
|
67
|
-
|
|
68
|
-
@staticmethod
|
|
69
|
-
def _get_tables_columns_process_wrapped(
|
|
70
|
-
sql_query: str, use_raw_names: bool = False
|
|
71
|
-
) -> Tuple[List[str], List[str]]:
|
|
72
|
-
# Invoke sql_lineage_parser_impl_func_wrapper in a separate process to avoid
|
|
73
|
-
# memory leaks from sqllineage module used by SqlLineageSQLParserImpl. This will help
|
|
74
|
-
# shield our sources like lookml & redash, that need to parse a large number of SQL statements,
|
|
75
|
-
# from causing significant memory leaks in the datahub cli during ingestion.
|
|
76
|
-
queue: multiprocessing.Queue = Queue()
|
|
77
|
-
process: multiprocessing.Process = Process(
|
|
78
|
-
target=sql_lineage_parser_impl_func_wrapper,
|
|
79
|
-
args=(queue, sql_query, use_raw_names),
|
|
80
|
-
)
|
|
81
|
-
process.start()
|
|
82
|
-
tables, columns, exception_details = queue.get(block=True)
|
|
83
|
-
if exception_details is not None:
|
|
84
|
-
raise exception_details[0](f"Sub-process exception: {exception_details[1]}")
|
|
85
|
-
return tables, columns
|
|
86
|
-
|
|
87
|
-
def get_tables(self) -> List[str]:
|
|
88
|
-
return self.tables
|
|
89
|
-
|
|
90
|
-
def get_columns(self) -> List[str]:
|
|
91
|
-
return self.columns
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
DefaultSQLParser = SqlLineageSQLParser
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
from abc import ABCMeta, abstractmethod
|
|
2
|
-
from typing import List
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class SqlParserException(Exception):
|
|
6
|
-
"""Raised when sql parser fails"""
|
|
7
|
-
|
|
8
|
-
pass
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class SQLParser(metaclass=ABCMeta):
|
|
12
|
-
def __init__(self, sql_query: str, use_external_process: bool = True) -> None:
|
|
13
|
-
self._sql_query = sql_query
|
|
14
|
-
|
|
15
|
-
@abstractmethod
|
|
16
|
-
def get_tables(self) -> List[str]:
|
|
17
|
-
pass
|
|
18
|
-
|
|
19
|
-
@abstractmethod
|
|
20
|
-
def get_columns(self) -> List[str]:
|
|
21
|
-
pass
|
|
File without changes
|
|
File without changes
|
|
File without changes
|