PyPI - acryl-datahub - Versions diffs - 0.14.1.13rc8__py3-none-any.whl → 0.15.0__py3-none-any.whl - Mend

acryl-datahub 0.14.1.13rc8py3-none-any.whl → 0.15.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (139) hide show

{acryl_datahub-0.14.1.13rc8.dist-info → acryl_datahub-0.15.0.dist-info}/METADATA +2506 -2456
{acryl_datahub-0.14.1.13rc8.dist-info → acryl_datahub-0.15.0.dist-info}/RECORD +136 -131
{acryl_datahub-0.14.1.13rc8.dist-info → acryl_datahub-0.15.0.dist-info}/entry_points.txt +2 -1
datahub/__init__.py +1 -1
datahub/api/entities/structuredproperties/structuredproperties.py +123 -146
datahub/cli/cli_utils.py +2 -0
datahub/cli/delete_cli.py +103 -24
datahub/cli/ingest_cli.py +110 -0
datahub/cli/put_cli.py +1 -1
datahub/cli/specific/dataproduct_cli.py +1 -1
datahub/cli/specific/structuredproperties_cli.py +2 -1
datahub/configuration/common.py +3 -3
datahub/configuration/git.py +7 -1
datahub/configuration/kafka_consumer_config.py +31 -1
datahub/emitter/mcp_patch_builder.py +43 -0
datahub/emitter/rest_emitter.py +17 -4
datahub/ingestion/api/incremental_properties_helper.py +69 -0
datahub/ingestion/api/source.py +6 -1
datahub/ingestion/api/source_helpers.py +4 -2
datahub/ingestion/graph/client.py +2 -0
datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +2 -2
datahub/ingestion/run/pipeline.py +6 -5
datahub/ingestion/run/pipeline_config.py +6 -0
datahub/ingestion/sink/datahub_rest.py +15 -4
datahub/ingestion/source/abs/source.py +4 -0
datahub/ingestion/source/aws/aws_common.py +13 -1
datahub/ingestion/source/aws/sagemaker.py +8 -0
datahub/ingestion/source/aws/sagemaker_processors/common.py +6 -0
datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +9 -4
datahub/ingestion/source/aws/sagemaker_processors/jobs.py +12 -1
datahub/ingestion/source/aws/sagemaker_processors/lineage.py +11 -4
datahub/ingestion/source/aws/sagemaker_processors/models.py +30 -1
datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
datahub/ingestion/source/bigquery_v2/bigquery_schema.py +0 -1
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +0 -21
datahub/ingestion/source/bigquery_v2/profiler.py +0 -6
datahub/ingestion/source/common/subtypes.py +2 -0
datahub/ingestion/source/csv_enricher.py +1 -1
datahub/ingestion/source/datahub/datahub_database_reader.py +41 -21
datahub/ingestion/source/datahub/datahub_source.py +8 -1
datahub/ingestion/source/dbt/dbt_common.py +7 -61
datahub/ingestion/source/dremio/dremio_api.py +204 -86
datahub/ingestion/source/dremio/dremio_aspects.py +19 -15
datahub/ingestion/source/dremio/dremio_config.py +5 -0
datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +2 -0
datahub/ingestion/source/dremio/dremio_entities.py +4 -0
datahub/ingestion/source/dremio/dremio_reporting.py +15 -0
datahub/ingestion/source/dremio/dremio_source.py +7 -2
datahub/ingestion/source/elastic_search.py +1 -1
datahub/ingestion/source/feast.py +97 -6
datahub/ingestion/source/gc/datahub_gc.py +46 -35
datahub/ingestion/source/gc/dataprocess_cleanup.py +110 -50
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +7 -2
datahub/ingestion/source/ge_data_profiler.py +46 -9
datahub/ingestion/source/ge_profiling_config.py +5 -0
datahub/ingestion/source/iceberg/iceberg.py +12 -5
datahub/ingestion/source/kafka/kafka.py +39 -19
datahub/ingestion/source/kafka/kafka_connect.py +81 -51
datahub/ingestion/source/looker/looker_liquid_tag.py +8 -1
datahub/ingestion/source/looker/lookml_concept_context.py +1 -2
datahub/ingestion/source/looker/view_upstream.py +65 -30
datahub/ingestion/source/metadata/business_glossary.py +35 -18
datahub/ingestion/source/mode.py +0 -23
datahub/ingestion/source/neo4j/__init__.py +0 -0
datahub/ingestion/source/neo4j/neo4j_source.py +331 -0
datahub/ingestion/source/powerbi/__init__.py +0 -1
datahub/ingestion/source/powerbi/config.py +3 -3
datahub/ingestion/source/powerbi/m_query/data_classes.py +36 -15
datahub/ingestion/source/powerbi/m_query/parser.py +6 -3
datahub/ingestion/source/powerbi/m_query/pattern_handler.py +912 -0
datahub/ingestion/source/powerbi/m_query/resolver.py +23 -947
datahub/ingestion/source/powerbi/m_query/tree_function.py +3 -3
datahub/ingestion/source/powerbi/m_query/validator.py +9 -3
datahub/ingestion/source/powerbi/powerbi.py +12 -6
datahub/ingestion/source/preset.py +1 -0
datahub/ingestion/source/pulsar.py +21 -2
datahub/ingestion/source/qlik_sense/data_classes.py +1 -0
datahub/ingestion/source/redash.py +13 -63
datahub/ingestion/source/redshift/config.py +1 -0
datahub/ingestion/source/redshift/redshift.py +3 -0
datahub/ingestion/source/s3/source.py +2 -3
datahub/ingestion/source/sigma/data_classes.py +1 -0
datahub/ingestion/source/sigma/sigma.py +101 -43
datahub/ingestion/source/snowflake/snowflake_config.py +8 -3
datahub/ingestion/source/snowflake/snowflake_connection.py +28 -0
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +6 -1
datahub/ingestion/source/snowflake/snowflake_query.py +21 -4
datahub/ingestion/source/snowflake/snowflake_report.py +1 -0
datahub/ingestion/source/snowflake/snowflake_schema.py +28 -0
datahub/ingestion/source/snowflake/snowflake_schema_gen.py +41 -2
datahub/ingestion/source/snowflake/snowflake_utils.py +46 -6
datahub/ingestion/source/snowflake/snowflake_v2.py +6 -0
datahub/ingestion/source/sql/athena.py +46 -22
datahub/ingestion/source/sql/mssql/source.py +18 -6
datahub/ingestion/source/sql/sql_common.py +34 -21
datahub/ingestion/source/sql/sql_report.py +1 -0
datahub/ingestion/source/sql/sql_types.py +85 -8
datahub/ingestion/source/state/redundant_run_skip_handler.py +1 -1
datahub/ingestion/source/superset.py +215 -65
datahub/ingestion/source/tableau/tableau.py +237 -76
datahub/ingestion/source/tableau/tableau_common.py +12 -6
datahub/ingestion/source/tableau/tableau_constant.py +2 -0
datahub/ingestion/source/tableau/tableau_server_wrapper.py +33 -0
datahub/ingestion/source/tableau/tableau_validation.py +48 -0
datahub/ingestion/source/unity/proxy_types.py +1 -0
datahub/ingestion/source/unity/source.py +4 -0
datahub/ingestion/source/unity/usage.py +20 -11
datahub/ingestion/transformer/add_dataset_tags.py +1 -1
datahub/ingestion/transformer/generic_aspect_transformer.py +1 -1
datahub/integrations/assertion/common.py +1 -1
datahub/lite/duckdb_lite.py +12 -17
datahub/metadata/_schema_classes.py +512 -392
datahub/metadata/_urns/urn_defs.py +1355 -1355
datahub/metadata/com/linkedin/pegasus2avro/structured/__init__.py +2 -0
datahub/metadata/schema.avsc +17222 -17499
datahub/metadata/schemas/FormInfo.avsc +4 -0
datahub/metadata/schemas/StructuredPropertyDefinition.avsc +1 -1
datahub/metadata/schemas/StructuredPropertyKey.avsc +1 -0
datahub/metadata/schemas/StructuredPropertySettings.avsc +114 -0
datahub/specific/chart.py +0 -39
datahub/specific/dashboard.py +0 -39
datahub/specific/datajob.py +7 -57
datahub/sql_parsing/schema_resolver.py +23 -0
datahub/sql_parsing/sql_parsing_aggregator.py +1 -2
datahub/sql_parsing/sqlglot_lineage.py +55 -14
datahub/sql_parsing/sqlglot_utils.py +8 -2
datahub/telemetry/telemetry.py +23 -9
datahub/testing/compare_metadata_json.py +1 -1
datahub/testing/doctest.py +12 -0
datahub/utilities/file_backed_collections.py +35 -2
datahub/utilities/partition_executor.py +1 -1
datahub/utilities/urn_encoder.py +2 -1
datahub/utilities/urns/_urn_base.py +1 -1
datahub/utilities/urns/structured_properties_urn.py +1 -1
datahub/utilities/sql_lineage_parser_impl.py +0 -160
datahub/utilities/sql_parser.py +0 -94
datahub/utilities/sql_parser_base.py +0 -21
{acryl_datahub-0.14.1.13rc8.dist-info → acryl_datahub-0.15.0.dist-info}/WHEEL +0 -0
{acryl_datahub-0.14.1.13rc8.dist-info → acryl_datahub-0.15.0.dist-info}/top_level.txt +0 -0

datahub/testing/doctest.py ADDED Viewed

@@ -0,0 +1,12 @@
+import doctest
+from types import ModuleType
+def assert_doctest(module: ModuleType) -> None:
+    result = doctest.testmod(
+        module,
+        raise_on_error=True,
+        verbose=True,
+    )
+    if result.attempted == 0:
+        raise ValueError(f"No doctests found in {module.__name__}")

datahub/utilities/file_backed_collections.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import collections
 import gzip
 import logging
+import os
 import pathlib
 import pickle
 import shutil
@@ -33,6 +34,14 @@ from datahub.ingestion.api.closeable import Closeable
 logger: logging.Logger = logging.getLogger(__name__)
+OVERRIDE_SQLITE_VERSION_REQUIREMENT_STR = (
+    os.environ.get("OVERRIDE_SQLITE_VERSION_REQ") or ""
+)
+OVERRIDE_SQLITE_VERSION_REQUIREMENT = (
+    OVERRIDE_SQLITE_VERSION_REQUIREMENT_STR
+    and OVERRIDE_SQLITE_VERSION_REQUIREMENT_STR.lower() != "false"
+)
 _DEFAULT_FILE_NAME = "sqlite.db"
 _DEFAULT_TABLE_NAME = "data"
@@ -212,6 +221,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
     _active_object_cache: OrderedDict[str, Tuple[_VT, bool]] = field(
         init=False, repr=False
     )
+    _use_sqlite_on_conflict: bool = field(repr=False, default=True)
     def __post_init__(self) -> None:
         assert (
@@ -232,7 +242,10 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
             # We use the ON CONFLICT clause to implement UPSERTs with sqlite.
             # This was added in 3.24.0 from 2018-06-04.
             # See https://www.sqlite.org/lang_conflict.html
-            raise RuntimeError("SQLite version 3.24.0 or later is required")
+            if OVERRIDE_SQLITE_VERSION_REQUIREMENT:
+                self.use_sqlite_on_conflict = False
+            else:
+                raise RuntimeError("SQLite version 3.24.0 or later is required")
         # We keep a small cache in memory to avoid having to serialize/deserialize
         # data from the database too often. We use an OrderedDict to build
@@ -295,7 +308,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
                     values.append(column_serializer(value))
                 items_to_write.append(tuple(values))
-        if items_to_write:
+        if items_to_write and self._use_sqlite_on_conflict:
             # Tricky: By using a INSERT INTO ... ON CONFLICT (key) structure, we can
             # ensure that the rowid remains the same if a value is updated but is
             # autoincremented when rows are inserted.
@@ -312,6 +325,26 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
                 """,
                 items_to_write,
             )
+        else:
+            for item in items_to_write:
+                try:
+                    self._conn.execute(
+                        f"""INSERT INTO {self.tablename} (
+                            key,
+                            value
+                            {''.join(f', {column_name}' for column_name in self.extra_columns.keys())}
+                        )
+                        VALUES ({', '.join(['?'] *(2 + len(self.extra_columns)))})""",
+                        item,
+                    )
+                except sqlite3.IntegrityError:
+                    self._conn.execute(
+                        f"""UPDATE {self.tablename} SET
+                            value = ?
+                            {''.join(f', {column_name} = ?' for column_name in self.extra_columns.keys())}
+                        WHERE key = ?""",
+                        (*item[1:], item[0]),
+                    )
     def flush(self) -> None:
         self._prune_cache(len(self._active_object_cache))

datahub/utilities/partition_executor.py CHANGED Viewed

@@ -268,7 +268,7 @@ class BatchPartitionExecutor(Closeable):
         self.process_batch = process_batch
         self.min_process_interval = min_process_interval
         self.read_from_pending_interval = read_from_pending_interval
-        assert self.max_workers > 1
+        assert self.max_workers >= 1
         self._state_lock = threading.Lock()
         self._executor = ThreadPoolExecutor(

datahub/utilities/urn_encoder.py CHANGED Viewed

@@ -4,7 +4,8 @@ from typing import List
 # NOTE: Frontend relies on encoding these three characters. Specifically, we decode and encode schema fields for column level lineage.
 # If this changes, make appropriate changes to datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts
 # We also rely on encoding these exact three characters when generating schemaField urns in our graphQL layer. Update SchemaFieldUtils if this changes.
-RESERVED_CHARS = {",", "(", ")"}
+# Also see https://datahubproject.io/docs/what/urn/#restrictions
+RESERVED_CHARS = {",", "(", ")", "␟"}
 RESERVED_CHARS_EXTENDED = RESERVED_CHARS.union({"%"})

datahub/utilities/urns/_urn_base.py CHANGED Viewed

@@ -200,7 +200,7 @@ class Urn:
     @classmethod
     @deprecated(reason="no longer needed")
     def validate(cls, urn_str: str) -> None:
-        Urn.create_from_string(urn_str)
+        Urn.from_string(urn_str)
     @staticmethod
     def url_encode(urn: str) -> str:

datahub/utilities/urns/structured_properties_urn.py CHANGED Viewed

@@ -4,4 +4,4 @@ __all__ = ["StructuredPropertyUrn", "make_structured_property_urn"]
 def make_structured_property_urn(structured_property_id: str) -> str:
-    return str(StructuredPropertyUrn.create_from_string(structured_property_id))
+    return str(StructuredPropertyUrn.from_string(structured_property_id))

datahub/utilities/sql_lineage_parser_impl.py DELETED Viewed

@@ -1,160 +0,0 @@
-import contextlib
-import logging
-import re
-import unittest
-import unittest.mock
-from typing import Dict, List, Optional, Set
-from sqllineage.core.holders import Column, SQLLineageHolder
-from sqllineage.exceptions import SQLLineageException
-from datahub.utilities.sql_parser_base import SQLParser, SqlParserException
-with contextlib.suppress(ImportError):
-    import sqlparse
-    from networkx import DiGraph
-    from sqllineage.core import LineageAnalyzer
-    import datahub.utilities.sqllineage_patch
-logger = logging.getLogger(__name__)
-class SqlLineageSQLParserImpl(SQLParser):
-    _DATE_SWAP_TOKEN = "__d_a_t_e"
-    _HOUR_SWAP_TOKEN = "__h_o_u_r"
-    _TIMESTAMP_SWAP_TOKEN = "__t_i_m_e_s_t_a_m_p"
-    _DATA_SWAP_TOKEN = "__d_a_t_a"
-    _ADMIN_SWAP_TOKEN = "__a_d_m_i_n"
-    _MYVIEW_SQL_TABLE_NAME_TOKEN = "__my_view__.__sql_table_name__"
-    _MYVIEW_LOOKER_TOKEN = "my_view.SQL_TABLE_NAME"
-    def __init__(self, sql_query: str, use_raw_names: bool = False) -> None:
-        super().__init__(sql_query)
-        original_sql_query = sql_query
-        self._use_raw_names = use_raw_names
-        # SqlLineageParser makes mistakes on lateral flatten queries, use the prefix
-        if "lateral flatten" in sql_query:
-            sql_query = sql_query[: sql_query.find("lateral flatten")]
-        # Replace reserved words that break SqlLineageParser
-        self.token_to_original: Dict[str, str] = {
-            self._DATE_SWAP_TOKEN: "date",
-            self._HOUR_SWAP_TOKEN: "hour",
-            self._TIMESTAMP_SWAP_TOKEN: "timestamp",
-            self._DATA_SWAP_TOKEN: "data",
-            self._ADMIN_SWAP_TOKEN: "admin",
-        }
-        for replacement, original in self.token_to_original.items():
-            # Replace original tokens with replacement. Since table and column name can contain a hyphen('-'),
-            # also prevent original tokens appearing as part of these names with a hyphen from getting substituted.
-            sql_query = re.sub(
-                rf"((?<!-)\b{original}\b)(?!-)",
-                rf"{replacement}",
-                sql_query,
-                flags=re.IGNORECASE,
-            )
-        # SqlLineageParser lowercarese tablenames and we need to replace Looker specific token which should be uppercased
-        sql_query = re.sub(
-            rf"(\${{{self._MYVIEW_LOOKER_TOKEN}}})",
-            rf"{self._MYVIEW_SQL_TABLE_NAME_TOKEN}",
-            sql_query,
-        )
-        # SqlLineageParser does not handle "encode" directives well. Remove them
-        sql_query = re.sub(r"\sencode [a-zA-Z]*", "", sql_query, flags=re.IGNORECASE)
-        # Replace lookml templates with the variable otherwise sqlparse can't parse ${
-        sql_query = re.sub(r"(\${)(.+)(})", r"\2", sql_query)
-        if sql_query != original_sql_query:
-            logger.debug(f"Rewrote original query {original_sql_query} as {sql_query}")
-        self._sql = sql_query
-        self._stmt_holders: Optional[List[LineageAnalyzer]] = None
-        self._sql_holder: Optional[SQLLineageHolder] = None
-        try:
-            self._stmt = [
-                s
-                for s in sqlparse.parse(
-                    # first apply sqlparser formatting just to get rid of comments, which cause
-                    # inconsistencies in parsing output
-                    sqlparse.format(
-                        self._sql.strip(),
-                        strip_comments=True,
-                        use_space_around_operators=True,
-                    ),
-                )
-                if s.token_first(skip_cm=True)
-            ]
-            with unittest.mock.patch(
-                "sqllineage.core.handlers.source.SourceHandler.end_of_query_cleanup",
-                datahub.utilities.sqllineage_patch.end_of_query_cleanup_patch,
-            ):
-                with unittest.mock.patch(
-                    "sqllineage.core.holders.SubQueryLineageHolder.add_column_lineage",
-                    datahub.utilities.sqllineage_patch.add_column_lineage_patch,
-                ):
-                    self._stmt_holders = [
-                        LineageAnalyzer().analyze(stmt) for stmt in self._stmt
-                    ]
-                    self._sql_holder = SQLLineageHolder.of(*self._stmt_holders)
-        except SQLLineageException as e:
-            raise SqlParserException(
-                f"SQL lineage analyzer error '{e}' for query: '{self._sql}"
-            ) from e
-    def get_tables(self) -> List[str]:
-        result: List[str] = []
-        if self._sql_holder is None:
-            logger.error("sql holder not present so cannot get tables")
-            return result
-        for table in self._sql_holder.source_tables:
-            table_normalized = re.sub(
-                r"^<default>.",
-                "",
-                (
-                    str(table)
-                    if not self._use_raw_names
-                    else f"{table.schema.raw_name}.{table.raw_name}"
-                ),
-            )
-            result.append(str(table_normalized))
-        # We need to revert TOKEN replacements
-        for token, replacement in self.token_to_original.items():
-            result = [replacement if c == token else c for c in result]
-        result = [
-            self._MYVIEW_LOOKER_TOKEN if c == self._MYVIEW_SQL_TABLE_NAME_TOKEN else c
-            for c in result
-        ]
-        # Sort tables to make the list deterministic
-        result.sort()
-        return result
-    def get_columns(self) -> List[str]:
-        if self._sql_holder is None:
-            raise SqlParserException("sql holder not present so cannot get columns")
-        graph: DiGraph = self._sql_holder.graph  # For mypy attribute checking
-        column_nodes = [n for n in graph.nodes if isinstance(n, Column)]
-        column_graph = graph.subgraph(column_nodes)
-        target_columns = {column for column, deg in column_graph.out_degree if deg == 0}
-        result: Set[str] = set()
-        for column in target_columns:
-            # Let's drop all the count(*) and similard columns which are expression actually if it does not have an alias
-            if not any(ele in column.raw_name for ele in ["*", "(", ")"]):
-                result.add(str(column.raw_name))
-        # Reverting back all the previously renamed words which confuses the parser
-        result = {"date" if c == self._DATE_SWAP_TOKEN else c for c in result}
-        result = {
-            "timestamp" if c == self._TIMESTAMP_SWAP_TOKEN else c for c in list(result)
-        }
-        # swap back renamed date column
-        return list(result)

datahub/utilities/sql_parser.py DELETED Viewed

@@ -1,94 +0,0 @@
-import logging
-import multiprocessing
-import traceback
-from multiprocessing import Process, Queue
-from typing import Any, List, Optional, Tuple
-from datahub.utilities.sql_lineage_parser_impl import SqlLineageSQLParserImpl
-from datahub.utilities.sql_parser_base import SQLParser
-logger = logging.getLogger(__name__)
-def sql_lineage_parser_impl_func_wrapper(
-    queue: Optional[multiprocessing.Queue], sql_query: str, use_raw_names: bool = False
-) -> Optional[Tuple[List[str], List[str], Any]]:
-    """
-    The wrapper function that computes the tables and columns using the SqlLineageSQLParserImpl
-    and puts the results on the shared IPC queue. This is used to isolate SqlLineageSQLParserImpl
-    functionality in a separate process, and hence protect our sources from memory leaks originating in
-    the sqllineage module.
-    :param queue: The shared IPC queue on to which the results will be put.
-    :param sql_query: The SQL query to extract the tables & columns from.
-    :param use_raw_names: Parameter used to ignore sqllineage's default lowercasing.
-    :return: None.
-    """
-    exception_details: Optional[Tuple[BaseException, str]] = None
-    tables: List[str] = []
-    columns: List[str] = []
-    try:
-        parser = SqlLineageSQLParserImpl(sql_query, use_raw_names)
-        tables = parser.get_tables()
-        columns = parser.get_columns()
-    except BaseException as e:
-        exc_msg = traceback.format_exc()
-        exception_details = (e, exc_msg)
-        logger.debug(exc_msg)
-    if queue is not None:
-        queue.put((tables, columns, exception_details))
-        return None
-    else:
-        return (tables, columns, exception_details)
-class SqlLineageSQLParser(SQLParser):
-    def __init__(
-        self,
-        sql_query: str,
-        use_external_process: bool = False,
-        use_raw_names: bool = False,
-    ) -> None:
-        super().__init__(sql_query, use_external_process)
-        if use_external_process:
-            self.tables, self.columns = self._get_tables_columns_process_wrapped(
-                sql_query, use_raw_names
-            )
-        else:
-            return_tuple = sql_lineage_parser_impl_func_wrapper(
-                None, sql_query, use_raw_names
-            )
-            if return_tuple is not None:
-                (
-                    self.tables,
-                    self.columns,
-                    some_exception,
-                ) = return_tuple
-    @staticmethod
-    def _get_tables_columns_process_wrapped(
-        sql_query: str, use_raw_names: bool = False
-    ) -> Tuple[List[str], List[str]]:
-        # Invoke sql_lineage_parser_impl_func_wrapper in a separate process to avoid
-        # memory leaks from sqllineage module used by SqlLineageSQLParserImpl. This will help
-        # shield our sources like lookml & redash, that need to parse a large number of SQL statements,
-        # from causing significant memory leaks in the datahub cli during ingestion.
-        queue: multiprocessing.Queue = Queue()
-        process: multiprocessing.Process = Process(
-            target=sql_lineage_parser_impl_func_wrapper,
-            args=(queue, sql_query, use_raw_names),
-        )
-        process.start()
-        tables, columns, exception_details = queue.get(block=True)
-        if exception_details is not None:
-            raise exception_details[0](f"Sub-process exception: {exception_details[1]}")
-        return tables, columns
-    def get_tables(self) -> List[str]:
-        return self.tables
-    def get_columns(self) -> List[str]:
-        return self.columns
-DefaultSQLParser = SqlLineageSQLParser

datahub/utilities/sql_parser_base.py DELETED Viewed

@@ -1,21 +0,0 @@
-from abc import ABCMeta, abstractmethod
-from typing import List
-class SqlParserException(Exception):
-    """Raised when sql parser fails"""
-    pass
-class SQLParser(metaclass=ABCMeta):
-    def __init__(self, sql_query: str, use_external_process: bool = True) -> None:
-        self._sql_query = sql_query
-    @abstractmethod
-    def get_tables(self) -> List[str]:
-        pass
-    @abstractmethod
-    def get_columns(self) -> List[str]:
-        pass

{acryl_datahub-0.14.1.13rc8.dist-info → acryl_datahub-0.15.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{acryl_datahub-0.14.1.13rc8.dist-info → acryl_datahub-0.15.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

acryl-datahub 0.14.1.13rc8__py3-none-any.whl → 0.15.0__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.14.1.13rc8py3-none-any.whl → 0.15.0py3-none-any.whl