PyPI - acryl-datahub - Versions diffs - 0.15.0rc14__py3-none-any.whl → 0.15.0rc16__py3-none-any.whl - Mend

acryl-datahub 0.15.0rc14py3-none-any.whl → 0.15.0rc16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (27) hide show

{acryl_datahub-0.15.0rc14.dist-info → acryl_datahub-0.15.0rc16.dist-info}/METADATA +2414 -2430
{acryl_datahub-0.15.0rc14.dist-info → acryl_datahub-0.15.0rc16.dist-info}/RECORD +24 -26
datahub/__init__.py +1 -1
datahub/cli/cli_utils.py +2 -0
datahub/ingestion/api/incremental_properties_helper.py +69 -0
datahub/ingestion/api/source_helpers.py +3 -1
datahub/ingestion/sink/datahub_rest.py +3 -3
datahub/ingestion/source/abs/source.py +4 -0
datahub/ingestion/source/gc/datahub_gc.py +5 -5
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +1 -1
datahub/ingestion/source/mode.py +0 -23
datahub/ingestion/source/redash.py +13 -63
datahub/ingestion/source/redshift/config.py +1 -0
datahub/ingestion/source/redshift/redshift.py +2 -0
datahub/ingestion/source/snowflake/snowflake_config.py +4 -0
datahub/ingestion/source/snowflake/snowflake_v2.py +6 -0
datahub/ingestion/source/tableau/tableau.py +107 -30
datahub/ingestion/source/unity/source.py +2 -0
datahub/ingestion/source/unity/usage.py +20 -11
datahub/specific/datajob.py +4 -10
datahub/utilities/partition_executor.py +1 -1
datahub/utilities/sql_lineage_parser_impl.py +0 -160
datahub/utilities/sql_parser.py +0 -94
datahub/utilities/sql_parser_base.py +0 -21
{acryl_datahub-0.15.0rc14.dist-info → acryl_datahub-0.15.0rc16.dist-info}/WHEEL +0 -0
{acryl_datahub-0.15.0rc14.dist-info → acryl_datahub-0.15.0rc16.dist-info}/entry_points.txt +0 -0
{acryl_datahub-0.15.0rc14.dist-info → acryl_datahub-0.15.0rc16.dist-info}/top_level.txt +0 -0

datahub/ingestion/source/tableau/tableau.py CHANGED Viewed

@@ -485,6 +485,18 @@ class TableauConfig(
         description="Configuration settings for ingesting Tableau groups and their capabilities as custom properties.",
     )
+    ingest_hidden_assets: bool = Field(
+        True,
+        description="When enabled, hidden views and dashboards are ingested into Datahub. "
+        "If a dashboard or view is hidden in Tableau the luid is blank. Default of this config field is True.",
+    )
+    tags_for_hidden_assets: List[str] = Field(
+        default=[],
+        description="Tags to be added to hidden dashboards and views. If a dashboard or view is hidden in Tableau the luid is blank. "
+        "This can only be used with ingest_tags enabled as it will overwrite tags entered from the UI.",
+    )
     # pre = True because we want to take some decision before pydantic initialize the configuration to default values
     @root_validator(pre=True)
     def projects_backward_compatibility(cls, values: Dict) -> Dict:
@@ -510,6 +522,20 @@ class TableauConfig(
         return values
+    @root_validator()
+    def validate_config_values(cls, values: Dict) -> Dict:
+        tags_for_hidden_assets = values.get("tags_for_hidden_assets")
+        ingest_tags = values.get("ingest_tags")
+        if (
+            not ingest_tags
+            and tags_for_hidden_assets
+            and len(tags_for_hidden_assets) > 0
+        ):
+            raise ValueError(
+                "tags_for_hidden_assets is only allowed with ingest_tags enabled. Be aware that this will overwrite tags entered from the UI."
+            )
+        return values
 class WorkbookKey(ContainerKey):
     workbook_id: str
@@ -596,7 +622,16 @@ class TableauSourceReport(StaleEntityRemovalSourceReport):
     num_datasource_field_skipped_no_name: int = 0
     num_csql_field_skipped_no_name: int = 0
     num_table_field_skipped_no_name: int = 0
+    # lineage
+    num_tables_with_upstream_lineage: int = 0
+    num_upstream_table_lineage: int = 0
+    num_upstream_fine_grained_lineage: int = 0
     num_upstream_table_skipped_no_name: int = 0
+    num_upstream_table_skipped_no_columns: int = 0
+    num_upstream_table_failed_generate_reference: int = 0
+    num_upstream_table_lineage_failed_parse_sql: int = 0
+    num_upstream_fine_grained_lineage_failed_parse_sql: int = 0
+    num_hidden_assets_skipped: int = 0
 @platform_name("Tableau")
@@ -1043,6 +1078,11 @@ class TableauSiteSource:
             ),
         )
+    def _is_hidden_view(self, dashboard_or_view: Dict) -> bool:
+        # LUID is blank if the view is hidden in the workbook.
+        # More info here: https://help.tableau.com/current/api/metadata_api/en-us/reference/view.doc.html
+        return not dashboard_or_view.get(c.LUID)
     def get_connection_object_page(
         self,
         query: str,
@@ -1311,7 +1351,7 @@ class TableauSiteSource:
         datasource: dict,
         browse_path: Optional[str],
         is_embedded_ds: bool = False,
-    ) -> Tuple:
+    ) -> Tuple[List[Upstream], List[FineGrainedLineage]]:
         upstream_tables: List[Upstream] = []
         fine_grained_lineages: List[FineGrainedLineage] = []
         table_id_to_urn = {}
@@ -1472,6 +1512,7 @@ class TableauSiteSource:
                 c.COLUMNS_CONNECTION
             ].get("totalCount")
             if not is_custom_sql and not num_tbl_cols:
+                self.report.num_upstream_table_skipped_no_columns += 1
                 logger.warning(
                     f"Skipping upstream table with id {table[c.ID]}, no columns: {table}"
                 )
@@ -1488,6 +1529,7 @@ class TableauSiteSource:
                     table, default_schema_map=self.config.default_schema_map
                 )
             except Exception as e:
+                self.report.num_upstream_table_failed_generate_reference += 1
                 self.report.warning(
                     title="Potentially Missing Lineage Issue",
                     message="Failed to generate upstream reference",
@@ -1659,15 +1701,7 @@ class TableauSiteSource:
             func_overridden_info=None,  # Here we don't want to override any information from configuration
         )
-        if parsed_result is None:
-            logger.info(
-                f"Failed to extract column level lineage from datasource {datasource_urn}"
-            )
-            return []
-        if parsed_result.debug_info.error:
-            logger.info(
-                f"Failed to extract column level lineage from datasource {datasource_urn}: {parsed_result.debug_info.error}"
-            )
+        if parsed_result is None or parsed_result.debug_info.error:
             return []
         cll: List[ColumnLineageInfo] = (
@@ -2031,6 +2065,8 @@ class TableauSiteSource:
                 aspect_name=c.UPSTREAM_LINEAGE,
                 aspect=upstream_lineage,
             )
+            self.report.num_tables_with_upstream_lineage += 1
+            self.report.num_upstream_table_lineage += len(upstream_tables)
     @staticmethod
     def _clean_tableau_query_parameters(query: str) -> str:
@@ -2130,7 +2166,7 @@ class TableauSiteSource:
             f"Overridden info upstream_db={upstream_db}, platform_instance={platform_instance}, platform={platform}"
         )
-        return create_lineage_sql_parsed_result(
+        parsed_result = create_lineage_sql_parsed_result(
             query=query,
             default_db=upstream_db,
             platform=platform,
@@ -2140,6 +2176,21 @@ class TableauSiteSource:
             schema_aware=not self.config.sql_parsing_disable_schema_awareness,
         )
+        assert parsed_result is not None
+        if parsed_result.debug_info.table_error:
+            logger.warning(
+                f"Failed to extract table lineage from datasource {datasource_urn}: {parsed_result.debug_info.table_error}"
+            )
+            self.report.num_upstream_table_lineage_failed_parse_sql += 1
+        elif parsed_result.debug_info.column_error:
+            logger.warning(
+                f"Failed to extract column level lineage from datasource {datasource_urn}: {parsed_result.debug_info.column_error}"
+            )
+            self.report.num_upstream_fine_grained_lineage_failed_parse_sql += 1
+        return parsed_result
     def _enrich_database_tables_with_parsed_schemas(
         self, parsing_result: SqlParsingResult
     ) -> None:
@@ -2174,9 +2225,6 @@ class TableauSiteSource:
         )
         if parsed_result is None:
-            logger.info(
-                f"Failed to extract table level lineage for datasource {csql_urn}"
-            )
             return
         self._enrich_database_tables_with_parsed_schemas(parsed_result)
@@ -2196,12 +2244,14 @@ class TableauSiteSource:
             upstreams=upstream_tables,
             fineGrainedLineages=fine_grained_lineages,
         )
         yield self.get_metadata_change_proposal(
             csql_urn,
             aspect_name=c.UPSTREAM_LINEAGE,
             aspect=upstream_lineage,
         )
+        self.report.num_tables_with_upstream_lineage += 1
+        self.report.num_upstream_table_lineage += len(upstream_tables)
+        self.report.num_upstream_fine_grained_lineage += len(fine_grained_lineages)
     def _get_schema_metadata_for_datasource(
         self, datasource_fields: List[dict]
@@ -2278,12 +2328,11 @@ class TableauSiteSource:
         )
         # Tags
-        if datasource_info:
+        if datasource_info and self.config.ingest_tags:
             tags = self.get_tags(datasource_info)
-            if tags:
-                dataset_snapshot.aspects.append(
-                    builder.make_global_tag_aspect_with_tag_list(tags)
-                )
+            dataset_snapshot.aspects.append(
+                builder.make_global_tag_aspect_with_tag_list(tags)
+            )
         # Browse path
         if browse_path and is_embedded_ds and workbook and workbook.get(c.NAME):
@@ -2352,6 +2401,11 @@ class TableauSiteSource:
                     aspect_name=c.UPSTREAM_LINEAGE,
                     aspect=upstream_lineage,
                 )
+                self.report.num_tables_with_upstream_lineage += 1
+                self.report.num_upstream_table_lineage += len(upstream_tables)
+                self.report.num_upstream_fine_grained_lineage += len(
+                    fine_grained_lineages
+                )
         # Datasource Fields
         schema_metadata = self._get_schema_metadata_for_datasource(
@@ -2669,7 +2723,13 @@ class TableauSiteSource:
             c.SHEETS_CONNECTION,
             sheets_filter,
         ):
-            yield from self.emit_sheets_as_charts(sheet, sheet.get(c.WORKBOOK))
+            if self.config.ingest_hidden_assets or not self._is_hidden_view(sheet):
+                yield from self.emit_sheets_as_charts(sheet, sheet.get(c.WORKBOOK))
+            else:
+                self.report.num_hidden_assets_skipped += 1
+                logger.debug(
+                    f"Skip view {sheet.get(c.ID)} because it's hidden (luid is blank)."
+                )
     def emit_sheets_as_charts(
         self, sheet: dict, workbook: Optional[Dict]
@@ -2760,11 +2820,17 @@ class TableauSiteSource:
             chart_snapshot.aspects.append(owner)
         #  Tags
-        tags = self.get_tags(sheet)
-        if tags:
+        if self.config.ingest_tags:
+            tags = self.get_tags(sheet)
+            if len(self.config.tags_for_hidden_assets) > 0 and self._is_hidden_view(
+                sheet
+            ):
+                tags.extend(self.config.tags_for_hidden_assets)
             chart_snapshot.aspects.append(
                 builder.make_global_tag_aspect_with_tag_list(tags)
             )
         yield self.get_metadata_change_event(chart_snapshot)
         if sheet_external_url is not None and self.config.ingest_embed_url is True:
             yield self.new_work_unit(
@@ -2846,7 +2912,7 @@ class TableauSiteSource:
             else None
         )
-        tags = self.get_tags(workbook)
+        tags = self.get_tags(workbook) if self.config.ingest_tags else None
         parent_key = None
         project_luid: Optional[str] = self._get_workbook_project_luid(workbook)
@@ -2977,17 +3043,23 @@ class TableauSiteSource:
             c.DASHBOARDS_CONNECTION,
             dashboards_filter,
         ):
-            yield from self.emit_dashboard(dashboard, dashboard.get(c.WORKBOOK))
+            if self.config.ingest_hidden_assets or not self._is_hidden_view(dashboard):
+                yield from self.emit_dashboard(dashboard, dashboard.get(c.WORKBOOK))
+            else:
+                self.report.num_hidden_assets_skipped += 1
+                logger.debug(
+                    f"Skip dashboard {dashboard.get(c.ID)} because it's hidden (luid is blank)."
+                )
-    def get_tags(self, obj: dict) -> Optional[List[str]]:
+    def get_tags(self, obj: dict) -> List[str]:
         tag_list = obj.get(c.TAGS, [])
-        if tag_list and self.config.ingest_tags:
+        if tag_list:
             tag_list_str = [
                 t[c.NAME] for t in tag_list if t is not None and t.get(c.NAME)
             ]
             return tag_list_str
-        return None
+        return []
     def emit_dashboard(
         self, dashboard: dict, workbook: Optional[Dict]
@@ -3038,8 +3110,13 @@ class TableauSiteSource:
         )
         dashboard_snapshot.aspects.append(dashboard_info_class)
-        tags = self.get_tags(dashboard)
-        if tags:
+        if self.config.ingest_tags:
+            tags = self.get_tags(dashboard)
+            if len(self.config.tags_for_hidden_assets) > 0 and self._is_hidden_view(
+                dashboard
+            ):
+                tags.extend(self.config.tags_for_hidden_assets)
             dashboard_snapshot.aspects.append(
                 builder.make_global_tag_aspect_with_tag_list(tags)
             )

datahub/ingestion/source/unity/source.py CHANGED Viewed

@@ -556,6 +556,8 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
                 )
         if table_props:
+            # TODO: use auto_incremental_properties workunit processor instead
+            # Consider enabling incremental_properties by default
             patch_builder = create_dataset_props_patch_builder(dataset_urn, table_props)
             for patch_mcp in patch_builder.build():
                 yield MetadataWorkUnit(

datahub/ingestion/source/unity/usage.py CHANGED Viewed

@@ -7,7 +7,6 @@ from typing import Any, Callable, Dict, Generic, Iterable, List, Optional, Set,
 import pyspark
 from databricks.sdk.service.sql import QueryStatementType
-from sqllineage.runner import LineageRunner
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
 from datahub.ingestion.api.source_helpers import auto_empty_dataset_usage_statistics
@@ -22,7 +21,9 @@ from datahub.ingestion.source.unity.proxy_types import (
 from datahub.ingestion.source.unity.report import UnityCatalogReport
 from datahub.ingestion.source.usage.usage_common import UsageAggregator
 from datahub.metadata.schema_classes import OperationClass
+from datahub.sql_parsing.sqlglot_lineage import create_lineage_sql_parsed_result
 from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint
+from datahub.utilities.urns.dataset_urn import DatasetUrn
 logger = logging.getLogger(__name__)
@@ -48,6 +49,7 @@ class UnityCatalogUsageExtractor:
     proxy: UnityCatalogApiProxy
     table_urn_builder: Callable[[TableReference], str]
     user_urn_builder: Callable[[str], str]
+    platform: str = "databricks"
     def __post_init__(self):
         self.usage_aggregator = UsageAggregator[TableReference](self.config)
@@ -173,7 +175,7 @@ class UnityCatalogUsageExtractor:
         self, query: Query, table_map: TableMap
     ) -> Optional[QueryTableInfo]:
         with self.report.usage_perf_report.sql_parsing_timer:
-            table_info = self._parse_query_via_lineage_runner(query.query_text)
+            table_info = self._parse_query_via_sqlglot(query.query_text)
             if table_info is None and query.statement_type == QueryStatementType.SELECT:
                 with self.report.usage_perf_report.spark_sql_parsing_timer:
                     table_info = self._parse_query_via_spark_sql_plan(query.query_text)
@@ -191,26 +193,33 @@ class UnityCatalogUsageExtractor:
                     ),
                 )
-    def _parse_query_via_lineage_runner(self, query: str) -> Optional[StringTableInfo]:
+    def _parse_query_via_sqlglot(self, query: str) -> Optional[StringTableInfo]:
         try:
-            runner = LineageRunner(query)
+            sql_parser_in_tables = create_lineage_sql_parsed_result(
+                query=query,
+                default_db=None,
+                platform=self.platform,
+                env=self.config.env,
+                platform_instance=None,
+            )
             return GenericTableInfo(
                 source_tables=[
-                    self._parse_sqllineage_table(table)
-                    for table in runner.source_tables
+                    self._parse_sqlglot_table(table)
+                    for table in sql_parser_in_tables.in_tables
                 ],
                 target_tables=[
-                    self._parse_sqllineage_table(table)
-                    for table in runner.target_tables
+                    self._parse_sqlglot_table(table)
+                    for table in sql_parser_in_tables.out_tables
                 ],
             )
         except Exception as e:
-            logger.info(f"Could not parse query via lineage runner, {query}: {e!r}")
+            logger.info(f"Could not parse query via sqlglot, {query}: {e!r}")
             return None
     @staticmethod
-    def _parse_sqllineage_table(sqllineage_table: object) -> str:
-        full_table_name = str(sqllineage_table)
+    def _parse_sqlglot_table(table_urn: str) -> str:
+        full_table_name = DatasetUrn.from_string(table_urn).name
         default_schema = "<default>."
         if full_table_name.startswith(default_schema):
             return full_table_name[len(default_schema) :]

datahub/specific/datajob.py CHANGED Viewed

@@ -102,7 +102,7 @@ class DataJobPatchBuilder(MetadataPatchProposal):
         Notes:
             If `input` is an Edge object, it is used directly. If `input` is a Urn object or string,
-            it is converted to an Edge object and added with default audit stamps.
+            it is converted to an Edge object and added without any audit stamps.
         """
         if isinstance(input, Edge):
             input_urn: str = input.destinationUrn
@@ -114,8 +114,6 @@ class DataJobPatchBuilder(MetadataPatchProposal):
             input_edge = Edge(
                 destinationUrn=input_urn,
-                created=self._mint_auditstamp(),
-                lastModified=self._mint_auditstamp(),
             )
         self._ensure_urn_type("dataJob", [input_edge], "add_input_datajob")
@@ -185,7 +183,7 @@ class DataJobPatchBuilder(MetadataPatchProposal):
         Notes:
             If `input` is an Edge object, it is used directly. If `input` is a Urn object or string,
-            it is converted to an Edge object and added with default audit stamps.
+            it is converted to an Edge object and added without any audit stamps.
         """
         if isinstance(input, Edge):
             input_urn: str = input.destinationUrn
@@ -197,8 +195,6 @@ class DataJobPatchBuilder(MetadataPatchProposal):
             input_edge = Edge(
                 destinationUrn=input_urn,
-                created=self._mint_auditstamp(),
-                lastModified=self._mint_auditstamp(),
             )
         self._ensure_urn_type("dataset", [input_edge], "add_input_dataset")
@@ -270,7 +266,7 @@ class DataJobPatchBuilder(MetadataPatchProposal):
         Notes:
             If `output` is an Edge object, it is used directly. If `output` is a Urn object or string,
-            it is converted to an Edge object and added with default audit stamps.
+            it is converted to an Edge object and added without any audit stamps.
         """
         if isinstance(output, Edge):
             output_urn: str = output.destinationUrn
@@ -282,15 +278,13 @@ class DataJobPatchBuilder(MetadataPatchProposal):
             output_edge = Edge(
                 destinationUrn=output_urn,
-                created=self._mint_auditstamp(),
-                lastModified=self._mint_auditstamp(),
             )
         self._ensure_urn_type("dataset", [output_edge], "add_output_dataset")
         self._add_patch(
             DataJobInputOutput.ASPECT_NAME,
             "add",
-            path=f"/outputDatasetEdges/{self.quote(str(output))}",
+            path=f"/outputDatasetEdges/{self.quote(output_urn)}",
             value=output_edge,
         )
         return self

datahub/utilities/partition_executor.py CHANGED Viewed

@@ -268,7 +268,7 @@ class BatchPartitionExecutor(Closeable):
         self.process_batch = process_batch
         self.min_process_interval = min_process_interval
         self.read_from_pending_interval = read_from_pending_interval
-        assert self.max_workers > 1
+        assert self.max_workers >= 1
         self._state_lock = threading.Lock()
         self._executor = ThreadPoolExecutor(

datahub/utilities/sql_lineage_parser_impl.py DELETED Viewed

@@ -1,160 +0,0 @@
-import contextlib
-import logging
-import re
-import unittest
-import unittest.mock
-from typing import Dict, List, Optional, Set
-from sqllineage.core.holders import Column, SQLLineageHolder
-from sqllineage.exceptions import SQLLineageException
-from datahub.utilities.sql_parser_base import SQLParser, SqlParserException
-with contextlib.suppress(ImportError):
-    import sqlparse
-    from networkx import DiGraph
-    from sqllineage.core import LineageAnalyzer
-    import datahub.utilities.sqllineage_patch
-logger = logging.getLogger(__name__)
-class SqlLineageSQLParserImpl(SQLParser):
-    _DATE_SWAP_TOKEN = "__d_a_t_e"
-    _HOUR_SWAP_TOKEN = "__h_o_u_r"
-    _TIMESTAMP_SWAP_TOKEN = "__t_i_m_e_s_t_a_m_p"
-    _DATA_SWAP_TOKEN = "__d_a_t_a"
-    _ADMIN_SWAP_TOKEN = "__a_d_m_i_n"
-    _MYVIEW_SQL_TABLE_NAME_TOKEN = "__my_view__.__sql_table_name__"
-    _MYVIEW_LOOKER_TOKEN = "my_view.SQL_TABLE_NAME"
-    def __init__(self, sql_query: str, use_raw_names: bool = False) -> None:
-        super().__init__(sql_query)
-        original_sql_query = sql_query
-        self._use_raw_names = use_raw_names
-        # SqlLineageParser makes mistakes on lateral flatten queries, use the prefix
-        if "lateral flatten" in sql_query:
-            sql_query = sql_query[: sql_query.find("lateral flatten")]
-        # Replace reserved words that break SqlLineageParser
-        self.token_to_original: Dict[str, str] = {
-            self._DATE_SWAP_TOKEN: "date",
-            self._HOUR_SWAP_TOKEN: "hour",
-            self._TIMESTAMP_SWAP_TOKEN: "timestamp",
-            self._DATA_SWAP_TOKEN: "data",
-            self._ADMIN_SWAP_TOKEN: "admin",
-        }
-        for replacement, original in self.token_to_original.items():
-            # Replace original tokens with replacement. Since table and column name can contain a hyphen('-'),
-            # also prevent original tokens appearing as part of these names with a hyphen from getting substituted.
-            sql_query = re.sub(
-                rf"((?<!-)\b{original}\b)(?!-)",
-                rf"{replacement}",
-                sql_query,
-                flags=re.IGNORECASE,
-            )
-        # SqlLineageParser lowercarese tablenames and we need to replace Looker specific token which should be uppercased
-        sql_query = re.sub(
-            rf"(\${{{self._MYVIEW_LOOKER_TOKEN}}})",
-            rf"{self._MYVIEW_SQL_TABLE_NAME_TOKEN}",
-            sql_query,
-        )
-        # SqlLineageParser does not handle "encode" directives well. Remove them
-        sql_query = re.sub(r"\sencode [a-zA-Z]*", "", sql_query, flags=re.IGNORECASE)
-        # Replace lookml templates with the variable otherwise sqlparse can't parse ${
-        sql_query = re.sub(r"(\${)(.+)(})", r"\2", sql_query)
-        if sql_query != original_sql_query:
-            logger.debug(f"Rewrote original query {original_sql_query} as {sql_query}")
-        self._sql = sql_query
-        self._stmt_holders: Optional[List[LineageAnalyzer]] = None
-        self._sql_holder: Optional[SQLLineageHolder] = None
-        try:
-            self._stmt = [
-                s
-                for s in sqlparse.parse(
-                    # first apply sqlparser formatting just to get rid of comments, which cause
-                    # inconsistencies in parsing output
-                    sqlparse.format(
-                        self._sql.strip(),
-                        strip_comments=True,
-                        use_space_around_operators=True,
-                    ),
-                )
-                if s.token_first(skip_cm=True)
-            ]
-            with unittest.mock.patch(
-                "sqllineage.core.handlers.source.SourceHandler.end_of_query_cleanup",
-                datahub.utilities.sqllineage_patch.end_of_query_cleanup_patch,
-            ):
-                with unittest.mock.patch(
-                    "sqllineage.core.holders.SubQueryLineageHolder.add_column_lineage",
-                    datahub.utilities.sqllineage_patch.add_column_lineage_patch,
-                ):
-                    self._stmt_holders = [
-                        LineageAnalyzer().analyze(stmt) for stmt in self._stmt
-                    ]
-                    self._sql_holder = SQLLineageHolder.of(*self._stmt_holders)
-        except SQLLineageException as e:
-            raise SqlParserException(
-                f"SQL lineage analyzer error '{e}' for query: '{self._sql}"
-            ) from e
-    def get_tables(self) -> List[str]:
-        result: List[str] = []
-        if self._sql_holder is None:
-            logger.error("sql holder not present so cannot get tables")
-            return result
-        for table in self._sql_holder.source_tables:
-            table_normalized = re.sub(
-                r"^<default>.",
-                "",
-                (
-                    str(table)
-                    if not self._use_raw_names
-                    else f"{table.schema.raw_name}.{table.raw_name}"
-                ),
-            )
-            result.append(str(table_normalized))
-        # We need to revert TOKEN replacements
-        for token, replacement in self.token_to_original.items():
-            result = [replacement if c == token else c for c in result]
-        result = [
-            self._MYVIEW_LOOKER_TOKEN if c == self._MYVIEW_SQL_TABLE_NAME_TOKEN else c
-            for c in result
-        ]
-        # Sort tables to make the list deterministic
-        result.sort()
-        return result
-    def get_columns(self) -> List[str]:
-        if self._sql_holder is None:
-            raise SqlParserException("sql holder not present so cannot get columns")
-        graph: DiGraph = self._sql_holder.graph  # For mypy attribute checking
-        column_nodes = [n for n in graph.nodes if isinstance(n, Column)]
-        column_graph = graph.subgraph(column_nodes)
-        target_columns = {column for column, deg in column_graph.out_degree if deg == 0}
-        result: Set[str] = set()
-        for column in target_columns:
-            # Let's drop all the count(*) and similard columns which are expression actually if it does not have an alias
-            if not any(ele in column.raw_name for ele in ["*", "(", ")"]):
-                result.add(str(column.raw_name))
-        # Reverting back all the previously renamed words which confuses the parser
-        result = {"date" if c == self._DATE_SWAP_TOKEN else c for c in result}
-        result = {
-            "timestamp" if c == self._TIMESTAMP_SWAP_TOKEN else c for c in list(result)
-        }
-        # swap back renamed date column
-        return list(result)

acryl-datahub 0.15.0rc14__py3-none-any.whl → 0.15.0rc16__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.15.0rc14py3-none-any.whl → 0.15.0rc16py3-none-any.whl