PyPI - acryl-datahub - Versions diffs - 1.0.0.2rc5__py3-none-any.whl → 1.0.0.3rc2__py3-none-any.whl - Mend

acryl-datahub 1.0.0.2rc5py3-none-any.whl → 1.0.0.3rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (24) hide show

datahub/ingestion/source/powerbi/m_query/pattern_handler.py CHANGED Viewed

@@ -29,6 +29,12 @@ from datahub.ingestion.source.powerbi.m_query.data_classes import (
     Lineage,
     ReferencedTable,
 )
+from datahub.ingestion.source.powerbi.m_query.odbc import (
+    extract_dsn,
+    extract_platform,
+    extract_server,
+    normalize_platform_name,
+)
 from datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes import Table
 from datahub.metadata.schema_classes import SchemaFieldDataTypeClass
 from datahub.sql_parsing.sqlglot_lineage import (
@@ -155,6 +161,7 @@ class AbstractLineage(ABC):
                 tree_function.token_values(arg_list)
             ),
         )
+        logger.debug(f"DB Details: {arguments}")
         if len(arguments) < 2:
             logger.debug(f"Expected minimum 2 arguments, but got {len(arguments)}")
@@ -940,6 +947,147 @@ class NativeQueryLineage(AbstractLineage):
         )
+class OdbcLineage(AbstractLineage):
+    def create_lineage(
+        self, data_access_func_detail: DataAccessFunctionDetail
+    ) -> Lineage:
+        logger.debug(
+            f"Processing {self.get_platform_pair().powerbi_data_platform_name} "
+            f"data-access function detail {data_access_func_detail}"
+        )
+        connect_string, _ = self.get_db_detail_from_argument(
+            data_access_func_detail.arg_list
+        )
+        if not connect_string:
+            self.reporter.warning(
+                title="Can not extract ODBC connect string",
+                message="Can not extract ODBC connect string from data access function. Skipping Lineage creation.",
+                context=f"table-name={self.table.full_name}, data-access-func-detail={data_access_func_detail}",
+            )
+            return Lineage.empty()
+        logger.debug(f"ODBC connect string: {connect_string}")
+        data_platform, powerbi_platform = extract_platform(connect_string)
+        server_name = extract_server(connect_string)
+        if not data_platform:
+            dsn = extract_dsn(connect_string)
+            if dsn:
+                logger.debug(f"Extracted DSN: {dsn}")
+                server_name = dsn
+            if dsn and self.config.dsn_to_platform_name:
+                logger.debug(f"Attempting to map DSN {dsn} to platform")
+                name = self.config.dsn_to_platform_name.get(dsn)
+                if name:
+                    logger.debug(f"Found DSN {dsn} mapped to platform {name}")
+                    data_platform, powerbi_platform = normalize_platform_name(name)
+        if not data_platform or not powerbi_platform:
+            self.reporter.warning(
+                title="Can not determine ODBC platform",
+                message="Can not determine platform from ODBC connect string. Skipping Lineage creation.",
+                context=f"table-name={self.table.full_name}, connect-string={connect_string}",
+            )
+            return Lineage.empty()
+        platform_pair: DataPlatformPair = self.create_platform_pair(
+            data_platform, powerbi_platform
+        )
+        if not server_name and self.config.server_to_platform_instance:
+            self.reporter.warning(
+                title="Can not determine ODBC server name",
+                message="Can not determine server name with server_to_platform_instance mapping. Skipping Lineage creation.",
+                context=f"table-name={self.table.full_name}",
+            )
+            return Lineage.empty()
+        elif not server_name:
+            server_name = "unknown"
+        database_name = None
+        schema_name = None
+        table_name = None
+        qualified_table_name = None
+        temp_accessor: Optional[IdentifierAccessor] = (
+            data_access_func_detail.identifier_accessor
+        )
+        while temp_accessor:
+            logger.debug(
+                f"identifier = {temp_accessor.identifier} items = {temp_accessor.items}"
+            )
+            if temp_accessor.items.get("Kind") == "Database":
+                database_name = temp_accessor.items["Name"]
+            if temp_accessor.items.get("Kind") == "Schema":
+                schema_name = temp_accessor.items["Name"]
+            if temp_accessor.items.get("Kind") == "Table":
+                table_name = temp_accessor.items["Name"]
+            if temp_accessor.next is not None:
+                temp_accessor = temp_accessor.next
+            else:
+                break
+        if (
+            database_name is not None
+            and schema_name is not None
+            and table_name is not None
+        ):
+            qualified_table_name = f"{database_name}.{schema_name}.{table_name}"
+        elif database_name is not None and table_name is not None:
+            qualified_table_name = f"{database_name}.{table_name}"
+        if not qualified_table_name:
+            self.reporter.warning(
+                title="Can not determine qualified table name",
+                message="Can not determine qualified table name for ODBC data source. Skipping Lineage creation.",
+                context=f"table-name={self.table.full_name}, data-platform={data_platform}",
+            )
+            logger.warning(
+                f"Can not determine qualified table name for ODBC data source {data_platform} "
+                f"table {self.table.full_name}."
+            )
+            return Lineage.empty()
+        logger.debug(
+            f"ODBC Platform {data_platform} found qualified table name {qualified_table_name}"
+        )
+        urn = make_urn(
+            config=self.config,
+            platform_instance_resolver=self.platform_instance_resolver,
+            data_platform_pair=platform_pair,
+            server=server_name,
+            qualified_table_name=qualified_table_name,
+        )
+        column_lineage = self.create_table_column_lineage(urn)
+        return Lineage(
+            upstreams=[
+                DataPlatformTable(
+                    data_platform_pair=platform_pair,
+                    urn=urn,
+                )
+            ],
+            column_lineage=column_lineage,
+        )
+    @staticmethod
+    def create_platform_pair(
+        data_platform: str, powerbi_platform: str
+    ) -> DataPlatformPair:
+        return DataPlatformPair(data_platform, powerbi_platform)
+    def get_platform_pair(self) -> DataPlatformPair:
+        return SupportedDataPlatform.ODBC.value
 class SupportedPattern(Enum):
     DATABRICKS_QUERY = (
         DatabricksLineage,
@@ -991,6 +1139,11 @@ class SupportedPattern(Enum):
         FunctionName.NATIVE_QUERY,
     )
+    ODBC = (
+        OdbcLineage,
+        FunctionName.ODBC_DATA_ACCESS,
+    )
     def handler(self) -> Type[AbstractLineage]:
         return self.value[0]

datahub/ingestion/source/sigma/config.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import logging
 from dataclasses import dataclass, field
-from typing import Dict, Optional
+from typing import Dict, List, Optional
 import pydantic
+from pydantic import BaseModel, Field
 from datahub.configuration.common import AllowDenyPattern
 from datahub.configuration.source_common import (
@@ -17,6 +18,7 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import (
 from datahub.ingestion.source.state.stateful_ingestion_base import (
     StatefulIngestionConfigBase,
 )
+from datahub.utilities.lossy_collections import LossyDict
 logger = logging.getLogger(__name__)
@@ -53,15 +55,82 @@ class Constant:
     DEFAULT_API_URL = "https://aws-api.sigmacomputing.com/v2"
+class WorkspaceCounts(BaseModel):
+    workbooks_count: int = 0
+    datasets_count: int = 0
+    elements_count: int = 0
+    pages_count: int = 0
+    def is_empty(self) -> bool:
+        return (
+            self.workbooks_count == 0
+            and self.datasets_count == 0
+            and self.elements_count == 0
+            and self.pages_count == 0
+        )
+    def as_obj(self) -> dict:
+        return {
+            "workbooks_count": self.workbooks_count,
+            "datasets_count": self.datasets_count,
+            "elements_count": self.elements_count,
+            "pages_count": self.pages_count,
+        }
+class SigmaWorkspaceEntityFilterReport(EntityFilterReport):
+    type: str = "workspace"
+    workspace_counts: LossyDict[str, WorkspaceCounts] = Field(
+        default_factory=LossyDict,
+        description="Counts of workbooks, datasets, elements and pages in each workspace.",
+    )
+    def increment_workbooks_count(self, workspace_id: str) -> None:
+        if workspace_id not in self.workspace_counts:
+            self.workspace_counts[workspace_id] = WorkspaceCounts()
+        self.workspace_counts[workspace_id].workbooks_count += 1
+    def increment_datasets_count(self, workspace_id: str) -> None:
+        if workspace_id not in self.workspace_counts:
+            self.workspace_counts[workspace_id] = WorkspaceCounts()
+        self.workspace_counts[workspace_id].datasets_count += 1
+    def increment_elements_count(self, workspace_id: str) -> None:
+        if workspace_id not in self.workspace_counts:
+            self.workspace_counts[workspace_id] = WorkspaceCounts()
+        self.workspace_counts[workspace_id].elements_count += 1
+    def increment_pages_count(self, workspace_id: str) -> None:
+        if workspace_id not in self.workspace_counts:
+            self.workspace_counts[workspace_id] = WorkspaceCounts()
+        self.workspace_counts[workspace_id].pages_count += 1
+    def as_obj(self) -> dict:
+        return {
+            "filtered": self.dropped_entities.as_obj(),
+            "processed": self.processed_entities.as_obj(),
+            "workspace_counts": {
+                key: item.as_obj() for key, item in self.workspace_counts.items()
+            },
+        }
 @dataclass
 class SigmaSourceReport(StaleEntityRemovalSourceReport):
-    workspaces: EntityFilterReport = EntityFilterReport.field(type="workspace")
-    number_of_workspaces: Optional[int] = None
+    workspaces: SigmaWorkspaceEntityFilterReport = field(
+        default_factory=SigmaWorkspaceEntityFilterReport
+    )
     non_accessible_workspaces_count: int = 0
-    shared_entities_count: int = 0
-    number_of_datasets: int = 0
-    number_of_workbooks: int = 0
+    datasets: EntityFilterReport = EntityFilterReport.field(type="dataset")
+    datasets_without_workspace: int = 0
+    workbooks: EntityFilterReport = EntityFilterReport.field(type="workbook")
+    workbooks_without_workspace: int = 0
     number_of_files_metadata: Dict[str, int] = field(default_factory=dict)
+    empty_workspaces: List[str] = field(default_factory=list)
 class PlatformDetail(PlatformInstanceConfigMixin, EnvConfigMixin):

datahub/ingestion/source/sigma/sigma.py CHANGED Viewed

@@ -35,6 +35,7 @@ from datahub.ingestion.source.sigma.config import (
     PlatformDetail,
     SigmaSourceConfig,
     SigmaSourceReport,
+    WorkspaceCounts,
 )
 from datahub.ingestion.source.sigma.data_classes import (
     Element,
@@ -163,7 +164,6 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
     def _get_allowed_workspaces(self) -> List[Workspace]:
         all_workspaces = self.sigma_api.workspaces.values()
         logger.info(f"Number of workspaces = {len(all_workspaces)}")
-        self.reporter.number_of_workspaces = len(all_workspaces)
         allowed_workspaces = []
         for workspace in all_workspaces:
@@ -285,6 +285,7 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
         yield self._gen_dataset_properties(dataset_urn, dataset)
         if dataset.workspaceId:
+            self.reporter.workspaces.increment_datasets_count(dataset.workspaceId)
             yield from add_entity_to_container(
                 container_key=self._gen_workspace_key(dataset.workspaceId),
                 entity_type="dataset",
@@ -468,6 +469,8 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
             ).as_workunit()
             if workbook.workspaceId:
+                self.reporter.workspaces.increment_elements_count(workbook.workspaceId)
                 yield self._gen_entity_browsepath_aspect(
                     entity_urn=chart_urn,
                     parent_entity_urn=builder.make_container_urn(
@@ -525,6 +528,7 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
             all_input_fields: List[InputFieldClass] = []
             if workbook.workspaceId:
+                self.reporter.workspaces.increment_pages_count(workbook.workspaceId)
                 yield self._gen_entity_browsepath_aspect(
                     entity_urn=dashboard_urn,
                     parent_entity_urn=builder.make_container_urn(
@@ -614,6 +618,8 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
         paths = workbook.path.split("/")[1:]
         if workbook.workspaceId:
+            self.reporter.workspaces.increment_workbooks_count(workbook.workspaceId)
             yield self._gen_entity_browsepath_aspect(
                 entity_urn=dashboard_urn,
                 parent_entity_urn=builder.make_container_urn(
@@ -667,6 +673,15 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
                 f"{workspace.name} ({workspace.workspaceId})"
             )
             yield from self._gen_workspace_workunit(workspace)
+            if self.reporter.workspaces.workspace_counts.get(
+                workspace.workspaceId, WorkspaceCounts()
+            ).is_empty():
+                logger.warning(
+                    f"Workspace {workspace.name} ({workspace.workspaceId}) is empty. If this is not expected, add the user associated with the Client ID/Secret to each workspace with missing metadata"
+                )
+                self.reporter.empty_workspaces.append(
+                    f"{workspace.name} ({workspace.workspaceId})"
+                )
         yield from self._gen_sigma_dataset_upstream_lineage_workunit()
     def get_report(self) -> SourceReport:

datahub/ingestion/source/sigma/sigma_api.py CHANGED Viewed

@@ -95,22 +95,22 @@ class SigmaAPI:
         return get_response
     def get_workspace(self, workspace_id: str) -> Optional[Workspace]:
+        if workspace_id in self.workspaces:
+            return self.workspaces[workspace_id]
         logger.debug(f"Fetching workspace metadata with id '{workspace_id}'")
         try:
-            if workspace_id in self.workspaces:
-                return self.workspaces[workspace_id]
-            else:
-                response = self._get_api_call(
-                    f"{self.config.api_url}/workspaces/{workspace_id}"
-                )
-                if response.status_code == 403:
-                    logger.debug(f"Workspace {workspace_id} not accessible.")
-                    self.report.non_accessible_workspaces_count += 1
-                    return None
-                response.raise_for_status()
-                workspace = Workspace.parse_obj(response.json())
-                self.workspaces[workspace.workspaceId] = workspace
-                return workspace
+            response = self._get_api_call(
+                f"{self.config.api_url}/workspaces/{workspace_id}"
+            )
+            if response.status_code == 403:
+                logger.debug(f"Workspace {workspace_id} not accessible.")
+                self.report.non_accessible_workspaces_count += 1
+                return None
+            response.raise_for_status()
+            workspace = Workspace.parse_obj(response.json())
+            self.workspaces[workspace.workspaceId] = workspace
+            return workspace
         except Exception as e:
             self._log_http_error(
                 message=f"Unable to fetch workspace '{workspace_id}'. Exception: {e}"
@@ -187,7 +187,9 @@ class SigmaAPI:
     @functools.lru_cache
     def _get_files_metadata(self, file_type: str) -> Dict[str, File]:
         logger.debug(f"Fetching file metadata with type {file_type}.")
-        file_url = url = f"{self.config.api_url}/files?typeFilters={file_type}"
+        file_url = url = (
+            f"{self.config.api_url}/files?permissionFilter=view&typeFilters={file_type}"
+        )
         try:
             files_metadata: Dict[str, File] = {}
             while True:
@@ -225,31 +227,50 @@ class SigmaAPI:
                 for dataset_dict in response_dict[Constant.ENTRIES]:
                     dataset = SigmaDataset.parse_obj(dataset_dict)
-                    if dataset.datasetId in dataset_files_metadata:
-                        dataset.path = dataset_files_metadata[dataset.datasetId].path
-                        dataset.badge = dataset_files_metadata[dataset.datasetId].badge
-                        workspace_id = dataset_files_metadata[
-                            dataset.datasetId
-                        ].workspaceId
-                        if workspace_id:
-                            dataset.workspaceId = workspace_id
-                            workspace = self.get_workspace(dataset.workspaceId)
-                            if workspace:
-                                if self.config.workspace_pattern.allowed(
-                                    workspace.name
-                                ):
-                                    datasets.append(dataset)
-                            elif self.config.ingest_shared_entities:
-                                # If no workspace for dataset we can consider it as shared entity
-                                self.report.shared_entities_count += 1
-                                datasets.append(dataset)
+                    if dataset.datasetId not in dataset_files_metadata:
+                        self.report.datasets.dropped(
+                            f"{dataset.name} ({dataset.datasetId}) (missing file metadata)"
+                        )
+                        continue
+                    dataset.workspaceId = dataset_files_metadata[
+                        dataset.datasetId
+                    ].workspaceId
+                    dataset.path = dataset_files_metadata[dataset.datasetId].path
+                    dataset.badge = dataset_files_metadata[dataset.datasetId].badge
+                    workspace = None
+                    if dataset.workspaceId:
+                        workspace = self.get_workspace(dataset.workspaceId)
+                    if workspace:
+                        if self.config.workspace_pattern.allowed(workspace.name):
+                            self.report.datasets.processed(
+                                f"{dataset.name} ({dataset.datasetId}) in {workspace.name}"
+                            )
+                            datasets.append(dataset)
+                        else:
+                            self.report.datasets.dropped(
+                                f"{dataset.name} ({dataset.datasetId}) in {workspace.name}"
+                            )
+                    elif self.config.ingest_shared_entities:
+                        # If no workspace for dataset we can consider it as shared entity
+                        self.report.datasets_without_workspace += 1
+                        self.report.datasets.processed(
+                            f"{dataset.name} ({dataset.datasetId}) in workspace id {dataset.workspaceId or 'unknown'}"
+                        )
+                        datasets.append(dataset)
+                    else:
+                        self.report.datasets.dropped(
+                            f"{dataset.name} ({dataset.datasetId}) in workspace id {dataset.workspaceId or 'unknown'}"
+                        )
                 if response_dict[Constant.NEXTPAGE]:
                     url = f"{dataset_url}?page={response_dict[Constant.NEXTPAGE]}"
                 else:
                     break
-            self.report.number_of_datasets = len(datasets)
             return datasets
         except Exception as e:
             self._log_http_error(
@@ -381,34 +402,54 @@ class SigmaAPI:
                 for workbook_dict in response_dict[Constant.ENTRIES]:
                     workbook = Workbook.parse_obj(workbook_dict)
-                    if workbook.workbookId in workbook_files_metadata:
-                        workbook.badge = workbook_files_metadata[
-                            workbook.workbookId
-                        ].badge
-                        workspace_id = workbook_files_metadata[
-                            workbook.workbookId
-                        ].workspaceId
-                        if workspace_id:
-                            workbook.workspaceId = workspace_id
-                            workspace = self.get_workspace(workbook.workspaceId)
-                            if workspace:
-                                if self.config.workspace_pattern.allowed(
-                                    workspace.name
-                                ):
-                                    workbook.pages = self.get_workbook_pages(workbook)
-                                    workbooks.append(workbook)
-                            elif self.config.ingest_shared_entities:
-                                # If no workspace for workbook we can consider it as shared entity
-                                self.report.shared_entities_count += 1
-                                workbook.pages = self.get_workbook_pages(workbook)
-                                workbooks.append(workbook)
+                    if workbook.workbookId not in workbook_files_metadata:
+                        # Due to a bug in the Sigma API, it seems like the /files endpoint does not
+                        # return file metadata when the user has access via admin permissions. In
+                        # those cases, the user associated with the token needs to be manually added
+                        # to the workspace.
+                        self.report.workbooks.dropped(
+                            f"{workbook.name} ({workbook.workbookId}) (missing file metadata; path: {workbook.path}; likely need to manually add user to workspace)"
+                        )
+                        continue
+                    workbook.workspaceId = workbook_files_metadata[
+                        workbook.workbookId
+                    ].workspaceId
+                    workbook.badge = workbook_files_metadata[workbook.workbookId].badge
+                    workspace = None
+                    if workbook.workspaceId:
+                        workspace = self.get_workspace(workbook.workspaceId)
+                    if workspace:
+                        if self.config.workspace_pattern.allowed(workspace.name):
+                            self.report.workbooks.processed(
+                                f"{workbook.name} ({workbook.workbookId}) in {workspace.name}"
+                            )
+                            workbook.pages = self.get_workbook_pages(workbook)
+                            workbooks.append(workbook)
+                        else:
+                            self.report.workbooks.dropped(
+                                f"{workbook.name} ({workbook.workbookId}) in {workspace.name}"
+                            )
+                    elif self.config.ingest_shared_entities:
+                        # If no workspace for workbook we can consider it as shared entity
+                        self.report.workbooks_without_workspace += 1
+                        self.report.workbooks.processed(
+                            f"{workbook.name} ({workbook.workbookId}) in workspace id {workbook.workspaceId or 'unknown'}"
+                        )
+                        workbook.pages = self.get_workbook_pages(workbook)
+                        workbooks.append(workbook)
+                    else:
+                        self.report.workbooks.dropped(
+                            f"{workbook.name} ({workbook.workbookId}) in workspace id {workbook.workspaceId or 'unknown'}"
+                        )
                 if response_dict[Constant.NEXTPAGE]:
                     url = f"{workbook_url}?page={response_dict[Constant.NEXTPAGE]}"
                 else:
                     break
-            self.report.number_of_workbooks = len(workbooks)
             return workbooks
         except Exception as e:
             self._log_http_error(

datahub/ingestion/source/snowflake/snowflake_config.py CHANGED Viewed

@@ -301,6 +301,7 @@ class SnowflakeV2Config(
         default=AllowDenyPattern.allow_all(),
         description=(
             "List of regex patterns for structured properties to include in ingestion."
+            " Applied to tags with form `<database>.<schema>.<tag_name>`."
             " Only used if `extract_tags` and `extract_tags_as_structured_properties` are enabled."
         ),
     )

datahub/ingestion/source/snowflake/snowflake_queries.py CHANGED Viewed

@@ -515,7 +515,10 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
             # job at eliminating redundant / repetitive queries. As such, we include the fast fingerprint
             # here
             query_id=get_query_fingerprint(
-                res["query_text"], self.identifiers.platform, fast=True
+                res["query_text"],
+                self.identifiers.platform,
+                fast=True,
+                secondary_id=res["query_secondary_fingerprint"],
             ),
             query_text=res["query_text"],
             upstreams=upstreams,
@@ -654,7 +657,17 @@ WITH
 fingerprinted_queries as (
     SELECT *,
         -- TODO: Generate better fingerprints for each query by pushing down regex logic.
-        query_history.query_parameterized_hash as query_fingerprint
+        query_history.query_parameterized_hash as query_fingerprint,
+        -- Optional and additional hash to be used for query deduplication and final query identity
+        CASE
+            WHEN CONTAINS(query_history.query_text, '-- Hex query metadata:')
+            -- Extract project id and hash it
+            THEN CAST(HASH(
+                REGEXP_SUBSTR(query_history.query_text, '"project_id"\\\\s*:\\\\s*"([^"]+)"', 1, 1, 'e', 1),
+                REGEXP_SUBSTR(query_history.query_text, '"context"\\\\s*:\\\\s*"([^"]+)"', 1, 1, 'e', 1)
+            ) AS VARCHAR)
+            ELSE NULL
+        END as query_secondary_fingerprint
     FROM
         snowflake.account_usage.query_history
     WHERE
@@ -670,11 +683,11 @@ fingerprinted_queries as (
             {time_bucket_size},
             CONVERT_TIMEZONE('UTC', start_time)
         ) AS bucket_start_time,
-        COUNT(*) OVER (PARTITION BY bucket_start_time, query_fingerprint) AS query_count,
+        COUNT(*) OVER (PARTITION BY bucket_start_time, query_fingerprint, query_secondary_fingerprint) AS query_count,
     FROM
         fingerprinted_queries
     QUALIFY
-        ROW_NUMBER() OVER (PARTITION BY bucket_start_time, query_fingerprint ORDER BY start_time DESC) = 1
+        ROW_NUMBER() OVER (PARTITION BY bucket_start_time, query_fingerprint, query_secondary_fingerprint ORDER BY start_time DESC) = 1
 )
 , raw_access_history AS (
     SELECT
@@ -714,6 +727,7 @@ fingerprinted_queries as (
         q.bucket_start_time,
         q.query_id,
         q.query_fingerprint,
+        q.query_secondary_fingerprint,
         q.query_count,
         q.session_id AS "SESSION_ID",
         q.start_time AS "QUERY_START_TIME",

datahub/ingestion/source/snowflake/snowflake_query.py CHANGED Viewed

@@ -1000,4 +1000,4 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
         from_clause = (
             f"""FROM '{stream_pagination_marker}'""" if stream_pagination_marker else ""
         )
-        return f"""SHOW STREAMS IN DATABASE {db_name} LIMIT {limit} {from_clause};"""
+        return f"""SHOW STREAMS IN DATABASE "{db_name}" LIMIT {limit} {from_clause};"""

datahub/ingestion/source/snowflake/snowflake_tag.py CHANGED Viewed

@@ -23,6 +23,7 @@ from datahub.metadata.com.linkedin.pegasus2avro.common import AuditStamp
 from datahub.metadata.com.linkedin.pegasus2avro.structured import (
     StructuredPropertyDefinition,
 )
+from datahub.metadata.schema_classes import ChangeTypeClass
 from datahub.metadata.urns import (
     ContainerUrn,
     DatasetUrn,
@@ -81,7 +82,7 @@ class SnowflakeTagExtractor(SnowflakeCommonMixin):
     def create_structured_property_templates(self) -> Iterable[MetadataWorkUnit]:
         for tag in self.data_dictionary.get_all_tags():
             if not self.config.structured_property_pattern.allowed(
-                tag.tag_identifier()
+                tag._id_prefix_as_str()
             ):
                 continue
             if self.config.extract_tags_as_structured_properties:
@@ -111,6 +112,8 @@ class SnowflakeTagExtractor(SnowflakeCommonMixin):
         yield MetadataChangeProposalWrapper(
             entityUrn=urn,
             aspect=aspect,
+            changeType=ChangeTypeClass.CREATE,
+            headers={"If-None-Match": "*"},
         ).as_workunit()
     def _get_tags_on_object_with_propagation(

acryl-datahub 1.0.0.2rc5__py3-none-any.whl → 1.0.0.3rc2__py3-none-any.whl

Potentially problematic release.

acryl-datahub 1.0.0.2rc5py3-none-any.whl → 1.0.0.3rc2py3-none-any.whl