PyPI - acryl-datahub - Versions diffs - 0.14.1.13rc7__py3-none-any.whl → 0.14.1.13rc9__py3-none-any.whl - Mend

acryl-datahub 0.14.1.13rc7py3-none-any.whl → 0.14.1.13rc9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (15) hide show

{acryl_datahub-0.14.1.13rc7.dist-info → acryl_datahub-0.14.1.13rc9.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-datahub/__init__.py,sha256=iJWtbGzvLO2Ab_1AQTeXndrbJHFOcpOmE_RHQzAvOEo,577
+datahub/__init__.py,sha256=Qii_Ygk-5SdmcNm5lZWzqq41uil0D4isccAfgzqcyu8,577
 datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
 datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
 datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -189,8 +189,8 @@ datahub/ingestion/source/demo_data.py,sha256=yzA_R-wfSX2WPz0i5ukYlscpmpb0Pt8D7Ek
 datahub/ingestion/source/elastic_search.py,sha256=qFUVNzynTVJTabASTjGMu8Qhf9UpNbEtSBFjaPQjBJE,22641
 datahub/ingestion/source/feast.py,sha256=NYaAjzLVRhmMKDawBwN0OL8AMyKDLsxOwEj3YFX0wIA,14244
 datahub/ingestion/source/file.py,sha256=pH-Qkjh5FQ2XvyYPE7Z8XEY4vUk_SUHxm8p8IxG12tU,15879
-datahub/ingestion/source/ge_data_profiler.py,sha256=jORUlsmN2XtHm3QyltENvhEyt-CwbF2O548mFxtisxY,63587
-datahub/ingestion/source/ge_profiling_config.py,sha256=E65adlsUrs17mQB7WQnoe3QCjvbGaGoNNPMf8szNK6s,10648
+datahub/ingestion/source/ge_data_profiler.py,sha256=JqTonv8y7Re4Rfn2YKOEaLufiiAOWKfK1XQvJfV5dvs,64126
+datahub/ingestion/source/ge_profiling_config.py,sha256=P-9pd20koFvpxeEL_pqFvKWWz-qnpZ6XkELUyBKr7is,10807
 datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
 datahub/ingestion/source/ldap.py,sha256=Vnzg8tpwBYeyM-KBVVsUJvGZGBMJiCJ_i_FhxaFRQ9A,18627
 datahub/ingestion/source/metabase.py,sha256=oemiMdzjfr82Hx6rdwTNBzFM8962LDkosYh7SD_I5cY,31717
@@ -240,12 +240,12 @@ datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7
 datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=8nuQ8hMuJEswWDZtV2RjbK8RvDJUzT_S74dnyPpGFdQ,4857
 datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=EoHo9twb0_QdX7Nvd1HJC1Yn0rqtrfR52EVk7Hu3XOQ,3296
 datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=WxiLPFc7LwZXNDYfV9oySUD43kc2GcOf_pUokp3vFNM,8098
-datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=QQk2xnyLCpywwRA3a2Pm95zJd0LgJUGbe5ht-5yadmQ,32352
-datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=JkQqG8GrMnsp6efUCSv1Efc0ZUmdC6q5_M6wWIyg_dQ,51774
+datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=tyKPfxg88ERX6z7Q5lCmJS-H7cgsWRUwM2epqe62bI0,32277
+datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=Sv6BrK62nu3xpgjYGE-x1xdSTouvvnKDJtazPobhiKQ,50813
 datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
 datahub/ingestion/source/bigquery_v2/common.py,sha256=Cxjf1a8ibkL_YRQeS0BqsjlyMgFJpaZ3iq_d7e8T8MQ,4030
 datahub/ingestion/source/bigquery_v2/lineage.py,sha256=Jg_pwnaj7l_KEcgq0enJXwrKh5jyUfBl4YB05YpkIVg,45415
-datahub/ingestion/source/bigquery_v2/profiler.py,sha256=ddNnPsc9NaraWYy4HctL0y83QtsyiiHx7zE9DgY8hTQ,11140
+datahub/ingestion/source/bigquery_v2/profiler.py,sha256=8-yAoq8sX0E6VIwr75YbM8wITRNhGfxgte9BCeGNkMM,10681
 datahub/ingestion/source/bigquery_v2/queries.py,sha256=B2vJLZYfwM1J5JAckijKJTxLhDYA0yw3kfzj5oRQB5c,20151
 datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=xLf-vCUAnNuDdTHghxJvPOyGeA_XLCW3r-xj-8cfn3Q,19528
 datahub/ingestion/source/bigquery_v2/usage.py,sha256=xHb-gsugshoyzwScOQ5DHxZhoA-K0e4EbfSeVTLs428,40543
@@ -319,7 +319,7 @@ datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
 datahub/ingestion/source/identity/azure_ad.py,sha256=GdmJFD4UMsb5353Z7phXRf-YsXR2woGLRJwBXUkgXq0,28809
 datahub/ingestion/source/identity/okta.py,sha256=PnRokWLG8wSoNZlXJiRZiW6APTEHO09q4n2j_l6m3V0,30756
 datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datahub/ingestion/source/kafka/kafka.py,sha256=wC5tZ31fvXWLjse9qH3YONPhMxXIZ9QV9vbBcWDAP_w,25352
+datahub/ingestion/source/kafka/kafka.py,sha256=D54QXYoEQJYVb7oWyvLVCGbJunS8ZTGYlMMdaBqpmMs,25475
 datahub/ingestion/source/kafka/kafka_connect.py,sha256=5KUlhn3876c41Z3kx5l4oJhbu0ekXZQRdxmu52vb_v8,55167
 datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
 datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -410,8 +410,8 @@ datahub/ingestion/source/schema_inference/object.py,sha256=Aibf4dY4dXb4P9zfcNGnI
 datahub/ingestion/source/schema_inference/parquet.py,sha256=CdqsNuiabLLCulWbuPMssijeFmKLv3M5MKFIhlatpWA,3456
 datahub/ingestion/source/sigma/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datahub/ingestion/source/sigma/config.py,sha256=zGh0ZU2Ty5NHfNXAVwFxVkK4NlsNSxtAyfCgMJJvzdc,3795
-datahub/ingestion/source/sigma/data_classes.py,sha256=kaSuWs0TaDCCsyRbHaKq8LknE5I42VOJ4hQXt5SKFg0,1974
-datahub/ingestion/source/sigma/sigma.py,sha256=PgFoSj0O309iL8kMT4jGnN9sTTtpM6kJoCLa84vckPg,22412
+datahub/ingestion/source/sigma/data_classes.py,sha256=YZkkzwftV34mq5c_4jlC2PCSiRKt4hvHjmqikLQhl1I,2012
+datahub/ingestion/source/sigma/sigma.py,sha256=DZCxMgIx-uJp6poFG85KFygRCyFDkkM4KXtBbUsgkHk,24094
 datahub/ingestion/source/sigma/sigma_api.py,sha256=jUkKbtqX3eRdfriruJE5KFT9aM7DCbvhmwfPzveYIiM,17814
 datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datahub/ingestion/source/slack/slack.py,sha256=C_3iXUS72h7HALhBW_AIyi3nNOqzyh7Ogflr-qI5ZEE,12946
@@ -464,7 +464,7 @@ datahub/ingestion/source/sql/two_tier_sql_source.py,sha256=YDrGBb5WKVls6qv17QU5f
 datahub/ingestion/source/sql/vertica.py,sha256=_9OgSgIgqBml0av063rb8nACiT3SAmzpw0ouyF91wv8,33382
 datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr5GO9qgjS3R7pLRZSk,87
 datahub/ingestion/source/sql/mssql/job_models.py,sha256=eMyR0Efl5kvi7QNgNXzd5_6PdDKYly_552Y8OGSj9PY,6012
-datahub/ingestion/source/sql/mssql/source.py,sha256=fzpWjwexGvJgpd6Z4DCsK6Ld2vQCfPkD2M1xE4pU9Ec,29542
+datahub/ingestion/source/sql/mssql/source.py,sha256=lOfvNUGfxLmgCAops2jwm1OTBvJxgBivHcEj-9DEaAE,30390
 datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py,sha256=RpnvKPalAAaOD_eUg8bZ4VkGTSeLFWuy0mefwc4s3x8,2837
 datahub/ingestion/source/state/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datahub/ingestion/source/state/checkpoint.py,sha256=x9Xww-MIFXSKjeg1tOZXE72LehCm5OfKy3HfucgIRWM,8833
@@ -971,8 +971,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
 datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
 datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
 datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
-acryl_datahub-0.14.1.13rc7.dist-info/METADATA,sha256=GLfCmMbHHo7KoYo_AKEO6D8Cty0VP2obvuJ5sSSclMo,171138
-acryl_datahub-0.14.1.13rc7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
-acryl_datahub-0.14.1.13rc7.dist-info/entry_points.txt,sha256=VcQx0dnqaYLyeY_L5OaX7bLmmE-Il7TAXkxCKvEn2bA,9432
-acryl_datahub-0.14.1.13rc7.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
-acryl_datahub-0.14.1.13rc7.dist-info/RECORD,,
+acryl_datahub-0.14.1.13rc9.dist-info/METADATA,sha256=CvAHjZi0HfaRrZKhjpu6Rp_n-3c2oYSPb8A54uw9zNQ,171138
+acryl_datahub-0.14.1.13rc9.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
+acryl_datahub-0.14.1.13rc9.dist-info/entry_points.txt,sha256=VcQx0dnqaYLyeY_L5OaX7bLmmE-Il7TAXkxCKvEn2bA,9432
+acryl_datahub-0.14.1.13rc9.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
+acryl_datahub-0.14.1.13rc9.dist-info/RECORD,,

datahub/__init__.py CHANGED Viewed

@@ -3,7 +3,7 @@ import warnings
 # Published at https://pypi.org/project/acryl-datahub/.
 __package_name__ = "acryl-datahub"
-__version__ = "0.14.1.13rc7"
+__version__ = "0.14.1.13rc9"
 def is_dev_mode() -> bool:

datahub/ingestion/source/bigquery_v2/bigquery_schema.py CHANGED Viewed

@@ -118,7 +118,6 @@ class BigqueryTable(BaseTable):
     active_billable_bytes: Optional[int] = None
     long_term_billable_bytes: Optional[int] = None
     partition_info: Optional[PartitionInfo] = None
-    columns_ignore_from_profiling: List[str] = field(default_factory=list)
     external: bool = False
     constraints: List[BigqueryTableConstraint] = field(default_factory=list)
     table_type: Optional[str] = None

datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py CHANGED Viewed

@@ -598,18 +598,6 @@ class BigQuerySchemaGenerator:
                     dataset_name=dataset_name,
                 )
-    # This method is used to generate the ignore list for datatypes the profiler doesn't support we have to do it here
-    # because the profiler doesn't have access to columns
-    def generate_profile_ignore_list(self, columns: List[BigqueryColumn]) -> List[str]:
-        ignore_list: List[str] = []
-        for column in columns:
-            if not column.data_type or any(
-                word in column.data_type.lower()
-                for word in ["array", "struct", "geography", "json"]
-            ):
-                ignore_list.append(column.field_path)
-        return ignore_list
     def _process_table(
         self,
         table: BigqueryTable,
@@ -631,15 +619,6 @@ class BigQuerySchemaGenerator:
             )
         table.column_count = len(columns)
-        # We only collect profile ignore list if profiling is enabled and profile_table_level_only is false
-        if (
-            self.config.is_profiling_enabled()
-            and not self.config.profiling.profile_table_level_only
-        ):
-            table.columns_ignore_from_profiling = self.generate_profile_ignore_list(
-                columns
-            )
         if not table.column_count:
             logger.warning(
                 f"Table doesn't have any column or unable to get columns for table: {table_identifier}"

datahub/ingestion/source/bigquery_v2/profiler.py CHANGED Viewed

@@ -166,12 +166,6 @@ WHERE
                 normalized_table_name = BigqueryTableIdentifier(
                     project_id=project_id, dataset=dataset, table=table.name
                 ).get_table_name()
-                for column in table.columns_ignore_from_profiling:
-                    # Profiler has issues with complex types (array, struct, geography, json), so we deny those types from profiling
-                    # We also filter columns without data type as it means that column is part of a complex type.
-                    self.config.profile_pattern.deny.append(
-                        f"^{normalized_table_name}.{column}$"
-                    )
                 if table.external and not self.config.profiling.profile_external_tables:
                     self.report.profiling_skipped_other[f"{project_id}.{dataset}"] += 1

datahub/ingestion/source/ge_data_profiler.py CHANGED Viewed

@@ -7,6 +7,7 @@ import dataclasses
 import functools
 import json
 import logging
+import re
 import threading
 import traceback
 import unittest.mock
@@ -123,6 +124,8 @@ ProfilerTypeMapping.BINARY_TYPE_NAMES.append("LargeBinary")
 _datasource_connection_injection_lock = threading.Lock()
+NORMALIZE_TYPE_PATTERN = re.compile(r"^(.*?)(?:[\[<(].*)?$")
 @contextlib.contextmanager
 def _inject_connection_into_datasource(conn: Connection) -> Iterator[None]:
@@ -165,11 +168,9 @@ def get_column_unique_count_dh_patch(self: SqlAlchemyDataset, column: str) -> in
         return convert_to_json_serializable(element_values.fetchone()[0])
     elif self.engine.dialect.name.lower() == BIGQUERY:
         element_values = self.engine.execute(
-            sa.select(
-                [
-                    sa.func.coalesce(sa.text(f"APPROX_COUNT_DISTINCT(`{column}`)")),
-                ]
-            ).select_from(self._table)
+            sa.select(sa.func.APPROX_COUNT_DISTINCT(sa.column(column))).select_from(
+                self._table
+            )
         )
         return convert_to_json_serializable(element_values.fetchone()[0])
     elif self.engine.dialect.name.lower() == SNOWFLAKE:
@@ -378,6 +379,9 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
                 f"{self.dataset_name}.{col}"
             ):
                 ignored_columns_by_pattern.append(col)
+            # We try to ignore nested columns as well
+            elif not self.config.profile_nested_fields and "." in col:
+                ignored_columns_by_pattern.append(col)
             elif col_dict.get("type") and self._should_ignore_column(col_dict["type"]):
                 ignored_columns_by_type.append(col)
             else:
@@ -407,9 +411,18 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
         return columns_to_profile
     def _should_ignore_column(self, sqlalchemy_type: sa.types.TypeEngine) -> bool:
-        return str(sqlalchemy_type) in _get_column_types_to_ignore(
-            self.dataset.engine.dialect.name
-        )
+        # We don't profiles columns with None types
+        if str(sqlalchemy_type) == "NULL":
+            return True
+        sql_type = str(sqlalchemy_type)
+        match = re.match(NORMALIZE_TYPE_PATTERN, sql_type)
+        if match:
+            sql_type = match.group(1)
+        return sql_type in _get_column_types_to_ignore(self.dataset.engine.dialect.name)
     @_run_with_query_combiner
     def _get_column_type(self, column_spec: _SingleColumnSpec, column: str) -> None:
@@ -1397,6 +1410,8 @@ class DatahubGEProfiler:
 def _get_column_types_to_ignore(dialect_name: str) -> List[str]:
     if dialect_name.lower() == POSTGRESQL:
         return ["JSON"]
+    elif dialect_name.lower() == BIGQUERY:
+        return ["ARRAY", "STRUCT", "GEOGRAPHY", "JSON"]
     return []

datahub/ingestion/source/ge_profiling_config.py CHANGED Viewed

@@ -188,6 +188,11 @@ class GEProfilingConfig(GEProfilingBaseConfig):
         ),
     )
+    profile_nested_fields: bool = Field(
+        default=False,
+        description="Whether to profile complex types like structs, arrays and maps. ",
+    )
     @pydantic.root_validator(pre=True)
     def deprecate_bigquery_temp_table_schema(cls, values):
         # TODO: Update docs to remove mention of this field.

datahub/ingestion/source/kafka/kafka.py CHANGED Viewed

@@ -157,7 +157,9 @@ def get_kafka_consumer(
     if CallableConsumerConfig.is_callable_config(connection.consumer_config):
         # As per documentation, we need to explicitly call the poll method to make sure OAuth callback gets executed
         # https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#kafka-client-configuration
+        logger.debug("Initiating polling for kafka consumer")
         consumer.poll(timeout=30)
+        logger.debug("Initiated polling for kafka consumer")
     return consumer

datahub/ingestion/source/sigma/data_classes.py CHANGED Viewed

@@ -80,6 +80,7 @@ class Workbook(BaseModel):
     path: str
     latestVersion: int
     workspaceId: Optional[str] = None
+    description: Optional[str] = None
     pages: List[Page] = []
     badge: Optional[str] = None

datahub/ingestion/source/sigma/sigma.py CHANGED Viewed

@@ -4,7 +4,12 @@ from typing import Dict, Iterable, List, Optional
 import datahub.emitter.mce_builder as builder
 from datahub.configuration.common import ConfigurationError
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.emitter.mcp_builder import add_entity_to_container, gen_containers
+from datahub.emitter.mcp_builder import (
+    add_entity_to_container,
+    add_owner_to_entity_wu,
+    add_tags_to_entity_wu,
+    gen_containers,
+)
 from datahub.ingestion.api.common import PipelineContext
 from datahub.ingestion.api.decorators import (
     SourceCapability,
@@ -59,12 +64,14 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
     UpstreamLineage,
 )
 from datahub.metadata.schema_classes import (
+    AuditStampClass,
     BrowsePathEntryClass,
     BrowsePathsV2Class,
     ChangeAuditStampsClass,
     ChartInfoClass,
     DashboardInfoClass,
     DataPlatformInstanceClass,
+    EdgeClass,
     GlobalTagsClass,
     InputFieldClass,
     InputFieldsClass,
@@ -74,6 +81,7 @@ from datahub.metadata.schema_classes import (
     SchemaFieldClass,
     SchemaFieldDataTypeClass,
     StringTypeClass,
+    SubTypesClass,
     TagAssociationClass,
 )
 from datahub.sql_parsing.sqlglot_lineage import create_lineage_sql_parsed_result
@@ -257,11 +265,6 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
         entries = [
             BrowsePathEntryClass(id=parent_entity_urn, urn=parent_entity_urn)
         ] + [BrowsePathEntryClass(id=path) for path in paths]
-        if self.config.platform_instance:
-            urn = builder.make_dataplatform_instance_urn(
-                self.platform, self.config.platform_instance
-            )
-            entries = [BrowsePathEntryClass(id=urn, urn=urn)] + entries
         return MetadataChangeProposalWrapper(
             entityUrn=entity_urn,
             aspect=BrowsePathsV2Class(entries),
@@ -424,11 +427,11 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
         elements: List[Element],
         workbook: Workbook,
         all_input_fields: List[InputFieldClass],
+        paths: List[str],
     ) -> Iterable[MetadataWorkUnit]:
         """
         Map Sigma page element to Datahub Chart
         """
         for element in elements:
             chart_urn = builder.make_chart_urn(
                 platform=self.platform,
@@ -459,11 +462,14 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
                 ),
             ).as_workunit()
-            yield from add_entity_to_container(
-                container_key=self._gen_workbook_key(workbook.workbookId),
-                entity_type="chart",
-                entity_urn=chart_urn,
-            )
+            if workbook.workspaceId:
+                yield self._gen_entity_browsepath_aspect(
+                    entity_urn=chart_urn,
+                    parent_entity_urn=builder.make_container_urn(
+                        self._gen_workspace_key(workbook.workspaceId)
+                    ),
+                    paths=paths + [workbook.name],
+                )
             # Add sigma dataset's upstream dataset urn mapping
             for dataset_urn, upstream_dataset_urns in inputs.items():
@@ -494,7 +500,9 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
             all_input_fields.extend(element_input_fields)
-    def _gen_pages_workunit(self, workbook: Workbook) -> Iterable[MetadataWorkUnit]:
+    def _gen_pages_workunit(
+        self, workbook: Workbook, paths: List[str]
+    ) -> Iterable[MetadataWorkUnit]:
         """
         Map Sigma workbook page to Datahub dashboard
         """
@@ -505,20 +513,23 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
             yield self._gen_dashboard_info_workunit(page)
-            yield from add_entity_to_container(
-                container_key=self._gen_workbook_key(workbook.workbookId),
-                entity_type="dashboard",
-                entity_urn=dashboard_urn,
-            )
             dpi_aspect = self._gen_dataplatform_instance_aspect(dashboard_urn)
             if dpi_aspect:
                 yield dpi_aspect
             all_input_fields: List[InputFieldClass] = []
+            if workbook.workspaceId:
+                yield self._gen_entity_browsepath_aspect(
+                    entity_urn=dashboard_urn,
+                    parent_entity_urn=builder.make_container_urn(
+                        self._gen_workspace_key(workbook.workspaceId)
+                    ),
+                    paths=paths + [workbook.name],
+                )
             yield from self._gen_elements_workunit(
-                page.elements, workbook, all_input_fields
+                page.elements, workbook, all_input_fields, paths
             )
             yield MetadataChangeProposalWrapper(
@@ -531,42 +542,89 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
         Map Sigma Workbook to Datahub container
         """
         owner_username = self.sigma_api.get_user_name(workbook.createdBy)
-        workbook_key = self._gen_workbook_key(workbook.workbookId)
-        yield from gen_containers(
-            container_key=workbook_key,
-            name=workbook.name,
-            sub_types=[BIContainerSubTypes.SIGMA_WORKBOOK],
-            parent_container_key=(
-                self._gen_workspace_key(workbook.workspaceId)
-                if workbook.workspaceId
-                else None
+        dashboard_urn = self._gen_dashboard_urn(workbook.workbookId)
+        yield self._gen_entity_status_aspect(dashboard_urn)
+        lastModified = AuditStampClass(
+            time=int(workbook.updatedAt.timestamp() * 1000),
+            actor="urn:li:corpuser:datahub",
+        )
+        created = AuditStampClass(
+            time=int(workbook.createdAt.timestamp() * 1000),
+            actor="urn:li:corpuser:datahub",
+        )
+        dashboard_info_cls = DashboardInfoClass(
+            title=workbook.name,
+            description=workbook.description if workbook.description else "",
+            dashboards=[
+                EdgeClass(
+                    destinationUrn=self._gen_dashboard_urn(page.get_urn_part()),
+                    sourceUrn=dashboard_urn,
+                )
+                for page in workbook.pages
+            ],
+            externalUrl=workbook.url,
+            lastModified=ChangeAuditStampsClass(
+                created=created, lastModified=lastModified
             ),
-            extra_properties={
+            customProperties={
                 "path": workbook.path,
                 "latestVersion": str(workbook.latestVersion),
             },
-            owner_urn=(
-                builder.make_user_urn(owner_username)
-                if self.config.ingest_owner and owner_username
-                else None
-            ),
-            external_url=workbook.url,
-            tags=[workbook.badge] if workbook.badge else None,
-            created=int(workbook.createdAt.timestamp() * 1000),
-            last_modified=int(workbook.updatedAt.timestamp() * 1000),
         )
+        yield MetadataChangeProposalWrapper(
+            entityUrn=dashboard_urn, aspect=dashboard_info_cls
+        ).as_workunit()
+        # Set subtype
+        yield MetadataChangeProposalWrapper(
+            entityUrn=dashboard_urn,
+            aspect=SubTypesClass(typeNames=[BIContainerSubTypes.SIGMA_WORKBOOK]),
+        ).as_workunit()
+        # Ownership
+        owner_urn = (
+            builder.make_user_urn(owner_username)
+            if self.config.ingest_owner and owner_username
+            else None
+        )
+        if owner_urn:
+            yield from add_owner_to_entity_wu(
+                entity_type="dashboard",
+                entity_urn=dashboard_urn,
+                owner_urn=owner_urn,
+            )
+        # Tags
+        tags = [workbook.badge] if workbook.badge else None
+        if tags:
+            yield from add_tags_to_entity_wu(
+                entity_type="dashboard",
+                entity_urn=dashboard_urn,
+                tags=sorted(tags),
+            )
         paths = workbook.path.split("/")[1:]
-        if len(paths) > 0 and workbook.workspaceId:
+        if workbook.workspaceId:
             yield self._gen_entity_browsepath_aspect(
-                entity_urn=builder.make_container_urn(workbook_key),
+                entity_urn=dashboard_urn,
                 parent_entity_urn=builder.make_container_urn(
                     self._gen_workspace_key(workbook.workspaceId)
                 ),
-                paths=paths,
+                paths=paths + [workbook.name],
             )
-        yield from self._gen_pages_workunit(workbook)
+            if len(paths) == 0:
+                yield from add_entity_to_container(
+                    container_key=self._gen_workspace_key(workbook.workspaceId),
+                    entity_type="dashboard",
+                    entity_urn=dashboard_urn,
+                )
+        yield from self._gen_pages_workunit(workbook, paths)
     def _gen_sigma_dataset_upstream_lineage_workunit(
         self,

datahub/ingestion/source/sql/mssql/source.py CHANGED Viewed

@@ -50,6 +50,7 @@ from datahub.ingestion.source.sql.sql_config import (
     BasicSQLAlchemyConfig,
     make_sqlalchemy_uri,
 )
+from datahub.ingestion.source.sql.sql_report import SQLSourceReport
 from datahub.metadata.schema_classes import (
     BooleanTypeClass,
     NumberTypeClass,
@@ -78,6 +79,11 @@ class SQLServerConfig(BasicSQLAlchemyConfig):
     include_stored_procedures_code: bool = Field(
         default=True, description="Include information about object code."
     )
+    procedure_pattern: AllowDenyPattern = Field(
+        default=AllowDenyPattern.allow_all(),
+        description="Regex patterns for stored procedures to filter in ingestion."
+        "Specify regex to match the entire procedure name in database.schema.procedure_name format. e.g. to match all procedures starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*'",
+    )
     include_jobs: bool = Field(
         default=True,
         description="Include ingest of MSSQL Jobs. Requires access to the 'msdb' and 'sys' schema.",
@@ -164,6 +170,8 @@ class SQLServerSource(SQLAlchemySource):
     If you do use pyodbc, make sure to change the source type from `mssql` to `mssql-odbc` so that we pull in the right set of dependencies. This will be needed in most cases where encryption is required, such as managed SQL Server services in Azure.
     """
+    report: SQLSourceReport
     def __init__(self, config: SQLServerConfig, ctx: PipelineContext):
         super().__init__(config, ctx, "mssql")
         # Cache the table and column descriptions
@@ -416,10 +424,16 @@ class SQLServerSource(SQLAlchemySource):
         data_flow = MSSQLDataFlow(entity=mssql_default_job)
         with inspector.engine.connect() as conn:
             procedures_data_list = self._get_stored_procedures(conn, db_name, schema)
-            procedures = [
-                StoredProcedure(flow=mssql_default_job, **procedure_data)
-                for procedure_data in procedures_data_list
-            ]
+            procedures: List[StoredProcedure] = []
+            for procedure_data in procedures_data_list:
+                procedure_full_name = f"{db_name}.{schema}.{procedure_data['name']}"
+                if not self.config.procedure_pattern.allowed(procedure_full_name):
+                    self.report.report_dropped(procedure_full_name)
+                    continue
+                procedures.append(
+                    StoredProcedure(flow=mssql_default_job, **procedure_data)
+                )
             if procedures:
                 yield from self.construct_flow_workunits(data_flow=data_flow)
             for procedure in procedures:

{acryl_datahub-0.14.1.13rc7.dist-info → acryl_datahub-0.14.1.13rc9.dist-info}/WHEEL RENAMED Viewed

File without changes

{acryl_datahub-0.14.1.13rc7.dist-info → acryl_datahub-0.14.1.13rc9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{acryl_datahub-0.14.1.13rc7.dist-info → acryl_datahub-0.14.1.13rc9.dist-info}/top_level.txt RENAMED Viewed

File without changes

acryl-datahub 0.14.1.13rc7__py3-none-any.whl → 0.14.1.13rc9__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.14.1.13rc7py3-none-any.whl → 0.14.1.13rc9py3-none-any.whl