PyPI - acryl-datahub - Versions diffs - 1.2.0.10rc2__py3-none-any.whl → 1.2.0.10rc4__py3-none-any.whl - Mend

acryl-datahub 1.2.0.10rc2py3-none-any.whl → 1.2.0.10rc4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (93) hide show

{acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/METADATA +2525 -2609
{acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/RECORD +93 -93
datahub/_version.py +1 -1
datahub/api/entities/assertion/assertion.py +1 -1
datahub/api/entities/corpgroup/corpgroup.py +1 -1
datahub/api/entities/dataproduct/dataproduct.py +6 -3
datahub/api/entities/dataset/dataset.py +9 -18
datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
datahub/api/graphql/operation.py +10 -6
datahub/cli/docker_check.py +2 -2
datahub/configuration/common.py +29 -1
datahub/configuration/connection_resolver.py +5 -2
datahub/configuration/import_resolver.py +7 -4
datahub/configuration/pydantic_migration_helpers.py +0 -9
datahub/configuration/source_common.py +3 -2
datahub/configuration/validate_field_deprecation.py +5 -2
datahub/configuration/validate_field_removal.py +5 -2
datahub/configuration/validate_field_rename.py +6 -5
datahub/configuration/validate_multiline_string.py +5 -2
datahub/ingestion/run/pipeline_config.py +2 -2
datahub/ingestion/source/azure/azure_common.py +1 -1
datahub/ingestion/source/bigquery_v2/bigquery_config.py +28 -14
datahub/ingestion/source/bigquery_v2/queries_extractor.py +4 -5
datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
datahub/ingestion/source/data_lake_common/path_spec.py +16 -16
datahub/ingestion/source/datahub/config.py +8 -9
datahub/ingestion/source/delta_lake/config.py +1 -1
datahub/ingestion/source/dremio/dremio_config.py +3 -4
datahub/ingestion/source/feast.py +8 -10
datahub/ingestion/source/fivetran/config.py +1 -1
datahub/ingestion/source/ge_profiling_config.py +26 -22
datahub/ingestion/source/grafana/grafana_config.py +2 -2
datahub/ingestion/source/grafana/models.py +12 -14
datahub/ingestion/source/hex/hex.py +6 -1
datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
datahub/ingestion/source/kafka_connect/common.py +2 -2
datahub/ingestion/source/looker/looker_common.py +55 -75
datahub/ingestion/source/looker/looker_config.py +15 -4
datahub/ingestion/source/looker/looker_source.py +445 -548
datahub/ingestion/source/looker/lookml_config.py +1 -1
datahub/ingestion/source/metadata/business_glossary.py +7 -7
datahub/ingestion/source/metadata/lineage.py +1 -1
datahub/ingestion/source/mode.py +13 -5
datahub/ingestion/source/nifi.py +1 -1
datahub/ingestion/source/powerbi/config.py +14 -21
datahub/ingestion/source/preset.py +1 -1
datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
datahub/ingestion/source/redshift/config.py +6 -3
datahub/ingestion/source/salesforce.py +13 -9
datahub/ingestion/source/schema/json_schema.py +14 -14
datahub/ingestion/source/sigma/data_classes.py +3 -0
datahub/ingestion/source/snowflake/snowflake_config.py +12 -15
datahub/ingestion/source/snowflake/snowflake_connection.py +8 -3
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +15 -2
datahub/ingestion/source/snowflake/snowflake_queries.py +4 -5
datahub/ingestion/source/sql/athena.py +2 -1
datahub/ingestion/source/sql/clickhouse.py +12 -7
datahub/ingestion/source/sql/cockroachdb.py +5 -3
datahub/ingestion/source/sql/druid.py +2 -2
datahub/ingestion/source/sql/hive.py +4 -3
datahub/ingestion/source/sql/hive_metastore.py +7 -9
datahub/ingestion/source/sql/mssql/source.py +2 -2
datahub/ingestion/source/sql/mysql.py +2 -2
datahub/ingestion/source/sql/oracle.py +3 -3
datahub/ingestion/source/sql/presto.py +2 -1
datahub/ingestion/source/sql/teradata.py +4 -4
datahub/ingestion/source/sql/trino.py +2 -1
datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
datahub/ingestion/source/sql/vertica.py +1 -1
datahub/ingestion/source/sql_queries.py +6 -6
datahub/ingestion/source/state/checkpoint.py +5 -1
datahub/ingestion/source/state/entity_removal_state.py +5 -2
datahub/ingestion/source/state/stateful_ingestion_base.py +5 -8
datahub/ingestion/source/superset.py +29 -4
datahub/ingestion/source/tableau/tableau.py +65 -11
datahub/ingestion/source/tableau/tableau_common.py +5 -0
datahub/ingestion/source/tableau/tableau_constant.py +1 -0
datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
datahub/ingestion/source/unity/config.py +7 -3
datahub/ingestion/source/usage/usage_common.py +3 -3
datahub/ingestion/source_config/pulsar.py +3 -1
datahub/metadata/_internal_schema_classes.py +45 -1
datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
datahub/metadata/schema.avsc +24 -1
datahub/metadata/schemas/InstitutionalMemory.avsc +22 -0
datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
datahub/metadata/schemas/MetadataChangeEvent.avsc +22 -0
datahub/sdk/dashboard.py +0 -2
datahub/sdk/search_filters.py +1 -7
{acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/WHEEL +0 -0
{acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/entry_points.txt +0 -0
{acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/licenses/LICENSE +0 -0
{acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/top_level.txt +0 -0

datahub/ingestion/source/looker/looker_common.py CHANGED Viewed

@@ -28,7 +28,7 @@ from looker_sdk.sdk.api40.models import (
     User,
     WriteQuery,
 )
-from pydantic.class_validators import validator
+from pydantic import validator
 import datahub.emitter.mce_builder as builder
 from datahub.api.entities.platformresource.platform_resource import (
@@ -36,7 +36,7 @@ from datahub.api.entities.platformresource.platform_resource import (
     PlatformResourceKey,
 )
 from datahub.emitter.mcp import MetadataChangeProposalWrapper
-from datahub.emitter.mcp_builder import ContainerKey, create_embed_mcp
+from datahub.emitter.mcp_builder import ContainerKey
 from datahub.ingestion.api.report import Report
 from datahub.ingestion.api.source import SourceReport
 from datahub.ingestion.source.common.subtypes import DatasetSubTypes
@@ -72,7 +72,6 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
     UpstreamClass,
     UpstreamLineage,
 )
-from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
 from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
 from datahub.metadata.com.linkedin.pegasus2avro.schema import (
     ArrayTypeClass,
@@ -90,21 +89,18 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
 )
 from datahub.metadata.schema_classes import (
     BrowsePathEntryClass,
-    BrowsePathsClass,
     BrowsePathsV2Class,
-    ContainerClass,
-    DatasetPropertiesClass,
+    EmbedClass,
     EnumTypeClass,
     FineGrainedLineageClass,
     GlobalTagsClass,
     SchemaMetadataClass,
-    StatusClass,
-    SubTypesClass,
     TagAssociationClass,
     TagPropertiesClass,
     TagSnapshotClass,
 )
 from datahub.metadata.urns import TagUrn
+from datahub.sdk.dataset import Dataset
 from datahub.sql_parsing.sqlglot_lineage import ColumnRef
 from datahub.utilities.lossy_collections import LossyList, LossySet
 from datahub.utilities.url_util import remove_port_from_url
@@ -1307,50 +1303,28 @@ class LookerExplore:
         reporter: SourceReport,
         base_url: str,
         extract_embed_urls: bool,
-    ) -> Optional[List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]]:
-        # We only generate MCE-s for explores that contain from clauses and do NOT contain joins
-        # All other explores (passthrough explores and joins) end in correct resolution of lineage, and don't need additional nodes in the graph.
-        dataset_snapshot = DatasetSnapshot(
-            urn=self.get_explore_urn(config),
-            aspects=[],  # we append to this list later on
-        )
-        model_key = gen_model_key(config, self.model_name)
-        browse_paths = BrowsePathsClass(paths=[self.get_explore_browse_path(config)])
-        container = ContainerClass(container=model_key.as_urn())
-        dataset_snapshot.aspects.append(browse_paths)
-        dataset_snapshot.aspects.append(StatusClass(removed=False))
-        custom_properties = {
-            "project": self.project_name,
-            "model": self.model_name,
-            "looker.explore.label": self.label,
-            "looker.explore.name": self.name,
-            "looker.explore.file": self.source_file,
-        }
-        dataset_props = DatasetPropertiesClass(
-            name=str(self.label) if self.label else LookerUtil._display_name(self.name),
-            description=self.description,
-            customProperties={
-                k: str(v) for k, v in custom_properties.items() if v is not None
-            },
-        )
-        dataset_props.externalUrl = self._get_url(base_url)
+    ) -> Dataset:
+        """
+        Generate a Dataset metadata event for this Looker Explore.
-        dataset_snapshot.aspects.append(dataset_props)
+        Only generates datasets for explores that contain FROM clauses and do NOT contain joins.
+        Passthrough explores and joins are handled via lineage and do not need additional nodes.
+        """
+        upstream_lineage = None
         view_name_to_urn_map: Dict[str, str] = {}
         if self.upstream_views is not None:
             assert self.project_name is not None
-            upstreams = []
+            upstreams: list[UpstreamClass] = []
             observed_lineage_ts = datetime.datetime.now(tz=datetime.timezone.utc)
             for view_ref in sorted(self.upstream_views):
                 # set file_path to ViewFieldType.UNKNOWN if file_path is not available to keep backward compatibility
                 # if we raise error on file_path equal to None then existing test-cases will fail as mock data
                 # doesn't have required attributes.
                 file_path: str = (
                     cast(str, self.upstream_views_file_path[view_ref.include])
-                    if self.upstream_views_file_path[view_ref.include] is not None
+                    if self.upstream_views_file_path.get(view_ref.include) is not None
                     else ViewFieldValue.NOT_AVAILABLE.value
                 )
@@ -1377,7 +1351,7 @@ class LookerExplore:
                 )
                 view_name_to_urn_map[view_ref.include] = view_urn
-            fine_grained_lineages = []
+            fine_grained_lineages: list[FineGrainedLineageClass] = []
             if config.extract_column_level_lineage:
                 for field in self.fields or []:
                     # Skip creating fine-grained lineage for empty field names to prevent invalid schema field URNs
@@ -1418,9 +1392,11 @@ class LookerExplore:
                         )
             upstream_lineage = UpstreamLineage(
-                upstreams=upstreams, fineGrainedLineages=fine_grained_lineages or None
+                upstreams=upstreams,
+                fineGrainedLineages=fine_grained_lineages or None,
             )
-            dataset_snapshot.aspects.append(upstream_lineage)
+        schema_metadata = None
         if self.fields is not None:
             schema_metadata = LookerUtil._get_schema(
                 platform_name=config.platform_name,
@@ -1428,42 +1404,46 @@ class LookerExplore:
                 view_fields=self.fields,
                 reporter=reporter,
             )
-            if schema_metadata is not None:
-                dataset_snapshot.aspects.append(schema_metadata)
-        mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
-        mcp = MetadataChangeProposalWrapper(
-            entityUrn=dataset_snapshot.urn,
-            aspect=SubTypesClass(typeNames=[DatasetSubTypes.LOOKER_EXPLORE]),
-        )
-        proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
-            mce,
-            mcp,
-        ]
-        # Add tags
-        explore_tag_urns: List[TagAssociationClass] = [
-            TagAssociationClass(tag=TagUrn(tag).urn()) for tag in self.tags
-        ]
-        if explore_tag_urns:
-            dataset_snapshot.aspects.append(GlobalTagsClass(explore_tag_urns))
+        extra_aspects: List[Union[GlobalTagsClass, EmbedClass]] = []
-        # If extracting embeds is enabled, produce an MCP for embed URL.
+        explore_tag_urns: List[TagUrn] = [TagUrn(tag) for tag in self.tags]
         if extract_embed_urls:
-            embed_mcp = create_embed_mcp(
-                dataset_snapshot.urn, self._get_embed_url(base_url)
-            )
-            proposals.append(embed_mcp)
+            extra_aspects.append(EmbedClass(renderUrl=self._get_embed_url(base_url)))
-        proposals.append(
-            MetadataChangeProposalWrapper(
-                entityUrn=dataset_snapshot.urn,
-                aspect=container,
-            )
-        )
+        custom_properties: Dict[str, Optional[str]] = {
+            "project": self.project_name,
+            "model": self.model_name,
+            "looker.explore.label": self.label,
+            "looker.explore.name": self.name,
+            "looker.explore.file": self.source_file,
+        }
-        return proposals
+        return Dataset(
+            platform=config.platform_name,
+            name=config.explore_naming_pattern.replace_variables(
+                self.get_mapping(config)
+            ),
+            display_name=str(self.label)
+            if self.label
+            else LookerUtil._display_name(self.name),
+            description=self.description,
+            subtype=DatasetSubTypes.LOOKER_EXPLORE,
+            env=config.env,
+            platform_instance=config.platform_instance,
+            custom_properties={
+                k: str(v) for k, v in custom_properties.items() if v is not None
+            },
+            external_url=self._get_url(base_url),
+            upstreams=upstream_lineage,
+            schema=schema_metadata,
+            parent_container=[
+                "Explore",
+                gen_model_key(config, self.model_name).as_urn(),
+            ],
+            tags=explore_tag_urns if explore_tag_urns else None,
+            extra_aspects=extra_aspects,
+        )
 def gen_project_key(config: LookerCommonConfig, project_name: str) -> LookMLProjectKey:

datahub/ingestion/source/looker/looker_config.py CHANGED Viewed

@@ -5,10 +5,14 @@ from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union, cast
 import pydantic
 from looker_sdk.sdk.api40.models import DBConnection
-from pydantic import Field, validator
+from pydantic import Field, model_validator, validator
 from datahub.configuration import ConfigModel
-from datahub.configuration.common import AllowDenyPattern, ConfigurationError
+from datahub.configuration.common import (
+    AllowDenyPattern,
+    ConfigurationError,
+    HiddenFromDocs,
+)
 from datahub.configuration.source_common import (
     EnvConfigMixin,
     PlatformInstanceConfigMixin,
@@ -43,6 +47,14 @@ class NamingPattern(ConfigModel):
         assert isinstance(v, str), "pattern must be a string"
         return {"pattern": v}
+    @model_validator(mode="before")
+    @classmethod
+    def pydantic_v2_accept_raw_pattern(cls, v):
+        # Pydantic v2 compatibility: handle string input by converting to dict
+        if isinstance(v, str):
+            return {"pattern": v}
+        return v
     @classmethod
     def pydantic_validate_pattern(cls, v):
         assert isinstance(v, NamingPattern)
@@ -132,11 +144,10 @@ class LookerCommonConfig(EnvConfigMixin, PlatformInstanceConfigMixin):
         description="When enabled, attaches tags to measures, dimensions and dimension groups to make them more "
         "discoverable. When disabled, adds this information to the description of the column.",
     )
-    platform_name: str = Field(
+    platform_name: HiddenFromDocs[str] = Field(
         # TODO: This shouldn't be part of the config.
         "looker",
         description="Default platform name.",
-        hidden_from_docs=True,
     )
     extract_column_level_lineage: bool = Field(
         True,

acryl-datahub 1.2.0.10rc2__py3-none-any.whl → 1.2.0.10rc4__py3-none-any.whl

Potentially problematic release.

acryl-datahub 1.2.0.10rc2py3-none-any.whl → 1.2.0.10rc4py3-none-any.whl