PyPI - acryl-datahub - Versions diffs - 0.15.0.6rc2__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

acryl-datahub 0.15.0.6rc2py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (205) hide show

{acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/METADATA +2522 -2493
{acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/RECORD +205 -192
{acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/WHEEL +1 -1
{acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/entry_points.txt +1 -0
datahub/_version.py +1 -1
datahub/api/entities/common/serialized_value.py +4 -3
datahub/api/entities/dataset/dataset.py +731 -42
datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
datahub/cli/check_cli.py +72 -19
datahub/cli/docker_cli.py +3 -3
datahub/cli/iceberg_cli.py +31 -7
datahub/cli/ingest_cli.py +30 -93
datahub/cli/lite_cli.py +4 -2
datahub/cli/specific/dataproduct_cli.py +1 -1
datahub/cli/specific/dataset_cli.py +128 -14
datahub/configuration/common.py +10 -2
datahub/configuration/git.py +1 -3
datahub/configuration/kafka.py +1 -1
datahub/emitter/mce_builder.py +28 -13
datahub/emitter/mcp_builder.py +4 -1
datahub/emitter/response_helper.py +145 -0
datahub/emitter/rest_emitter.py +323 -10
datahub/ingestion/api/decorators.py +1 -1
datahub/ingestion/api/source_helpers.py +4 -0
datahub/ingestion/fs/s3_fs.py +2 -2
datahub/ingestion/glossary/classification_mixin.py +1 -5
datahub/ingestion/graph/client.py +41 -22
datahub/ingestion/graph/entity_versioning.py +3 -3
datahub/ingestion/graph/filters.py +64 -37
datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
datahub/ingestion/run/pipeline.py +112 -148
datahub/ingestion/run/sink_callback.py +77 -0
datahub/ingestion/sink/datahub_rest.py +8 -0
datahub/ingestion/source/abs/config.py +2 -4
datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
datahub/ingestion/source/bigquery_v2/bigquery_schema.py +6 -1
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
datahub/ingestion/source/cassandra/cassandra.py +152 -233
datahub/ingestion/source/cassandra/cassandra_api.py +13 -5
datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
datahub/ingestion/source/common/subtypes.py +12 -0
datahub/ingestion/source/csv_enricher.py +3 -3
datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
datahub/ingestion/source/dbt/dbt_common.py +8 -5
datahub/ingestion/source/dbt/dbt_core.py +11 -9
datahub/ingestion/source/dbt/dbt_tests.py +4 -8
datahub/ingestion/source/delta_lake/config.py +8 -1
datahub/ingestion/source/delta_lake/report.py +4 -2
datahub/ingestion/source/delta_lake/source.py +20 -5
datahub/ingestion/source/dremio/dremio_api.py +4 -8
datahub/ingestion/source/dremio/dremio_aspects.py +3 -5
datahub/ingestion/source/dynamodb/dynamodb.py +6 -0
datahub/ingestion/source/elastic_search.py +26 -6
datahub/ingestion/source/feast.py +27 -8
datahub/ingestion/source/file.py +6 -3
datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
datahub/ingestion/source/ge_data_profiler.py +12 -15
datahub/ingestion/source/iceberg/iceberg.py +46 -12
datahub/ingestion/source/iceberg/iceberg_common.py +71 -21
datahub/ingestion/source/identity/okta.py +37 -7
datahub/ingestion/source/kafka/kafka.py +1 -1
datahub/ingestion/source/kafka_connect/common.py +2 -7
datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
datahub/ingestion/source/kafka_connect/source_connectors.py +6 -9
datahub/ingestion/source/looker/looker_common.py +6 -5
datahub/ingestion/source/looker/looker_file_loader.py +2 -2
datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
datahub/ingestion/source/looker/looker_source.py +1 -1
datahub/ingestion/source/looker/looker_template_language.py +4 -2
datahub/ingestion/source/looker/lookml_source.py +3 -2
datahub/ingestion/source/metabase.py +57 -35
datahub/ingestion/source/metadata/business_glossary.py +45 -3
datahub/ingestion/source/metadata/lineage.py +2 -2
datahub/ingestion/source/mlflow.py +365 -35
datahub/ingestion/source/mode.py +18 -8
datahub/ingestion/source/neo4j/neo4j_source.py +27 -7
datahub/ingestion/source/nifi.py +37 -11
datahub/ingestion/source/openapi.py +1 -1
datahub/ingestion/source/openapi_parser.py +49 -17
datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
datahub/ingestion/source/powerbi/powerbi.py +1 -3
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
datahub/ingestion/source/powerbi_report_server/report_server.py +26 -7
datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
datahub/ingestion/source/preset.py +7 -4
datahub/ingestion/source/pulsar.py +3 -2
datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
datahub/ingestion/source/redash.py +31 -7
datahub/ingestion/source/redshift/config.py +4 -0
datahub/ingestion/source/redshift/datashares.py +236 -0
datahub/ingestion/source/redshift/lineage.py +6 -2
datahub/ingestion/source/redshift/lineage_v2.py +24 -9
datahub/ingestion/source/redshift/profile.py +1 -1
datahub/ingestion/source/redshift/query.py +133 -33
datahub/ingestion/source/redshift/redshift.py +46 -73
datahub/ingestion/source/redshift/redshift_schema.py +186 -6
datahub/ingestion/source/redshift/report.py +3 -0
datahub/ingestion/source/s3/config.py +5 -5
datahub/ingestion/source/s3/source.py +20 -41
datahub/ingestion/source/salesforce.py +550 -275
datahub/ingestion/source/schema_inference/object.py +1 -1
datahub/ingestion/source/sigma/sigma.py +1 -1
datahub/ingestion/source/slack/slack.py +31 -10
datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
datahub/ingestion/source/snowflake/snowflake_queries.py +19 -13
datahub/ingestion/source/snowflake/snowflake_query.py +6 -4
datahub/ingestion/source/snowflake/snowflake_schema.py +3 -4
datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
datahub/ingestion/source/sql/athena.py +10 -16
datahub/ingestion/source/sql/druid.py +1 -5
datahub/ingestion/source/sql/hive.py +15 -6
datahub/ingestion/source/sql/hive_metastore.py +3 -2
datahub/ingestion/source/sql/mssql/job_models.py +29 -0
datahub/ingestion/source/sql/mssql/source.py +11 -5
datahub/ingestion/source/sql/oracle.py +127 -63
datahub/ingestion/source/sql/sql_common.py +16 -18
datahub/ingestion/source/sql/sql_types.py +2 -2
datahub/ingestion/source/sql/teradata.py +19 -5
datahub/ingestion/source/sql/trino.py +2 -2
datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
datahub/ingestion/source/superset.py +222 -62
datahub/ingestion/source/tableau/tableau.py +22 -6
datahub/ingestion/source/tableau/tableau_common.py +3 -2
datahub/ingestion/source/unity/ge_profiler.py +2 -1
datahub/ingestion/source/unity/source.py +11 -1
datahub/ingestion/source/vertexai.py +697 -0
datahub/ingestion/source_config/pulsar.py +3 -1
datahub/ingestion/transformer/pattern_cleanup_ownership.py +25 -7
datahub/lite/duckdb_lite.py +3 -10
datahub/lite/lite_local.py +1 -1
datahub/lite/lite_util.py +4 -3
datahub/metadata/_schema_classes.py +714 -417
datahub/metadata/_urns/urn_defs.py +1673 -1649
datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py +4 -0
datahub/metadata/schema.avsc +16438 -16603
datahub/metadata/schemas/AssertionInfo.avsc +3 -1
datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
datahub/metadata/schemas/ChartInfo.avsc +1 -0
datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
datahub/metadata/schemas/CorpUserKey.avsc +2 -1
datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
datahub/metadata/schemas/DataProcessKey.avsc +2 -1
datahub/metadata/schemas/DataProductKey.avsc +2 -1
datahub/metadata/schemas/DomainKey.avsc +2 -1
datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
datahub/metadata/schemas/GlossaryNodeKey.avsc +3 -1
datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
datahub/metadata/schemas/IncidentInfo.avsc +130 -46
datahub/metadata/schemas/InputFields.avsc +3 -1
datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
datahub/metadata/schemas/MLModelKey.avsc +3 -1
datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -2
datahub/metadata/schemas/PostKey.avsc +2 -1
datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
datahub/metadata/schemas/VersionProperties.avsc +18 -0
datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
datahub/pydantic/__init__.py +0 -0
datahub/pydantic/compat.py +58 -0
datahub/sdk/__init__.py +30 -12
datahub/sdk/_all_entities.py +1 -1
datahub/sdk/_attribution.py +4 -0
datahub/sdk/_shared.py +258 -16
datahub/sdk/_utils.py +35 -0
datahub/sdk/container.py +30 -6
datahub/sdk/dataset.py +118 -20
datahub/sdk/{_entity.py → entity.py} +24 -1
datahub/sdk/entity_client.py +1 -1
datahub/sdk/main_client.py +23 -0
datahub/sdk/resolver_client.py +17 -29
datahub/sdk/search_client.py +50 -0
datahub/sdk/search_filters.py +374 -0
datahub/specific/dataset.py +3 -4
datahub/sql_parsing/_sqlglot_patch.py +2 -10
datahub/sql_parsing/schema_resolver.py +1 -1
datahub/sql_parsing/split_statements.py +220 -126
datahub/sql_parsing/sql_parsing_common.py +7 -0
datahub/sql_parsing/sqlglot_lineage.py +1 -1
datahub/sql_parsing/sqlglot_utils.py +1 -4
datahub/testing/check_sql_parser_result.py +5 -6
datahub/testing/compare_metadata_json.py +7 -6
datahub/testing/pytest_hooks.py +56 -0
datahub/upgrade/upgrade.py +2 -2
datahub/utilities/file_backed_collections.py +3 -14
datahub/utilities/ingest_utils.py +106 -0
datahub/utilities/mapping.py +1 -1
datahub/utilities/memory_footprint.py +3 -2
datahub/utilities/sentinels.py +22 -0
datahub/utilities/unified_diff.py +5 -1
{acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/LICENSE +0 -0
{acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/top_level.txt +0 -0

datahub/sdk/search_filters.py ADDED Viewed

@@ -0,0 +1,374 @@
+from __future__ import annotations
+import abc
+from typing import (
+    Any,
+    List,
+    Sequence,
+    TypedDict,
+    Union,
+)
+import pydantic
+from datahub.configuration.common import ConfigModel
+from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2
+from datahub.ingestion.graph.client import entity_type_to_graphql
+from datahub.ingestion.graph.filters import SearchFilterRule
+from datahub.metadata.schema_classes import EntityTypeName
+from datahub.metadata.urns import DataPlatformUrn, DomainUrn
+_AndSearchFilterRule = TypedDict(
+    "_AndSearchFilterRule", {"and": List[SearchFilterRule]}
+)
+_OrFilters = List[_AndSearchFilterRule]
+class _BaseFilter(ConfigModel):
+    class Config:
+        # We can't wrap this in a TYPE_CHECKING block because the pydantic plugin
+        # doesn't recognize it properly. So unfortunately we'll need to live
+        # with the deprecation warning w/ pydantic v2.
+        allow_population_by_field_name = True
+        if PYDANTIC_VERSION_2:
+            populate_by_name = True
+    @abc.abstractmethod
+    def compile(self) -> _OrFilters:
+        pass
+def _flexible_entity_type_to_graphql(entity_type: str) -> str:
+    if entity_type.upper() == entity_type:
+        # Assume that we were passed a graphql EntityType enum value,
+        # so no conversion is needed.
+        return entity_type
+    return entity_type_to_graphql(entity_type)
+class _EntityTypeFilter(_BaseFilter):
+    entity_type: List[str] = pydantic.Field(
+        description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', etc.",
+    )
+    def _build_rule(self) -> SearchFilterRule:
+        return SearchFilterRule(
+            field="_entityType",
+            condition="EQUAL",
+            values=[_flexible_entity_type_to_graphql(t) for t in self.entity_type],
+        )
+    def compile(self) -> _OrFilters:
+        return [{"and": [self._build_rule()]}]
+class _EntitySubtypeFilter(_BaseFilter):
+    entity_type: str
+    entity_subtype: str = pydantic.Field(
+        description="The entity subtype to filter on. Can be 'Table', 'View', 'Source', etc. depending on the native platform's concepts.",
+    )
+    def compile(self) -> _OrFilters:
+        rules = [
+            SearchFilterRule(
+                field="_entityType",
+                condition="EQUAL",
+                values=[_flexible_entity_type_to_graphql(self.entity_type)],
+            ),
+            SearchFilterRule(
+                field="typeNames",
+                condition="EQUAL",
+                values=[self.entity_subtype],
+            ),
+        ]
+        return [{"and": rules}]
+class _PlatformFilter(_BaseFilter):
+    platform: List[str]
+    # TODO: Add validator to convert string -> list of strings
+    @pydantic.validator("platform", each_item=True)
+    def validate_platform(cls, v: str) -> str:
+        # Subtle - we use the constructor instead of the from_string method
+        # because coercion is acceptable here.
+        return str(DataPlatformUrn(v))
+    def _build_rule(self) -> SearchFilterRule:
+        return SearchFilterRule(
+            field="platform.keyword",
+            condition="EQUAL",
+            values=self.platform,
+        )
+    def compile(self) -> _OrFilters:
+        return [{"and": [self._build_rule()]}]
+class _DomainFilter(_BaseFilter):
+    domain: List[str]
+    @pydantic.validator("domain", each_item=True)
+    def validate_domain(cls, v: str) -> str:
+        return str(DomainUrn.from_string(v))
+    def _build_rule(self) -> SearchFilterRule:
+        return SearchFilterRule(
+            field="domains",
+            condition="EQUAL",
+            values=self.domain,
+        )
+    def compile(self) -> _OrFilters:
+        return [{"and": [self._build_rule()]}]
+class _EnvFilter(_BaseFilter):
+    # Note that not all entity types have an env (e.g. dashboards / charts).
+    # If the env filter is specified, these will be excluded.
+    env: List[str]
+    def compile(self) -> _OrFilters:
+        return [
+            # For most entity types, we look at the origin field.
+            {
+                "and": [
+                    SearchFilterRule(
+                        field="origin",
+                        condition="EQUAL",
+                        values=self.env,
+                    ),
+                ]
+            },
+            # For containers, we now have an "env" property as of
+            # https://github.com/datahub-project/datahub/pull/11214
+            # Prior to this, we put "env" in the customProperties. But we're
+            # not bothering with that here.
+            {
+                "and": [
+                    SearchFilterRule(
+                        field="env",
+                        condition="EQUAL",
+                        values=self.env,
+                    ),
+                ]
+            },
+        ]
+class _CustomCondition(_BaseFilter):
+    """Represents a single field condition"""
+    field: str
+    condition: str
+    values: List[str]
+    def compile(self) -> _OrFilters:
+        rule = SearchFilterRule(
+            field=self.field,
+            condition=self.condition,
+            values=self.values,
+        )
+        return [{"and": [rule]}]
+class _And(_BaseFilter):
+    """Represents an AND conjunction of filters"""
+    and_: Sequence["Filter"] = pydantic.Field(alias="and")
+    # TODO: Add validator to ensure that the "and" field is not empty
+    def compile(self) -> _OrFilters:
+        # The "and" operator must be implemented by doing a Cartesian product
+        # of the OR clauses.
+        # Example 1:
+        # (A or B) and (C or D) ->
+        # (A and C) or (A and D) or (B and C) or (B and D)
+        # Example 2:
+        # (A or B) and (C or D) and (E or F) ->
+        # (A and C and E) or (A and C and F) or (A and D and E) or (A and D and F) or
+        # (B and C and E) or (B and C and F) or (B and D and E) or (B and D and F)
+        # Start with the first filter's OR clauses
+        result = self.and_[0].compile()
+        # For each subsequent filter
+        for filter in self.and_[1:]:
+            new_result = []
+            # Get its OR clauses
+            other_clauses = filter.compile()
+            # Create Cartesian product
+            for existing_clause in result:
+                for other_clause in other_clauses:
+                    # Merge the AND conditions from both clauses
+                    new_result.append(self._merge_ands(existing_clause, other_clause))
+            result = new_result
+        return result
+    @classmethod
+    def _merge_ands(
+        cls, a: _AndSearchFilterRule, b: _AndSearchFilterRule
+    ) -> _AndSearchFilterRule:
+        return {
+            "and": [
+                *a["and"],
+                *b["and"],
+            ]
+        }
+class _Or(_BaseFilter):
+    """Represents an OR conjunction of filters"""
+    or_: Sequence["Filter"] = pydantic.Field(alias="or")
+    # TODO: Add validator to ensure that the "or" field is not empty
+    def compile(self) -> _OrFilters:
+        merged_filter = []
+        for filter in self.or_:
+            merged_filter.extend(filter.compile())
+        return merged_filter
+class _Not(_BaseFilter):
+    """Represents a NOT filter"""
+    not_: "Filter" = pydantic.Field(alias="not")
+    @pydantic.validator("not_", pre=False)
+    def validate_not(cls, v: "Filter") -> "Filter":
+        inner_filter = v.compile()
+        if len(inner_filter) != 1:
+            raise ValueError(
+                "Cannot negate a filter with multiple OR clauses [not yet supported]"
+            )
+        return v
+    def compile(self) -> _OrFilters:
+        # TODO: Eventually we'll want to implement a full DNF normalizer.
+        # https://en.wikipedia.org/wiki/Disjunctive_normal_form#Conversion_to_DNF
+        inner_filter = self.not_.compile()
+        assert len(inner_filter) == 1  # validated above
+        # ¬(A and B) -> (¬A) OR (¬B)
+        and_filters = inner_filter[0]["and"]
+        final_filters: _OrFilters = []
+        for rule in and_filters:
+            final_filters.append({"and": [rule.negate()]})
+        return final_filters
+# TODO: With pydantic 2, we can use a RootModel with a
+# discriminated union to make the error messages more informative.
+Filter = Union[
+    _And,
+    _Or,
+    _Not,
+    _EntityTypeFilter,
+    _EntitySubtypeFilter,
+    _PlatformFilter,
+    _DomainFilter,
+    _EnvFilter,
+    _CustomCondition,
+]
+# Required to resolve forward references to "Filter"
+if PYDANTIC_VERSION_2:
+    _And.model_rebuild()  # type: ignore
+    _Or.model_rebuild()  # type: ignore
+    _Not.model_rebuild()  # type: ignore
+else:
+    _And.update_forward_refs()
+    _Or.update_forward_refs()
+    _Not.update_forward_refs()
+def load_filters(obj: Any) -> Filter:
+    if PYDANTIC_VERSION_2:
+        return pydantic.TypeAdapter(Filter).validate_python(obj)  # type: ignore
+    else:
+        return pydantic.parse_obj_as(Filter, obj)  # type: ignore
+# We need FilterDsl for two reasons:
+# 1. To provide wrapper methods around lots of filters while avoid bloating the
+#    yaml spec.
+# 2. Pydantic models in general don't support positional arguments, making the
+#    calls feel repetitive (e.g. Platform(platform=...)).
+#    See https://github.com/pydantic/pydantic/issues/6792
+#    We also considered using dataclasses / pydantic dataclasses, but
+#    ultimately decided that they didn't quite suit our requirements,
+#    particularly with regards to the field aliases for and/or/not.
+class FilterDsl:
+    @staticmethod
+    def and_(*args: "Filter") -> _And:
+        return _And(and_=list(args))
+    @staticmethod
+    def or_(*args: "Filter") -> _Or:
+        return _Or(or_=list(args))
+    @staticmethod
+    def not_(arg: "Filter") -> _Not:
+        return _Not(not_=arg)
+    @staticmethod
+    def entity_type(
+        entity_type: Union[EntityTypeName, Sequence[EntityTypeName]],
+    ) -> _EntityTypeFilter:
+        return _EntityTypeFilter(
+            entity_type=(
+                [entity_type] if isinstance(entity_type, str) else list(entity_type)
+            )
+        )
+    @staticmethod
+    def entity_subtype(entity_type: str, subtype: str) -> _EntitySubtypeFilter:
+        return _EntitySubtypeFilter(
+            entity_type=entity_type,
+            entity_subtype=subtype,
+        )
+    @staticmethod
+    def platform(platform: Union[str, List[str]], /) -> _PlatformFilter:
+        return _PlatformFilter(
+            platform=[platform] if isinstance(platform, str) else platform
+        )
+    # TODO: Add a platform_instance filter
+    @staticmethod
+    def domain(domain: Union[str, List[str]], /) -> _DomainFilter:
+        return _DomainFilter(domain=[domain] if isinstance(domain, str) else domain)
+    @staticmethod
+    def env(env: Union[str, List[str]], /) -> _EnvFilter:
+        return _EnvFilter(env=[env] if isinstance(env, str) else env)
+    @staticmethod
+    def has_custom_property(key: str, value: str) -> _CustomCondition:
+        return _CustomCondition(
+            field="customProperties",
+            condition="EQUAL",
+            values=[f"{key}={value}"],
+        )
+    # TODO: Add a soft-deletion status filter
+    # TODO: add a container / browse path filter
+    # TODO add shortcut for custom filters
+    @staticmethod
+    def custom_filter(
+        field: str, condition: str, values: List[str]
+    ) -> _CustomCondition:
+        return _CustomCondition(
+            field=field,
+            condition=condition,
+            values=values,
+        )

datahub/specific/dataset.py CHANGED Viewed

@@ -15,6 +15,7 @@ from datahub.metadata.schema_classes import (
     UpstreamClass as Upstream,
     UpstreamLineageClass as UpstreamLineage,
 )
+from datahub.metadata.urns import DatasetUrn, TagUrn, Urn
 from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch
 from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
 from datahub.specific.aspect_helpers.structured_properties import (
@@ -22,8 +23,6 @@ from datahub.specific.aspect_helpers.structured_properties import (
 )
 from datahub.specific.aspect_helpers.tags import HasTagsPatch
 from datahub.specific.aspect_helpers.terms import HasTermsPatch
-from datahub.utilities.urns.tag_urn import TagUrn
-from datahub.utilities.urns.urn import Urn
 _Parent = TypeVar("_Parent", bound=MetadataPatchProposal)
@@ -104,12 +103,12 @@ class DatasetPatchBuilder(
 ):
     def __init__(
         self,
-        urn: str,
+        urn: Union[str, DatasetUrn],
         system_metadata: Optional[SystemMetadataClass] = None,
         audit_header: Optional[KafkaAuditHeaderClass] = None,
     ) -> None:
         super().__init__(
-            urn, system_metadata=system_metadata, audit_header=audit_header
+            str(urn), system_metadata=system_metadata, audit_header=audit_header
         )
     @classmethod

datahub/sql_parsing/_sqlglot_patch.py CHANGED Viewed

@@ -172,17 +172,9 @@ def _patch_lineage() -> None:
          derived_tables = [
              source.expression.parent
              for source in scope.sources.values()
-@@ -254,6 +257,7 @@ def to_node(
-         if dt.comments and dt.comments[0].startswith("source: ")
-     }
-+    c: exp.Column
-     for c in source_columns:
-         table = c.table
-         source = scope.sources.get(table)
@@ -281,8 +285,21 @@ def to_node(
-             # it means this column's lineage is unknown. This can happen if the definition of a source used in a query
-             # is not passed into the `sources` map.
+             # is unknown. This can happen if the definition of a source used in a query is not
+             # passed into the `sources` map.
              source = source or exp.Placeholder()
 +
 +            subfields = []

datahub/sql_parsing/schema_resolver.py CHANGED Viewed

@@ -13,7 +13,7 @@ from datahub.ingestion.graph.client import DataHubGraph
 from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
 from datahub.metadata.schema_classes import SchemaFieldClass, SchemaMetadataClass
 from datahub.metadata.urns import DataPlatformUrn
-from datahub.sql_parsing._models import _TableName as _TableName  # noqa: I250
+from datahub.sql_parsing._models import _TableName as _TableName
 from datahub.sql_parsing.sql_parsing_common import PLATFORMS_WITH_CASE_SENSITIVE_TABLES
 from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedDict
 from datahub.utilities.urns.field_paths import get_simple_field_path_from_v2_field_path

acryl-datahub 0.15.0.6rc2__py3-none-any.whl → 1.0.0__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.15.0.6rc2py3-none-any.whl → 1.0.0py3-none-any.whl