PyPI - acryl-datahub - Versions diffs - 0.15.0rc5__py3-none-any.whl → 0.15.0rc7__py3-none-any.whl - Mend

acryl-datahub 0.15.0rc5py3-none-any.whl → 0.15.0rc7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (43) hide show

{acryl_datahub-0.15.0rc5.dist-info → acryl_datahub-0.15.0rc7.dist-info}/METADATA +2456 -2426
{acryl_datahub-0.15.0rc5.dist-info → acryl_datahub-0.15.0rc7.dist-info}/RECORD +43 -41
{acryl_datahub-0.15.0rc5.dist-info → acryl_datahub-0.15.0rc7.dist-info}/entry_points.txt +1 -0
datahub/__init__.py +1 -1
datahub/api/entities/structuredproperties/structuredproperties.py +1 -1
datahub/cli/put_cli.py +1 -1
datahub/cli/specific/dataproduct_cli.py +1 -1
datahub/emitter/mcp_patch_builder.py +43 -0
datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +9 -4
datahub/ingestion/source/aws/sagemaker_processors/models.py +30 -1
datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
datahub/ingestion/source/common/subtypes.py +2 -0
datahub/ingestion/source/csv_enricher.py +1 -1
datahub/ingestion/source/dbt/dbt_common.py +7 -61
datahub/ingestion/source/dremio/dremio_api.py +11 -0
datahub/ingestion/source/dremio/dremio_aspects.py +19 -15
datahub/ingestion/source/dremio/dremio_config.py +5 -0
datahub/ingestion/source/dremio/dremio_entities.py +4 -0
datahub/ingestion/source/dremio/dremio_source.py +7 -2
datahub/ingestion/source/elastic_search.py +1 -1
datahub/ingestion/source/gc/dataprocess_cleanup.py +6 -1
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +1 -1
datahub/ingestion/source/ge_data_profiler.py +23 -1
datahub/ingestion/source/neo4j/__init__.py +0 -0
datahub/ingestion/source/neo4j/neo4j_source.py +331 -0
datahub/ingestion/source/qlik_sense/data_classes.py +1 -0
datahub/ingestion/source/redshift/redshift.py +1 -0
datahub/ingestion/source/snowflake/snowflake_schema_gen.py +1 -0
datahub/ingestion/source/sql/athena.py +46 -22
datahub/ingestion/source/sql/sql_types.py +85 -8
datahub/ingestion/source/unity/proxy_types.py +1 -0
datahub/ingestion/transformer/add_dataset_tags.py +1 -1
datahub/ingestion/transformer/generic_aspect_transformer.py +1 -1
datahub/integrations/assertion/common.py +1 -1
datahub/lite/duckdb_lite.py +12 -17
datahub/specific/chart.py +0 -39
datahub/specific/dashboard.py +0 -39
datahub/specific/datajob.py +3 -47
datahub/utilities/urn_encoder.py +2 -1
datahub/utilities/urns/_urn_base.py +1 -1
datahub/utilities/urns/structured_properties_urn.py +1 -1
{acryl_datahub-0.15.0rc5.dist-info → acryl_datahub-0.15.0rc7.dist-info}/WHEEL +0 -0
{acryl_datahub-0.15.0rc5.dist-info → acryl_datahub-0.15.0rc7.dist-info}/top_level.txt +0 -0

datahub/ingestion/source/sql/athena.py CHANGED Viewed

@@ -26,6 +26,7 @@ from datahub.ingestion.api.decorators import (
     platform_name,
     support_status,
 )
+from datahub.ingestion.api.source import StructuredLogLevel
 from datahub.ingestion.api.workunit import MetadataWorkUnit
 from datahub.ingestion.source.aws.s3_util import make_s3_urn
 from datahub.ingestion.source.common.subtypes import DatasetContainerSubTypes
@@ -35,6 +36,7 @@ from datahub.ingestion.source.sql.sql_common import (
     register_custom_type,
 )
 from datahub.ingestion.source.sql.sql_config import SQLCommonConfig, make_sqlalchemy_uri
+from datahub.ingestion.source.sql.sql_report import SQLSourceReport
 from datahub.ingestion.source.sql.sql_utils import (
     add_table_to_schema_container,
     gen_database_container,
@@ -48,6 +50,15 @@ from datahub.utilities.sqlalchemy_type_converter import (
     get_schema_fields_for_sqlalchemy_column,
 )
+try:
+    from typing_extensions import override
+except ImportError:
+    _F = typing.TypeVar("_F", bound=typing.Callable[..., typing.Any])
+    def override(f: _F, /) -> _F:  # noqa: F811
+        return f
 logger = logging.getLogger(__name__)
 assert STRUCT, "required type modules are not available"
@@ -322,12 +333,15 @@ class AthenaSource(SQLAlchemySource):
     - Profiling when enabled.
     """
-    table_partition_cache: Dict[str, Dict[str, Partitionitem]] = {}
+    config: AthenaConfig
+    report: SQLSourceReport
     def __init__(self, config, ctx):
         super().__init__(config, ctx, "athena")
         self.cursor: Optional[BaseCursor] = None
+        self.table_partition_cache: Dict[str, Dict[str, Partitionitem]] = {}
     @classmethod
     def create(cls, config_dict, ctx):
         config = AthenaConfig.parse_obj(config_dict)
@@ -452,6 +466,7 @@ class AthenaSource(SQLAlchemySource):
         )
     # It seems like database/schema filter in the connection string does not work and this to work around that
+    @override
     def get_schema_names(self, inspector: Inspector) -> List[str]:
         athena_config = typing.cast(AthenaConfig, self.config)
         schemas = inspector.get_schema_names()
@@ -459,34 +474,42 @@ class AthenaSource(SQLAlchemySource):
             return [schema for schema in schemas if schema == athena_config.database]
         return schemas
-    # Overwrite to get partitions
+    @classmethod
+    def _casted_partition_key(cls, key: str) -> str:
+        # We need to cast the partition keys to a VARCHAR, since otherwise
+        # Athena may throw an error during concatenation / comparison.
+        return f"CAST({key} as VARCHAR)"
+    @override
     def get_partitions(
         self, inspector: Inspector, schema: str, table: str
-    ) -> List[str]:
-        partitions = []
-        athena_config = typing.cast(AthenaConfig, self.config)
-        if not athena_config.extract_partitions:
-            return []
+    ) -> Optional[List[str]]:
+        if not self.config.extract_partitions:
+            return None
         if not self.cursor:
-            return []
+            return None
         metadata: AthenaTableMetadata = self.cursor.get_table_metadata(
             table_name=table, schema_name=schema
         )
-        if metadata.partition_keys:
-            for key in metadata.partition_keys:
-                if key.name:
-                    partitions.append(key.name)
-            if not partitions:
-                return []
+        partitions = []
+        for key in metadata.partition_keys:
+            if key.name:
+                partitions.append(key.name)
+        if not partitions:
+            return []
-            # We create an artiificaial concatenated partition key to be able to query max partition easier
-            part_concat = "|| '-' ||".join(partitions)
+        with self.report.report_exc(
+            message="Failed to extract partition details",
+            context=f"{schema}.{table}",
+            level=StructuredLogLevel.WARN,
+        ):
+            # We create an artifical concatenated partition key to be able to query max partition easier
+            part_concat = " || '-' || ".join(
+                self._casted_partition_key(key) for key in partitions
+            )
             max_partition_query = f'select {",".join(partitions)} from "{schema}"."{table}$partitions" where {part_concat} = (select max({part_concat}) from "{schema}"."{table}$partitions")'
             ret = self.cursor.execute(max_partition_query)
             max_partition: Dict[str, str] = {}
@@ -500,9 +523,8 @@ class AthenaSource(SQLAlchemySource):
                 partitions=partitions,
                 max_partition=max_partition,
             )
-            return partitions
-        return []
+        return partitions
     # Overwrite to modify the creation of schema fields
     def get_schema_fields_for_column(
@@ -551,7 +573,9 @@ class AthenaSource(SQLAlchemySource):
         if partition and partition.max_partition:
             max_partition_filters = []
             for key, value in partition.max_partition.items():
-                max_partition_filters.append(f"CAST({key} as VARCHAR) = '{value}'")
+                max_partition_filters.append(
+                    f"{self._casted_partition_key(key)} = '{value}'"
+                )
             max_partition = str(partition.max_partition)
             return (
                 max_partition,

datahub/ingestion/source/sql/sql_types.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import re
-from typing import Any, Dict, ValuesView
+from typing import Any, Dict, Optional, Type, Union, ValuesView
 from datahub.metadata.com.linkedin.pegasus2avro.schema import (
     ArrayType,
@@ -16,14 +16,28 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
     UnionType,
 )
-# these can be obtained by running `select format_type(oid, null),* from pg_type;`
-# we've omitted the types without a meaningful DataHub type (e.g. postgres-specific types, index vectors, etc.)
-# (run `\copy (select format_type(oid, null),* from pg_type) to 'pg_type.csv' csv header;` to get a CSV)
+DATAHUB_FIELD_TYPE = Union[
+    ArrayType,
+    BooleanType,
+    BytesType,
+    DateType,
+    EnumType,
+    MapType,
+    NullType,
+    NumberType,
+    RecordType,
+    StringType,
+    TimeType,
+    UnionType,
+]
-# we map from format_type since this is what dbt uses
-# see https://github.com/fishtown-analytics/dbt/blob/master/plugins/postgres/dbt/include/postgres/macros/catalog.sql#L22
-# see https://www.npgsql.org/dev/types.html for helpful type annotations
+# These can be obtained by running `select format_type(oid, null),* from pg_type;`
+# We've omitted the types without a meaningful DataHub type (e.g. postgres-specific types, index vectors, etc.)
+# (run `\copy (select format_type(oid, null),* from pg_type) to 'pg_type.csv' csv header;` to get a CSV)
+# We map from format_type since this is what dbt uses.
+# See https://github.com/fishtown-analytics/dbt/blob/master/plugins/postgres/dbt/include/postgres/macros/catalog.sql#L22
+# See https://www.npgsql.org/dev/types.html for helpful type annotations
 POSTGRES_TYPES_MAP: Dict[str, Any] = {
     "boolean": BooleanType,
     "bytea": BytesType,
@@ -262,7 +276,6 @@ def resolve_vertica_modified_type(type_string: str) -> Any:
     return VERTICA_SQL_TYPES_MAP[type_string]
-# see https://docs.snowflake.com/en/sql-reference/intro-summary-data-types.html
 SNOWFLAKE_TYPES_MAP: Dict[str, Any] = {
     "NUMBER": NumberType,
     "DECIMAL": NumberType,
@@ -298,6 +311,18 @@ SNOWFLAKE_TYPES_MAP: Dict[str, Any] = {
     "GEOGRAPHY": None,
 }
+def resolve_snowflake_modified_type(type_string: str) -> Any:
+    # Match types with precision and scale, e.g., 'DECIMAL(38,0)'
+    match = re.match(r"([a-zA-Z_]+)\(\d+,\s\d+\)", type_string)
+    if match:
+        modified_type_base = match.group(1)  # Extract the base type
+        return SNOWFLAKE_TYPES_MAP.get(modified_type_base, None)
+    # Fallback for types without precision/scale
+    return SNOWFLAKE_TYPES_MAP.get(type_string, None)
 # see https://github.com/googleapis/python-bigquery-sqlalchemy/blob/main/sqlalchemy_bigquery/_types.py#L32
 BIGQUERY_TYPES_MAP: Dict[str, Any] = {
     "STRING": StringType,
@@ -366,6 +391,7 @@ TRINO_SQL_TYPES_MAP: Dict[str, Any] = {
     "row": RecordType,
     "map": MapType,
     "array": ArrayType,
+    "json": RecordType,
 }
 # https://docs.aws.amazon.com/athena/latest/ug/data-types.html
@@ -430,3 +456,54 @@ VERTICA_SQL_TYPES_MAP: Dict[str, Any] = {
     "geography": None,
     "uuid": StringType,
 }
+_merged_mapping = {
+    "boolean": BooleanType,
+    "date": DateType,
+    "time": TimeType,
+    "numeric": NumberType,
+    "text": StringType,
+    "timestamp with time zone": DateType,
+    "timestamp without time zone": DateType,
+    "integer": NumberType,
+    "float8": NumberType,
+    "struct": RecordType,
+    **POSTGRES_TYPES_MAP,
+    **SNOWFLAKE_TYPES_MAP,
+    **BIGQUERY_TYPES_MAP,
+    **SPARK_SQL_TYPES_MAP,
+    **TRINO_SQL_TYPES_MAP,
+    **ATHENA_SQL_TYPES_MAP,
+    **VERTICA_SQL_TYPES_MAP,
+}
+def resolve_sql_type(
+    column_type: Optional[str],
+    platform: Optional[str] = None,
+) -> Optional[DATAHUB_FIELD_TYPE]:
+    # In theory, we should use the platform-specific mapping where available.
+    # However, the types don't ever conflict, so the merged mapping is fine.
+    TypeClass: Optional[Type[DATAHUB_FIELD_TYPE]] = (
+        _merged_mapping.get(column_type) if column_type else None
+    )
+    if TypeClass is None and column_type:
+        # resolve a modified type
+        if platform == "trino":
+            TypeClass = resolve_trino_modified_type(column_type)
+        elif platform == "athena":
+            TypeClass = resolve_athena_modified_type(column_type)
+        elif platform == "postgres" or platform == "redshift":
+            # Redshift uses a variant of Postgres, so we can use the same logic.
+            TypeClass = resolve_postgres_modified_type(column_type)
+        elif platform == "vertica":
+            TypeClass = resolve_vertica_modified_type(column_type)
+        elif platform == "snowflake":
+            # Snowflake types are uppercase, so we check that.
+            TypeClass = resolve_snowflake_modified_type(column_type.upper())
+    if TypeClass:
+        return TypeClass()
+    return None

datahub/ingestion/source/unity/proxy_types.py CHANGED Viewed

@@ -33,6 +33,7 @@ from datahub.metadata.schema_classes import (
 logger = logging.getLogger(__name__)
+# TODO: (maybe) Replace with standardized types in sql_types.py
 DATA_TYPE_REGISTRY: dict = {
     ColumnTypeName.BOOLEAN: BooleanTypeClass,
     ColumnTypeName.BYTE: BytesTypeClass,

datahub/ingestion/transformer/add_dataset_tags.py CHANGED Viewed

@@ -74,7 +74,7 @@ class AddDatasetTags(DatasetTagsTransformer):
         logger.debug("Generating tags")
         for tag_association in self.processed_tags.values():
-            tag_urn = TagUrn.create_from_string(tag_association.tag)
+            tag_urn = TagUrn.from_string(tag_association.tag)
             mcps.append(
                 MetadataChangeProposalWrapper(
                     entityUrn=tag_urn.urn(),

datahub/ingestion/transformer/generic_aspect_transformer.py CHANGED Viewed

@@ -100,7 +100,7 @@ class GenericAspectTransformer(
                         )
                         if transformed_aspect:
                             # for end of stream records, we modify the workunit-id
-                            structured_urn = Urn.create_from_string(urn)
+                            structured_urn = Urn.from_string(urn)
                             simple_name = "-".join(structured_urn.get_entity_id())
                             record_metadata = envelope.metadata.copy()
                             record_metadata.update(

datahub/integrations/assertion/common.py CHANGED Viewed

@@ -42,7 +42,7 @@ def get_entity_name(assertion: BaseEntityAssertion) -> Tuple[str, str, str]:
         if qualified_name is not None:
             parts = qualified_name.split(".")
         else:
-            urn_id = Urn.create_from_string(assertion.entity).entity_ids[1]
+            urn_id = Urn.from_string(assertion.entity).entity_ids[1]
             parts = urn_id.split(".")
             if len(parts) > 3:
                 parts = parts[-3:]

datahub/lite/duckdb_lite.py CHANGED Viewed

@@ -609,7 +609,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
             aspect_map, DataPlatformInstanceClass
         )  # type: ignore
-        needs_platform = Urn.create_from_string(entity_urn).get_type() in [
+        needs_platform = Urn.from_string(entity_urn).get_type() in [
             "dataset",
             "container",
             "chart",
@@ -617,7 +617,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
             "dataFlow",
             "dataJob",
         ]
-        entity_urn_parsed = Urn.create_from_string(entity_urn)
+        entity_urn_parsed = Urn.from_string(entity_urn)
         if entity_urn_parsed.get_type() in ["dataFlow", "dataJob"]:
             self.add_edge(
                 entity_urn,
@@ -630,15 +630,12 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
             # this is a top-level entity
             if not dpi:
                 logger.debug(f"No data platform instance for {entity_urn}")
-                maybe_parent_urn = Urn.create_from_string(entity_urn).get_entity_id()[0]
+                maybe_parent_urn = Urn.from_string(entity_urn).get_entity_id()[0]
                 needs_dpi = False
                 if maybe_parent_urn.startswith(Urn.URN_PREFIX):
                     parent_urn = maybe_parent_urn
-                    if (
-                        Urn.create_from_string(maybe_parent_urn).get_type()
-                        == "dataPlatform"
-                    ):
-                        data_platform_urn = DataPlatformUrn.create_from_string(
+                    if Urn.from_string(maybe_parent_urn).get_type() == "dataPlatform":
+                        data_platform_urn = DataPlatformUrn.from_string(
                             maybe_parent_urn
                         )
                         needs_dpi = True
@@ -660,7 +657,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
                         logger.error(f"Failed to generate edges entity {entity_urn}", e)
                     parent_urn = str(data_platform_instance_urn)
             else:
-                data_platform_urn = DataPlatformUrn.create_from_string(dpi.platform)
+                data_platform_urn = DataPlatformUrn.from_string(dpi.platform)
                 data_platform_instance = dpi.instance or "default"
                 data_platform_instance_urn = Urn(
                     entity_type="dataPlatformInstance",
@@ -673,9 +670,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
             parent_urn = "__root__"
         types = (
-            subtypes.typeNames
-            if subtypes
-            else [Urn.create_from_string(entity_urn).get_type()]
+            subtypes.typeNames if subtypes else [Urn.from_string(entity_urn).get_type()]
         )
         for t in types:
             type_urn = Urn(entity_type="systemNode", entity_id=[parent_urn, t])
@@ -686,7 +681,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
     def _create_edges_from_data_platform_instance(
         self, data_platform_instance_urn: Urn
     ) -> None:
-        data_platform_urn = DataPlatformUrn.create_from_string(
+        data_platform_urn = DataPlatformUrn.from_string(
             data_platform_instance_urn.get_entity_id()[0]
         )
         data_platform_instances_urn = Urn(
@@ -735,7 +730,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
         if isinstance(aspect, DatasetPropertiesClass):
             dp: DatasetPropertiesClass = aspect
             if dp.name:
-                specific_urn = DatasetUrn.create_from_string(entity_urn)
+                specific_urn = DatasetUrn.from_string(entity_urn)
                 if (
                     specific_urn.get_data_platform_urn().get_entity_id_as_string()
                     == "looker"
@@ -755,7 +750,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
             self.add_edge(entity_urn, "name", cp.name, remove_existing=True)
         elif isinstance(aspect, DataPlatformInstanceClass):
             dpi: DataPlatformInstanceClass = aspect
-            data_platform_urn = DataPlatformUrn.create_from_string(dpi.platform)
+            data_platform_urn = DataPlatformUrn.from_string(dpi.platform)
             data_platform_instance = dpi.instance or "default"
             data_platform_instance_urn = Urn(
                 entity_type="dataPlatformInstance",
@@ -763,7 +758,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
             )
             self._create_edges_from_data_platform_instance(data_platform_instance_urn)
         elif isinstance(aspect, ChartInfoClass):
-            urn = Urn.create_from_string(entity_urn)
+            urn = Urn.from_string(entity_urn)
             self.add_edge(
                 entity_urn,
                 "name",
@@ -771,7 +766,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
                 remove_existing=True,
             )
         elif isinstance(aspect, DashboardInfoClass):
-            urn = Urn.create_from_string(entity_urn)
+            urn = Urn.from_string(entity_urn)
             self.add_edge(
                 entity_urn,
                 "name",

datahub/specific/chart.py CHANGED Viewed

@@ -1,10 +1,8 @@
-import time
 from typing import Dict, List, Optional, Union
 from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
 from datahub.metadata.schema_classes import (
     AccessLevelClass,
-    AuditStampClass,
     ChangeAuditStampsClass,
     ChartInfoClass as ChartInfo,
     ChartTypeClass,
@@ -47,43 +45,6 @@ class ChartPatchBuilder(MetadataPatchProposal):
         )
         self.ownership_patch_helper = OwnershipPatchHelper(self)
-    def _mint_auditstamp(self, message: Optional[str] = None) -> AuditStampClass:
-        """
-        Creates an AuditStampClass instance with the current timestamp and other default values.
-        Args:
-            message: The message associated with the audit stamp (optional).
-        Returns:
-            An instance of AuditStampClass.
-        """
-        return AuditStampClass(
-            time=int(time.time() * 1000.0),
-            actor="urn:li:corpuser:datahub",
-            message=message,
-        )
-    def _ensure_urn_type(
-        self, entity_type: str, edges: List[Edge], context: str
-    ) -> None:
-        """
-        Ensures that the destination URNs in the given edges have the specified entity type.
-        Args:
-            entity_type: The entity type to check against.
-            edges: A list of Edge objects.
-            context: The context or description of the operation.
-        Raises:
-            ValueError: If any of the destination URNs is not of the specified entity type.
-        """
-        for e in edges:
-            urn = Urn.create_from_string(e.destinationUrn)
-            if not urn.get_type() == entity_type:
-                raise ValueError(
-                    f"{context}: {e.destinationUrn} is not of type {entity_type}"
-                )
     def add_owner(self, owner: Owner) -> "ChartPatchBuilder":
         """
         Adds an owner to the ChartPatchBuilder.

datahub/specific/dashboard.py CHANGED Viewed

@@ -1,10 +1,8 @@
-import time
 from typing import Dict, List, Optional, Union
 from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
 from datahub.metadata.schema_classes import (
     AccessLevelClass,
-    AuditStampClass,
     ChangeAuditStampsClass,
     DashboardInfoClass as DashboardInfo,
     EdgeClass as Edge,
@@ -46,43 +44,6 @@ class DashboardPatchBuilder(MetadataPatchProposal):
         )
         self.ownership_patch_helper = OwnershipPatchHelper(self)
-    def _mint_auditstamp(self, message: Optional[str] = None) -> AuditStampClass:
-        """
-        Creates an AuditStampClass instance with the current timestamp and other default values.
-        Args:
-            message: The message associated with the audit stamp (optional).
-        Returns:
-            An instance of AuditStampClass.
-        """
-        return AuditStampClass(
-            time=int(time.time() * 1000.0),
-            actor="urn:li:corpuser:datahub",
-            message=message,
-        )
-    def _ensure_urn_type(
-        self, entity_type: str, edges: List[Edge], context: str
-    ) -> None:
-        """
-        Ensures that the destination URNs in the given edges have the specified entity type.
-        Args:
-            entity_type: The entity type to check against.
-            edges: A list of Edge objects.
-            context: The context or description of the operation.
-        Raises:
-            ValueError: If any of the destination URNs is not of the specified entity type.
-        """
-        for e in edges:
-            urn = Urn.create_from_string(e.destinationUrn)
-            if not urn.get_type() == entity_type:
-                raise ValueError(
-                    f"{context}: {e.destinationUrn} is not of type {entity_type}"
-                )
     def add_owner(self, owner: Owner) -> "DashboardPatchBuilder":
         """
         Adds an owner to the DashboardPatchBuilder.

datahub/specific/datajob.py CHANGED Viewed

@@ -1,9 +1,7 @@
-import time
 from typing import Dict, List, Optional, Union
 from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
 from datahub.metadata.schema_classes import (
-    AuditStampClass,
     DataJobInfoClass as DataJobInfo,
     DataJobInputOutputClass as DataJobInputOutput,
     EdgeClass as Edge,
@@ -16,10 +14,9 @@ from datahub.metadata.schema_classes import (
     SystemMetadataClass,
     TagAssociationClass as Tag,
 )
+from datahub.metadata.urns import SchemaFieldUrn, TagUrn, Urn
 from datahub.specific.custom_properties import CustomPropertiesPatchHelper
 from datahub.specific.ownership import OwnershipPatchHelper
-from datahub.utilities.urns.tag_urn import TagUrn
-from datahub.utilities.urns.urn import Urn
 class DataJobPatchBuilder(MetadataPatchProposal):
@@ -45,43 +42,6 @@ class DataJobPatchBuilder(MetadataPatchProposal):
         )
         self.ownership_patch_helper = OwnershipPatchHelper(self)
-    def _mint_auditstamp(self, message: Optional[str] = None) -> AuditStampClass:
-        """
-        Creates an AuditStampClass instance with the current timestamp and other default values.
-        Args:
-            message: The message associated with the audit stamp (optional).
-        Returns:
-            An instance of AuditStampClass.
-        """
-        return AuditStampClass(
-            time=int(time.time() * 1000.0),
-            actor="urn:li:corpuser:datahub",
-            message=message,
-        )
-    def _ensure_urn_type(
-        self, entity_type: str, edges: List[Edge], context: str
-    ) -> None:
-        """
-        Ensures that the destination URNs in the given edges have the specified entity type.
-        Args:
-            entity_type: The entity type to check against.
-            edges: A list of Edge objects.
-            context: The context or description of the operation.
-        Raises:
-            ValueError: If any of the destination URNs is not of the specified entity type.
-        """
-        for e in edges:
-            urn = Urn.create_from_string(e.destinationUrn)
-            if not urn.get_type() == entity_type:
-                raise ValueError(
-                    f"{context}: {e.destinationUrn} is not of type {entity_type}"
-                )
     def add_owner(self, owner: Owner) -> "DataJobPatchBuilder":
         """
         Adds an owner to the DataJobPatchBuilder.
@@ -392,9 +352,7 @@ class DataJobPatchBuilder(MetadataPatchProposal):
             ValueError: If the input is not a Schema Field urn.
         """
         input_urn = str(input)
-        urn = Urn.create_from_string(input_urn)
-        if not urn.get_type() == "schemaField":
-            raise ValueError(f"Input {input} is not a Schema Field urn")
+        assert SchemaFieldUrn.from_string(input_urn)
         self._add_patch(
             DataJobInputOutput.ASPECT_NAME,
@@ -466,9 +424,7 @@ class DataJobPatchBuilder(MetadataPatchProposal):
             ValueError: If the output is not a Schema Field urn.
         """
         output_urn = str(output)
-        urn = Urn.create_from_string(output_urn)
-        if not urn.get_type() == "schemaField":
-            raise ValueError(f"Input {output} is not a Schema Field urn")
+        assert SchemaFieldUrn.from_string(output_urn)
         self._add_patch(
             DataJobInputOutput.ASPECT_NAME,

datahub/utilities/urn_encoder.py CHANGED Viewed

@@ -4,7 +4,8 @@ from typing import List
 # NOTE: Frontend relies on encoding these three characters. Specifically, we decode and encode schema fields for column level lineage.
 # If this changes, make appropriate changes to datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts
 # We also rely on encoding these exact three characters when generating schemaField urns in our graphQL layer. Update SchemaFieldUtils if this changes.
-RESERVED_CHARS = {",", "(", ")"}
+# Also see https://datahubproject.io/docs/what/urn/#restrictions
+RESERVED_CHARS = {",", "(", ")", "␟"}
 RESERVED_CHARS_EXTENDED = RESERVED_CHARS.union({"%"})

datahub/utilities/urns/_urn_base.py CHANGED Viewed

@@ -200,7 +200,7 @@ class Urn:
     @classmethod
     @deprecated(reason="no longer needed")
     def validate(cls, urn_str: str) -> None:
-        Urn.create_from_string(urn_str)
+        Urn.from_string(urn_str)
     @staticmethod
     def url_encode(urn: str) -> str:

datahub/utilities/urns/structured_properties_urn.py CHANGED Viewed

@@ -4,4 +4,4 @@ __all__ = ["StructuredPropertyUrn", "make_structured_property_urn"]
 def make_structured_property_urn(structured_property_id: str) -> str:
-    return str(StructuredPropertyUrn.create_from_string(structured_property_id))
+    return str(StructuredPropertyUrn.from_string(structured_property_id))

{acryl_datahub-0.15.0rc5.dist-info → acryl_datahub-0.15.0rc7.dist-info}/WHEEL RENAMED Viewed

File without changes

{acryl_datahub-0.15.0rc5.dist-info → acryl_datahub-0.15.0rc7.dist-info}/top_level.txt RENAMED Viewed

File without changes

acryl-datahub 0.15.0rc5__py3-none-any.whl → 0.15.0rc7__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.15.0rc5py3-none-any.whl → 0.15.0rc7py3-none-any.whl