PyPI - acryl-datahub - Versions diffs - 0.15.0.2rc6__py3-none-any.whl → 0.15.0.2rc8__py3-none-any.whl - Mend

acryl-datahub 0.15.0.2rc6py3-none-any.whl → 0.15.0.2rc8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (168) hide show

{acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/METADATA +2513 -2521
{acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/RECORD +168 -168
datahub/__init__.py +1 -1
datahub/api/entities/assertion/assertion_operator.py +3 -5
datahub/api/entities/corpgroup/corpgroup.py +1 -1
datahub/api/entities/datacontract/assertion_operator.py +3 -5
datahub/api/entities/dataproduct/dataproduct.py +4 -4
datahub/api/entities/dataset/dataset.py +2 -1
datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
datahub/cli/cli_utils.py +1 -1
datahub/cli/docker_cli.py +6 -6
datahub/cli/ingest_cli.py +25 -15
datahub/cli/lite_cli.py +2 -2
datahub/cli/migrate.py +3 -3
datahub/cli/specific/assertions_cli.py +3 -3
datahub/cli/timeline_cli.py +1 -1
datahub/configuration/common.py +1 -2
datahub/configuration/config_loader.py +73 -50
datahub/configuration/git.py +2 -2
datahub/configuration/time_window_config.py +10 -5
datahub/emitter/mce_builder.py +4 -8
datahub/emitter/mcp_patch_builder.py +1 -2
datahub/entrypoints.py +6 -0
datahub/ingestion/api/incremental_lineage_helper.py +2 -8
datahub/ingestion/api/report.py +1 -2
datahub/ingestion/api/source_helpers.py +1 -1
datahub/ingestion/extractor/json_schema_util.py +3 -3
datahub/ingestion/extractor/schema_util.py +3 -5
datahub/ingestion/fs/s3_fs.py +3 -3
datahub/ingestion/glossary/datahub_classifier.py +6 -4
datahub/ingestion/graph/client.py +4 -6
datahub/ingestion/run/pipeline.py +8 -7
datahub/ingestion/run/pipeline_config.py +3 -3
datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
datahub/ingestion/source/abs/source.py +19 -8
datahub/ingestion/source/aws/glue.py +11 -11
datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
datahub/ingestion/source/bigquery_v2/bigquery.py +3 -3
datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
datahub/ingestion/source/bigquery_v2/bigquery_config.py +6 -6
datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +15 -9
datahub/ingestion/source/bigquery_v2/lineage.py +9 -9
datahub/ingestion/source/bigquery_v2/queries.py +1 -3
datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
datahub/ingestion/source/bigquery_v2/usage.py +3 -3
datahub/ingestion/source/cassandra/cassandra.py +0 -1
datahub/ingestion/source/cassandra/cassandra_utils.py +4 -4
datahub/ingestion/source/confluent_schema_registry.py +6 -6
datahub/ingestion/source/csv_enricher.py +29 -29
datahub/ingestion/source/datahub/config.py +4 -0
datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
datahub/ingestion/source/dbt/dbt_common.py +9 -7
datahub/ingestion/source/dremio/dremio_api.py +4 -4
datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
datahub/ingestion/source/elastic_search.py +4 -4
datahub/ingestion/source/gc/datahub_gc.py +1 -0
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +17 -5
datahub/ingestion/source/gcs/gcs_source.py +3 -2
datahub/ingestion/source/ge_data_profiler.py +2 -5
datahub/ingestion/source/ge_profiling_config.py +3 -3
datahub/ingestion/source/iceberg/iceberg.py +3 -3
datahub/ingestion/source/identity/azure_ad.py +3 -3
datahub/ingestion/source/identity/okta.py +3 -3
datahub/ingestion/source/kafka/kafka.py +11 -9
datahub/ingestion/source/kafka_connect/kafka_connect.py +2 -3
datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
datahub/ingestion/source/looker/looker_common.py +19 -19
datahub/ingestion/source/looker/looker_config.py +3 -3
datahub/ingestion/source/looker/looker_source.py +25 -25
datahub/ingestion/source/looker/looker_template_language.py +3 -3
datahub/ingestion/source/looker/looker_usage.py +5 -7
datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
datahub/ingestion/source/looker/lookml_source.py +13 -15
datahub/ingestion/source/looker/view_upstream.py +5 -5
datahub/ingestion/source/mlflow.py +4 -4
datahub/ingestion/source/mode.py +5 -5
datahub/ingestion/source/mongodb.py +6 -4
datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
datahub/ingestion/source/nifi.py +24 -26
datahub/ingestion/source/openapi.py +9 -9
datahub/ingestion/source/powerbi/config.py +12 -12
datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
datahub/ingestion/source/powerbi/powerbi.py +6 -6
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +7 -7
datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
datahub/ingestion/source/redshift/config.py +3 -3
datahub/ingestion/source/redshift/redshift.py +12 -12
datahub/ingestion/source/redshift/usage.py +8 -8
datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
datahub/ingestion/source/s3/source.py +1 -1
datahub/ingestion/source/salesforce.py +26 -25
datahub/ingestion/source/schema/json_schema.py +1 -1
datahub/ingestion/source/sigma/sigma.py +3 -3
datahub/ingestion/source/sigma/sigma_api.py +12 -10
datahub/ingestion/source/snowflake/snowflake_config.py +9 -7
datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
datahub/ingestion/source/snowflake/snowflake_schema.py +3 -3
datahub/ingestion/source/snowflake/snowflake_schema_gen.py +6 -6
datahub/ingestion/source/snowflake/snowflake_tag.py +7 -7
datahub/ingestion/source/snowflake/snowflake_usage_v2.py +3 -3
datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
datahub/ingestion/source/snowflake/snowflake_v2.py +13 -4
datahub/ingestion/source/sql/athena.py +1 -3
datahub/ingestion/source/sql/clickhouse.py +8 -14
datahub/ingestion/source/sql/oracle.py +1 -3
datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
datahub/ingestion/source/sql/sql_types.py +0 -1
datahub/ingestion/source/sql/teradata.py +16 -3
datahub/ingestion/source/state/profiling_state_handler.py +3 -3
datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
datahub/ingestion/source/tableau/tableau.py +245 -101
datahub/ingestion/source/tableau/tableau_common.py +5 -2
datahub/ingestion/source/unity/config.py +3 -1
datahub/ingestion/source/unity/proxy.py +1 -1
datahub/ingestion/source/unity/source.py +3 -3
datahub/ingestion/source/unity/usage.py +3 -1
datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
datahub/ingestion/source/usage/usage_common.py +1 -1
datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
datahub/ingestion/transformer/add_dataset_properties.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
datahub/ingestion/transformer/tags_to_terms.py +7 -7
datahub/integrations/assertion/snowflake/compiler.py +10 -10
datahub/lite/duckdb_lite.py +12 -10
datahub/metadata/_schema_classes.py +1 -1
datahub/metadata/schema.avsc +6 -2
datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
datahub/secret/datahub_secrets_client.py +12 -21
datahub/secret/secret_common.py +14 -8
datahub/specific/aspect_helpers/custom_properties.py +1 -2
datahub/sql_parsing/schema_resolver.py +5 -10
datahub/sql_parsing/sql_parsing_aggregator.py +18 -16
datahub/sql_parsing/sqlglot_lineage.py +3 -3
datahub/sql_parsing/sqlglot_utils.py +1 -1
datahub/telemetry/stats.py +1 -2
datahub/testing/mcp_diff.py +1 -1
datahub/utilities/file_backed_collections.py +10 -10
datahub/utilities/hive_schema_to_avro.py +2 -2
datahub/utilities/logging_manager.py +2 -2
datahub/utilities/lossy_collections.py +3 -3
datahub/utilities/mapping.py +3 -3
datahub/utilities/memory_footprint.py +3 -2
datahub/utilities/serialized_lru_cache.py +3 -1
datahub/utilities/sqlalchemy_query_combiner.py +6 -6
datahub/utilities/sqllineage_patch.py +1 -1
datahub/utilities/stats_collections.py +3 -1
datahub/utilities/urns/_urn_base.py +28 -5
datahub/utilities/urns/urn_iter.py +2 -2
{acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/WHEEL +0 -0
{acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/entry_points.txt +0 -0
{acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/top_level.txt +0 -0

datahub/utilities/lossy_collections.py CHANGED Viewed

@@ -151,9 +151,9 @@ class LossyDict(Dict[_KT, _VT], Generic[_KT, _VT]):
     def as_obj(self) -> Dict[Union[_KT, str], Union[_VT, str]]:
         base_dict: Dict[Union[_KT, str], Union[_VT, str]] = super().copy()  # type: ignore
         if self.sampled:
-            base_dict[
-                "sampled"
-            ] = f"{len(self.keys())} sampled of at most {self.total_key_count()} entries."
+            base_dict["sampled"] = (
+                f"{len(self.keys())} sampled of at most {self.total_key_count()} entries."
+            )
         return base_dict
     def total_key_count(self) -> int:

datahub/utilities/mapping.py CHANGED Viewed

@@ -349,9 +349,9 @@ class OperationProcessor:
                         elements=[institutional_memory_element]
                     )
-                    aspect_map[
-                        Constants.ADD_DOC_LINK_OPERATION
-                    ] = institutional_memory_aspect
+                    aspect_map[Constants.ADD_DOC_LINK_OPERATION] = (
+                        institutional_memory_aspect
+                    )
                 else:
                     raise Exception(
                         f"Expected 1 item of type list for the documentation_link meta_mapping config,"

datahub/utilities/memory_footprint.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from collections import deque
 from itertools import chain
 from sys import getsizeof
-from typing import Any, Callable
+from typing import Any, Iterator
 def total_size(o: Any, handlers: Any = {}) -> int:
@@ -15,7 +15,8 @@ def total_size(o: Any, handlers: Any = {}) -> int:
     Based on https://github.com/ActiveState/recipe-577504-compute-mem-footprint/blob/master/recipe.py
     """
-    dict_handler: Callable[[Any], chain[Any]] = lambda d: chain.from_iterable(d.items())
+    def dict_handler(d: dict) -> Iterator[Any]:
+        return chain.from_iterable(d.items())
     all_handlers = {
         tuple: iter,

datahub/utilities/serialized_lru_cache.py CHANGED Viewed

@@ -41,7 +41,9 @@ def serialized_lru_cache(
         def wrapper(*args: _F.args, **kwargs: _F.kwargs) -> _T:
             # We need a type ignore here because there's no way for us to require that
             # the args and kwargs are hashable while using ParamSpec.
-            key: _Key = cachetools.keys.hashkey(*args, **{k: v for k, v in kwargs.items() if "cache_exclude" not in k})  # type: ignore
+            key: _Key = cachetools.keys.hashkey(
+                *args, **{k: v for k, v in kwargs.items() if "cache_exclude" not in k}
+            )  # type: ignore
             with cache_lock:
                 if key in cache:

datahub/utilities/sqlalchemy_query_combiner.py CHANGED Viewed

@@ -160,12 +160,12 @@ class SQLAlchemyQueryCombiner:
     _greenlets_by_thread_lock: threading.Lock = dataclasses.field(
         default_factory=lambda: threading.Lock()
     )
-    _queries_by_thread: Dict[
-        greenlet.greenlet, Dict[str, _QueryFuture]
-    ] = dataclasses.field(default_factory=lambda: collections.defaultdict(dict))
-    _greenlets_by_thread: Dict[
-        greenlet.greenlet, Set[greenlet.greenlet]
-    ] = dataclasses.field(default_factory=lambda: collections.defaultdict(set))
+    _queries_by_thread: Dict[greenlet.greenlet, Dict[str, _QueryFuture]] = (
+        dataclasses.field(default_factory=lambda: collections.defaultdict(dict))
+    )
+    _greenlets_by_thread: Dict[greenlet.greenlet, Set[greenlet.greenlet]] = (
+        dataclasses.field(default_factory=lambda: collections.defaultdict(set))
+    )
     @staticmethod
     def _generate_sql_safe_identifier() -> str:

datahub/utilities/sqllineage_patch.py CHANGED Viewed

@@ -8,7 +8,7 @@ from sqllineage.utils.constant import EdgeType
 # Patch based on sqllineage v1.3.3
 def end_of_query_cleanup_patch(self, holder: SubQueryLineageHolder) -> None:  # type: ignore
-    for i, tbl in enumerate(self.tables):
+    for tbl in self.tables:
         holder.add_read(tbl)
     self.union_barriers.append((len(self.columns), len(self.tables)))

datahub/utilities/stats_collections.py CHANGED Viewed

@@ -48,7 +48,9 @@ class TopKDict(DefaultDict[_KT, _VT]):
                 total_value: Union[_VT, str] = sum(trimmed_dict.values())  # type: ignore
             except Exception:
                 total_value = ""
-            trimmed_dict[f"... top {self.top_k} of total {len(self)} entries"] = total_value  # type: ignore
+            trimmed_dict[f"... top {self.top_k} of total {len(self)} entries"] = (  # type: ignore
+                total_value  # type: ignore
+            )
             return trimmed_dict

datahub/utilities/urns/_urn_base.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import functools
 import urllib.parse
 from abc import abstractmethod
-from typing import ClassVar, Dict, List, Optional, Type
+from typing import ClassVar, Dict, List, Optional, Type, Union
 from deprecated import deprecated
 from typing_extensions import Self
@@ -86,12 +86,24 @@ class Urn:
         return self._entity_ids
     @classmethod
-    def from_string(cls, urn_str: str) -> Self:
-        """
-        Creates an Urn from its string representation.
+    def from_string(cls, urn_str: Union[str, "Urn"], /) -> Self:
+        """Create an Urn from its string representation.
+        When called against the base Urn class, this method will return a more specific Urn type where possible.
+        >>> from datahub.metadata.urns import DatasetUrn, Urn
+        >>> urn_str = 'urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)'
+        >>> urn = Urn.from_string(urn_str)
+        >>> assert isinstance(urn, DatasetUrn)
+        When called against a specific Urn type (e.g. DatasetUrn.from_string), this method can
+        also be used for type narrowing.
+        >>> urn_str = 'urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)'
+        >>> assert DatasetUrn.from_string(urn_str)
         Args:
-            urn_str: The string representation of the Urn.
+            urn_str: The string representation of the urn. Also accepts an existing Urn instance.
         Returns:
             Urn of the given string representation.
@@ -100,6 +112,17 @@ class Urn:
             InvalidUrnError: If the string representation is in invalid format.
         """
+        if isinstance(urn_str, Urn):
+            if issubclass(cls, _SpecificUrn) and isinstance(urn_str, cls):
+                # Fast path - we're already the right type.
+                # I'm not really sure why we need a type ignore here, but mypy doesn't really
+                # understand the isinstance check above.
+                return urn_str  # type: ignore
+            # Fall through, so that we can convert a generic Urn to a specific Urn type.
+            urn_str = urn_str.urn()
         # TODO: Add handling for url encoded urns e.g. urn%3A ...
         if not urn_str.startswith("urn:li:"):

datahub/utilities/urns/urn_iter.py CHANGED Viewed

@@ -21,7 +21,7 @@ def _add_prefix_to_paths(
 def list_urns_with_path(
-    model: Union[DictWrapper, MetadataChangeProposalWrapper]
+    model: Union[DictWrapper, MetadataChangeProposalWrapper],
 ) -> List[Tuple[str, _Path]]:
     """List urns in the given model with their paths.
@@ -145,7 +145,7 @@ def lowercase_dataset_urns(
         MetadataChangeEventClass,
         MetadataChangeProposalClass,
         MetadataChangeProposalWrapper,
-    ]
+    ],
 ) -> None:
     def modify_urn(urn: str) -> str:
         if guess_entity_type(urn) == "dataset":

{acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/WHEEL RENAMED Viewed

File without changes

{acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{acryl_datahub-0.15.0.2rc6.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/top_level.txt RENAMED Viewed

File without changes

acryl-datahub 0.15.0.2rc6__py3-none-any.whl → 0.15.0.2rc8__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.15.0.2rc6py3-none-any.whl → 0.15.0.2rc8py3-none-any.whl