PyPI - acryl-datahub - Versions diffs - 0.15.0.2rc7__py3-none-any.whl → 0.15.0.3__py3-none-any.whl - Mend

acryl-datahub 0.15.0.2rc7py3-none-any.whl → 0.15.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (161) hide show

{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/METADATA +2461 -2463
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/RECORD +161 -161
datahub/__init__.py +1 -1
datahub/api/entities/assertion/assertion_operator.py +3 -5
datahub/api/entities/corpgroup/corpgroup.py +1 -1
datahub/api/entities/datacontract/assertion_operator.py +3 -5
datahub/api/entities/dataproduct/dataproduct.py +4 -4
datahub/api/entities/dataset/dataset.py +2 -1
datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
datahub/cli/cli_utils.py +1 -1
datahub/cli/delete_cli.py +16 -2
datahub/cli/docker_cli.py +6 -6
datahub/cli/lite_cli.py +2 -2
datahub/cli/migrate.py +3 -3
datahub/cli/specific/assertions_cli.py +3 -3
datahub/cli/timeline_cli.py +1 -1
datahub/configuration/common.py +1 -2
datahub/configuration/config_loader.py +73 -50
datahub/configuration/git.py +2 -2
datahub/configuration/time_window_config.py +10 -5
datahub/emitter/mce_builder.py +4 -8
datahub/emitter/mcp_patch_builder.py +1 -2
datahub/ingestion/api/incremental_lineage_helper.py +2 -8
datahub/ingestion/api/report.py +1 -2
datahub/ingestion/api/source_helpers.py +1 -1
datahub/ingestion/extractor/json_schema_util.py +3 -3
datahub/ingestion/extractor/schema_util.py +3 -5
datahub/ingestion/fs/s3_fs.py +3 -3
datahub/ingestion/glossary/datahub_classifier.py +6 -4
datahub/ingestion/graph/client.py +4 -6
datahub/ingestion/run/pipeline.py +8 -7
datahub/ingestion/run/pipeline_config.py +3 -3
datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
datahub/ingestion/source/abs/source.py +19 -8
datahub/ingestion/source/aws/glue.py +11 -11
datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
datahub/ingestion/source/bigquery_v2/bigquery.py +3 -3
datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
datahub/ingestion/source/bigquery_v2/bigquery_config.py +6 -6
datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +15 -9
datahub/ingestion/source/bigquery_v2/lineage.py +9 -9
datahub/ingestion/source/bigquery_v2/queries.py +1 -3
datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
datahub/ingestion/source/bigquery_v2/usage.py +3 -3
datahub/ingestion/source/cassandra/cassandra.py +0 -1
datahub/ingestion/source/cassandra/cassandra_utils.py +4 -4
datahub/ingestion/source/confluent_schema_registry.py +6 -6
datahub/ingestion/source/csv_enricher.py +29 -29
datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
datahub/ingestion/source/dbt/dbt_common.py +9 -7
datahub/ingestion/source/dremio/dremio_api.py +4 -4
datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
datahub/ingestion/source/elastic_search.py +4 -4
datahub/ingestion/source/fivetran/config.py +4 -0
datahub/ingestion/source/fivetran/fivetran.py +15 -5
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +3 -3
datahub/ingestion/source/gcs/gcs_source.py +5 -3
datahub/ingestion/source/ge_data_profiler.py +4 -5
datahub/ingestion/source/ge_profiling_config.py +3 -3
datahub/ingestion/source/iceberg/iceberg.py +3 -3
datahub/ingestion/source/identity/azure_ad.py +3 -3
datahub/ingestion/source/identity/okta.py +3 -3
datahub/ingestion/source/kafka/kafka.py +11 -9
datahub/ingestion/source/kafka_connect/kafka_connect.py +2 -3
datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
datahub/ingestion/source/looker/looker_common.py +19 -19
datahub/ingestion/source/looker/looker_config.py +3 -3
datahub/ingestion/source/looker/looker_source.py +25 -25
datahub/ingestion/source/looker/looker_template_language.py +3 -3
datahub/ingestion/source/looker/looker_usage.py +5 -7
datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
datahub/ingestion/source/looker/lookml_source.py +13 -15
datahub/ingestion/source/looker/view_upstream.py +5 -5
datahub/ingestion/source/mlflow.py +4 -4
datahub/ingestion/source/mongodb.py +6 -4
datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
datahub/ingestion/source/nifi.py +24 -26
datahub/ingestion/source/openapi.py +9 -9
datahub/ingestion/source/powerbi/config.py +12 -12
datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
datahub/ingestion/source/powerbi/powerbi.py +6 -6
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
datahub/ingestion/source/redshift/config.py +3 -3
datahub/ingestion/source/redshift/query.py +77 -47
datahub/ingestion/source/redshift/redshift.py +12 -12
datahub/ingestion/source/redshift/usage.py +8 -8
datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
datahub/ingestion/source/s3/source.py +1 -1
datahub/ingestion/source/salesforce.py +26 -25
datahub/ingestion/source/schema/json_schema.py +1 -1
datahub/ingestion/source/sigma/sigma.py +3 -3
datahub/ingestion/source/sigma/sigma_api.py +12 -10
datahub/ingestion/source/snowflake/snowflake_config.py +9 -7
datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
datahub/ingestion/source/snowflake/snowflake_schema.py +3 -3
datahub/ingestion/source/snowflake/snowflake_schema_gen.py +6 -6
datahub/ingestion/source/snowflake/snowflake_tag.py +7 -7
datahub/ingestion/source/snowflake/snowflake_usage_v2.py +3 -3
datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
datahub/ingestion/source/snowflake/snowflake_v2.py +13 -4
datahub/ingestion/source/sql/athena.py +1 -3
datahub/ingestion/source/sql/clickhouse.py +8 -14
datahub/ingestion/source/sql/oracle.py +1 -3
datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
datahub/ingestion/source/sql/teradata.py +16 -3
datahub/ingestion/source/state/profiling_state_handler.py +3 -3
datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
datahub/ingestion/source/tableau/tableau.py +48 -49
datahub/ingestion/source/unity/config.py +3 -1
datahub/ingestion/source/unity/proxy.py +1 -1
datahub/ingestion/source/unity/source.py +3 -3
datahub/ingestion/source/unity/usage.py +3 -1
datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
datahub/ingestion/source/usage/usage_common.py +1 -1
datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
datahub/ingestion/transformer/add_dataset_properties.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
datahub/ingestion/transformer/tags_to_terms.py +7 -7
datahub/integrations/assertion/snowflake/compiler.py +10 -10
datahub/lite/duckdb_lite.py +12 -10
datahub/metadata/_schema_classes.py +1 -1
datahub/metadata/schema.avsc +6 -2
datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
datahub/secret/secret_common.py +14 -8
datahub/specific/aspect_helpers/custom_properties.py +1 -2
datahub/sql_parsing/schema_resolver.py +5 -10
datahub/sql_parsing/sql_parsing_aggregator.py +16 -16
datahub/sql_parsing/sqlglot_lineage.py +5 -4
datahub/sql_parsing/sqlglot_utils.py +3 -2
datahub/telemetry/stats.py +1 -2
datahub/testing/mcp_diff.py +1 -1
datahub/utilities/file_backed_collections.py +10 -10
datahub/utilities/hive_schema_to_avro.py +2 -2
datahub/utilities/logging_manager.py +2 -2
datahub/utilities/lossy_collections.py +3 -3
datahub/utilities/mapping.py +3 -3
datahub/utilities/serialized_lru_cache.py +3 -1
datahub/utilities/sqlalchemy_query_combiner.py +6 -6
datahub/utilities/sqllineage_patch.py +1 -1
datahub/utilities/stats_collections.py +3 -1
datahub/utilities/urns/urn_iter.py +2 -2
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/WHEEL +0 -0
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/entry_points.txt +0 -0
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/top_level.txt +0 -0

datahub/ingestion/transformer/tags_to_terms.py CHANGED Viewed

@@ -92,9 +92,9 @@ class TagsToTermMapper(TagsToTermTransformer):
         in_global_tags_aspect: Optional[GlobalTagsClass] = self.ctx.graph.get_tags(
             entity_urn
         )
-        in_schema_metadata_aspect: Optional[
-            SchemaMetadataClass
-        ] = self.ctx.graph.get_schema_metadata(entity_urn)
+        in_schema_metadata_aspect: Optional[SchemaMetadataClass] = (
+            self.ctx.graph.get_schema_metadata(entity_urn)
+        )
         if in_global_tags_aspect is None and in_schema_metadata_aspect is None:
             return cast(Aspect, in_glossary_terms)
@@ -134,10 +134,10 @@ class TagsToTermMapper(TagsToTermTransformer):
         )
         if self.config.semantics == TransformerSemantics.PATCH:
-            patch_glossary_terms: Optional[
-                GlossaryTermsClass
-            ] = TagsToTermMapper._merge_with_server_glossary_terms(
-                self.ctx.graph, entity_urn, out_glossary_terms
+            patch_glossary_terms: Optional[GlossaryTermsClass] = (
+                TagsToTermMapper._merge_with_server_glossary_terms(
+                    self.ctx.graph, entity_urn, out_glossary_terms
+                )
             )
             return cast(Optional[Aspect], patch_glossary_terms)
         else:

datahub/integrations/assertion/snowflake/compiler.py CHANGED Viewed

@@ -61,17 +61,17 @@ class SnowflakeAssertionCompiler(AssertionCompiler):
     def create(
         cls, output_dir: str, extras: Dict[str, str]
     ) -> "SnowflakeAssertionCompiler":
-        assert os.path.exists(
-            output_dir
-        ), f"Specified location {output_dir} does not exist."
+        assert os.path.exists(output_dir), (
+            f"Specified location {output_dir} does not exist."
+        )
-        assert os.path.isdir(
-            output_dir
-        ), f"Specified location {output_dir} is not a folder."
+        assert os.path.isdir(output_dir), (
+            f"Specified location {output_dir} is not a folder."
+        )
-        assert any(
-            x.upper() == DMF_SCHEMA_PROPERTY_KEY for x in extras
-        ), "Must specify value for DMF schema using -x DMF_SCHEMA=<db.schema>"
+        assert any(x.upper() == DMF_SCHEMA_PROPERTY_KEY for x in extras), (
+            "Must specify value for DMF schema using -x DMF_SCHEMA=<db.schema>"
+        )
         return SnowflakeAssertionCompiler(output_dir, extras)
@@ -232,6 +232,6 @@ def get_dmf_schedule(trigger: AssertionTrigger) -> str:
     elif isinstance(trigger.trigger, CronTrigger):
         return f"USING CRON {trigger.trigger.cron} {trigger.trigger.timezone}"
     elif isinstance(trigger.trigger, IntervalTrigger):
-        return f"{trigger.trigger.interval.seconds/60} MIN"
+        return f"{trigger.trigger.interval.seconds / 60} MIN"
     else:
         raise ValueError(f"Unsupported trigger type {type(trigger.trigger)}")

datahub/lite/duckdb_lite.py CHANGED Viewed

@@ -163,9 +163,9 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
                 if "properties" not in writeable_dict["systemMetadata"]:
                     writeable_dict["systemMetadata"]["properties"] = {}
-                writeable_dict["systemMetadata"]["properties"][
-                    "sysVersion"
-                ] = new_version
+                writeable_dict["systemMetadata"]["properties"]["sysVersion"] = (
+                    new_version
+                )
                 if needs_write:
                     self.duckdb_client.execute(
                         query="INSERT INTO metadata_aspect_v2 VALUES (?, ?, ?, ?, ?, ?)",
@@ -208,9 +208,9 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
                             "lastObserved": writeable.systemMetadata.lastObserved
                         }
                     else:
-                        system_metadata[
-                            "lastObserved"
-                        ] = writeable.systemMetadata.lastObserved
+                        system_metadata["lastObserved"] = (
+                            writeable.systemMetadata.lastObserved
+                        )
                     self.duckdb_client.execute(
                         query="UPDATE metadata_aspect_v2 SET system_metadata = ? WHERE urn = ? AND aspect_name = ? AND version = 0",
                         parameters=[
@@ -497,9 +497,9 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
             aspect_name = r[1]
             aspect_payload = json.loads(r[2])
             if typed:
-                assert (
-                    aspect_name in ASPECT_MAP
-                ), f"Missing aspect name {aspect_name} in the registry"
+                assert aspect_name in ASPECT_MAP, (
+                    f"Missing aspect name {aspect_name} in the registry"
+                )
                 try:
                     aspect_payload = ASPECT_MAP[aspect_name].from_obj(
                         post_json_transform(aspect_payload)
@@ -531,7 +531,9 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
         for r in results.fetchall():
             urn = r[0]
             aspect_name = r[1]
-            aspect_metadata = ASPECT_MAP[aspect_name].from_obj(post_json_transform(json.loads(r[2])))  # type: ignore
+            aspect_metadata = ASPECT_MAP[aspect_name].from_obj(
+                post_json_transform(json.loads(r[2]))
+            )  # type: ignore
             system_metadata = SystemMetadataClass.from_obj(json.loads(r[3]))
             mcp = MetadataChangeProposalWrapper(
                 entityUrn=urn,

datahub/metadata/_schema_classes.py CHANGED Viewed

@@ -9096,7 +9096,7 @@ class DataProcessInstanceInputClass(_Aspect):
     @property
     def inputs(self) -> List[str]:
-        """Input datasets to be consumed"""
+        """Input assets consumed"""
         return self._inner_dict.get('inputs')  # type: ignore
     @inputs.setter

datahub/metadata/schema.avsc CHANGED Viewed

@@ -12699,8 +12699,10 @@
         "Relationship": {
           "/*": {
             "entityTypes": [
-              "dataset"
+              "dataset",
+              "mlModel"
             ],
+            "isLineage": true,
             "name": "Consumes"
           }
         },
@@ -12720,7 +12722,7 @@
           "items": "string"
         },
         "name": "inputs",
-        "doc": "Input datasets to be consumed"
+        "doc": "Input assets consumed"
       }
     ],
     "doc": "Information about the inputs datasets of a Data process"
@@ -12883,6 +12885,8 @@
               "dataset",
               "mlModel"
             ],
+            "isLineage": true,
+            "isUpstream": false,
             "name": "Produces"
           }
         },

datahub/metadata/schemas/DataProcessInstanceInput.avsc CHANGED Viewed

@@ -10,8 +10,10 @@
       "Relationship": {
         "/*": {
           "entityTypes": [
-            "dataset"
+            "dataset",
+            "mlModel"
           ],
+          "isLineage": true,
           "name": "Consumes"
         }
       },
@@ -29,7 +31,7 @@
         "items": "string"
       },
       "name": "inputs",
-      "doc": "Input datasets to be consumed",
+      "doc": "Input assets consumed",
       "Urn": "Urn",
       "urn_is_array": true
     }

datahub/metadata/schemas/DataProcessInstanceOutput.avsc CHANGED Viewed

@@ -13,6 +13,8 @@
             "dataset",
             "mlModel"
           ],
+          "isLineage": true,
+          "isUpstream": false,
           "name": "Produces"
         }
       },

datahub/secret/secret_common.py CHANGED Viewed

@@ -2,10 +2,7 @@ import json
 import logging
 from typing import List
-from datahub.configuration.config_loader import (
-    list_referenced_env_variables,
-    resolve_env_variables,
-)
+from datahub.configuration.config_loader import EnvResolver
 from datahub.secret.secret_store import SecretStore
 logger = logging.getLogger(__name__)
@@ -42,18 +39,27 @@ def resolve_secrets(secret_names: List[str], secret_stores: List[SecretStore]) -
     return final_secret_values
-def resolve_recipe(recipe: str, secret_stores: List[SecretStore]) -> dict:
+def resolve_recipe(
+    recipe: str, secret_stores: List[SecretStore], strict_env_syntax: bool = True
+) -> dict:
+    # Note: the default for `strict_env_syntax` is normally False, but here we override
+    # it to be true. Particularly when fetching secrets from external secret stores, we
+    # want to be more careful about not over-fetching secrets.
     json_recipe_raw = json.loads(recipe)
     # 1. Extract all secrets needing resolved.
-    secrets_to_resolve = list_referenced_env_variables(json_recipe_raw)
+    secrets_to_resolve = EnvResolver.list_referenced_variables(
+        json_recipe_raw, strict_env_syntax=strict_env_syntax
+    )
     # 2. Resolve secret values
     secret_values_dict = resolve_secrets(list(secrets_to_resolve), secret_stores)
     # 3. Substitute secrets into recipe file
-    json_recipe_resolved = resolve_env_variables(
-        json_recipe_raw, environ=secret_values_dict
+    resolver = EnvResolver(
+        environ=secret_values_dict, strict_env_syntax=strict_env_syntax
     )
+    json_recipe_resolved = resolver.resolve(json_recipe_raw)
     return json_recipe_resolved

datahub/specific/aspect_helpers/custom_properties.py CHANGED Viewed

@@ -9,8 +9,7 @@ from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
 class HasCustomPropertiesPatch(MetadataPatchProposal):
     @classmethod
     @abstractmethod
-    def _custom_properties_location(self) -> Tuple[str, PatchPath]:
-        ...
+    def _custom_properties_location(self) -> Tuple[str, PatchPath]: ...
     def add_custom_property(self, key: str, value: str) -> Self:
         """Add a custom property to the entity.

datahub/sql_parsing/schema_resolver.py CHANGED Viewed

@@ -33,14 +33,11 @@ class GraphQLSchemaMetadata(TypedDict):
 class SchemaResolverInterface(Protocol):
     @property
-    def platform(self) -> str:
-        ...
+    def platform(self) -> str: ...
-    def includes_temp_tables(self) -> bool:
-        ...
+    def includes_temp_tables(self) -> bool: ...
-    def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]:
-        ...
+    def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]: ...
     def __hash__(self) -> int:
         # Mainly to make lru_cache happy in methods that accept a schema resolver.
@@ -232,8 +229,7 @@ class SchemaResolver(Closeable, SchemaResolverInterface):
         return {
             get_simple_field_path_from_v2_field_path(field["fieldPath"]): (
                 # The actual types are more of a "nice to have".
-                field["nativeDataType"]
-                or "str"
+                field["nativeDataType"] or "str"
             )
             for field in schema["fields"]
             # TODO: We can't generate lineage to columns nested within structs yet.
@@ -289,8 +285,7 @@ def _convert_schema_field_list_to_info(
     return {
         get_simple_field_path_from_v2_field_path(col.fieldPath): (
             # The actual types are more of a "nice to have".
-            col.nativeDataType
-            or "str"
+            col.nativeDataType or "str"
         )
         for col in schema_fields
         # TODO: We can't generate lineage to columns nested within structs yet.

datahub/sql_parsing/sql_parsing_aggregator.py CHANGED Viewed

@@ -682,10 +682,10 @@ class SqlParsingAggregator(Closeable):
         query_id = self._known_lineage_query_id()
         # Generate CLL if schema of downstream is known
-        column_lineage: List[
-            ColumnLineageInfo
-        ] = self._generate_identity_column_lineage(
-            upstream_urn=upstream_urn, downstream_urn=downstream_urn
+        column_lineage: List[ColumnLineageInfo] = (
+            self._generate_identity_column_lineage(
+                upstream_urn=upstream_urn, downstream_urn=downstream_urn
+            )
         )
         # Register the query.
@@ -1044,9 +1044,9 @@ class SqlParsingAggregator(Closeable):
             temp_table_schemas: Dict[str, Optional[List[models.SchemaFieldClass]]] = {}
             for temp_table_urn, query_ids in self._temp_lineage_map[session_id].items():
                 for query_id in query_ids:
-                    temp_table_schemas[
-                        temp_table_urn
-                    ] = self._inferred_temp_schemas.get(query_id)
+                    temp_table_schemas[temp_table_urn] = (
+                        self._inferred_temp_schemas.get(query_id)
+                    )
                     if temp_table_schemas:
                         break
@@ -1073,9 +1073,9 @@ class SqlParsingAggregator(Closeable):
             schema_resolver=self._schema_resolver,
         )
         if parsed.debug_info.error:
-            self.report.views_parse_failures[
-                view_urn
-            ] = f"{parsed.debug_info.error} on query: {view_definition.view_definition[:100]}"
+            self.report.views_parse_failures[view_urn] = (
+                f"{parsed.debug_info.error} on query: {view_definition.view_definition[:100]}"
+            )
         if parsed.debug_info.table_error:
             self.report.num_views_failed += 1
             return  # we can't do anything with this query
@@ -1583,9 +1583,9 @@ class SqlParsingAggregator(Closeable):
                                     temp_query_lineage_info
                                 )
                             else:
-                                temp_upstream_queries[
-                                    upstream
-                                ] = temp_query_lineage_info
+                                temp_upstream_queries[upstream] = (
+                                    temp_query_lineage_info
+                                )
             # Compute merged upstreams.
             new_upstreams = OrderedSet[UrnStr]()
@@ -1665,9 +1665,9 @@ class SqlParsingAggregator(Closeable):
         composed_of_queries_truncated: LossyList[str] = LossyList()
         for query_id in composed_of_queries:
             composed_of_queries_truncated.append(query_id)
-        self.report.queries_with_temp_upstreams[
-            composite_query_id
-        ] = composed_of_queries_truncated
+        self.report.queries_with_temp_upstreams[composite_query_id] = (
+            composed_of_queries_truncated
+        )
         merged_query_text = ";\n\n".join(
             [q.formatted_query_string for q in ordered_queries]

datahub/sql_parsing/sqlglot_lineage.py CHANGED Viewed

@@ -1,9 +1,10 @@
+from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
 import dataclasses
 import functools
 import logging
 import traceback
 from collections import defaultdict
-from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
 from typing import Any, Dict, List, Optional, Set, Tuple, TypeVar, Union
 import pydantic.dataclasses
@@ -441,9 +442,9 @@ def _create_table_ddl_cll(
 ) -> List[_ColumnLineageInfo]:
     column_lineage: List[_ColumnLineageInfo] = []
-    assert (
-        output_table is not None
-    ), "output_table must be set for create DDL statements"
+    assert output_table is not None, (
+        "output_table must be set for create DDL statements"
+    )
     create_schema: sqlglot.exp.Schema = statement.this
     sqlglot_columns = create_schema.expressions

datahub/sql_parsing/sqlglot_utils.py CHANGED Viewed

@@ -1,8 +1,9 @@
+from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
 import functools
 import hashlib
 import logging
 import re
-from datahub.sql_parsing._sqlglot_patch import SQLGLOT_PATCHED
 from typing import Dict, Iterable, Optional, Tuple, Union
 import sqlglot
@@ -403,7 +404,7 @@ def detach_ctes(
         if new_statement == statement:
             if iteration > 1:
                 logger.debug(
-                    f"Required {iteration+1} iterations to detach and eliminate all CTEs"
+                    f"Required {iteration + 1} iterations to detach and eliminate all CTEs"
                 )
             break
         statement = new_statement

datahub/telemetry/stats.py CHANGED Viewed

@@ -5,8 +5,7 @@ from typing_extensions import Protocol
 class SupportsLT(Protocol):
-    def __lt__(self, __other: Any) -> Any:
-        ...
+    def __lt__(self, __other: Any) -> Any: ...
 _SupportsComparisonT = TypeVar("_SupportsComparisonT", bound=SupportsLT)

datahub/testing/mcp_diff.py CHANGED Viewed

@@ -246,7 +246,7 @@ class MCPDiff:
         for urn in self.aspect_changes.keys() - self.urns_added - self.urns_removed:
             aspect_map = self.aspect_changes[urn]
             s.append(f"Urn changed, {urn}:")
-            for aspect_name, aspect_diffs in aspect_map.items():
+            for aspect_diffs in aspect_map.values():
                 for i, ga in aspect_diffs.aspects_added.items():
                     s.append(self.report_aspect(ga, i, "added"))
                     if verbose:

datahub/utilities/file_backed_collections.py CHANGED Viewed

@@ -224,9 +224,9 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
     _use_sqlite_on_conflict: bool = field(repr=False, default=True)
     def __post_init__(self) -> None:
-        assert (
-            self.cache_eviction_batch_size > 0
-        ), "cache_eviction_batch_size must be positive"
+        assert self.cache_eviction_batch_size > 0, (
+            "cache_eviction_batch_size must be positive"
+        )
         for reserved_column in ("key", "value", "rowid"):
             if reserved_column in self.extra_columns:
@@ -261,7 +261,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
                 rowid INTEGER PRIMARY KEY AUTOINCREMENT,
                 key TEXT UNIQUE,
                 value BLOB
-                {''.join(f', {column_name} BLOB' for column_name in self.extra_columns.keys())}
+                {"".join(f", {column_name} BLOB" for column_name in self.extra_columns.keys())}
             )"""
         )
@@ -316,12 +316,12 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
                 f"""INSERT INTO {self.tablename} (
                     key,
                     value
-                    {''.join(f', {column_name}' for column_name in self.extra_columns.keys())}
+                    {"".join(f", {column_name}" for column_name in self.extra_columns.keys())}
                 )
-                VALUES ({', '.join(['?'] *(2 + len(self.extra_columns)))})
+                VALUES ({", ".join(["?"] * (2 + len(self.extra_columns)))})
                 ON CONFLICT (key) DO UPDATE SET
                     value = excluded.value
-                    {''.join(f', {column_name} = excluded.{column_name}' for column_name in self.extra_columns.keys())}
+                    {"".join(f", {column_name} = excluded.{column_name}" for column_name in self.extra_columns.keys())}
                 """,
                 items_to_write,
             )
@@ -332,16 +332,16 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
                         f"""INSERT INTO {self.tablename} (
                             key,
                             value
-                            {''.join(f', {column_name}' for column_name in self.extra_columns.keys())}
+                            {"".join(f", {column_name}" for column_name in self.extra_columns.keys())}
                         )
-                        VALUES ({', '.join(['?'] *(2 + len(self.extra_columns)))})""",
+                        VALUES ({", ".join(["?"] * (2 + len(self.extra_columns)))})""",
                         item,
                     )
                 except sqlite3.IntegrityError:
                     self._conn.execute(
                         f"""UPDATE {self.tablename} SET
                             value = ?
-                            {''.join(f', {column_name} = ?' for column_name in self.extra_columns.keys())}
+                            {"".join(f", {column_name} = ?" for column_name in self.extra_columns.keys())}
                         WHERE key = ?""",
                         (*item[1:], item[0]),
                     )

datahub/utilities/hive_schema_to_avro.py CHANGED Viewed

@@ -142,10 +142,10 @@ class HiveColumnToAvroConverter:
                 fields.append({"name": field_name, "type": field_type})
         if kwargs.get("ustruct_seqn") is not None:
-            struct_name = f'__structn_{kwargs["ustruct_seqn"]}_{str(uuid.uuid4()).replace("-", "")}'
+            struct_name = f"__structn_{kwargs['ustruct_seqn']}_{str(uuid.uuid4()).replace('-', '')}"
         else:
-            struct_name = f'__struct_{str(uuid.uuid4()).replace("-", "")}'
+            struct_name = f"__struct_{str(uuid.uuid4()).replace('-', '')}"
         return {
             "type": "record",
             "name": struct_name,

datahub/utilities/logging_manager.py CHANGED Viewed

@@ -130,9 +130,9 @@ class _ColorLogFormatter(logging.Formatter):
         # Mimic our default format, but with color.
         message_fg = self.MESSAGE_COLORS.get(record.levelname)
         return (
-            f'{click.style(f"[{self.formatTime(record, self.datefmt)}]", fg="green", dim=True)} '
+            f"{click.style(f'[{self.formatTime(record, self.datefmt)}]', fg='green', dim=True)} "
             f"{click.style(f'{record.levelname:8}', fg=message_fg)} "
-            f'{click.style(f"{{{record.name}:{record.lineno}}}", fg="blue", dim=True)} - '
+            f"{click.style(f'{{{record.name}:{record.lineno}}}', fg='blue', dim=True)} - "
             f"{click.style(record.getMessage(), fg=message_fg)}"
         )

datahub/utilities/lossy_collections.py CHANGED Viewed

@@ -151,9 +151,9 @@ class LossyDict(Dict[_KT, _VT], Generic[_KT, _VT]):
     def as_obj(self) -> Dict[Union[_KT, str], Union[_VT, str]]:
         base_dict: Dict[Union[_KT, str], Union[_VT, str]] = super().copy()  # type: ignore
         if self.sampled:
-            base_dict[
-                "sampled"
-            ] = f"{len(self.keys())} sampled of at most {self.total_key_count()} entries."
+            base_dict["sampled"] = (
+                f"{len(self.keys())} sampled of at most {self.total_key_count()} entries."
+            )
         return base_dict
     def total_key_count(self) -> int:

datahub/utilities/mapping.py CHANGED Viewed

@@ -349,9 +349,9 @@ class OperationProcessor:
                         elements=[institutional_memory_element]
                     )
-                    aspect_map[
-                        Constants.ADD_DOC_LINK_OPERATION
-                    ] = institutional_memory_aspect
+                    aspect_map[Constants.ADD_DOC_LINK_OPERATION] = (
+                        institutional_memory_aspect
+                    )
                 else:
                     raise Exception(
                         f"Expected 1 item of type list for the documentation_link meta_mapping config,"

datahub/utilities/serialized_lru_cache.py CHANGED Viewed

@@ -41,7 +41,9 @@ def serialized_lru_cache(
         def wrapper(*args: _F.args, **kwargs: _F.kwargs) -> _T:
             # We need a type ignore here because there's no way for us to require that
             # the args and kwargs are hashable while using ParamSpec.
-            key: _Key = cachetools.keys.hashkey(*args, **{k: v for k, v in kwargs.items() if "cache_exclude" not in k})  # type: ignore
+            key: _Key = cachetools.keys.hashkey(
+                *args, **{k: v for k, v in kwargs.items() if "cache_exclude" not in k}
+            )  # type: ignore
             with cache_lock:
                 if key in cache:

datahub/utilities/sqlalchemy_query_combiner.py CHANGED Viewed

@@ -160,12 +160,12 @@ class SQLAlchemyQueryCombiner:
     _greenlets_by_thread_lock: threading.Lock = dataclasses.field(
         default_factory=lambda: threading.Lock()
     )
-    _queries_by_thread: Dict[
-        greenlet.greenlet, Dict[str, _QueryFuture]
-    ] = dataclasses.field(default_factory=lambda: collections.defaultdict(dict))
-    _greenlets_by_thread: Dict[
-        greenlet.greenlet, Set[greenlet.greenlet]
-    ] = dataclasses.field(default_factory=lambda: collections.defaultdict(set))
+    _queries_by_thread: Dict[greenlet.greenlet, Dict[str, _QueryFuture]] = (
+        dataclasses.field(default_factory=lambda: collections.defaultdict(dict))
+    )
+    _greenlets_by_thread: Dict[greenlet.greenlet, Set[greenlet.greenlet]] = (
+        dataclasses.field(default_factory=lambda: collections.defaultdict(set))
+    )
     @staticmethod
     def _generate_sql_safe_identifier() -> str:

datahub/utilities/sqllineage_patch.py CHANGED Viewed

@@ -8,7 +8,7 @@ from sqllineage.utils.constant import EdgeType
 # Patch based on sqllineage v1.3.3
 def end_of_query_cleanup_patch(self, holder: SubQueryLineageHolder) -> None:  # type: ignore
-    for i, tbl in enumerate(self.tables):
+    for tbl in self.tables:
         holder.add_read(tbl)
     self.union_barriers.append((len(self.columns), len(self.tables)))

datahub/utilities/stats_collections.py CHANGED Viewed

@@ -48,7 +48,9 @@ class TopKDict(DefaultDict[_KT, _VT]):
                 total_value: Union[_VT, str] = sum(trimmed_dict.values())  # type: ignore
             except Exception:
                 total_value = ""
-            trimmed_dict[f"... top {self.top_k} of total {len(self)} entries"] = total_value  # type: ignore
+            trimmed_dict[f"... top {self.top_k} of total {len(self)} entries"] = (  # type: ignore
+                total_value  # type: ignore
+            )
             return trimmed_dict

datahub/utilities/urns/urn_iter.py CHANGED Viewed

@@ -21,7 +21,7 @@ def _add_prefix_to_paths(
 def list_urns_with_path(
-    model: Union[DictWrapper, MetadataChangeProposalWrapper]
+    model: Union[DictWrapper, MetadataChangeProposalWrapper],
 ) -> List[Tuple[str, _Path]]:
     """List urns in the given model with their paths.
@@ -145,7 +145,7 @@ def lowercase_dataset_urns(
         MetadataChangeEventClass,
         MetadataChangeProposalClass,
         MetadataChangeProposalWrapper,
-    ]
+    ],
 ) -> None:
     def modify_urn(urn: str) -> str:
         if guess_entity_type(urn) == "dataset":

{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

acryl-datahub 0.15.0.2rc7__py3-none-any.whl → 0.15.0.3__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.15.0.2rc7py3-none-any.whl → 0.15.0.3py3-none-any.whl