PyPI - acryl-datahub - Versions diffs - 0.15.0.2rc7__py3-none-any.whl → 0.15.0.3__py3-none-any.whl - Mend

acryl-datahub 0.15.0.2rc7py3-none-any.whl → 0.15.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (161) hide show

{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/METADATA +2461 -2463
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/RECORD +161 -161
datahub/__init__.py +1 -1
datahub/api/entities/assertion/assertion_operator.py +3 -5
datahub/api/entities/corpgroup/corpgroup.py +1 -1
datahub/api/entities/datacontract/assertion_operator.py +3 -5
datahub/api/entities/dataproduct/dataproduct.py +4 -4
datahub/api/entities/dataset/dataset.py +2 -1
datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
datahub/cli/cli_utils.py +1 -1
datahub/cli/delete_cli.py +16 -2
datahub/cli/docker_cli.py +6 -6
datahub/cli/lite_cli.py +2 -2
datahub/cli/migrate.py +3 -3
datahub/cli/specific/assertions_cli.py +3 -3
datahub/cli/timeline_cli.py +1 -1
datahub/configuration/common.py +1 -2
datahub/configuration/config_loader.py +73 -50
datahub/configuration/git.py +2 -2
datahub/configuration/time_window_config.py +10 -5
datahub/emitter/mce_builder.py +4 -8
datahub/emitter/mcp_patch_builder.py +1 -2
datahub/ingestion/api/incremental_lineage_helper.py +2 -8
datahub/ingestion/api/report.py +1 -2
datahub/ingestion/api/source_helpers.py +1 -1
datahub/ingestion/extractor/json_schema_util.py +3 -3
datahub/ingestion/extractor/schema_util.py +3 -5
datahub/ingestion/fs/s3_fs.py +3 -3
datahub/ingestion/glossary/datahub_classifier.py +6 -4
datahub/ingestion/graph/client.py +4 -6
datahub/ingestion/run/pipeline.py +8 -7
datahub/ingestion/run/pipeline_config.py +3 -3
datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
datahub/ingestion/source/abs/source.py +19 -8
datahub/ingestion/source/aws/glue.py +11 -11
datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
datahub/ingestion/source/bigquery_v2/bigquery.py +3 -3
datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
datahub/ingestion/source/bigquery_v2/bigquery_config.py +6 -6
datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +15 -9
datahub/ingestion/source/bigquery_v2/lineage.py +9 -9
datahub/ingestion/source/bigquery_v2/queries.py +1 -3
datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
datahub/ingestion/source/bigquery_v2/usage.py +3 -3
datahub/ingestion/source/cassandra/cassandra.py +0 -1
datahub/ingestion/source/cassandra/cassandra_utils.py +4 -4
datahub/ingestion/source/confluent_schema_registry.py +6 -6
datahub/ingestion/source/csv_enricher.py +29 -29
datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
datahub/ingestion/source/dbt/dbt_common.py +9 -7
datahub/ingestion/source/dremio/dremio_api.py +4 -4
datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
datahub/ingestion/source/elastic_search.py +4 -4
datahub/ingestion/source/fivetran/config.py +4 -0
datahub/ingestion/source/fivetran/fivetran.py +15 -5
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +3 -3
datahub/ingestion/source/gcs/gcs_source.py +5 -3
datahub/ingestion/source/ge_data_profiler.py +4 -5
datahub/ingestion/source/ge_profiling_config.py +3 -3
datahub/ingestion/source/iceberg/iceberg.py +3 -3
datahub/ingestion/source/identity/azure_ad.py +3 -3
datahub/ingestion/source/identity/okta.py +3 -3
datahub/ingestion/source/kafka/kafka.py +11 -9
datahub/ingestion/source/kafka_connect/kafka_connect.py +2 -3
datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
datahub/ingestion/source/looker/looker_common.py +19 -19
datahub/ingestion/source/looker/looker_config.py +3 -3
datahub/ingestion/source/looker/looker_source.py +25 -25
datahub/ingestion/source/looker/looker_template_language.py +3 -3
datahub/ingestion/source/looker/looker_usage.py +5 -7
datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
datahub/ingestion/source/looker/lookml_source.py +13 -15
datahub/ingestion/source/looker/view_upstream.py +5 -5
datahub/ingestion/source/mlflow.py +4 -4
datahub/ingestion/source/mongodb.py +6 -4
datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
datahub/ingestion/source/nifi.py +24 -26
datahub/ingestion/source/openapi.py +9 -9
datahub/ingestion/source/powerbi/config.py +12 -12
datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
datahub/ingestion/source/powerbi/powerbi.py +6 -6
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
datahub/ingestion/source/redshift/config.py +3 -3
datahub/ingestion/source/redshift/query.py +77 -47
datahub/ingestion/source/redshift/redshift.py +12 -12
datahub/ingestion/source/redshift/usage.py +8 -8
datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
datahub/ingestion/source/s3/source.py +1 -1
datahub/ingestion/source/salesforce.py +26 -25
datahub/ingestion/source/schema/json_schema.py +1 -1
datahub/ingestion/source/sigma/sigma.py +3 -3
datahub/ingestion/source/sigma/sigma_api.py +12 -10
datahub/ingestion/source/snowflake/snowflake_config.py +9 -7
datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
datahub/ingestion/source/snowflake/snowflake_schema.py +3 -3
datahub/ingestion/source/snowflake/snowflake_schema_gen.py +6 -6
datahub/ingestion/source/snowflake/snowflake_tag.py +7 -7
datahub/ingestion/source/snowflake/snowflake_usage_v2.py +3 -3
datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
datahub/ingestion/source/snowflake/snowflake_v2.py +13 -4
datahub/ingestion/source/sql/athena.py +1 -3
datahub/ingestion/source/sql/clickhouse.py +8 -14
datahub/ingestion/source/sql/oracle.py +1 -3
datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
datahub/ingestion/source/sql/teradata.py +16 -3
datahub/ingestion/source/state/profiling_state_handler.py +3 -3
datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
datahub/ingestion/source/tableau/tableau.py +48 -49
datahub/ingestion/source/unity/config.py +3 -1
datahub/ingestion/source/unity/proxy.py +1 -1
datahub/ingestion/source/unity/source.py +3 -3
datahub/ingestion/source/unity/usage.py +3 -1
datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
datahub/ingestion/source/usage/usage_common.py +1 -1
datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
datahub/ingestion/transformer/add_dataset_properties.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
datahub/ingestion/transformer/tags_to_terms.py +7 -7
datahub/integrations/assertion/snowflake/compiler.py +10 -10
datahub/lite/duckdb_lite.py +12 -10
datahub/metadata/_schema_classes.py +1 -1
datahub/metadata/schema.avsc +6 -2
datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
datahub/secret/secret_common.py +14 -8
datahub/specific/aspect_helpers/custom_properties.py +1 -2
datahub/sql_parsing/schema_resolver.py +5 -10
datahub/sql_parsing/sql_parsing_aggregator.py +16 -16
datahub/sql_parsing/sqlglot_lineage.py +5 -4
datahub/sql_parsing/sqlglot_utils.py +3 -2
datahub/telemetry/stats.py +1 -2
datahub/testing/mcp_diff.py +1 -1
datahub/utilities/file_backed_collections.py +10 -10
datahub/utilities/hive_schema_to_avro.py +2 -2
datahub/utilities/logging_manager.py +2 -2
datahub/utilities/lossy_collections.py +3 -3
datahub/utilities/mapping.py +3 -3
datahub/utilities/serialized_lru_cache.py +3 -1
datahub/utilities/sqlalchemy_query_combiner.py +6 -6
datahub/utilities/sqllineage_patch.py +1 -1
datahub/utilities/stats_collections.py +3 -1
datahub/utilities/urns/urn_iter.py +2 -2
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/WHEEL +0 -0
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/entry_points.txt +0 -0
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/top_level.txt +0 -0

datahub/ingestion/graph/client.py CHANGED Viewed

@@ -519,9 +519,9 @@ class DataHubGraph(DatahubRestEmitter):
         :return: Optionally, a map of aspect_name to aspect_value as a dictionary if present, aspect_value will be set to None if that aspect was not found. Returns None on HTTP status 404.
         :raises HttpError: if the HTTP response is not a 200
         """
-        assert len(aspects) == len(
-            aspect_types
-        ), f"number of aspects requested ({len(aspects)}) should be the same as number of aspect types provided ({len(aspect_types)})"
+        assert len(aspects) == len(aspect_types), (
+            f"number of aspects requested ({len(aspects)}) should be the same as number of aspect types provided ({len(aspect_types)})"
+        )
         # TODO: generate aspects list from type classes
         response_json = self.get_entity_raw(entity_urn, aspects)
@@ -1576,9 +1576,7 @@ class DataHubGraph(DatahubRestEmitter):
                     ... assertionResult
                 }
             }
-        """ % (
-            self._assertion_result_shared()
-        )
+        """ % (self._assertion_result_shared())
         variables = {
             "assertionUrn": urn,

datahub/ingestion/run/pipeline.py CHANGED Viewed

@@ -76,8 +76,9 @@ class LoggingCallback(WriteCallback):
         failure_metadata: dict,
     ) -> None:
         logger.error(
-            f"{self.name} failed to write record with workunit {record_envelope.metadata['workunit_id']}"
-            f" with {failure_exception} and info {failure_metadata}"
+            f"{self.name} failed to write record with workunit {record_envelope.metadata['workunit_id']}",
+            extra={"failure_metadata": failure_metadata},
+            exc_info=failure_exception,
         )
@@ -108,9 +109,9 @@ class DeadLetterQueueCallback(WriteCallback):
                         mcp.systemMetadata.properties = {}
                     if "workunit_id" not in mcp.systemMetadata.properties:
                         # update the workunit id
-                        mcp.systemMetadata.properties[
-                            "workunit_id"
-                        ] = record_envelope.metadata["workunit_id"]
+                        mcp.systemMetadata.properties["workunit_id"] = (
+                            record_envelope.metadata["workunit_id"]
+                        )
                 record_envelope.record = mcp
         self.file_sink.write_record_async(record_envelope, self.logging_callback)
@@ -700,7 +701,7 @@ class Pipeline:
             num_failures_sink = len(self.sink.get_report().failures)
             click.secho(
                 message_template.format(
-                    status=f"with at least {num_failures_source+num_failures_sink} failures"
+                    status=f"with at least {num_failures_source + num_failures_sink} failures"
                 ),
                 fg=self._get_text_color(
                     running=currently_running, failures=True, warnings=False
@@ -718,7 +719,7 @@ class Pipeline:
             num_warn_global = len(global_warnings)
             click.secho(
                 message_template.format(
-                    status=f"with at least {num_warn_source+num_warn_sink+num_warn_global} warnings"
+                    status=f"with at least {num_warn_source + num_warn_sink + num_warn_global} warnings"
                 ),
                 fg=self._get_text_color(
                     running=currently_running, failures=False, warnings=True

datahub/ingestion/run/pipeline_config.py CHANGED Viewed

@@ -92,9 +92,9 @@ class PipelineConfig(ConfigModel):
     pipeline_name: Optional[str] = None
     failure_log: FailureLoggingConfig = FailureLoggingConfig()
-    _raw_dict: Optional[
-        dict
-    ] = None  # the raw dict that was parsed to construct this config
+    _raw_dict: Optional[dict] = (
+        None  # the raw dict that was parsed to construct this config
+    )
     @validator("run_id", pre=True, always=True)
     def run_id_should_be_semantic(

datahub/ingestion/source/abs/datalake_profiler_config.py CHANGED Viewed

@@ -85,8 +85,8 @@ class DataLakeProfilerConfig(ConfigModel):
                 if field_level_metric.startswith("include_field_"):
                     values.setdefault(field_level_metric, False)
-            assert (
-                max_num_fields_to_profile is None
-            ), f"{max_num_fields_to_profile_key} should be set to None"
+            assert max_num_fields_to_profile is None, (
+                f"{max_num_fields_to_profile_key} should be set to None"
+            )
         return values

datahub/ingestion/source/abs/source.py CHANGED Viewed

@@ -508,7 +508,12 @@ class ABSSource(StatefulIngestionSourceBase):
                         ):
                             abs_path = self.create_abs_path(obj.name)
                             logger.debug(f"Sampling file: {abs_path}")
-                            yield abs_path, obj.name, obj.last_modified, obj.size,
+                            yield (
+                                abs_path,
+                                obj.name,
+                                obj.last_modified,
+                                obj.size,
+                            )
                 except Exception as e:
                     # This odd check if being done because boto does not have a proper exception to catch
                     # The exception that appears in stacktrace cannot actually be caught without a lot more work
@@ -552,9 +557,12 @@ class ABSSource(StatefulIngestionSourceBase):
         if os.path.isfile(prefix):
             logger.debug(f"Scanning single local file: {prefix}")
             file_name = prefix
-            yield prefix, file_name, datetime.utcfromtimestamp(
-                os.path.getmtime(prefix)
-            ), os.path.getsize(prefix)
+            yield (
+                prefix,
+                file_name,
+                datetime.utcfromtimestamp(os.path.getmtime(prefix)),
+                os.path.getsize(prefix),
+            )
         else:
             logger.debug(f"Scanning files under local folder: {prefix}")
             for root, dirs, files in os.walk(prefix):
@@ -565,9 +573,12 @@ class ABSSource(StatefulIngestionSourceBase):
                     full_path = PurePath(
                         os.path.normpath(os.path.join(root, file))
                     ).as_posix()
-                    yield full_path, file, datetime.utcfromtimestamp(
-                        os.path.getmtime(full_path)
-                    ), os.path.getsize(full_path)
+                    yield (
+                        full_path,
+                        file,
+                        datetime.utcfromtimestamp(os.path.getmtime(full_path)),
+                        os.path.getsize(full_path),
+                    )
     def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
         self.container_WU_creator = ContainerWUCreator(
@@ -613,7 +624,7 @@ class ABSSource(StatefulIngestionSourceBase):
                                 table_data.table_path
                             ].timestamp = table_data.timestamp
-                for guid, table_data in table_dict.items():
+                for _, table_data in table_dict.items():
                     yield from self.ingest_table(table_data, path_spec)
     def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:

datahub/ingestion/source/aws/glue.py CHANGED Viewed

@@ -521,7 +521,7 @@ class GlueSource(StatefulIngestionSourceBase):
         # otherwise, a node represents a transformation
         else:
             node_urn = mce_builder.make_data_job_urn_with_flow(
-                flow_urn, job_id=f'{node["NodeType"]}-{node["Id"]}'
+                flow_urn, job_id=f"{node['NodeType']}-{node['Id']}"
             )
         return {
@@ -679,7 +679,7 @@ class GlueSource(StatefulIngestionSourceBase):
             )
         )
-        return MetadataWorkUnit(id=f'{job_name}-{node["Id"]}', mce=mce)
+        return MetadataWorkUnit(id=f"{job_name}-{node['Id']}", mce=mce)
     def get_all_databases(self) -> Iterable[Mapping[str, Any]]:
         logger.debug("Getting all databases")
@@ -750,13 +750,13 @@ class GlueSource(StatefulIngestionSourceBase):
     ) -> Optional[MetadataWorkUnit]:
         if self.source_config.emit_s3_lineage:
             # extract dataset properties aspect
-            dataset_properties: Optional[
-                DatasetPropertiesClass
-            ] = mce_builder.get_aspect_if_available(mce, DatasetPropertiesClass)
+            dataset_properties: Optional[DatasetPropertiesClass] = (
+                mce_builder.get_aspect_if_available(mce, DatasetPropertiesClass)
+            )
             # extract dataset schema aspect
-            schema_metadata: Optional[
-                SchemaMetadataClass
-            ] = mce_builder.get_aspect_if_available(mce, SchemaMetadataClass)
+            schema_metadata: Optional[SchemaMetadataClass] = (
+                mce_builder.get_aspect_if_available(mce, SchemaMetadataClass)
+            )
             if dataset_properties and "Location" in dataset_properties.customProperties:
                 location = dataset_properties.customProperties["Location"]
@@ -765,9 +765,9 @@ class GlueSource(StatefulIngestionSourceBase):
                         location, self.source_config.env
                     )
                     assert self.ctx.graph
-                    schema_metadata_for_s3: Optional[
-                        SchemaMetadataClass
-                    ] = self.ctx.graph.get_schema_metadata(s3_dataset_urn)
+                    schema_metadata_for_s3: Optional[SchemaMetadataClass] = (
+                        self.ctx.graph.get_schema_metadata(s3_dataset_urn)
+                    )
                     if self.source_config.glue_s3_lineage_direction == "upstream":
                         fine_grained_lineages = None

datahub/ingestion/source/aws/s3_boto_utils.py CHANGED Viewed

@@ -40,7 +40,7 @@ def get_s3_tags(
                 ]
             )
         except s3.meta.client.exceptions.ClientError:
-            logger.warn(f"No tags found for bucket={bucket_name}")
+            logger.warning(f"No tags found for bucket={bucket_name}")
     if use_s3_object_tags and key_name is not None:
         s3_client = aws_config.get_s3_client()
@@ -53,7 +53,7 @@ def get_s3_tags(
         else:
             # Unlike bucket tags, if an object does not have tags, it will just return an empty array
             # as opposed to an exception.
-            logger.warn(f"No tags found for bucket={bucket_name} key={key_name}")
+            logger.warning(f"No tags found for bucket={bucket_name} key={key_name}")
     if len(tags_to_add) == 0:
         return None
     if ctx.graph is not None:
@@ -65,7 +65,7 @@ def get_s3_tags(
         if current_tags:
             tags_to_add.extend([current_tag.tag for current_tag in current_tags.tags])
     else:
-        logger.warn("Could not connect to DatahubApi. No current tags to maintain")
+        logger.warning("Could not connect to DatahubApi. No current tags to maintain")
     # Remove duplicate tags
     tags_to_add = sorted(list(set(tags_to_add)))
     new_tags = GlobalTagsClass(

datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py CHANGED Viewed

@@ -257,7 +257,7 @@ class FeatureGroupProcessor:
             mce = MetadataChangeEvent(proposedSnapshot=feature_snapshot)
         return MetadataWorkUnit(
-            id=f'{feature_group_details["FeatureGroupName"]}-{feature["FeatureName"]}',
+            id=f"{feature_group_details['FeatureGroupName']}-{feature['FeatureName']}",
             mce=mce,
         )

datahub/ingestion/source/aws/sagemaker_processors/models.py CHANGED Viewed

@@ -212,7 +212,7 @@ class ModelProcessor:
         mce = MetadataChangeEvent(proposedSnapshot=endpoint_snapshot)
         return MetadataWorkUnit(
-            id=f'{endpoint_details["EndpointName"]}',
+            id=f"{endpoint_details['EndpointName']}",
             mce=mce,
         )
@@ -503,7 +503,7 @@ class ModelProcessor:
         mce = MetadataChangeEvent(proposedSnapshot=model_snapshot)
         return MetadataWorkUnit(
-            id=f'{model_details["ModelName"]}',
+            id=f"{model_details['ModelName']}",
             mce=mce,
         )

datahub/ingestion/source/bigquery_v2/bigquery.py CHANGED Viewed

@@ -132,9 +132,9 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
         self.filters = BigQueryFilter(self.config, self.report)
         self.identifiers = BigQueryIdentifierBuilder(self.config, self.report)
-        redundant_lineage_run_skip_handler: Optional[
-            RedundantLineageRunSkipHandler
-        ] = None
+        redundant_lineage_run_skip_handler: Optional[RedundantLineageRunSkipHandler] = (
+            None
+        )
         if self.config.enable_stateful_lineage_ingestion:
             redundant_lineage_run_skip_handler = RedundantLineageRunSkipHandler(
                 source=self,

datahub/ingestion/source/bigquery_v2/bigquery_audit.py CHANGED Viewed

@@ -37,9 +37,9 @@ class BigqueryTableIdentifier:
     # Note: this regex may get overwritten by the sharded_table_pattern config.
     # The class-level constant, however, will not be overwritten.
-    _BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: ClassVar[
-        str
-    ] = _BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX
+    _BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX: ClassVar[str] = (
+        _BIGQUERY_DEFAULT_SHARDED_TABLE_REGEX
+    )
     _BIGQUERY_WILDCARD_REGEX: ClassVar[str] = "((_(\\d+)?)\\*$)|\\*$"
     _BQ_SHARDED_TABLE_SUFFIX: str = "_yyyymmdd"

datahub/ingestion/source/bigquery_v2/bigquery_config.py CHANGED Viewed

@@ -137,9 +137,9 @@ class BigQueryCredential(ConfigModel):
     @root_validator(skip_on_failure=True)
     def validate_config(cls, values: Dict[str, Any]) -> Dict[str, Any]:
         if values.get("client_x509_cert_url") is None:
-            values[
-                "client_x509_cert_url"
-            ] = f'https://www.googleapis.com/robot/v1/metadata/x509/{values["client_email"]}'
+            values["client_x509_cert_url"] = (
+                f"https://www.googleapis.com/robot/v1/metadata/x509/{values['client_email']}"
+            )
         return values
     def create_credential_temp_file(self) -> str:
@@ -611,9 +611,9 @@ class BigQueryV2Config(
         cls, v: Optional[List[str]], values: Dict
     ) -> Optional[List[str]]:
         if values.get("use_exported_bigquery_audit_metadata"):
-            assert (
-                v and len(v) > 0
-            ), "`bigquery_audit_metadata_datasets` should be set if using `use_exported_bigquery_audit_metadata: True`."
+            assert v and len(v) > 0, (
+                "`bigquery_audit_metadata_datasets` should be set if using `use_exported_bigquery_audit_metadata: True`."
+            )
         return v

datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py CHANGED Viewed

@@ -87,9 +87,9 @@ class BigQueryPlatformResourceHelper:
             key=platform_resource_key, graph_client=self.graph
         )
         if platform_resource:
-            self.platform_resource_cache[
-                platform_resource_key.primary_key
-            ] = platform_resource
+            self.platform_resource_cache[platform_resource_key.primary_key] = (
+                platform_resource
+            )
             return platform_resource
         return None
@@ -115,7 +115,11 @@ class BigQueryPlatformResourceHelper:
                 and platform_resource.resource_info.value
             ):
                 try:
-                    existing_info: Optional[BigQueryLabelInfo] = platform_resource.resource_info.value.as_pydantic_object(BigQueryLabelInfo)  # type: ignore
+                    existing_info: Optional[BigQueryLabelInfo] = (
+                        platform_resource.resource_info.value.as_pydantic_object(  # type: ignore
+                            BigQueryLabelInfo
+                        )
+                    )
                 except ValidationError as e:
                     logger.error(
                         f"Error converting existing value to BigQueryLabelInfo: {e}. Creating new one. Maybe this is because of a non backward compatible schema change."

datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py CHANGED Viewed

@@ -311,8 +311,10 @@ class BigQuerySchemaGenerator:
                         platform_resource: PlatformResource = self.platform_resource_helper.generate_label_platform_resource(
                             label, tag_urn, managed_by_datahub=False
                         )
-                        label_info: BigQueryLabelInfo = platform_resource.resource_info.value.as_pydantic_object(  # type: ignore
-                            BigQueryLabelInfo
+                        label_info: BigQueryLabelInfo = (
+                            platform_resource.resource_info.value.as_pydantic_object(  # type: ignore
+                                BigQueryLabelInfo
+                            )
                         )
                         tag_urn = TagUrn.from_string(label_info.datahub_urn)
@@ -820,8 +822,10 @@ class BigQuerySchemaGenerator:
                         platform_resource: PlatformResource = self.platform_resource_helper.generate_label_platform_resource(
                             label, tag_urn, managed_by_datahub=False
                         )
-                        label_info: BigQueryLabelInfo = platform_resource.resource_info.value.as_pydantic_object(  # type: ignore
-                            BigQueryLabelInfo
+                        label_info: BigQueryLabelInfo = (
+                            platform_resource.resource_info.value.as_pydantic_object(  # type: ignore
+                                BigQueryLabelInfo
+                            )
                         )
                         tag_urn = TagUrn.from_string(label_info.datahub_urn)
@@ -860,8 +864,10 @@ class BigQuerySchemaGenerator:
                         platform_resource: PlatformResource = self.platform_resource_helper.generate_label_platform_resource(
                             label, tag_urn, managed_by_datahub=False
                         )
-                        label_info: BigQueryLabelInfo = platform_resource.resource_info.value.as_pydantic_object(  # type: ignore
-                            BigQueryLabelInfo
+                        label_info: BigQueryLabelInfo = (
+                            platform_resource.resource_info.value.as_pydantic_object(  # type: ignore
+                                BigQueryLabelInfo
+                            )
                         )
                         tag_urn = TagUrn.from_string(label_info.datahub_urn)
@@ -1203,9 +1209,9 @@ class BigQuerySchemaGenerator:
                     report=self.report,
                 )
-        self.report.metadata_extraction_sec[
-            f"{project_id}.{dataset.name}"
-        ] = timer.elapsed_seconds(digits=2)
+        self.report.metadata_extraction_sec[f"{project_id}.{dataset.name}"] = (
+            timer.elapsed_seconds(digits=2)
+        )
     def get_core_table_details(
         self, dataset_name: str, project_id: str, temp_table_dataset_prefix: str

datahub/ingestion/source/bigquery_v2/lineage.py CHANGED Viewed

@@ -697,7 +697,7 @@ class BigqueryLineageExtractor:
                         if parsed_queries[-1]:
                             query = f"""create table `{destination_table.get_sanitized_table_ref().table_identifier.get_table_name()}` AS
                             (
-                                {parsed_queries[-1].sql(dialect='bigquery')}
+                                {parsed_queries[-1].sql(dialect="bigquery")}
                             )"""
                         else:
                             query = e.query
@@ -809,11 +809,11 @@ class BigqueryLineageExtractor:
                             upstream_lineage, temp_table_upstream
                         )
-                        upstreams[
-                            ref_temp_table_upstream
-                        ] = _merge_lineage_edge_columns(
-                            upstreams.get(ref_temp_table_upstream),
-                            collapsed_lineage,
+                        upstreams[ref_temp_table_upstream] = (
+                            _merge_lineage_edge_columns(
+                                upstreams.get(ref_temp_table_upstream),
+                                collapsed_lineage,
+                            )
                         )
             else:
                 upstreams[upstream_table_ref] = _merge_lineage_edge_columns(
@@ -1004,9 +1004,9 @@ class BigqueryLineageExtractor:
                 dataset_urn
             )
             for gcs_dataset_urn in gcs_urns:
-                schema_metadata_for_gcs: Optional[
-                    SchemaMetadataClass
-                ] = graph.get_schema_metadata(gcs_dataset_urn)
+                schema_metadata_for_gcs: Optional[SchemaMetadataClass] = (
+                    graph.get_schema_metadata(gcs_dataset_urn)
+                )
                 if schema_metadata and schema_metadata_for_gcs:
                     fine_grained_lineage = self.get_fine_grained_lineages_with_gcs(
                         dataset_urn,

datahub/ingestion/source/bigquery_v2/queries.py CHANGED Viewed

@@ -387,9 +387,7 @@ AND
     OR
     protoPayload.metadata.tableDataRead.reason = "JOB"
 )
-""".strip(
-    "\t \n"
-)
+""".strip("\t \n")
 def bigquery_audit_metadata_query_template_lineage(

datahub/ingestion/source/bigquery_v2/queries_extractor.py CHANGED Viewed

@@ -271,9 +271,9 @@ class BigQueryQueriesExtractor(Closeable):
             # Preprocessing stage that deduplicates the queries using query hash per usage bucket
             # Note: FileBackedDict is an ordered dictionary, so the order of execution of
             # queries is inherently maintained
-            queries_deduped: FileBackedDict[
-                Dict[int, ObservedQuery]
-            ] = self.deduplicate_queries(queries)
+            queries_deduped: FileBackedDict[Dict[int, ObservedQuery]] = (
+                self.deduplicate_queries(queries)
+            )
             self.report.num_unique_queries = len(queries_deduped)
             logger.info(f"Found {self.report.num_unique_queries} unique queries")

datahub/ingestion/source/bigquery_v2/usage.py CHANGED Viewed

@@ -763,9 +763,9 @@ class BigQueryUsageExtractor:
                     )
                 if event.query_event.default_dataset:
-                    custom_properties[
-                        "defaultDatabase"
-                    ] = event.query_event.default_dataset
+                    custom_properties["defaultDatabase"] = (
+                        event.query_event.default_dataset
+                    )
             if event.read_event:
                 if event.read_event.readReason:
                     custom_properties["readReason"] = event.read_event.readReason

datahub/ingestion/source/cassandra/cassandra.py CHANGED Viewed

@@ -91,7 +91,6 @@ class KeyspaceKey(ContainerKey):
     supported=True,
 )
 class CassandraSource(StatefulIngestionSourceBase):
     """
     This plugin extracts the following:

datahub/ingestion/source/cassandra/cassandra_utils.py CHANGED Viewed

@@ -107,10 +107,10 @@ class CassandraToSchemaFieldConverter:
     @staticmethod
     def get_column_type(cassandra_column_type: str) -> SchemaFieldDataType:
-        type_class: Optional[
-            Type
-        ] = CassandraToSchemaFieldConverter._field_type_to_schema_field_type.get(
-            cassandra_column_type
+        type_class: Optional[Type] = (
+            CassandraToSchemaFieldConverter._field_type_to_schema_field_type.get(
+                cassandra_column_type
+            )
         )
         if type_class is None:
             logger.warning(

datahub/ingestion/source/confluent_schema_registry.py CHANGED Viewed

@@ -293,9 +293,9 @@ class ConfluentSchemaRegistry(KafkaSchemaRegistryBase):
     def _load_json_schema_with_resolved_references(
         self, schema: Schema, name: str, subject: str
     ) -> dict:
-        imported_json_schemas: List[
-            JsonSchemaWrapper
-        ] = self.get_schemas_from_confluent_ref_json(schema, name=name, subject=subject)
+        imported_json_schemas: List[JsonSchemaWrapper] = (
+            self.get_schemas_from_confluent_ref_json(schema, name=name, subject=subject)
+        )
         schema_dict = json.loads(schema.schema_str)
         reference_map = {}
         for imported_schema in imported_json_schemas:
@@ -332,9 +332,9 @@ class ConfluentSchemaRegistry(KafkaSchemaRegistryBase):
             )
         elif schema.schema_type == "PROTOBUF":
-            imported_schemas: List[
-                ProtobufSchema
-            ] = self.get_schemas_from_confluent_ref_protobuf(schema)
+            imported_schemas: List[ProtobufSchema] = (
+                self.get_schemas_from_confluent_ref_protobuf(schema)
+            )
             base_name: str = topic.replace(".", "_")
             fields = protobuf_util.protobuf_schema_to_mce_fields(
                 ProtobufSchema(

datahub/ingestion/source/csv_enricher.py CHANGED Viewed

@@ -371,11 +371,11 @@ class CSVEnricherSource(Source):
         domain: Optional[str],
         description: Optional[str],
     ) -> Iterable[MetadataWorkUnit]:
-        maybe_terms_wu: Optional[
-            MetadataWorkUnit
-        ] = self.get_resource_glossary_terms_work_unit(
-            entity_urn=entity_urn,
-            term_associations=term_associations,
+        maybe_terms_wu: Optional[MetadataWorkUnit] = (
+            self.get_resource_glossary_terms_work_unit(
+                entity_urn=entity_urn,
+                term_associations=term_associations,
+            )
         )
         if maybe_terms_wu:
             self.report.num_glossary_term_workunits_produced += 1
@@ -389,31 +389,31 @@ class CSVEnricherSource(Source):
             self.report.num_tag_workunits_produced += 1
             yield maybe_tags_wu
-        maybe_owners_wu: Optional[
-            MetadataWorkUnit
-        ] = self.get_resource_owners_work_unit(
-            entity_urn=entity_urn,
-            owners=owners,
+        maybe_owners_wu: Optional[MetadataWorkUnit] = (
+            self.get_resource_owners_work_unit(
+                entity_urn=entity_urn,
+                owners=owners,
+            )
         )
         if maybe_owners_wu:
             self.report.num_owners_workunits_produced += 1
             yield maybe_owners_wu
-        maybe_domain_wu: Optional[
-            MetadataWorkUnit
-        ] = self.get_resource_domain_work_unit(
-            entity_urn=entity_urn,
-            domain=domain,
+        maybe_domain_wu: Optional[MetadataWorkUnit] = (
+            self.get_resource_domain_work_unit(
+                entity_urn=entity_urn,
+                domain=domain,
+            )
         )
         if maybe_domain_wu:
             self.report.num_domain_workunits_produced += 1
             yield maybe_domain_wu
-        maybe_description_wu: Optional[
-            MetadataWorkUnit
-        ] = self.get_resource_description_work_unit(
-            entity_urn=entity_urn,
-            description=description,
+        maybe_description_wu: Optional[MetadataWorkUnit] = (
+            self.get_resource_description_work_unit(
+                entity_urn=entity_urn,
+                description=description,
+            )
         )
         if maybe_description_wu:
             self.report.num_description_workunits_produced += 1
@@ -426,9 +426,9 @@ class CSVEnricherSource(Source):
         needs_write: bool,
     ) -> Tuple[EditableSchemaMetadataClass, bool]:
         field_path: str = sub_resource_row.field_path
-        term_associations: List[
-            GlossaryTermAssociationClass
-        ] = sub_resource_row.term_associations
+        term_associations: List[GlossaryTermAssociationClass] = (
+            sub_resource_row.term_associations
+        )
         tag_associations: List[TagAssociationClass] = sub_resource_row.tag_associations
         description: Optional[str] = sub_resource_row.description
         has_terms: bool = len(term_associations) > 0
@@ -517,9 +517,9 @@ class CSVEnricherSource(Source):
             # Boolean field to tell whether we need to write an MCPW.
             needs_write = False
-            current_editable_schema_metadata: Optional[
-                EditableSchemaMetadataClass
-            ] = None
+            current_editable_schema_metadata: Optional[EditableSchemaMetadataClass] = (
+                None
+            )
             if self.ctx.graph and not self.should_overwrite:
                 # Fetch the current editable schema metadata
                 current_editable_schema_metadata = self.ctx.graph.get_aspect(
@@ -655,9 +655,9 @@ class CSVEnricherSource(Source):
             entity_urn = row["resource"]
             entity_type = Urn.from_string(row["resource"]).get_type()
-            term_associations: List[
-                GlossaryTermAssociationClass
-            ] = self.maybe_extract_glossary_terms(row)
+            term_associations: List[GlossaryTermAssociationClass] = (
+                self.maybe_extract_glossary_terms(row)
+            )
             tag_associations: List[TagAssociationClass] = self.maybe_extract_tags(row)
             owners: List[OwnerClass] = self.maybe_extract_owners(row, is_resource_row)

datahub/ingestion/source/datahub/datahub_database_reader.py CHANGED Viewed

@@ -152,7 +152,9 @@ class DataHubDatabaseReader:
     ) -> Iterable[Dict[str, Any]]:
         with self.engine.connect() as conn:
             if self.engine.dialect.name in ["postgresql", "mysql", "mariadb"]:
-                with conn.begin():  # Transaction required for PostgreSQL server-side cursor
+                with (
+                    conn.begin()
+                ):  # Transaction required for PostgreSQL server-side cursor
                     # Note that stream_results=True is mainly supported by PostgreSQL and MySQL-based dialects.
                     # https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.Connection.execution_options.params.stream_results
                     conn = conn.execution_options(
@@ -222,7 +224,7 @@ class DataHubDatabaseReader:
             )
         except Exception as e:
             logger.warning(
-                f'Failed to parse metadata for {row["urn"]}: {e}', exc_info=True
+                f"Failed to parse metadata for {row['urn']}: {e}", exc_info=True
             )
             self.report.num_database_parse_errors += 1
             self.report.database_parse_errors.setdefault(

acryl-datahub 0.15.0.2rc7__py3-none-any.whl → 0.15.0.3__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.15.0.2rc7py3-none-any.whl → 0.15.0.3py3-none-any.whl