PyPI - acryl-datahub - Versions diffs - 0.15.0.2rc7__py3-none-any.whl → 0.15.0.3__py3-none-any.whl - Mend

acryl-datahub 0.15.0.2rc7py3-none-any.whl → 0.15.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (161) hide show

{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/METADATA +2461 -2463
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/RECORD +161 -161
datahub/__init__.py +1 -1
datahub/api/entities/assertion/assertion_operator.py +3 -5
datahub/api/entities/corpgroup/corpgroup.py +1 -1
datahub/api/entities/datacontract/assertion_operator.py +3 -5
datahub/api/entities/dataproduct/dataproduct.py +4 -4
datahub/api/entities/dataset/dataset.py +2 -1
datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
datahub/cli/cli_utils.py +1 -1
datahub/cli/delete_cli.py +16 -2
datahub/cli/docker_cli.py +6 -6
datahub/cli/lite_cli.py +2 -2
datahub/cli/migrate.py +3 -3
datahub/cli/specific/assertions_cli.py +3 -3
datahub/cli/timeline_cli.py +1 -1
datahub/configuration/common.py +1 -2
datahub/configuration/config_loader.py +73 -50
datahub/configuration/git.py +2 -2
datahub/configuration/time_window_config.py +10 -5
datahub/emitter/mce_builder.py +4 -8
datahub/emitter/mcp_patch_builder.py +1 -2
datahub/ingestion/api/incremental_lineage_helper.py +2 -8
datahub/ingestion/api/report.py +1 -2
datahub/ingestion/api/source_helpers.py +1 -1
datahub/ingestion/extractor/json_schema_util.py +3 -3
datahub/ingestion/extractor/schema_util.py +3 -5
datahub/ingestion/fs/s3_fs.py +3 -3
datahub/ingestion/glossary/datahub_classifier.py +6 -4
datahub/ingestion/graph/client.py +4 -6
datahub/ingestion/run/pipeline.py +8 -7
datahub/ingestion/run/pipeline_config.py +3 -3
datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
datahub/ingestion/source/abs/source.py +19 -8
datahub/ingestion/source/aws/glue.py +11 -11
datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
datahub/ingestion/source/bigquery_v2/bigquery.py +3 -3
datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
datahub/ingestion/source/bigquery_v2/bigquery_config.py +6 -6
datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +15 -9
datahub/ingestion/source/bigquery_v2/lineage.py +9 -9
datahub/ingestion/source/bigquery_v2/queries.py +1 -3
datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
datahub/ingestion/source/bigquery_v2/usage.py +3 -3
datahub/ingestion/source/cassandra/cassandra.py +0 -1
datahub/ingestion/source/cassandra/cassandra_utils.py +4 -4
datahub/ingestion/source/confluent_schema_registry.py +6 -6
datahub/ingestion/source/csv_enricher.py +29 -29
datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
datahub/ingestion/source/dbt/dbt_common.py +9 -7
datahub/ingestion/source/dremio/dremio_api.py +4 -4
datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
datahub/ingestion/source/elastic_search.py +4 -4
datahub/ingestion/source/fivetran/config.py +4 -0
datahub/ingestion/source/fivetran/fivetran.py +15 -5
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +3 -3
datahub/ingestion/source/gcs/gcs_source.py +5 -3
datahub/ingestion/source/ge_data_profiler.py +4 -5
datahub/ingestion/source/ge_profiling_config.py +3 -3
datahub/ingestion/source/iceberg/iceberg.py +3 -3
datahub/ingestion/source/identity/azure_ad.py +3 -3
datahub/ingestion/source/identity/okta.py +3 -3
datahub/ingestion/source/kafka/kafka.py +11 -9
datahub/ingestion/source/kafka_connect/kafka_connect.py +2 -3
datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
datahub/ingestion/source/looker/looker_common.py +19 -19
datahub/ingestion/source/looker/looker_config.py +3 -3
datahub/ingestion/source/looker/looker_source.py +25 -25
datahub/ingestion/source/looker/looker_template_language.py +3 -3
datahub/ingestion/source/looker/looker_usage.py +5 -7
datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
datahub/ingestion/source/looker/lookml_source.py +13 -15
datahub/ingestion/source/looker/view_upstream.py +5 -5
datahub/ingestion/source/mlflow.py +4 -4
datahub/ingestion/source/mongodb.py +6 -4
datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
datahub/ingestion/source/nifi.py +24 -26
datahub/ingestion/source/openapi.py +9 -9
datahub/ingestion/source/powerbi/config.py +12 -12
datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
datahub/ingestion/source/powerbi/powerbi.py +6 -6
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
datahub/ingestion/source/redshift/config.py +3 -3
datahub/ingestion/source/redshift/query.py +77 -47
datahub/ingestion/source/redshift/redshift.py +12 -12
datahub/ingestion/source/redshift/usage.py +8 -8
datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
datahub/ingestion/source/s3/source.py +1 -1
datahub/ingestion/source/salesforce.py +26 -25
datahub/ingestion/source/schema/json_schema.py +1 -1
datahub/ingestion/source/sigma/sigma.py +3 -3
datahub/ingestion/source/sigma/sigma_api.py +12 -10
datahub/ingestion/source/snowflake/snowflake_config.py +9 -7
datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
datahub/ingestion/source/snowflake/snowflake_schema.py +3 -3
datahub/ingestion/source/snowflake/snowflake_schema_gen.py +6 -6
datahub/ingestion/source/snowflake/snowflake_tag.py +7 -7
datahub/ingestion/source/snowflake/snowflake_usage_v2.py +3 -3
datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
datahub/ingestion/source/snowflake/snowflake_v2.py +13 -4
datahub/ingestion/source/sql/athena.py +1 -3
datahub/ingestion/source/sql/clickhouse.py +8 -14
datahub/ingestion/source/sql/oracle.py +1 -3
datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
datahub/ingestion/source/sql/teradata.py +16 -3
datahub/ingestion/source/state/profiling_state_handler.py +3 -3
datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
datahub/ingestion/source/tableau/tableau.py +48 -49
datahub/ingestion/source/unity/config.py +3 -1
datahub/ingestion/source/unity/proxy.py +1 -1
datahub/ingestion/source/unity/source.py +3 -3
datahub/ingestion/source/unity/usage.py +3 -1
datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
datahub/ingestion/source/usage/usage_common.py +1 -1
datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
datahub/ingestion/transformer/add_dataset_properties.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
datahub/ingestion/transformer/tags_to_terms.py +7 -7
datahub/integrations/assertion/snowflake/compiler.py +10 -10
datahub/lite/duckdb_lite.py +12 -10
datahub/metadata/_schema_classes.py +1 -1
datahub/metadata/schema.avsc +6 -2
datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
datahub/secret/secret_common.py +14 -8
datahub/specific/aspect_helpers/custom_properties.py +1 -2
datahub/sql_parsing/schema_resolver.py +5 -10
datahub/sql_parsing/sql_parsing_aggregator.py +16 -16
datahub/sql_parsing/sqlglot_lineage.py +5 -4
datahub/sql_parsing/sqlglot_utils.py +3 -2
datahub/telemetry/stats.py +1 -2
datahub/testing/mcp_diff.py +1 -1
datahub/utilities/file_backed_collections.py +10 -10
datahub/utilities/hive_schema_to_avro.py +2 -2
datahub/utilities/logging_manager.py +2 -2
datahub/utilities/lossy_collections.py +3 -3
datahub/utilities/mapping.py +3 -3
datahub/utilities/serialized_lru_cache.py +3 -1
datahub/utilities/sqlalchemy_query_combiner.py +6 -6
datahub/utilities/sqllineage_patch.py +1 -1
datahub/utilities/stats_collections.py +3 -1
datahub/utilities/urns/urn_iter.py +2 -2
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/WHEEL +0 -0
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/entry_points.txt +0 -0
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3.dist-info}/top_level.txt +0 -0

datahub/ingestion/source/sql/clickhouse.py CHANGED Viewed

@@ -218,9 +218,7 @@ def _get_all_table_comments_and_properties(self, connection, **kw):
              , comment
              , {properties_clause} AS properties
           FROM system.tables
-         WHERE name NOT LIKE '.inner%'""".format(
-            properties_clause=properties_clause
-        )
+         WHERE name NOT LIKE '.inner%'""".format(properties_clause=properties_clause)
     )
     all_table_comments: Dict[Tuple[str, str], Dict[str, Any]] = {}
@@ -268,7 +266,7 @@ def _get_table_or_view_names(self, relkind, connection, schema=None, **kw):
     info_cache = kw.get("info_cache")
     all_relations = self._get_all_relation_info(connection, info_cache=info_cache)
     relation_names = []
-    for key, relation in all_relations.items():
+    for _, relation in all_relations.items():
         if relation.database == schema and relation.relkind == relkind:
             relation_names.append(relation.relname)
     return relation_names
@@ -301,9 +299,7 @@ def _get_schema_column_info(self, connection, schema=None, **kw):
              , comment
           FROM system.columns
          WHERE {schema_clause}
-         ORDER BY database, table, position""".format(
-                    schema_clause=schema_clause
-                )
+         ORDER BY database, table, position""".format(schema_clause=schema_clause)
             )
         )
     )
@@ -474,7 +470,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
         logger.debug(f"sql_alchemy_url={url}")
         engine = create_engine(url, **self.config.options)
         for db_row in engine.execute(text(all_tables_query)):
-            all_tables_set.add(f'{db_row["database"]}.{db_row["table_name"]}')
+            all_tables_set.add(f"{db_row['database']}.{db_row['table_name']}")
         return all_tables_set
@@ -503,7 +499,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
         try:
             for db_row in engine.execute(text(query)):
-                dataset_name = f'{db_row["target_schema"]}.{db_row["target_table"]}'
+                dataset_name = f"{db_row['target_schema']}.{db_row['target_table']}"
                 if not self.config.database_pattern.allowed(
                     db_row["target_schema"]
                 ) or not self.config.table_pattern.allowed(dataset_name):
@@ -512,7 +508,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
                 # Target
                 target_path = (
-                    f'{self.config.platform_instance+"." if self.config.platform_instance else ""}'
+                    f"{self.config.platform_instance + '.' if self.config.platform_instance else ''}"
                     f"{dataset_name}"
                 )
                 target = LineageItem(
@@ -525,7 +521,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
                 # Source
                 platform = LineageDatasetPlatform.CLICKHOUSE
-                path = f'{db_row["source_schema"]}.{db_row["source_table"]}'
+                path = f"{db_row['source_schema']}.{db_row['source_table']}"
                 sources = [
                     LineageDataset(
@@ -552,9 +548,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
                         target.dataset.path
                     ].upstreams = self._lineage_map[
                         target.dataset.path
-                    ].upstreams.union(
-                        target.upstreams
-                    )
+                    ].upstreams.union(target.upstreams)
                 else:
                     self._lineage_map[target.dataset.path] = target

datahub/ingestion/source/sql/oracle.py CHANGED Viewed

@@ -234,9 +234,7 @@ class OracleInspectorObjectWrapper:
                     WHERE col.table_name = id.table_name
                     AND col.column_name = id.column_name
                     AND col.owner = id.owner
-                ) AS identity_options""".format(
-                dblink=dblink
-            )
+                ) AS identity_options""".format(dblink=dblink)
         else:
             identity_cols = "NULL as default_on_null, NULL as identity_options"

datahub/ingestion/source/sql/sql_generic_profiler.py CHANGED Viewed

@@ -278,8 +278,7 @@ class GenericProfiler:
         if self.config.profiling.profile_table_size_limit is not None and (
             size_in_bytes is not None
-            and size_in_bytes / (2**30)
-            > self.config.profiling.profile_table_size_limit
+            and size_in_bytes / (2**30) > self.config.profiling.profile_table_size_limit
         ):
             self.report.profiling_skipped_size_limit[schema_name] += 1
             logger.debug(

datahub/ingestion/source/sql/teradata.py CHANGED Viewed

@@ -599,7 +599,12 @@ ORDER by DataBaseName, TableName;
             setattr(  # noqa: B010
                 TeradataDialect,
                 "get_columns",
-                lambda self, connection, table_name, schema=None, use_qvci=self.config.use_qvci, **kw: optimized_get_columns(
+                lambda self,
+                connection,
+                table_name,
+                schema=None,
+                use_qvci=self.config.use_qvci,
+                **kw: optimized_get_columns(
                     self,
                     connection,
                     table_name,
@@ -613,7 +618,11 @@ ORDER by DataBaseName, TableName;
             setattr(  # noqa: B010
                 TeradataDialect,
                 "get_pk_constraint",
-                lambda self, connection, table_name, schema=None, **kw: optimized_get_pk_constraint(
+                lambda self,
+                connection,
+                table_name,
+                schema=None,
+                **kw: optimized_get_pk_constraint(
                     self, connection, table_name, schema, **kw
                 ),
             )
@@ -621,7 +630,11 @@ ORDER by DataBaseName, TableName;
             setattr(  # noqa: B010
                 TeradataDialect,
                 "get_foreign_keys",
-                lambda self, connection, table_name, schema=None, **kw: optimized_get_foreign_keys(
+                lambda self,
+                connection,
+                table_name,
+                schema=None,
+                **kw: optimized_get_foreign_keys(
                     self, connection, table_name, schema, **kw
                 ),
             )

datahub/ingestion/source/state/profiling_state_handler.py CHANGED Viewed

@@ -41,9 +41,9 @@ class ProfilingHandler(StatefulIngestionUsecaseHandlerBase[ProfilingCheckpointSt
         run_id: str,
     ):
         self.state_provider = source.state_provider
-        self.stateful_ingestion_config: Optional[
-            ProfilingStatefulIngestionConfig
-        ] = config.stateful_ingestion
+        self.stateful_ingestion_config: Optional[ProfilingStatefulIngestionConfig] = (
+            config.stateful_ingestion
+        )
         self.pipeline_name = pipeline_name
         self.run_id = run_id
         self.checkpointing_enabled: bool = (

datahub/ingestion/source/state/redundant_run_skip_handler.py CHANGED Viewed

@@ -48,9 +48,9 @@ class RedundantRunSkipHandler(
     ):
         self.source = source
         self.state_provider = source.state_provider
-        self.stateful_ingestion_config: Optional[
-            StatefulIngestionConfig
-        ] = config.stateful_ingestion
+        self.stateful_ingestion_config: Optional[StatefulIngestionConfig] = (
+            config.stateful_ingestion
+        )
         self.pipeline_name = pipeline_name
         self.run_id = run_id
         self._job_id = self._init_job_id()
@@ -145,8 +145,7 @@ class RedundantRunSkipHandler(
             )
             logger.debug(
-                f"{self.job_id} : Last run start, end times:"
-                f"({last_run_time_window})"
+                f"{self.job_id} : Last run start, end times:({last_run_time_window})"
             )
             # If current run's time window is subset of last run's time window, then skip.
@@ -212,8 +211,7 @@ class RedundantRunSkipHandler(
             )
         self.log(
-            "Adjusted start, end times: "
-            f"({suggested_start_time}, {suggested_end_time})"
+            f"Adjusted start, end times: ({suggested_start_time}, {suggested_end_time})"
         )
         return (suggested_start_time, suggested_end_time)

datahub/ingestion/source/state/stale_entity_removal_handler.py CHANGED Viewed

@@ -111,9 +111,9 @@ class StaleEntityRemovalHandler(
         self.state_type_class = state_type_class
         self.pipeline_name = pipeline_name
         self.run_id = run_id
-        self.stateful_ingestion_config: Optional[
-            StatefulStaleMetadataRemovalConfig
-        ] = config.stateful_ingestion
+        self.stateful_ingestion_config: Optional[StatefulStaleMetadataRemovalConfig] = (
+            config.stateful_ingestion
+        )
         self.checkpointing_enabled: bool = (
             True
             if (

datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py CHANGED Viewed

@@ -70,20 +70,20 @@ class DatahubIngestionCheckpointingProvider(IngestionCheckpointingProviderBase):
             self.orchestrator_name, pipeline_name, job_name
         )
-        latest_checkpoint: Optional[
-            DatahubIngestionCheckpointClass
-        ] = self.graph.get_latest_timeseries_value(
-            entity_urn=data_job_urn,
-            aspect_type=DatahubIngestionCheckpointClass,
-            filter_criteria_map={
-                "pipelineName": pipeline_name,
-            },
+        latest_checkpoint: Optional[DatahubIngestionCheckpointClass] = (
+            self.graph.get_latest_timeseries_value(
+                entity_urn=data_job_urn,
+                aspect_type=DatahubIngestionCheckpointClass,
+                filter_criteria_map={
+                    "pipelineName": pipeline_name,
+                },
+            )
         )
         if latest_checkpoint:
             logger.debug(
                 f"The last committed ingestion checkpoint for pipelineName:'{pipeline_name}',"
                 f" job_name:'{job_name}' found with start_time:"
-                f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis/1000)}"
+                f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis / 1000)}"
             )
             return latest_checkpoint
         else:

datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py CHANGED Viewed

@@ -67,7 +67,7 @@ class FileIngestionCheckpointingProvider(IngestionCheckpointingProviderBase):
             logger.debug(
                 f"The last committed ingestion checkpoint for pipelineName:'{pipeline_name}',"
                 f" job_name:'{job_name}' found with start_time:"
-                f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis/1000)}"
+                f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis / 1000)}"
             )
             return latest_checkpoint
         else:

datahub/ingestion/source/tableau/tableau.py CHANGED Viewed

@@ -281,9 +281,9 @@ class TableauConnectionConfig(ConfigModel):
         return authentication
     def make_tableau_client(self, site: str) -> Server:
-        authentication: Union[
-            TableauAuth, PersonalAccessTokenAuth
-        ] = self.get_tableau_auth(site)
+        authentication: Union[TableauAuth, PersonalAccessTokenAuth] = (
+            self.get_tableau_auth(site)
+        )
         try:
             server = Server(
                 self.connect_uri,
@@ -635,7 +635,7 @@ class TableauConfig(
         project_path_pattern = values.get("project_path_pattern")
         if project_pattern is None and project_path_pattern is None and projects:
             logger.warning(
-                "projects is deprecated, please use " "project_path_pattern instead."
+                "projects is deprecated, please use project_path_pattern instead."
             )
             logger.info("Initializing project_pattern from projects")
             values["project_pattern"] = AllowDenyPattern(
@@ -708,18 +708,18 @@ class DatabaseTable:
     """
     urn: str
-    id: Optional[
-        str
-    ] = None  # is not None only for tables that came from Tableau metadata
+    id: Optional[str] = (
+        None  # is not None only for tables that came from Tableau metadata
+    )
     num_cols: Optional[int] = None
-    paths: Optional[
-        Set[str]
-    ] = None  # maintains all browse paths encountered for this table
+    paths: Optional[Set[str]] = (
+        None  # maintains all browse paths encountered for this table
+    )
-    parsed_columns: Optional[
-        Set[str]
-    ] = None  # maintains all columns encountered for this table during parsing SQL queries
+    parsed_columns: Optional[Set[str]] = (
+        None  # maintains all columns encountered for this table during parsing SQL queries
+    )
     def update_table(
         self,
@@ -2310,8 +2310,7 @@ class TableauSiteSource:
             c.EMBEDDED_DATA_SOURCE,
         ):
             logger.debug(
-                f"datasource {ds.get(c.NAME)} type {ds.get(c.TYPE_NAME)} is "
-                f"unsupported"
+                f"datasource {ds.get(c.NAME)} type {ds.get(c.TYPE_NAME)} is unsupported"
             )
             return None
@@ -2493,9 +2492,9 @@ class TableauSiteSource:
     def _enrich_database_tables_with_parsed_schemas(
         self, parsing_result: SqlParsingResult
     ) -> None:
-        in_tables_schemas: Dict[
-            str, Set[str]
-        ] = transform_parsing_result_to_in_tables_schemas(parsing_result)
+        in_tables_schemas: Dict[str, Set[str]] = (
+            transform_parsing_result_to_in_tables_schemas(parsing_result)
+        )
         if not in_tables_schemas:
             logger.info("Unable to extract table schema from parsing result")
@@ -3559,25 +3558,25 @@ class TableauSiteSource:
             generated_project_keys.add(project_key.guid())
-            parent_project_key: Optional[
-                Union[ProjectKey, SiteKey]
-            ] = None  # It is going
+            parent_project_key: Optional[Union[ProjectKey, SiteKey]] = (
+                None  # It is going
+            )
             # to be used as a parent container key for the current tableau project
             if project_.parent_id is not None:
                 # Go to the parent project as we need to generate container first for parent
                 parent_project_key = self.gen_project_key(project_.parent_id)
-                parent_tableau_project: Optional[
-                    TableauProject
-                ] = self.tableau_project_registry.get(project_.parent_id)
+                parent_tableau_project: Optional[TableauProject] = (
+                    self.tableau_project_registry.get(project_.parent_id)
+                )
                 if (
                     parent_tableau_project is None
                 ):  # It is not in project registry because of project_pattern
-                    assert (
-                        project_.parent_name
-                    ), f"project {project_.name} should not be null"
+                    assert project_.parent_name, (
+                        f"project {project_.name} should not be null"
+                    )
                     parent_tableau_project = TableauProject(
                         id=project_.parent_id,
                         name=project_.parent_name,
@@ -3605,7 +3604,7 @@ class TableauSiteSource:
                 parent_container_key=parent_project_key,
             )
-        for id_, project in self.tableau_project_registry.items():
+        for project in self.tableau_project_registry.values():
             logger.debug(
                 f"project {project.name} and it's parent {project.parent_name} and parent id {project.parent_id}"
             )
@@ -3669,16 +3668,16 @@ class TableauSiteSource:
             if self.config.extract_usage_stats:
                 with PerfTimer() as timer:
                     self._populate_usage_stat_registry()
-                    self.report.extract_usage_stats_timer[
-                        self.site_content_url
-                    ] = timer.elapsed_seconds(digits=2)
+                    self.report.extract_usage_stats_timer[self.site_content_url] = (
+                        timer.elapsed_seconds(digits=2)
+                    )
             if self.config.permission_ingestion:
                 with PerfTimer() as timer:
                     self._fetch_groups()
-                    self.report.fetch_groups_timer[
-                        self.site_content_url
-                    ] = timer.elapsed_seconds(digits=2)
+                    self.report.fetch_groups_timer[self.site_content_url] = (
+                        timer.elapsed_seconds(digits=2)
+                    )
             # Populate the map of database names and database hostnames to be used later to map
             # databases to platform instances.
@@ -3691,9 +3690,9 @@ class TableauSiteSource:
             with PerfTimer() as timer:
                 self._populate_projects_registry()
-                self.report.populate_projects_registry_timer[
-                    self.site_content_url
-                ] = timer.elapsed_seconds(digits=2)
+                self.report.populate_projects_registry_timer[self.site_content_url] = (
+                    timer.elapsed_seconds(digits=2)
+                )
             if self.config.add_site_container:
                 yield from self.emit_site_container()
@@ -3701,23 +3700,23 @@ class TableauSiteSource:
             with PerfTimer() as timer:
                 yield from self.emit_workbooks()
-                self.report.emit_workbooks_timer[
-                    self.site_content_url
-                ] = timer.elapsed_seconds(digits=2)
+                self.report.emit_workbooks_timer[self.site_content_url] = (
+                    timer.elapsed_seconds(digits=2)
+                )
             if self.sheet_ids:
                 with PerfTimer() as timer:
                     yield from self.emit_sheets()
-                    self.report.emit_sheets_timer[
-                        self.site_content_url
-                    ] = timer.elapsed_seconds(digits=2)
+                    self.report.emit_sheets_timer[self.site_content_url] = (
+                        timer.elapsed_seconds(digits=2)
+                    )
             if self.dashboard_ids:
                 with PerfTimer() as timer:
                     yield from self.emit_dashboards()
-                    self.report.emit_dashboards_timer[
-                        self.site_content_url
-                    ] = timer.elapsed_seconds(digits=2)
+                    self.report.emit_dashboards_timer[self.site_content_url] = (
+                        timer.elapsed_seconds(digits=2)
+                    )
             if self.embedded_datasource_ids_being_used:
                 with PerfTimer() as timer:
@@ -3743,6 +3742,6 @@ class TableauSiteSource:
             if self.database_tables:
                 with PerfTimer() as timer:
                     yield from self.emit_upstream_tables()
-                    self.report.emit_upstream_tables_timer[
-                        self.site_content_url
-                    ] = timer.elapsed_seconds(digits=2)
+                    self.report.emit_upstream_tables_timer[self.site_content_url] = (
+                        timer.elapsed_seconds(digits=2)
+                    )

datahub/ingestion/source/unity/config.py CHANGED Viewed

@@ -254,7 +254,9 @@ class UnityCatalogSourceConfig(
     )
     # TODO: Remove `type:ignore` by refactoring config
-    profiling: Union[UnityCatalogGEProfilerConfig, UnityCatalogAnalyzeProfilerConfig] = Field(  # type: ignore
+    profiling: Union[
+        UnityCatalogGEProfilerConfig, UnityCatalogAnalyzeProfilerConfig
+    ] = Field(  # type: ignore
         default=UnityCatalogGEProfilerConfig(),
         description="Data profiling configuration",
         discriminator="method",

datahub/ingestion/source/unity/proxy.py CHANGED Viewed

@@ -363,7 +363,7 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
     @staticmethod
     def _create_metastore(
-        obj: Union[GetMetastoreSummaryResponse, MetastoreInfo]
+        obj: Union[GetMetastoreSummaryResponse, MetastoreInfo],
     ) -> Optional[Metastore]:
         if not obj.name:
             return None

datahub/ingestion/source/unity/source.py CHANGED Viewed

@@ -205,9 +205,9 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
         self.table_refs: Set[TableReference] = set()
         self.view_refs: Set[TableReference] = set()
         self.notebooks: FileBackedDict[Notebook] = FileBackedDict()
-        self.view_definitions: FileBackedDict[
-            Tuple[TableReference, str]
-        ] = FileBackedDict()
+        self.view_definitions: FileBackedDict[Tuple[TableReference, str]] = (
+            FileBackedDict()
+        )
         # Global map of tables, for profiling
         self.tables: FileBackedDict[Table] = FileBackedDict()

datahub/ingestion/source/unity/usage.py CHANGED Viewed

@@ -103,7 +103,9 @@ class UnityCatalogUsageExtractor:
                                 query, table_info
                             )
                         for source_table in table_info.source_tables:
-                            with self.report.usage_perf_report.aggregator_add_event_timer:
+                            with (
+                                self.report.usage_perf_report.aggregator_add_event_timer
+                            ):
                                 self.usage_aggregator.aggregate_event(
                                     resource=source_table,
                                     start_time=query.start_time,

datahub/ingestion/source/usage/clickhouse_usage.py CHANGED Viewed

@@ -213,15 +213,15 @@ class ClickHouseUsageSource(Source):
     def _aggregate_access_events(
         self, events: List[ClickHouseJoinedAccessEvent]
     ) -> Dict[datetime, Dict[ClickHouseTableRef, AggregatedDataset]]:
-        datasets: Dict[
-            datetime, Dict[ClickHouseTableRef, AggregatedDataset]
-        ] = collections.defaultdict(dict)
+        datasets: Dict[datetime, Dict[ClickHouseTableRef, AggregatedDataset]] = (
+            collections.defaultdict(dict)
+        )
         for event in events:
             floored_ts = get_time_bucket(event.starttime, self.config.bucket_duration)
             resource = (
-                f'{self.config.platform_instance+"." if self.config.platform_instance else ""}'
+                f"{self.config.platform_instance + '.' if self.config.platform_instance else ''}"
                 f"{event.database}.{event.table}"
             )

datahub/ingestion/source/usage/starburst_trino_usage.py CHANGED Viewed

@@ -235,9 +235,9 @@ class TrinoUsageSource(Source):
     def _aggregate_access_events(
         self, events: List[TrinoJoinedAccessEvent]
     ) -> Dict[datetime, Dict[TrinoTableRef, AggregatedDataset]]:
-        datasets: Dict[
-            datetime, Dict[TrinoTableRef, AggregatedDataset]
-        ] = collections.defaultdict(dict)
+        datasets: Dict[datetime, Dict[TrinoTableRef, AggregatedDataset]] = (
+            collections.defaultdict(dict)
+        )
         for event in events:
             floored_ts = get_time_bucket(event.starttime, self.config.bucket_duration)

datahub/ingestion/source/usage/usage_common.py CHANGED Viewed

@@ -89,7 +89,7 @@ def make_usage_workunit(
     top_sql_queries: Optional[List[str]] = None
     if query_freq is not None:
         if top_n_queries < len(query_freq):
-            logger.warn(
+            logger.warning(
                 f"Top N query limit exceeded on {str(resource)}.  Max number of queries {top_n_queries} <  {len(query_freq)}. Truncating top queries to {top_n_queries}."
             )
             query_freq = query_freq[0:top_n_queries]

datahub/ingestion/transformer/add_dataset_dataproduct.py CHANGED Viewed

@@ -80,10 +80,10 @@ class AddDatasetDataProduct(DatasetDataproductTransformer):
                         ).add_asset(container_urn)
                         data_products_container[data_product_urn] = container_product
                     else:
-                        data_products_container[
-                            data_product_urn
-                        ] = data_products_container[data_product_urn].add_asset(
-                            container_urn
+                        data_products_container[data_product_urn] = (
+                            data_products_container[data_product_urn].add_asset(
+                                container_urn
+                            )
                         )
         mcps: List[

datahub/ingestion/transformer/add_dataset_properties.py CHANGED Viewed

@@ -61,9 +61,9 @@ class AddDatasetProperties(DatasetPropertiesTransformer):
     ) -> Optional[DatasetPropertiesClass]:
         assert dataset_properties_aspect
-        server_dataset_properties_aspect: Optional[
-            DatasetPropertiesClass
-        ] = graph.get_dataset_properties(entity_urn)
+        server_dataset_properties_aspect: Optional[DatasetPropertiesClass] = (
+            graph.get_dataset_properties(entity_urn)
+        )
         # No need to take any action if server properties is None or there is not customProperties in server properties
         if (
             server_dataset_properties_aspect is None

datahub/ingestion/transformer/add_dataset_schema_tags.py CHANGED Viewed

@@ -89,9 +89,9 @@ class AddDatasetSchemaTags(DatasetSchemaMetadataTransformer):
         server_field_map: dict = {}
         if self.config.semantics == TransformerSemantics.PATCH:
             assert self.ctx.graph
-            server_schema_metadata_aspect: Optional[
-                SchemaMetadataClass
-            ] = self.ctx.graph.get_schema_metadata(entity_urn=entity_urn)
+            server_schema_metadata_aspect: Optional[SchemaMetadataClass] = (
+                self.ctx.graph.get_schema_metadata(entity_urn=entity_urn)
+            )
             if server_schema_metadata_aspect is not None:
                 if not schema_metadata_aspect:
                     schema_metadata_aspect = server_schema_metadata_aspect

datahub/ingestion/transformer/add_dataset_schema_terms.py CHANGED Viewed

@@ -108,9 +108,9 @@ class AddDatasetSchemaTerms(DatasetSchemaMetadataTransformer):
         ] = {}  # Map to cache server field objects, where fieldPath is key
         if self.config.semantics == TransformerSemantics.PATCH:
             assert self.ctx.graph
-            server_schema_metadata_aspect: Optional[
-                SchemaMetadataClass
-            ] = self.ctx.graph.get_schema_metadata(entity_urn=entity_urn)
+            server_schema_metadata_aspect: Optional[SchemaMetadataClass] = (
+                self.ctx.graph.get_schema_metadata(entity_urn=entity_urn)
+            )
             if server_schema_metadata_aspect is not None:
                 if not schema_metadata_aspect:
                     schema_metadata_aspect = server_schema_metadata_aspect

datahub/ingestion/transformer/dataset_domain_based_on_tags.py CHANGED Viewed

@@ -60,10 +60,10 @@ class DatasetTagDomainMapper(DatasetDomainTransformer):
                 domain_aspect.domains.extend(mapped_domains.domains)
                 if self.config.semantics == TransformerSemantics.PATCH:
                     # Try merging with server-side domains
-                    patch_domain_aspect: Optional[
-                        DomainsClass
-                    ] = AddDatasetDomain._merge_with_server_domains(
-                        self.ctx.graph, entity_urn, domain_aspect
+                    patch_domain_aspect: Optional[DomainsClass] = (
+                        AddDatasetDomain._merge_with_server_domains(
+                            self.ctx.graph, entity_urn, domain_aspect
+                        )
                     )
                     return cast(Optional[Aspect], patch_domain_aspect)
                 return cast(Optional[Aspect], domain_aspect)

datahub/ingestion/transformer/extract_ownership_from_tags.py CHANGED Viewed

@@ -141,9 +141,9 @@ class ExtractOwnersFromTagsTransformer(DatasetTagsTransformer):
                 else:
                     owner_type = get_owner_type(self.config.owner_type)
                     if owner_type == OwnershipTypeClass.CUSTOM:
-                        assert (
-                            self.config.owner_type_urn is not None
-                        ), "owner_type_urn must be set if owner_type is CUSTOM"
+                        assert self.config.owner_type_urn is not None, (
+                            "owner_type_urn must be set if owner_type is CUSTOM"
+                        )
                     owners.append(
                         OwnerClass(

acryl-datahub 0.15.0.2rc7__py3-none-any.whl → 0.15.0.3__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.15.0.2rc7py3-none-any.whl → 0.15.0.3py3-none-any.whl