PyPI - acryl-datahub - Versions diffs - 0.15.0.2rc7__py3-none-any.whl → 0.15.0.2rc8__py3-none-any.whl - Mend

acryl-datahub 0.15.0.2rc7py3-none-any.whl → 0.15.0.2rc8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (157) hide show

{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/METADATA +2335 -2337
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/RECORD +157 -157
datahub/__init__.py +1 -1
datahub/api/entities/assertion/assertion_operator.py +3 -5
datahub/api/entities/corpgroup/corpgroup.py +1 -1
datahub/api/entities/datacontract/assertion_operator.py +3 -5
datahub/api/entities/dataproduct/dataproduct.py +4 -4
datahub/api/entities/dataset/dataset.py +2 -1
datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
datahub/cli/cli_utils.py +1 -1
datahub/cli/docker_cli.py +6 -6
datahub/cli/lite_cli.py +2 -2
datahub/cli/migrate.py +3 -3
datahub/cli/specific/assertions_cli.py +3 -3
datahub/cli/timeline_cli.py +1 -1
datahub/configuration/common.py +1 -2
datahub/configuration/config_loader.py +73 -50
datahub/configuration/git.py +2 -2
datahub/configuration/time_window_config.py +10 -5
datahub/emitter/mce_builder.py +4 -8
datahub/emitter/mcp_patch_builder.py +1 -2
datahub/ingestion/api/incremental_lineage_helper.py +2 -8
datahub/ingestion/api/report.py +1 -2
datahub/ingestion/api/source_helpers.py +1 -1
datahub/ingestion/extractor/json_schema_util.py +3 -3
datahub/ingestion/extractor/schema_util.py +3 -5
datahub/ingestion/fs/s3_fs.py +3 -3
datahub/ingestion/glossary/datahub_classifier.py +6 -4
datahub/ingestion/graph/client.py +4 -6
datahub/ingestion/run/pipeline.py +8 -7
datahub/ingestion/run/pipeline_config.py +3 -3
datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
datahub/ingestion/source/abs/source.py +19 -8
datahub/ingestion/source/aws/glue.py +11 -11
datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
datahub/ingestion/source/bigquery_v2/bigquery.py +3 -3
datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
datahub/ingestion/source/bigquery_v2/bigquery_config.py +6 -6
datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +15 -9
datahub/ingestion/source/bigquery_v2/lineage.py +9 -9
datahub/ingestion/source/bigquery_v2/queries.py +1 -3
datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
datahub/ingestion/source/bigquery_v2/usage.py +3 -3
datahub/ingestion/source/cassandra/cassandra.py +0 -1
datahub/ingestion/source/cassandra/cassandra_utils.py +4 -4
datahub/ingestion/source/confluent_schema_registry.py +6 -6
datahub/ingestion/source/csv_enricher.py +29 -29
datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
datahub/ingestion/source/dbt/dbt_common.py +9 -7
datahub/ingestion/source/dremio/dremio_api.py +4 -4
datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
datahub/ingestion/source/elastic_search.py +4 -4
datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +3 -3
datahub/ingestion/source/gcs/gcs_source.py +3 -2
datahub/ingestion/source/ge_data_profiler.py +4 -5
datahub/ingestion/source/ge_profiling_config.py +3 -3
datahub/ingestion/source/iceberg/iceberg.py +3 -3
datahub/ingestion/source/identity/azure_ad.py +3 -3
datahub/ingestion/source/identity/okta.py +3 -3
datahub/ingestion/source/kafka/kafka.py +11 -9
datahub/ingestion/source/kafka_connect/kafka_connect.py +2 -3
datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
datahub/ingestion/source/looker/looker_common.py +19 -19
datahub/ingestion/source/looker/looker_config.py +3 -3
datahub/ingestion/source/looker/looker_source.py +25 -25
datahub/ingestion/source/looker/looker_template_language.py +3 -3
datahub/ingestion/source/looker/looker_usage.py +5 -7
datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
datahub/ingestion/source/looker/lookml_source.py +13 -15
datahub/ingestion/source/looker/view_upstream.py +5 -5
datahub/ingestion/source/mlflow.py +4 -4
datahub/ingestion/source/mongodb.py +6 -4
datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
datahub/ingestion/source/nifi.py +24 -26
datahub/ingestion/source/openapi.py +9 -9
datahub/ingestion/source/powerbi/config.py +12 -12
datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
datahub/ingestion/source/powerbi/powerbi.py +6 -6
datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
datahub/ingestion/source/redshift/config.py +3 -3
datahub/ingestion/source/redshift/redshift.py +12 -12
datahub/ingestion/source/redshift/usage.py +8 -8
datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
datahub/ingestion/source/s3/source.py +1 -1
datahub/ingestion/source/salesforce.py +26 -25
datahub/ingestion/source/schema/json_schema.py +1 -1
datahub/ingestion/source/sigma/sigma.py +3 -3
datahub/ingestion/source/sigma/sigma_api.py +12 -10
datahub/ingestion/source/snowflake/snowflake_config.py +9 -7
datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
datahub/ingestion/source/snowflake/snowflake_schema.py +3 -3
datahub/ingestion/source/snowflake/snowflake_schema_gen.py +6 -6
datahub/ingestion/source/snowflake/snowflake_tag.py +7 -7
datahub/ingestion/source/snowflake/snowflake_usage_v2.py +3 -3
datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
datahub/ingestion/source/snowflake/snowflake_v2.py +13 -4
datahub/ingestion/source/sql/athena.py +1 -3
datahub/ingestion/source/sql/clickhouse.py +8 -14
datahub/ingestion/source/sql/oracle.py +1 -3
datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
datahub/ingestion/source/sql/teradata.py +16 -3
datahub/ingestion/source/state/profiling_state_handler.py +3 -3
datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
datahub/ingestion/source/tableau/tableau.py +48 -49
datahub/ingestion/source/unity/config.py +3 -1
datahub/ingestion/source/unity/proxy.py +1 -1
datahub/ingestion/source/unity/source.py +3 -3
datahub/ingestion/source/unity/usage.py +3 -1
datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
datahub/ingestion/source/usage/usage_common.py +1 -1
datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
datahub/ingestion/transformer/add_dataset_properties.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
datahub/ingestion/transformer/tags_to_terms.py +7 -7
datahub/integrations/assertion/snowflake/compiler.py +10 -10
datahub/lite/duckdb_lite.py +12 -10
datahub/metadata/_schema_classes.py +1 -1
datahub/metadata/schema.avsc +6 -2
datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
datahub/secret/secret_common.py +14 -8
datahub/specific/aspect_helpers/custom_properties.py +1 -2
datahub/sql_parsing/schema_resolver.py +5 -10
datahub/sql_parsing/sql_parsing_aggregator.py +16 -16
datahub/sql_parsing/sqlglot_lineage.py +5 -4
datahub/sql_parsing/sqlglot_utils.py +3 -2
datahub/telemetry/stats.py +1 -2
datahub/testing/mcp_diff.py +1 -1
datahub/utilities/file_backed_collections.py +10 -10
datahub/utilities/hive_schema_to_avro.py +2 -2
datahub/utilities/logging_manager.py +2 -2
datahub/utilities/lossy_collections.py +3 -3
datahub/utilities/mapping.py +3 -3
datahub/utilities/serialized_lru_cache.py +3 -1
datahub/utilities/sqlalchemy_query_combiner.py +6 -6
datahub/utilities/sqllineage_patch.py +1 -1
datahub/utilities/stats_collections.py +3 -1
datahub/utilities/urns/urn_iter.py +2 -2
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/WHEEL +0 -0
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/entry_points.txt +0 -0
{acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/top_level.txt +0 -0

datahub/ingestion/source/sigma/sigma.py CHANGED Viewed

@@ -477,9 +477,9 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
                     upstream_dataset_urns
                     and dataset_urn not in self.dataset_upstream_urn_mapping
                 ):
-                    self.dataset_upstream_urn_mapping[
-                        dataset_urn
-                    ] = upstream_dataset_urns
+                    self.dataset_upstream_urn_mapping[dataset_urn] = (
+                        upstream_dataset_urns
+                    )
             element_input_fields = [
                 InputFieldClass(

datahub/ingestion/source/sigma/sigma_api.py CHANGED Viewed

@@ -126,9 +126,9 @@ class SigmaAPI:
                 response.raise_for_status()
                 response_dict = response.json()
                 for workspace_dict in response_dict[Constant.ENTRIES]:
-                    self.workspaces[
-                        workspace_dict[Constant.WORKSPACEID]
-                    ] = Workspace.parse_obj(workspace_dict)
+                    self.workspaces[workspace_dict[Constant.WORKSPACEID]] = (
+                        Workspace.parse_obj(workspace_dict)
+                    )
                 if response_dict[Constant.NEXTPAGE]:
                     url = f"{workspace_url}&page={response_dict[Constant.NEXTPAGE]}"
                 else:
@@ -147,9 +147,9 @@ class SigmaAPI:
                 response.raise_for_status()
                 response_dict = response.json()
                 for user_dict in response_dict[Constant.ENTRIES]:
-                    users[
-                        user_dict[Constant.MEMBERID]
-                    ] = f"{user_dict[Constant.FIRSTNAME]}_{user_dict[Constant.LASTNAME]}"
+                    users[user_dict[Constant.MEMBERID]] = (
+                        f"{user_dict[Constant.FIRSTNAME]}_{user_dict[Constant.LASTNAME]}"
+                    )
                 if response_dict[Constant.NEXTPAGE]:
                     url = f"{members_url}&page={response_dict[Constant.NEXTPAGE]}"
                 else:
@@ -327,10 +327,12 @@ class SigmaAPI:
             response.raise_for_status()
             for i, element_dict in enumerate(response.json()[Constant.ENTRIES]):
                 if not element_dict.get(Constant.NAME):
-                    element_dict[Constant.NAME] = f"Element {i+1} of Page '{page.name}'"
-                element_dict[
-                    Constant.URL
-                ] = f"{workbook.url}?:nodeId={element_dict[Constant.ELEMENTID]}&:fullScreen=true"
+                    element_dict[Constant.NAME] = (
+                        f"Element {i + 1} of Page '{page.name}'"
+                    )
+                element_dict[Constant.URL] = (
+                    f"{workbook.url}?:nodeId={element_dict[Constant.ELEMENTID]}&:fullScreen=true"
+                )
                 element = Element.parse_obj(element_dict)
                 if (
                     self.config.extract_lineage

datahub/ingestion/source/snowflake/snowflake_config.py CHANGED Viewed

@@ -384,18 +384,20 @@ class SnowflakeV2Config(
                     assert all(
                         consumer.platform_instance != share_details.platform_instance
                         for consumer in share_details.consumers
-                    ), "Share's platform_instance can not be same as consumer's platform instance. Self-sharing not supported in Snowflake."
+                    ), (
+                        "Share's platform_instance can not be same as consumer's platform instance. Self-sharing not supported in Snowflake."
+                    )
                 databases_included_in_share.append(shared_db)
                 databases_created_from_share.extend(share_details.consumers)
             for db_from_share in databases_created_from_share:
-                assert (
-                    db_from_share not in databases_included_in_share
-                ), "Database included in a share can not be present as consumer in any share."
-                assert (
-                    databases_created_from_share.count(db_from_share) == 1
-                ), "Same database can not be present as consumer in more than one share."
+                assert db_from_share not in databases_included_in_share, (
+                    "Database included in a share can not be present as consumer in any share."
+                )
+                assert databases_created_from_share.count(db_from_share) == 1, (
+                    "Same database can not be present as consumer in more than one share."
+                )
         return shares

datahub/ingestion/source/snowflake/snowflake_connection.py CHANGED Viewed

@@ -250,9 +250,9 @@ class SnowflakeConnectionConfig(ConfigModel):
             if self.private_key is not None:
                 pkey_bytes = self.private_key.replace("\\n", "\n").encode()
             else:
-                assert (
-                    self.private_key_path
-                ), "missing required private key path to read key from"
+                assert self.private_key_path, (
+                    "missing required private key path to read key from"
+                )
                 with open(self.private_key_path, "rb") as key:
                     pkey_bytes = key.read()
@@ -284,9 +284,9 @@ class SnowflakeConnectionConfig(ConfigModel):
         return self.options
     def get_oauth_connection(self) -> NativeSnowflakeConnection:
-        assert (
-            self.oauth_config
-        ), "oauth_config should be provided if using oauth based authentication"
+        assert self.oauth_config, (
+            "oauth_config should be provided if using oauth based authentication"
+        )
         generator = OAuthTokenGenerator(
             client_id=self.oauth_config.client_id,
             authority_url=self.oauth_config.authority_url,

datahub/ingestion/source/snowflake/snowflake_queries.py CHANGED Viewed

@@ -623,7 +623,7 @@ fingerprinted_queries as (
         query_history.start_time >= to_timestamp_ltz({start_time_millis}, 3)
         AND query_history.start_time < to_timestamp_ltz({end_time_millis}, 3)
         AND execution_status = 'SUCCESS'
-        AND {users_filter or 'TRUE'}
+        AND {users_filter or "TRUE"}
 )
 , deduplicated_queries as (
     SELECT
@@ -651,7 +651,7 @@ fingerprinted_queries as (
     WHERE
         query_start_time >= to_timestamp_ltz({start_time_millis}, 3)
         AND query_start_time < to_timestamp_ltz({end_time_millis}, 3)
-        AND {users_filter or 'TRUE'}
+        AND {users_filter or "TRUE"}
         AND query_id IN (
             SELECT query_id FROM deduplicated_queries
         )

datahub/ingestion/source/snowflake/snowflake_schema.py CHANGED Viewed

@@ -142,9 +142,9 @@ class _SnowflakeTagCache:
         )
         # self._table_tags[<database_name>][<schema_name>][<table_name>] = list of tags applied to table
-        self._table_tags: Dict[
-            str, Dict[str, Dict[str, List[SnowflakeTag]]]
-        ] = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
+        self._table_tags: Dict[str, Dict[str, Dict[str, List[SnowflakeTag]]]] = (
+            defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
+        )
         # self._column_tags[<database_name>][<schema_name>][<table_name>][<column_name>] = list of tags applied to column
         self._column_tags: Dict[

datahub/ingestion/source/snowflake/snowflake_schema_gen.py CHANGED Viewed

@@ -194,9 +194,9 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
             config, self.data_dictionary, self.report
         )
         self.profiler: Optional[SnowflakeProfiler] = profiler
-        self.snowsight_url_builder: Optional[
-            SnowsightUrlBuilder
-        ] = snowsight_url_builder
+        self.snowsight_url_builder: Optional[SnowsightUrlBuilder] = (
+            snowsight_url_builder
+        )
         # These are populated as side-effects of get_workunits_internal.
         self.databases: List[SnowflakeDatabase] = []
@@ -267,9 +267,9 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
             )
             return None
         else:
-            ischema_databases: List[
-                SnowflakeDatabase
-            ] = self.get_databases_from_ischema(databases)
+            ischema_databases: List[SnowflakeDatabase] = (
+                self.get_databases_from_ischema(databases)
+            )
             if len(ischema_databases) == 0:
                 self.structured_reporter.failure(

datahub/ingestion/source/snowflake/snowflake_tag.py CHANGED Viewed

@@ -38,9 +38,9 @@ class SnowflakeTagExtractor(SnowflakeCommonMixin):
         table_name: Optional[str],
     ) -> List[SnowflakeTag]:
         if db_name not in self.tag_cache:
-            self.tag_cache[
-                db_name
-            ] = self.data_dictionary.get_tags_for_database_without_propagation(db_name)
+            self.tag_cache[db_name] = (
+                self.data_dictionary.get_tags_for_database_without_propagation(db_name)
+            )
         if domain == SnowflakeObjectDomain.DATABASE:
             return self.tag_cache[db_name].get_database_tags(db_name)
@@ -130,10 +130,10 @@ class SnowflakeTagExtractor(SnowflakeCommonMixin):
         temp_column_tags: Dict[str, List[SnowflakeTag]] = {}
         if self.config.extract_tags == TagOption.without_lineage:
             if db_name not in self.tag_cache:
-                self.tag_cache[
-                    db_name
-                ] = self.data_dictionary.get_tags_for_database_without_propagation(
-                    db_name
+                self.tag_cache[db_name] = (
+                    self.data_dictionary.get_tags_for_database_without_propagation(
+                        db_name
+                    )
                 )
             temp_column_tags = self.tag_cache[db_name].get_column_tags_for_table(
                 table_name, schema_name, db_name

datahub/ingestion/source/snowflake/snowflake_usage_v2.py CHANGED Viewed

@@ -549,9 +549,9 @@ class SnowflakeUsageExtractor(SnowflakeCommonMixin, Closeable):
         ):
             # NOTE: Generated emails may be incorrect, as email may be different than
             # username@email_domain
-            event_dict[
-                "EMAIL"
-            ] = f'{event_dict["USER_NAME"]}@{self.config.email_domain}'.lower()
+            event_dict["EMAIL"] = (
+                f"{event_dict['USER_NAME']}@{self.config.email_domain}".lower()
+            )
         if not event_dict["EMAIL"]:
             self.report.rows_missing_email += 1

datahub/ingestion/source/snowflake/snowflake_utils.py CHANGED Viewed

@@ -21,8 +21,7 @@ from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Repor
 class SnowflakeStructuredReportMixin(abc.ABC):
     @property
     @abc.abstractmethod
-    def structured_reporter(self) -> SourceReport:
-        ...
+    def structured_reporter(self) -> SourceReport: ...
 class SnowsightUrlBuilder:

datahub/ingestion/source/snowflake/snowflake_v2.py CHANGED Viewed

@@ -211,9 +211,9 @@ class SnowflakeV2Source(
         self.usage_extractor: Optional[SnowflakeUsageExtractor] = None
         if self.config.include_usage_stats or self.config.include_operational_stats:
-            redundant_usage_run_skip_handler: Optional[
-                RedundantUsageRunSkipHandler
-            ] = None
+            redundant_usage_run_skip_handler: Optional[RedundantUsageRunSkipHandler] = (
+                None
+            )
             if self.config.enable_stateful_usage_ingestion:
                 redundant_usage_run_skip_handler = RedundantUsageRunSkipHandler(
                     source=self,
@@ -296,7 +296,16 @@ class SnowflakeV2Source(
         _report: Dict[Union[SourceCapability, str], CapabilityReport] = dict()
         privileges: List[SnowflakePrivilege] = []
-        capabilities: List[SourceCapability] = [c.capability for c in SnowflakeV2Source.get_capabilities() if c.capability not in (SourceCapability.PLATFORM_INSTANCE, SourceCapability.DOMAINS, SourceCapability.DELETION_DETECTION)]  # type: ignore
+        capabilities: List[SourceCapability] = [
+            c.capability
+            for c in SnowflakeV2Source.get_capabilities()  # type: ignore
+            if c.capability
+            not in (
+                SourceCapability.PLATFORM_INSTANCE,
+                SourceCapability.DOMAINS,
+                SourceCapability.DELETION_DETECTION,
+            )
+        ]
         cur = conn.query("select current_role()")
         current_role = [row["CURRENT_ROLE()"] for row in cur][0]

datahub/ingestion/source/sql/athena.py CHANGED Viewed

@@ -104,9 +104,7 @@ class CustomAthenaRestDialect(AthenaRestDialect):
             return "\n".join([r for r in res])
     @typing.no_type_check
-    def _get_column_type(
-        self, type_: Union[str, Dict[str, Any]]
-    ) -> TypeEngine:  # noqa: C901
+    def _get_column_type(self, type_: Union[str, Dict[str, Any]]) -> TypeEngine:  # noqa: C901
         """Derives the data type of the Athena column.
         This method is overwritten to extend the behavior of PyAthena.

datahub/ingestion/source/sql/clickhouse.py CHANGED Viewed

@@ -218,9 +218,7 @@ def _get_all_table_comments_and_properties(self, connection, **kw):
              , comment
              , {properties_clause} AS properties
           FROM system.tables
-         WHERE name NOT LIKE '.inner%'""".format(
-            properties_clause=properties_clause
-        )
+         WHERE name NOT LIKE '.inner%'""".format(properties_clause=properties_clause)
     )
     all_table_comments: Dict[Tuple[str, str], Dict[str, Any]] = {}
@@ -268,7 +266,7 @@ def _get_table_or_view_names(self, relkind, connection, schema=None, **kw):
     info_cache = kw.get("info_cache")
     all_relations = self._get_all_relation_info(connection, info_cache=info_cache)
     relation_names = []
-    for key, relation in all_relations.items():
+    for _, relation in all_relations.items():
         if relation.database == schema and relation.relkind == relkind:
             relation_names.append(relation.relname)
     return relation_names
@@ -301,9 +299,7 @@ def _get_schema_column_info(self, connection, schema=None, **kw):
              , comment
           FROM system.columns
          WHERE {schema_clause}
-         ORDER BY database, table, position""".format(
-                    schema_clause=schema_clause
-                )
+         ORDER BY database, table, position""".format(schema_clause=schema_clause)
             )
         )
     )
@@ -474,7 +470,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
         logger.debug(f"sql_alchemy_url={url}")
         engine = create_engine(url, **self.config.options)
         for db_row in engine.execute(text(all_tables_query)):
-            all_tables_set.add(f'{db_row["database"]}.{db_row["table_name"]}')
+            all_tables_set.add(f"{db_row['database']}.{db_row['table_name']}")
         return all_tables_set
@@ -503,7 +499,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
         try:
             for db_row in engine.execute(text(query)):
-                dataset_name = f'{db_row["target_schema"]}.{db_row["target_table"]}'
+                dataset_name = f"{db_row['target_schema']}.{db_row['target_table']}"
                 if not self.config.database_pattern.allowed(
                     db_row["target_schema"]
                 ) or not self.config.table_pattern.allowed(dataset_name):
@@ -512,7 +508,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
                 # Target
                 target_path = (
-                    f'{self.config.platform_instance+"." if self.config.platform_instance else ""}'
+                    f"{self.config.platform_instance + '.' if self.config.platform_instance else ''}"
                     f"{dataset_name}"
                 )
                 target = LineageItem(
@@ -525,7 +521,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
                 # Source
                 platform = LineageDatasetPlatform.CLICKHOUSE
-                path = f'{db_row["source_schema"]}.{db_row["source_table"]}'
+                path = f"{db_row['source_schema']}.{db_row['source_table']}"
                 sources = [
                     LineageDataset(
@@ -552,9 +548,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
                         target.dataset.path
                     ].upstreams = self._lineage_map[
                         target.dataset.path
-                    ].upstreams.union(
-                        target.upstreams
-                    )
+                    ].upstreams.union(target.upstreams)
                 else:
                     self._lineage_map[target.dataset.path] = target

datahub/ingestion/source/sql/oracle.py CHANGED Viewed

@@ -234,9 +234,7 @@ class OracleInspectorObjectWrapper:
                     WHERE col.table_name = id.table_name
                     AND col.column_name = id.column_name
                     AND col.owner = id.owner
-                ) AS identity_options""".format(
-                dblink=dblink
-            )
+                ) AS identity_options""".format(dblink=dblink)
         else:
             identity_cols = "NULL as default_on_null, NULL as identity_options"

datahub/ingestion/source/sql/sql_generic_profiler.py CHANGED Viewed

@@ -278,8 +278,7 @@ class GenericProfiler:
         if self.config.profiling.profile_table_size_limit is not None and (
             size_in_bytes is not None
-            and size_in_bytes / (2**30)
-            > self.config.profiling.profile_table_size_limit
+            and size_in_bytes / (2**30) > self.config.profiling.profile_table_size_limit
         ):
             self.report.profiling_skipped_size_limit[schema_name] += 1
             logger.debug(

datahub/ingestion/source/sql/teradata.py CHANGED Viewed

@@ -599,7 +599,12 @@ ORDER by DataBaseName, TableName;
             setattr(  # noqa: B010
                 TeradataDialect,
                 "get_columns",
-                lambda self, connection, table_name, schema=None, use_qvci=self.config.use_qvci, **kw: optimized_get_columns(
+                lambda self,
+                connection,
+                table_name,
+                schema=None,
+                use_qvci=self.config.use_qvci,
+                **kw: optimized_get_columns(
                     self,
                     connection,
                     table_name,
@@ -613,7 +618,11 @@ ORDER by DataBaseName, TableName;
             setattr(  # noqa: B010
                 TeradataDialect,
                 "get_pk_constraint",
-                lambda self, connection, table_name, schema=None, **kw: optimized_get_pk_constraint(
+                lambda self,
+                connection,
+                table_name,
+                schema=None,
+                **kw: optimized_get_pk_constraint(
                     self, connection, table_name, schema, **kw
                 ),
             )
@@ -621,7 +630,11 @@ ORDER by DataBaseName, TableName;
             setattr(  # noqa: B010
                 TeradataDialect,
                 "get_foreign_keys",
-                lambda self, connection, table_name, schema=None, **kw: optimized_get_foreign_keys(
+                lambda self,
+                connection,
+                table_name,
+                schema=None,
+                **kw: optimized_get_foreign_keys(
                     self, connection, table_name, schema, **kw
                 ),
             )

datahub/ingestion/source/state/profiling_state_handler.py CHANGED Viewed

@@ -41,9 +41,9 @@ class ProfilingHandler(StatefulIngestionUsecaseHandlerBase[ProfilingCheckpointSt
         run_id: str,
     ):
         self.state_provider = source.state_provider
-        self.stateful_ingestion_config: Optional[
-            ProfilingStatefulIngestionConfig
-        ] = config.stateful_ingestion
+        self.stateful_ingestion_config: Optional[ProfilingStatefulIngestionConfig] = (
+            config.stateful_ingestion
+        )
         self.pipeline_name = pipeline_name
         self.run_id = run_id
         self.checkpointing_enabled: bool = (

datahub/ingestion/source/state/redundant_run_skip_handler.py CHANGED Viewed

@@ -48,9 +48,9 @@ class RedundantRunSkipHandler(
     ):
         self.source = source
         self.state_provider = source.state_provider
-        self.stateful_ingestion_config: Optional[
-            StatefulIngestionConfig
-        ] = config.stateful_ingestion
+        self.stateful_ingestion_config: Optional[StatefulIngestionConfig] = (
+            config.stateful_ingestion
+        )
         self.pipeline_name = pipeline_name
         self.run_id = run_id
         self._job_id = self._init_job_id()
@@ -145,8 +145,7 @@ class RedundantRunSkipHandler(
             )
             logger.debug(
-                f"{self.job_id} : Last run start, end times:"
-                f"({last_run_time_window})"
+                f"{self.job_id} : Last run start, end times:({last_run_time_window})"
             )
             # If current run's time window is subset of last run's time window, then skip.
@@ -212,8 +211,7 @@ class RedundantRunSkipHandler(
             )
         self.log(
-            "Adjusted start, end times: "
-            f"({suggested_start_time}, {suggested_end_time})"
+            f"Adjusted start, end times: ({suggested_start_time}, {suggested_end_time})"
         )
         return (suggested_start_time, suggested_end_time)

datahub/ingestion/source/state/stale_entity_removal_handler.py CHANGED Viewed

@@ -111,9 +111,9 @@ class StaleEntityRemovalHandler(
         self.state_type_class = state_type_class
         self.pipeline_name = pipeline_name
         self.run_id = run_id
-        self.stateful_ingestion_config: Optional[
-            StatefulStaleMetadataRemovalConfig
-        ] = config.stateful_ingestion
+        self.stateful_ingestion_config: Optional[StatefulStaleMetadataRemovalConfig] = (
+            config.stateful_ingestion
+        )
         self.checkpointing_enabled: bool = (
             True
             if (

datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py CHANGED Viewed

@@ -70,20 +70,20 @@ class DatahubIngestionCheckpointingProvider(IngestionCheckpointingProviderBase):
             self.orchestrator_name, pipeline_name, job_name
         )
-        latest_checkpoint: Optional[
-            DatahubIngestionCheckpointClass
-        ] = self.graph.get_latest_timeseries_value(
-            entity_urn=data_job_urn,
-            aspect_type=DatahubIngestionCheckpointClass,
-            filter_criteria_map={
-                "pipelineName": pipeline_name,
-            },
+        latest_checkpoint: Optional[DatahubIngestionCheckpointClass] = (
+            self.graph.get_latest_timeseries_value(
+                entity_urn=data_job_urn,
+                aspect_type=DatahubIngestionCheckpointClass,
+                filter_criteria_map={
+                    "pipelineName": pipeline_name,
+                },
+            )
         )
         if latest_checkpoint:
             logger.debug(
                 f"The last committed ingestion checkpoint for pipelineName:'{pipeline_name}',"
                 f" job_name:'{job_name}' found with start_time:"
-                f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis/1000)}"
+                f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis / 1000)}"
             )
             return latest_checkpoint
         else:

datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py CHANGED Viewed

@@ -67,7 +67,7 @@ class FileIngestionCheckpointingProvider(IngestionCheckpointingProviderBase):
             logger.debug(
                 f"The last committed ingestion checkpoint for pipelineName:'{pipeline_name}',"
                 f" job_name:'{job_name}' found with start_time:"
-                f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis/1000)}"
+                f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis / 1000)}"
             )
             return latest_checkpoint
         else:

acryl-datahub 0.15.0.2rc7__py3-none-any.whl → 0.15.0.2rc8__py3-none-any.whl

Potentially problematic release.

acryl-datahub 0.15.0.2rc7py3-none-any.whl → 0.15.0.2rc8py3-none-any.whl