acryl-datahub 0.15.0.2rc7__py3-none-any.whl → 0.15.0.2rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (157) hide show
  1. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/METADATA +2335 -2337
  2. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/RECORD +157 -157
  3. datahub/__init__.py +1 -1
  4. datahub/api/entities/assertion/assertion_operator.py +3 -5
  5. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  6. datahub/api/entities/datacontract/assertion_operator.py +3 -5
  7. datahub/api/entities/dataproduct/dataproduct.py +4 -4
  8. datahub/api/entities/dataset/dataset.py +2 -1
  9. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  10. datahub/cli/cli_utils.py +1 -1
  11. datahub/cli/docker_cli.py +6 -6
  12. datahub/cli/lite_cli.py +2 -2
  13. datahub/cli/migrate.py +3 -3
  14. datahub/cli/specific/assertions_cli.py +3 -3
  15. datahub/cli/timeline_cli.py +1 -1
  16. datahub/configuration/common.py +1 -2
  17. datahub/configuration/config_loader.py +73 -50
  18. datahub/configuration/git.py +2 -2
  19. datahub/configuration/time_window_config.py +10 -5
  20. datahub/emitter/mce_builder.py +4 -8
  21. datahub/emitter/mcp_patch_builder.py +1 -2
  22. datahub/ingestion/api/incremental_lineage_helper.py +2 -8
  23. datahub/ingestion/api/report.py +1 -2
  24. datahub/ingestion/api/source_helpers.py +1 -1
  25. datahub/ingestion/extractor/json_schema_util.py +3 -3
  26. datahub/ingestion/extractor/schema_util.py +3 -5
  27. datahub/ingestion/fs/s3_fs.py +3 -3
  28. datahub/ingestion/glossary/datahub_classifier.py +6 -4
  29. datahub/ingestion/graph/client.py +4 -6
  30. datahub/ingestion/run/pipeline.py +8 -7
  31. datahub/ingestion/run/pipeline_config.py +3 -3
  32. datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
  33. datahub/ingestion/source/abs/source.py +19 -8
  34. datahub/ingestion/source/aws/glue.py +11 -11
  35. datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
  36. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  37. datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
  38. datahub/ingestion/source/bigquery_v2/bigquery.py +3 -3
  39. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
  40. datahub/ingestion/source/bigquery_v2/bigquery_config.py +6 -6
  41. datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
  42. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +15 -9
  43. datahub/ingestion/source/bigquery_v2/lineage.py +9 -9
  44. datahub/ingestion/source/bigquery_v2/queries.py +1 -3
  45. datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
  46. datahub/ingestion/source/bigquery_v2/usage.py +3 -3
  47. datahub/ingestion/source/cassandra/cassandra.py +0 -1
  48. datahub/ingestion/source/cassandra/cassandra_utils.py +4 -4
  49. datahub/ingestion/source/confluent_schema_registry.py +6 -6
  50. datahub/ingestion/source/csv_enricher.py +29 -29
  51. datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
  52. datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
  53. datahub/ingestion/source/dbt/dbt_common.py +9 -7
  54. datahub/ingestion/source/dremio/dremio_api.py +4 -4
  55. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
  56. datahub/ingestion/source/elastic_search.py +4 -4
  57. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +3 -3
  58. datahub/ingestion/source/gcs/gcs_source.py +3 -2
  59. datahub/ingestion/source/ge_data_profiler.py +4 -5
  60. datahub/ingestion/source/ge_profiling_config.py +3 -3
  61. datahub/ingestion/source/iceberg/iceberg.py +3 -3
  62. datahub/ingestion/source/identity/azure_ad.py +3 -3
  63. datahub/ingestion/source/identity/okta.py +3 -3
  64. datahub/ingestion/source/kafka/kafka.py +11 -9
  65. datahub/ingestion/source/kafka_connect/kafka_connect.py +2 -3
  66. datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
  67. datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
  68. datahub/ingestion/source/looker/looker_common.py +19 -19
  69. datahub/ingestion/source/looker/looker_config.py +3 -3
  70. datahub/ingestion/source/looker/looker_source.py +25 -25
  71. datahub/ingestion/source/looker/looker_template_language.py +3 -3
  72. datahub/ingestion/source/looker/looker_usage.py +5 -7
  73. datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
  74. datahub/ingestion/source/looker/lookml_source.py +13 -15
  75. datahub/ingestion/source/looker/view_upstream.py +5 -5
  76. datahub/ingestion/source/mlflow.py +4 -4
  77. datahub/ingestion/source/mongodb.py +6 -4
  78. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  79. datahub/ingestion/source/nifi.py +24 -26
  80. datahub/ingestion/source/openapi.py +9 -9
  81. datahub/ingestion/source/powerbi/config.py +12 -12
  82. datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
  83. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
  84. datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
  85. datahub/ingestion/source/powerbi/powerbi.py +6 -6
  86. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
  87. datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
  88. datahub/ingestion/source/redshift/config.py +3 -3
  89. datahub/ingestion/source/redshift/redshift.py +12 -12
  90. datahub/ingestion/source/redshift/usage.py +8 -8
  91. datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
  92. datahub/ingestion/source/s3/source.py +1 -1
  93. datahub/ingestion/source/salesforce.py +26 -25
  94. datahub/ingestion/source/schema/json_schema.py +1 -1
  95. datahub/ingestion/source/sigma/sigma.py +3 -3
  96. datahub/ingestion/source/sigma/sigma_api.py +12 -10
  97. datahub/ingestion/source/snowflake/snowflake_config.py +9 -7
  98. datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
  99. datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
  100. datahub/ingestion/source/snowflake/snowflake_schema.py +3 -3
  101. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +6 -6
  102. datahub/ingestion/source/snowflake/snowflake_tag.py +7 -7
  103. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +3 -3
  104. datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
  105. datahub/ingestion/source/snowflake/snowflake_v2.py +13 -4
  106. datahub/ingestion/source/sql/athena.py +1 -3
  107. datahub/ingestion/source/sql/clickhouse.py +8 -14
  108. datahub/ingestion/source/sql/oracle.py +1 -3
  109. datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
  110. datahub/ingestion/source/sql/teradata.py +16 -3
  111. datahub/ingestion/source/state/profiling_state_handler.py +3 -3
  112. datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
  113. datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
  114. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
  115. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  116. datahub/ingestion/source/tableau/tableau.py +48 -49
  117. datahub/ingestion/source/unity/config.py +3 -1
  118. datahub/ingestion/source/unity/proxy.py +1 -1
  119. datahub/ingestion/source/unity/source.py +3 -3
  120. datahub/ingestion/source/unity/usage.py +3 -1
  121. datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
  122. datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
  123. datahub/ingestion/source/usage/usage_common.py +1 -1
  124. datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
  125. datahub/ingestion/transformer/add_dataset_properties.py +3 -3
  126. datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
  127. datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
  128. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
  129. datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
  130. datahub/ingestion/transformer/tags_to_terms.py +7 -7
  131. datahub/integrations/assertion/snowflake/compiler.py +10 -10
  132. datahub/lite/duckdb_lite.py +12 -10
  133. datahub/metadata/_schema_classes.py +1 -1
  134. datahub/metadata/schema.avsc +6 -2
  135. datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
  136. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
  137. datahub/secret/secret_common.py +14 -8
  138. datahub/specific/aspect_helpers/custom_properties.py +1 -2
  139. datahub/sql_parsing/schema_resolver.py +5 -10
  140. datahub/sql_parsing/sql_parsing_aggregator.py +16 -16
  141. datahub/sql_parsing/sqlglot_lineage.py +5 -4
  142. datahub/sql_parsing/sqlglot_utils.py +3 -2
  143. datahub/telemetry/stats.py +1 -2
  144. datahub/testing/mcp_diff.py +1 -1
  145. datahub/utilities/file_backed_collections.py +10 -10
  146. datahub/utilities/hive_schema_to_avro.py +2 -2
  147. datahub/utilities/logging_manager.py +2 -2
  148. datahub/utilities/lossy_collections.py +3 -3
  149. datahub/utilities/mapping.py +3 -3
  150. datahub/utilities/serialized_lru_cache.py +3 -1
  151. datahub/utilities/sqlalchemy_query_combiner.py +6 -6
  152. datahub/utilities/sqllineage_patch.py +1 -1
  153. datahub/utilities/stats_collections.py +3 -1
  154. datahub/utilities/urns/urn_iter.py +2 -2
  155. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/WHEEL +0 -0
  156. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/entry_points.txt +0 -0
  157. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/top_level.txt +0 -0
@@ -477,9 +477,9 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
477
477
  upstream_dataset_urns
478
478
  and dataset_urn not in self.dataset_upstream_urn_mapping
479
479
  ):
480
- self.dataset_upstream_urn_mapping[
481
- dataset_urn
482
- ] = upstream_dataset_urns
480
+ self.dataset_upstream_urn_mapping[dataset_urn] = (
481
+ upstream_dataset_urns
482
+ )
483
483
 
484
484
  element_input_fields = [
485
485
  InputFieldClass(
@@ -126,9 +126,9 @@ class SigmaAPI:
126
126
  response.raise_for_status()
127
127
  response_dict = response.json()
128
128
  for workspace_dict in response_dict[Constant.ENTRIES]:
129
- self.workspaces[
130
- workspace_dict[Constant.WORKSPACEID]
131
- ] = Workspace.parse_obj(workspace_dict)
129
+ self.workspaces[workspace_dict[Constant.WORKSPACEID]] = (
130
+ Workspace.parse_obj(workspace_dict)
131
+ )
132
132
  if response_dict[Constant.NEXTPAGE]:
133
133
  url = f"{workspace_url}&page={response_dict[Constant.NEXTPAGE]}"
134
134
  else:
@@ -147,9 +147,9 @@ class SigmaAPI:
147
147
  response.raise_for_status()
148
148
  response_dict = response.json()
149
149
  for user_dict in response_dict[Constant.ENTRIES]:
150
- users[
151
- user_dict[Constant.MEMBERID]
152
- ] = f"{user_dict[Constant.FIRSTNAME]}_{user_dict[Constant.LASTNAME]}"
150
+ users[user_dict[Constant.MEMBERID]] = (
151
+ f"{user_dict[Constant.FIRSTNAME]}_{user_dict[Constant.LASTNAME]}"
152
+ )
153
153
  if response_dict[Constant.NEXTPAGE]:
154
154
  url = f"{members_url}&page={response_dict[Constant.NEXTPAGE]}"
155
155
  else:
@@ -327,10 +327,12 @@ class SigmaAPI:
327
327
  response.raise_for_status()
328
328
  for i, element_dict in enumerate(response.json()[Constant.ENTRIES]):
329
329
  if not element_dict.get(Constant.NAME):
330
- element_dict[Constant.NAME] = f"Element {i+1} of Page '{page.name}'"
331
- element_dict[
332
- Constant.URL
333
- ] = f"{workbook.url}?:nodeId={element_dict[Constant.ELEMENTID]}&:fullScreen=true"
330
+ element_dict[Constant.NAME] = (
331
+ f"Element {i + 1} of Page '{page.name}'"
332
+ )
333
+ element_dict[Constant.URL] = (
334
+ f"{workbook.url}?:nodeId={element_dict[Constant.ELEMENTID]}&:fullScreen=true"
335
+ )
334
336
  element = Element.parse_obj(element_dict)
335
337
  if (
336
338
  self.config.extract_lineage
@@ -384,18 +384,20 @@ class SnowflakeV2Config(
384
384
  assert all(
385
385
  consumer.platform_instance != share_details.platform_instance
386
386
  for consumer in share_details.consumers
387
- ), "Share's platform_instance can not be same as consumer's platform instance. Self-sharing not supported in Snowflake."
387
+ ), (
388
+ "Share's platform_instance can not be same as consumer's platform instance. Self-sharing not supported in Snowflake."
389
+ )
388
390
 
389
391
  databases_included_in_share.append(shared_db)
390
392
  databases_created_from_share.extend(share_details.consumers)
391
393
 
392
394
  for db_from_share in databases_created_from_share:
393
- assert (
394
- db_from_share not in databases_included_in_share
395
- ), "Database included in a share can not be present as consumer in any share."
396
- assert (
397
- databases_created_from_share.count(db_from_share) == 1
398
- ), "Same database can not be present as consumer in more than one share."
395
+ assert db_from_share not in databases_included_in_share, (
396
+ "Database included in a share can not be present as consumer in any share."
397
+ )
398
+ assert databases_created_from_share.count(db_from_share) == 1, (
399
+ "Same database can not be present as consumer in more than one share."
400
+ )
399
401
 
400
402
  return shares
401
403
 
@@ -250,9 +250,9 @@ class SnowflakeConnectionConfig(ConfigModel):
250
250
  if self.private_key is not None:
251
251
  pkey_bytes = self.private_key.replace("\\n", "\n").encode()
252
252
  else:
253
- assert (
254
- self.private_key_path
255
- ), "missing required private key path to read key from"
253
+ assert self.private_key_path, (
254
+ "missing required private key path to read key from"
255
+ )
256
256
  with open(self.private_key_path, "rb") as key:
257
257
  pkey_bytes = key.read()
258
258
 
@@ -284,9 +284,9 @@ class SnowflakeConnectionConfig(ConfigModel):
284
284
  return self.options
285
285
 
286
286
  def get_oauth_connection(self) -> NativeSnowflakeConnection:
287
- assert (
288
- self.oauth_config
289
- ), "oauth_config should be provided if using oauth based authentication"
287
+ assert self.oauth_config, (
288
+ "oauth_config should be provided if using oauth based authentication"
289
+ )
290
290
  generator = OAuthTokenGenerator(
291
291
  client_id=self.oauth_config.client_id,
292
292
  authority_url=self.oauth_config.authority_url,
@@ -623,7 +623,7 @@ fingerprinted_queries as (
623
623
  query_history.start_time >= to_timestamp_ltz({start_time_millis}, 3)
624
624
  AND query_history.start_time < to_timestamp_ltz({end_time_millis}, 3)
625
625
  AND execution_status = 'SUCCESS'
626
- AND {users_filter or 'TRUE'}
626
+ AND {users_filter or "TRUE"}
627
627
  )
628
628
  , deduplicated_queries as (
629
629
  SELECT
@@ -651,7 +651,7 @@ fingerprinted_queries as (
651
651
  WHERE
652
652
  query_start_time >= to_timestamp_ltz({start_time_millis}, 3)
653
653
  AND query_start_time < to_timestamp_ltz({end_time_millis}, 3)
654
- AND {users_filter or 'TRUE'}
654
+ AND {users_filter or "TRUE"}
655
655
  AND query_id IN (
656
656
  SELECT query_id FROM deduplicated_queries
657
657
  )
@@ -142,9 +142,9 @@ class _SnowflakeTagCache:
142
142
  )
143
143
 
144
144
  # self._table_tags[<database_name>][<schema_name>][<table_name>] = list of tags applied to table
145
- self._table_tags: Dict[
146
- str, Dict[str, Dict[str, List[SnowflakeTag]]]
147
- ] = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
145
+ self._table_tags: Dict[str, Dict[str, Dict[str, List[SnowflakeTag]]]] = (
146
+ defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
147
+ )
148
148
 
149
149
  # self._column_tags[<database_name>][<schema_name>][<table_name>][<column_name>] = list of tags applied to column
150
150
  self._column_tags: Dict[
@@ -194,9 +194,9 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
194
194
  config, self.data_dictionary, self.report
195
195
  )
196
196
  self.profiler: Optional[SnowflakeProfiler] = profiler
197
- self.snowsight_url_builder: Optional[
198
- SnowsightUrlBuilder
199
- ] = snowsight_url_builder
197
+ self.snowsight_url_builder: Optional[SnowsightUrlBuilder] = (
198
+ snowsight_url_builder
199
+ )
200
200
 
201
201
  # These are populated as side-effects of get_workunits_internal.
202
202
  self.databases: List[SnowflakeDatabase] = []
@@ -267,9 +267,9 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
267
267
  )
268
268
  return None
269
269
  else:
270
- ischema_databases: List[
271
- SnowflakeDatabase
272
- ] = self.get_databases_from_ischema(databases)
270
+ ischema_databases: List[SnowflakeDatabase] = (
271
+ self.get_databases_from_ischema(databases)
272
+ )
273
273
 
274
274
  if len(ischema_databases) == 0:
275
275
  self.structured_reporter.failure(
@@ -38,9 +38,9 @@ class SnowflakeTagExtractor(SnowflakeCommonMixin):
38
38
  table_name: Optional[str],
39
39
  ) -> List[SnowflakeTag]:
40
40
  if db_name not in self.tag_cache:
41
- self.tag_cache[
42
- db_name
43
- ] = self.data_dictionary.get_tags_for_database_without_propagation(db_name)
41
+ self.tag_cache[db_name] = (
42
+ self.data_dictionary.get_tags_for_database_without_propagation(db_name)
43
+ )
44
44
 
45
45
  if domain == SnowflakeObjectDomain.DATABASE:
46
46
  return self.tag_cache[db_name].get_database_tags(db_name)
@@ -130,10 +130,10 @@ class SnowflakeTagExtractor(SnowflakeCommonMixin):
130
130
  temp_column_tags: Dict[str, List[SnowflakeTag]] = {}
131
131
  if self.config.extract_tags == TagOption.without_lineage:
132
132
  if db_name not in self.tag_cache:
133
- self.tag_cache[
134
- db_name
135
- ] = self.data_dictionary.get_tags_for_database_without_propagation(
136
- db_name
133
+ self.tag_cache[db_name] = (
134
+ self.data_dictionary.get_tags_for_database_without_propagation(
135
+ db_name
136
+ )
137
137
  )
138
138
  temp_column_tags = self.tag_cache[db_name].get_column_tags_for_table(
139
139
  table_name, schema_name, db_name
@@ -549,9 +549,9 @@ class SnowflakeUsageExtractor(SnowflakeCommonMixin, Closeable):
549
549
  ):
550
550
  # NOTE: Generated emails may be incorrect, as email may be different than
551
551
  # username@email_domain
552
- event_dict[
553
- "EMAIL"
554
- ] = f'{event_dict["USER_NAME"]}@{self.config.email_domain}'.lower()
552
+ event_dict["EMAIL"] = (
553
+ f"{event_dict['USER_NAME']}@{self.config.email_domain}".lower()
554
+ )
555
555
 
556
556
  if not event_dict["EMAIL"]:
557
557
  self.report.rows_missing_email += 1
@@ -21,8 +21,7 @@ from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Repor
21
21
  class SnowflakeStructuredReportMixin(abc.ABC):
22
22
  @property
23
23
  @abc.abstractmethod
24
- def structured_reporter(self) -> SourceReport:
25
- ...
24
+ def structured_reporter(self) -> SourceReport: ...
26
25
 
27
26
 
28
27
  class SnowsightUrlBuilder:
@@ -211,9 +211,9 @@ class SnowflakeV2Source(
211
211
 
212
212
  self.usage_extractor: Optional[SnowflakeUsageExtractor] = None
213
213
  if self.config.include_usage_stats or self.config.include_operational_stats:
214
- redundant_usage_run_skip_handler: Optional[
215
- RedundantUsageRunSkipHandler
216
- ] = None
214
+ redundant_usage_run_skip_handler: Optional[RedundantUsageRunSkipHandler] = (
215
+ None
216
+ )
217
217
  if self.config.enable_stateful_usage_ingestion:
218
218
  redundant_usage_run_skip_handler = RedundantUsageRunSkipHandler(
219
219
  source=self,
@@ -296,7 +296,16 @@ class SnowflakeV2Source(
296
296
 
297
297
  _report: Dict[Union[SourceCapability, str], CapabilityReport] = dict()
298
298
  privileges: List[SnowflakePrivilege] = []
299
- capabilities: List[SourceCapability] = [c.capability for c in SnowflakeV2Source.get_capabilities() if c.capability not in (SourceCapability.PLATFORM_INSTANCE, SourceCapability.DOMAINS, SourceCapability.DELETION_DETECTION)] # type: ignore
299
+ capabilities: List[SourceCapability] = [
300
+ c.capability
301
+ for c in SnowflakeV2Source.get_capabilities() # type: ignore
302
+ if c.capability
303
+ not in (
304
+ SourceCapability.PLATFORM_INSTANCE,
305
+ SourceCapability.DOMAINS,
306
+ SourceCapability.DELETION_DETECTION,
307
+ )
308
+ ]
300
309
 
301
310
  cur = conn.query("select current_role()")
302
311
  current_role = [row["CURRENT_ROLE()"] for row in cur][0]
@@ -104,9 +104,7 @@ class CustomAthenaRestDialect(AthenaRestDialect):
104
104
  return "\n".join([r for r in res])
105
105
 
106
106
  @typing.no_type_check
107
- def _get_column_type(
108
- self, type_: Union[str, Dict[str, Any]]
109
- ) -> TypeEngine: # noqa: C901
107
+ def _get_column_type(self, type_: Union[str, Dict[str, Any]]) -> TypeEngine: # noqa: C901
110
108
  """Derives the data type of the Athena column.
111
109
 
112
110
  This method is overwritten to extend the behavior of PyAthena.
@@ -218,9 +218,7 @@ def _get_all_table_comments_and_properties(self, connection, **kw):
218
218
  , comment
219
219
  , {properties_clause} AS properties
220
220
  FROM system.tables
221
- WHERE name NOT LIKE '.inner%'""".format(
222
- properties_clause=properties_clause
223
- )
221
+ WHERE name NOT LIKE '.inner%'""".format(properties_clause=properties_clause)
224
222
  )
225
223
 
226
224
  all_table_comments: Dict[Tuple[str, str], Dict[str, Any]] = {}
@@ -268,7 +266,7 @@ def _get_table_or_view_names(self, relkind, connection, schema=None, **kw):
268
266
  info_cache = kw.get("info_cache")
269
267
  all_relations = self._get_all_relation_info(connection, info_cache=info_cache)
270
268
  relation_names = []
271
- for key, relation in all_relations.items():
269
+ for _, relation in all_relations.items():
272
270
  if relation.database == schema and relation.relkind == relkind:
273
271
  relation_names.append(relation.relname)
274
272
  return relation_names
@@ -301,9 +299,7 @@ def _get_schema_column_info(self, connection, schema=None, **kw):
301
299
  , comment
302
300
  FROM system.columns
303
301
  WHERE {schema_clause}
304
- ORDER BY database, table, position""".format(
305
- schema_clause=schema_clause
306
- )
302
+ ORDER BY database, table, position""".format(schema_clause=schema_clause)
307
303
  )
308
304
  )
309
305
  )
@@ -474,7 +470,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
474
470
  logger.debug(f"sql_alchemy_url={url}")
475
471
  engine = create_engine(url, **self.config.options)
476
472
  for db_row in engine.execute(text(all_tables_query)):
477
- all_tables_set.add(f'{db_row["database"]}.{db_row["table_name"]}')
473
+ all_tables_set.add(f"{db_row['database']}.{db_row['table_name']}")
478
474
 
479
475
  return all_tables_set
480
476
 
@@ -503,7 +499,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
503
499
 
504
500
  try:
505
501
  for db_row in engine.execute(text(query)):
506
- dataset_name = f'{db_row["target_schema"]}.{db_row["target_table"]}'
502
+ dataset_name = f"{db_row['target_schema']}.{db_row['target_table']}"
507
503
  if not self.config.database_pattern.allowed(
508
504
  db_row["target_schema"]
509
505
  ) or not self.config.table_pattern.allowed(dataset_name):
@@ -512,7 +508,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
512
508
 
513
509
  # Target
514
510
  target_path = (
515
- f'{self.config.platform_instance+"." if self.config.platform_instance else ""}'
511
+ f"{self.config.platform_instance + '.' if self.config.platform_instance else ''}"
516
512
  f"{dataset_name}"
517
513
  )
518
514
  target = LineageItem(
@@ -525,7 +521,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
525
521
 
526
522
  # Source
527
523
  platform = LineageDatasetPlatform.CLICKHOUSE
528
- path = f'{db_row["source_schema"]}.{db_row["source_table"]}'
524
+ path = f"{db_row['source_schema']}.{db_row['source_table']}"
529
525
 
530
526
  sources = [
531
527
  LineageDataset(
@@ -552,9 +548,7 @@ class ClickHouseSource(TwoTierSQLAlchemySource):
552
548
  target.dataset.path
553
549
  ].upstreams = self._lineage_map[
554
550
  target.dataset.path
555
- ].upstreams.union(
556
- target.upstreams
557
- )
551
+ ].upstreams.union(target.upstreams)
558
552
 
559
553
  else:
560
554
  self._lineage_map[target.dataset.path] = target
@@ -234,9 +234,7 @@ class OracleInspectorObjectWrapper:
234
234
  WHERE col.table_name = id.table_name
235
235
  AND col.column_name = id.column_name
236
236
  AND col.owner = id.owner
237
- ) AS identity_options""".format(
238
- dblink=dblink
239
- )
237
+ ) AS identity_options""".format(dblink=dblink)
240
238
  else:
241
239
  identity_cols = "NULL as default_on_null, NULL as identity_options"
242
240
 
@@ -278,8 +278,7 @@ class GenericProfiler:
278
278
 
279
279
  if self.config.profiling.profile_table_size_limit is not None and (
280
280
  size_in_bytes is not None
281
- and size_in_bytes / (2**30)
282
- > self.config.profiling.profile_table_size_limit
281
+ and size_in_bytes / (2**30) > self.config.profiling.profile_table_size_limit
283
282
  ):
284
283
  self.report.profiling_skipped_size_limit[schema_name] += 1
285
284
  logger.debug(
@@ -599,7 +599,12 @@ ORDER by DataBaseName, TableName;
599
599
  setattr( # noqa: B010
600
600
  TeradataDialect,
601
601
  "get_columns",
602
- lambda self, connection, table_name, schema=None, use_qvci=self.config.use_qvci, **kw: optimized_get_columns(
602
+ lambda self,
603
+ connection,
604
+ table_name,
605
+ schema=None,
606
+ use_qvci=self.config.use_qvci,
607
+ **kw: optimized_get_columns(
603
608
  self,
604
609
  connection,
605
610
  table_name,
@@ -613,7 +618,11 @@ ORDER by DataBaseName, TableName;
613
618
  setattr( # noqa: B010
614
619
  TeradataDialect,
615
620
  "get_pk_constraint",
616
- lambda self, connection, table_name, schema=None, **kw: optimized_get_pk_constraint(
621
+ lambda self,
622
+ connection,
623
+ table_name,
624
+ schema=None,
625
+ **kw: optimized_get_pk_constraint(
617
626
  self, connection, table_name, schema, **kw
618
627
  ),
619
628
  )
@@ -621,7 +630,11 @@ ORDER by DataBaseName, TableName;
621
630
  setattr( # noqa: B010
622
631
  TeradataDialect,
623
632
  "get_foreign_keys",
624
- lambda self, connection, table_name, schema=None, **kw: optimized_get_foreign_keys(
633
+ lambda self,
634
+ connection,
635
+ table_name,
636
+ schema=None,
637
+ **kw: optimized_get_foreign_keys(
625
638
  self, connection, table_name, schema, **kw
626
639
  ),
627
640
  )
@@ -41,9 +41,9 @@ class ProfilingHandler(StatefulIngestionUsecaseHandlerBase[ProfilingCheckpointSt
41
41
  run_id: str,
42
42
  ):
43
43
  self.state_provider = source.state_provider
44
- self.stateful_ingestion_config: Optional[
45
- ProfilingStatefulIngestionConfig
46
- ] = config.stateful_ingestion
44
+ self.stateful_ingestion_config: Optional[ProfilingStatefulIngestionConfig] = (
45
+ config.stateful_ingestion
46
+ )
47
47
  self.pipeline_name = pipeline_name
48
48
  self.run_id = run_id
49
49
  self.checkpointing_enabled: bool = (
@@ -48,9 +48,9 @@ class RedundantRunSkipHandler(
48
48
  ):
49
49
  self.source = source
50
50
  self.state_provider = source.state_provider
51
- self.stateful_ingestion_config: Optional[
52
- StatefulIngestionConfig
53
- ] = config.stateful_ingestion
51
+ self.stateful_ingestion_config: Optional[StatefulIngestionConfig] = (
52
+ config.stateful_ingestion
53
+ )
54
54
  self.pipeline_name = pipeline_name
55
55
  self.run_id = run_id
56
56
  self._job_id = self._init_job_id()
@@ -145,8 +145,7 @@ class RedundantRunSkipHandler(
145
145
  )
146
146
 
147
147
  logger.debug(
148
- f"{self.job_id} : Last run start, end times:"
149
- f"({last_run_time_window})"
148
+ f"{self.job_id} : Last run start, end times:({last_run_time_window})"
150
149
  )
151
150
 
152
151
  # If current run's time window is subset of last run's time window, then skip.
@@ -212,8 +211,7 @@ class RedundantRunSkipHandler(
212
211
  )
213
212
 
214
213
  self.log(
215
- "Adjusted start, end times: "
216
- f"({suggested_start_time}, {suggested_end_time})"
214
+ f"Adjusted start, end times: ({suggested_start_time}, {suggested_end_time})"
217
215
  )
218
216
  return (suggested_start_time, suggested_end_time)
219
217
 
@@ -111,9 +111,9 @@ class StaleEntityRemovalHandler(
111
111
  self.state_type_class = state_type_class
112
112
  self.pipeline_name = pipeline_name
113
113
  self.run_id = run_id
114
- self.stateful_ingestion_config: Optional[
115
- StatefulStaleMetadataRemovalConfig
116
- ] = config.stateful_ingestion
114
+ self.stateful_ingestion_config: Optional[StatefulStaleMetadataRemovalConfig] = (
115
+ config.stateful_ingestion
116
+ )
117
117
  self.checkpointing_enabled: bool = (
118
118
  True
119
119
  if (
@@ -70,20 +70,20 @@ class DatahubIngestionCheckpointingProvider(IngestionCheckpointingProviderBase):
70
70
  self.orchestrator_name, pipeline_name, job_name
71
71
  )
72
72
 
73
- latest_checkpoint: Optional[
74
- DatahubIngestionCheckpointClass
75
- ] = self.graph.get_latest_timeseries_value(
76
- entity_urn=data_job_urn,
77
- aspect_type=DatahubIngestionCheckpointClass,
78
- filter_criteria_map={
79
- "pipelineName": pipeline_name,
80
- },
73
+ latest_checkpoint: Optional[DatahubIngestionCheckpointClass] = (
74
+ self.graph.get_latest_timeseries_value(
75
+ entity_urn=data_job_urn,
76
+ aspect_type=DatahubIngestionCheckpointClass,
77
+ filter_criteria_map={
78
+ "pipelineName": pipeline_name,
79
+ },
80
+ )
81
81
  )
82
82
  if latest_checkpoint:
83
83
  logger.debug(
84
84
  f"The last committed ingestion checkpoint for pipelineName:'{pipeline_name}',"
85
85
  f" job_name:'{job_name}' found with start_time:"
86
- f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis/1000)}"
86
+ f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis / 1000)}"
87
87
  )
88
88
  return latest_checkpoint
89
89
  else:
@@ -67,7 +67,7 @@ class FileIngestionCheckpointingProvider(IngestionCheckpointingProviderBase):
67
67
  logger.debug(
68
68
  f"The last committed ingestion checkpoint for pipelineName:'{pipeline_name}',"
69
69
  f" job_name:'{job_name}' found with start_time:"
70
- f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis/1000)}"
70
+ f" {datetime.utcfromtimestamp(latest_checkpoint.timestampMillis / 1000)}"
71
71
  )
72
72
  return latest_checkpoint
73
73
  else: