acryl-datahub 0.15.0.2rc7__py3-none-any.whl → 0.15.0.2rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (157) hide show
  1. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/METADATA +2335 -2337
  2. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/RECORD +157 -157
  3. datahub/__init__.py +1 -1
  4. datahub/api/entities/assertion/assertion_operator.py +3 -5
  5. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  6. datahub/api/entities/datacontract/assertion_operator.py +3 -5
  7. datahub/api/entities/dataproduct/dataproduct.py +4 -4
  8. datahub/api/entities/dataset/dataset.py +2 -1
  9. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  10. datahub/cli/cli_utils.py +1 -1
  11. datahub/cli/docker_cli.py +6 -6
  12. datahub/cli/lite_cli.py +2 -2
  13. datahub/cli/migrate.py +3 -3
  14. datahub/cli/specific/assertions_cli.py +3 -3
  15. datahub/cli/timeline_cli.py +1 -1
  16. datahub/configuration/common.py +1 -2
  17. datahub/configuration/config_loader.py +73 -50
  18. datahub/configuration/git.py +2 -2
  19. datahub/configuration/time_window_config.py +10 -5
  20. datahub/emitter/mce_builder.py +4 -8
  21. datahub/emitter/mcp_patch_builder.py +1 -2
  22. datahub/ingestion/api/incremental_lineage_helper.py +2 -8
  23. datahub/ingestion/api/report.py +1 -2
  24. datahub/ingestion/api/source_helpers.py +1 -1
  25. datahub/ingestion/extractor/json_schema_util.py +3 -3
  26. datahub/ingestion/extractor/schema_util.py +3 -5
  27. datahub/ingestion/fs/s3_fs.py +3 -3
  28. datahub/ingestion/glossary/datahub_classifier.py +6 -4
  29. datahub/ingestion/graph/client.py +4 -6
  30. datahub/ingestion/run/pipeline.py +8 -7
  31. datahub/ingestion/run/pipeline_config.py +3 -3
  32. datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
  33. datahub/ingestion/source/abs/source.py +19 -8
  34. datahub/ingestion/source/aws/glue.py +11 -11
  35. datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
  36. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  37. datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
  38. datahub/ingestion/source/bigquery_v2/bigquery.py +3 -3
  39. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
  40. datahub/ingestion/source/bigquery_v2/bigquery_config.py +6 -6
  41. datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
  42. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +15 -9
  43. datahub/ingestion/source/bigquery_v2/lineage.py +9 -9
  44. datahub/ingestion/source/bigquery_v2/queries.py +1 -3
  45. datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
  46. datahub/ingestion/source/bigquery_v2/usage.py +3 -3
  47. datahub/ingestion/source/cassandra/cassandra.py +0 -1
  48. datahub/ingestion/source/cassandra/cassandra_utils.py +4 -4
  49. datahub/ingestion/source/confluent_schema_registry.py +6 -6
  50. datahub/ingestion/source/csv_enricher.py +29 -29
  51. datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
  52. datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
  53. datahub/ingestion/source/dbt/dbt_common.py +9 -7
  54. datahub/ingestion/source/dremio/dremio_api.py +4 -4
  55. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
  56. datahub/ingestion/source/elastic_search.py +4 -4
  57. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +3 -3
  58. datahub/ingestion/source/gcs/gcs_source.py +3 -2
  59. datahub/ingestion/source/ge_data_profiler.py +4 -5
  60. datahub/ingestion/source/ge_profiling_config.py +3 -3
  61. datahub/ingestion/source/iceberg/iceberg.py +3 -3
  62. datahub/ingestion/source/identity/azure_ad.py +3 -3
  63. datahub/ingestion/source/identity/okta.py +3 -3
  64. datahub/ingestion/source/kafka/kafka.py +11 -9
  65. datahub/ingestion/source/kafka_connect/kafka_connect.py +2 -3
  66. datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
  67. datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
  68. datahub/ingestion/source/looker/looker_common.py +19 -19
  69. datahub/ingestion/source/looker/looker_config.py +3 -3
  70. datahub/ingestion/source/looker/looker_source.py +25 -25
  71. datahub/ingestion/source/looker/looker_template_language.py +3 -3
  72. datahub/ingestion/source/looker/looker_usage.py +5 -7
  73. datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
  74. datahub/ingestion/source/looker/lookml_source.py +13 -15
  75. datahub/ingestion/source/looker/view_upstream.py +5 -5
  76. datahub/ingestion/source/mlflow.py +4 -4
  77. datahub/ingestion/source/mongodb.py +6 -4
  78. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  79. datahub/ingestion/source/nifi.py +24 -26
  80. datahub/ingestion/source/openapi.py +9 -9
  81. datahub/ingestion/source/powerbi/config.py +12 -12
  82. datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
  83. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
  84. datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
  85. datahub/ingestion/source/powerbi/powerbi.py +6 -6
  86. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
  87. datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
  88. datahub/ingestion/source/redshift/config.py +3 -3
  89. datahub/ingestion/source/redshift/redshift.py +12 -12
  90. datahub/ingestion/source/redshift/usage.py +8 -8
  91. datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
  92. datahub/ingestion/source/s3/source.py +1 -1
  93. datahub/ingestion/source/salesforce.py +26 -25
  94. datahub/ingestion/source/schema/json_schema.py +1 -1
  95. datahub/ingestion/source/sigma/sigma.py +3 -3
  96. datahub/ingestion/source/sigma/sigma_api.py +12 -10
  97. datahub/ingestion/source/snowflake/snowflake_config.py +9 -7
  98. datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
  99. datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
  100. datahub/ingestion/source/snowflake/snowflake_schema.py +3 -3
  101. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +6 -6
  102. datahub/ingestion/source/snowflake/snowflake_tag.py +7 -7
  103. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +3 -3
  104. datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
  105. datahub/ingestion/source/snowflake/snowflake_v2.py +13 -4
  106. datahub/ingestion/source/sql/athena.py +1 -3
  107. datahub/ingestion/source/sql/clickhouse.py +8 -14
  108. datahub/ingestion/source/sql/oracle.py +1 -3
  109. datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
  110. datahub/ingestion/source/sql/teradata.py +16 -3
  111. datahub/ingestion/source/state/profiling_state_handler.py +3 -3
  112. datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
  113. datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
  114. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
  115. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  116. datahub/ingestion/source/tableau/tableau.py +48 -49
  117. datahub/ingestion/source/unity/config.py +3 -1
  118. datahub/ingestion/source/unity/proxy.py +1 -1
  119. datahub/ingestion/source/unity/source.py +3 -3
  120. datahub/ingestion/source/unity/usage.py +3 -1
  121. datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
  122. datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
  123. datahub/ingestion/source/usage/usage_common.py +1 -1
  124. datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
  125. datahub/ingestion/transformer/add_dataset_properties.py +3 -3
  126. datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
  127. datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
  128. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
  129. datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
  130. datahub/ingestion/transformer/tags_to_terms.py +7 -7
  131. datahub/integrations/assertion/snowflake/compiler.py +10 -10
  132. datahub/lite/duckdb_lite.py +12 -10
  133. datahub/metadata/_schema_classes.py +1 -1
  134. datahub/metadata/schema.avsc +6 -2
  135. datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
  136. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
  137. datahub/secret/secret_common.py +14 -8
  138. datahub/specific/aspect_helpers/custom_properties.py +1 -2
  139. datahub/sql_parsing/schema_resolver.py +5 -10
  140. datahub/sql_parsing/sql_parsing_aggregator.py +16 -16
  141. datahub/sql_parsing/sqlglot_lineage.py +5 -4
  142. datahub/sql_parsing/sqlglot_utils.py +3 -2
  143. datahub/telemetry/stats.py +1 -2
  144. datahub/testing/mcp_diff.py +1 -1
  145. datahub/utilities/file_backed_collections.py +10 -10
  146. datahub/utilities/hive_schema_to_avro.py +2 -2
  147. datahub/utilities/logging_manager.py +2 -2
  148. datahub/utilities/lossy_collections.py +3 -3
  149. datahub/utilities/mapping.py +3 -3
  150. datahub/utilities/serialized_lru_cache.py +3 -1
  151. datahub/utilities/sqlalchemy_query_combiner.py +6 -6
  152. datahub/utilities/sqllineage_patch.py +1 -1
  153. datahub/utilities/stats_collections.py +3 -1
  154. datahub/utilities/urns/urn_iter.py +2 -2
  155. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/WHEEL +0 -0
  156. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/entry_points.txt +0 -0
  157. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/top_level.txt +0 -0
@@ -288,7 +288,9 @@ class MongoDBSource(StatefulIngestionSourceBase):
288
288
 
289
289
  # See https://pymongo.readthedocs.io/en/stable/examples/datetimes.html#handling-out-of-range-datetimes
290
290
  self.mongo_client = MongoClient(
291
- self.config.connect_uri, datetime_conversion="DATETIME_AUTO", **options # type: ignore
291
+ self.config.connect_uri,
292
+ datetime_conversion="DATETIME_AUTO",
293
+ **options, # type: ignore
292
294
  )
293
295
 
294
296
  # This cheaply tests the connection. For details, see
@@ -470,9 +472,9 @@ class MongoDBSource(StatefulIngestionSourceBase):
470
472
  )
471
473
  # Add this information to the custom properties so user can know they are looking at downsampled schema
472
474
  dataset_properties.customProperties["schema.downsampled"] = "True"
473
- dataset_properties.customProperties[
474
- "schema.totalFields"
475
- ] = f"{collection_schema_size}"
475
+ dataset_properties.customProperties["schema.totalFields"] = (
476
+ f"{collection_schema_size}"
477
+ )
476
478
 
477
479
  logger.debug(f"Size of collection fields = {len(collection_fields)}")
478
480
  # append each schema field (sort so output is consistent)
@@ -286,7 +286,7 @@ class Neo4jSource(Source):
286
286
  df = self.get_neo4j_metadata(
287
287
  "CALL apoc.meta.schema() YIELD value UNWIND keys(value) AS key RETURN key, value[key] AS value;"
288
288
  )
289
- for index, row in df.iterrows():
289
+ for _, row in df.iterrows():
290
290
  try:
291
291
  yield MetadataWorkUnit(
292
292
  id=row["key"],
@@ -184,9 +184,9 @@ class NifiSourceConfig(EnvConfigMixin):
184
184
 
185
185
  @validator("site_url")
186
186
  def validator_site_url(cls, site_url: str) -> str:
187
- assert site_url.startswith(
188
- ("http://", "https://")
189
- ), "site_url must start with http:// or https://"
187
+ assert site_url.startswith(("http://", "https://")), (
188
+ "site_url must start with http:// or https://"
189
+ )
190
190
 
191
191
  if not site_url.endswith("/"):
192
192
  site_url = site_url + "/"
@@ -487,9 +487,7 @@ class NifiSource(Source):
487
487
  def get_report(self) -> SourceReport:
488
488
  return self.report
489
489
 
490
- def update_flow(
491
- self, pg_flow_dto: Dict, recursion_level: int = 0
492
- ) -> None: # noqa: C901
490
+ def update_flow(self, pg_flow_dto: Dict, recursion_level: int = 0) -> None: # noqa: C901
493
491
  """
494
492
  Update self.nifi_flow with contents of the input process group `pg_flow_dto`
495
493
  """
@@ -548,16 +546,16 @@ class NifiSource(Source):
548
546
  for inputPort in flow_dto.get("inputPorts", []):
549
547
  component = inputPort.get("component")
550
548
  if inputPort.get("allowRemoteAccess"):
551
- self.nifi_flow.remotely_accessible_ports[
552
- component.get("id")
553
- ] = NifiComponent(
554
- component.get("id"),
555
- component.get("name"),
556
- component.get("type"),
557
- component.get("parentGroupId"),
558
- NifiType.INPUT_PORT,
559
- comments=component.get("comments"),
560
- status=component.get("status", {}).get("runStatus"),
549
+ self.nifi_flow.remotely_accessible_ports[component.get("id")] = (
550
+ NifiComponent(
551
+ component.get("id"),
552
+ component.get("name"),
553
+ component.get("type"),
554
+ component.get("parentGroupId"),
555
+ NifiType.INPUT_PORT,
556
+ comments=component.get("comments"),
557
+ status=component.get("status", {}).get("runStatus"),
558
+ )
561
559
  )
562
560
  logger.debug(f"Adding remotely accessible port {component.get('id')}")
563
561
  else:
@@ -576,16 +574,16 @@ class NifiSource(Source):
576
574
  for outputPort in flow_dto.get("outputPorts", []):
577
575
  component = outputPort.get("component")
578
576
  if outputPort.get("allowRemoteAccess"):
579
- self.nifi_flow.remotely_accessible_ports[
580
- component.get("id")
581
- ] = NifiComponent(
582
- component.get("id"),
583
- component.get("name"),
584
- component.get("type"),
585
- component.get("parentGroupId"),
586
- NifiType.OUTPUT_PORT,
587
- comments=component.get("comments"),
588
- status=component.get("status", {}).get("runStatus"),
577
+ self.nifi_flow.remotely_accessible_ports[component.get("id")] = (
578
+ NifiComponent(
579
+ component.get("id"),
580
+ component.get("name"),
581
+ component.get("type"),
582
+ component.get("parentGroupId"),
583
+ NifiType.OUTPUT_PORT,
584
+ comments=component.get("comments"),
585
+ status=component.get("status", {}).get("runStatus"),
586
+ )
589
587
  )
590
588
  logger.debug(f"Adding remotely accessible port {component.get('id')}")
591
589
  else:
@@ -101,16 +101,16 @@ class OpenApiConfig(ConfigModel):
101
101
  # details there once, and then use that session for all requests.
102
102
  self.token = f"Bearer {self.bearer_token}"
103
103
  else:
104
- assert (
105
- "url_complement" in self.get_token.keys()
106
- ), "When 'request_type' is set to 'get', an url_complement is needed for the request."
104
+ assert "url_complement" in self.get_token.keys(), (
105
+ "When 'request_type' is set to 'get', an url_complement is needed for the request."
106
+ )
107
107
  if self.get_token["request_type"] == "get":
108
- assert (
109
- "{username}" in self.get_token["url_complement"]
110
- ), "we expect the keyword {username} to be present in the url"
111
- assert (
112
- "{password}" in self.get_token["url_complement"]
113
- ), "we expect the keyword {password} to be present in the url"
108
+ assert "{username}" in self.get_token["url_complement"], (
109
+ "we expect the keyword {username} to be present in the url"
110
+ )
111
+ assert "{password}" in self.get_token["url_complement"], (
112
+ "we expect the keyword {password} to be present in the url"
113
+ )
114
114
  url4req = self.get_token["url_complement"].replace(
115
115
  "{username}", self.username
116
116
  )
@@ -225,9 +225,9 @@ class PowerBiDashboardSourceReport(StaleEntityRemovalSourceReport):
225
225
  def default_for_dataset_type_mapping() -> Dict[str, str]:
226
226
  dict_: dict = {}
227
227
  for item in SupportedDataPlatform:
228
- dict_[
229
- item.value.powerbi_data_platform_name
230
- ] = item.value.datahub_data_platform_name
228
+ dict_[item.value.powerbi_data_platform_name] = (
229
+ item.value.datahub_data_platform_name
230
+ )
231
231
 
232
232
  return dict_
233
233
 
@@ -303,15 +303,15 @@ class PowerBiDashboardSourceConfig(
303
303
  # Dataset type mapping PowerBI support many type of data-sources. Here user needs to define what type of PowerBI
304
304
  # DataSource needs to be mapped to corresponding DataHub Platform DataSource. For example, PowerBI `Snowflake` is
305
305
  # mapped to DataHub `snowflake` PowerBI `PostgreSQL` is mapped to DataHub `postgres` and so on.
306
- dataset_type_mapping: Union[
307
- Dict[str, str], Dict[str, PlatformDetail]
308
- ] = pydantic.Field(
309
- default_factory=default_for_dataset_type_mapping,
310
- description="[deprecated] Use server_to_platform_instance instead. Mapping of PowerBI datasource type to "
311
- "DataHub supported datasources."
312
- "You can configured platform instance for dataset lineage. "
313
- "See Quickstart Recipe for mapping",
314
- hidden_from_docs=True,
306
+ dataset_type_mapping: Union[Dict[str, str], Dict[str, PlatformDetail]] = (
307
+ pydantic.Field(
308
+ default_factory=default_for_dataset_type_mapping,
309
+ description="[deprecated] Use server_to_platform_instance instead. Mapping of PowerBI datasource type to "
310
+ "DataHub supported datasources."
311
+ "You can configured platform instance for dataset lineage. "
312
+ "See Quickstart Recipe for mapping",
313
+ hidden_from_docs=True,
314
+ )
315
315
  )
316
316
  # PowerBI datasource's server to platform instance mapping
317
317
  server_to_platform_instance: Dict[
@@ -128,17 +128,17 @@ def get_upstream_tables(
128
128
  reporter.m_query_parse_successes += 1
129
129
 
130
130
  try:
131
- lineage: List[
132
- datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
133
- ] = resolver.MQueryResolver(
134
- table=table,
135
- parse_tree=parse_tree,
136
- reporter=reporter,
137
- parameters=parameters,
138
- ).resolve_to_lineage(
139
- ctx=ctx,
140
- config=config,
141
- platform_instance_resolver=platform_instance_resolver,
131
+ lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
132
+ resolver.MQueryResolver(
133
+ table=table,
134
+ parse_tree=parse_tree,
135
+ reporter=reporter,
136
+ parameters=parameters,
137
+ ).resolve_to_lineage(
138
+ ctx=ctx,
139
+ config=config,
140
+ platform_instance_resolver=platform_instance_resolver,
141
+ )
142
142
  )
143
143
 
144
144
  if lineage:
@@ -170,8 +170,7 @@ class AbstractLineage(ABC):
170
170
  logger.debug(f"Processing arguments {arguments}")
171
171
 
172
172
  if (
173
- len(arguments)
174
- >= 4 # [0] is warehouse FQDN.
173
+ len(arguments) >= 4 # [0] is warehouse FQDN.
175
174
  # [1] is endpoint, we are not using it.
176
175
  # [2] is "Catalog" key
177
176
  # [3] is catalog's value
@@ -215,16 +214,16 @@ class AbstractLineage(ABC):
215
214
  native_sql_parser.remove_special_characters(query)
216
215
  )
217
216
 
218
- parsed_result: Optional[
219
- "SqlParsingResult"
220
- ] = native_sql_parser.parse_custom_sql(
221
- ctx=self.ctx,
222
- query=query,
223
- platform=self.get_platform_pair().datahub_data_platform_name,
224
- platform_instance=platform_detail.platform_instance,
225
- env=platform_detail.env,
226
- database=database,
227
- schema=schema,
217
+ parsed_result: Optional["SqlParsingResult"] = (
218
+ native_sql_parser.parse_custom_sql(
219
+ ctx=self.ctx,
220
+ query=query,
221
+ platform=self.get_platform_pair().datahub_data_platform_name,
222
+ platform_instance=platform_detail.platform_instance,
223
+ env=platform_detail.env,
224
+ database=database,
225
+ schema=schema,
226
+ )
228
227
  )
229
228
 
230
229
  if parsed_result is None:
@@ -410,9 +409,9 @@ class DatabricksLineage(AbstractLineage):
410
409
  f"Processing Databrick data-access function detail {data_access_func_detail}"
411
410
  )
412
411
  table_detail: Dict[str, str] = {}
413
- temp_accessor: Optional[
414
- IdentifierAccessor
415
- ] = data_access_func_detail.identifier_accessor
412
+ temp_accessor: Optional[IdentifierAccessor] = (
413
+ data_access_func_detail.identifier_accessor
414
+ )
416
415
 
417
416
  while temp_accessor:
418
417
  # Condition to handle databricks M-query pattern where table, schema and database all are present in
@@ -647,11 +646,13 @@ class ThreeStepDataAccessPattern(AbstractLineage, ABC):
647
646
  db_name: str = data_access_func_detail.identifier_accessor.items["Name"] # type: ignore
648
647
  # Second is schema name
649
648
  schema_name: str = cast(
650
- IdentifierAccessor, data_access_func_detail.identifier_accessor.next # type: ignore
649
+ IdentifierAccessor,
650
+ data_access_func_detail.identifier_accessor.next, # type: ignore
651
651
  ).items["Name"]
652
652
  # Third is table name
653
653
  table_name: str = cast(
654
- IdentifierAccessor, data_access_func_detail.identifier_accessor.next.next # type: ignore
654
+ IdentifierAccessor,
655
+ data_access_func_detail.identifier_accessor.next.next, # type: ignore
655
656
  ).items["Name"]
656
657
 
657
658
  qualified_table_name: str = f"{db_name}.{schema_name}.{table_name}"
@@ -768,10 +769,13 @@ class NativeQueryLineage(AbstractLineage):
768
769
  ): # database name is explicitly set
769
770
  return database
770
771
 
771
- return get_next_item( # database name is set in Name argument
772
- data_access_tokens, "Name"
773
- ) or get_next_item( # If both above arguments are not available, then try Catalog
774
- data_access_tokens, "Catalog"
772
+ return (
773
+ get_next_item( # database name is set in Name argument
774
+ data_access_tokens, "Name"
775
+ )
776
+ or get_next_item( # If both above arguments are not available, then try Catalog
777
+ data_access_tokens, "Catalog"
778
+ )
775
779
  )
776
780
 
777
781
  def create_lineage(
@@ -819,9 +823,7 @@ class NativeQueryLineage(AbstractLineage):
819
823
  values=tree_function.remove_whitespaces_from_list(
820
824
  tree_function.token_values(flat_argument_list[1])
821
825
  ),
822
- )[
823
- 0
824
- ] # Remove any whitespaces and double quotes character
826
+ )[0] # Remove any whitespaces and double quotes character
825
827
 
826
828
  server = tree_function.strip_char_from_list([data_access_tokens[2]])[0]
827
829
 
@@ -188,9 +188,9 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
188
188
  # - The inner function Table.TransformColumnTypes takes #"Removed Columns1"
189
189
  # (a table reference) as its first argument
190
190
  # - Its result is then passed as the first argument to Table.SplitColumn
191
- second_invoke_expression: Optional[
192
- Tree
193
- ] = tree_function.first_invoke_expression_func(first_argument)
191
+ second_invoke_expression: Optional[Tree] = (
192
+ tree_function.first_invoke_expression_func(first_argument)
193
+ )
194
194
  if second_invoke_expression:
195
195
  # 1. The First argument is function call
196
196
  # 2. That function's first argument references next table variable
@@ -304,14 +304,14 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
304
304
  logger.debug(v_statement.pretty())
305
305
  return None
306
306
 
307
- invoke_expression: Optional[
308
- Tree
309
- ] = tree_function.first_invoke_expression_func(rh_tree)
307
+ invoke_expression: Optional[Tree] = (
308
+ tree_function.first_invoke_expression_func(rh_tree)
309
+ )
310
310
 
311
311
  if invoke_expression is not None:
312
- result: Union[
313
- DataAccessFunctionDetail, List[str], None
314
- ] = self._process_invoke_expression(invoke_expression)
312
+ result: Union[DataAccessFunctionDetail, List[str], None] = (
313
+ self._process_invoke_expression(invoke_expression)
314
+ )
315
315
  if result is None:
316
316
  return None # No need to process some un-expected grammar found while processing invoke_expression
317
317
  if isinstance(result, DataAccessFunctionDetail):
@@ -368,9 +368,9 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
368
368
  return lineage
369
369
 
370
370
  # Parse M-Query and use output_variable as root of tree and create instance of DataAccessFunctionDetail
371
- table_links: List[
372
- DataAccessFunctionDetail
373
- ] = self.create_data_access_functional_detail(output_variable)
371
+ table_links: List[DataAccessFunctionDetail] = (
372
+ self.create_data_access_functional_detail(output_variable)
373
+ )
374
374
 
375
375
  # Each item is data-access function
376
376
  for f_detail in table_links:
@@ -390,7 +390,7 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
390
390
 
391
391
  # From supported_resolver enum get respective handler like AmazonRedshift or Snowflake or Oracle or NativeQuery and create instance of it
392
392
  # & also pass additional information that will be need to generate lineage
393
- pattern_handler: (AbstractLineage) = supported_resolver.handler()(
393
+ pattern_handler: AbstractLineage = supported_resolver.handler()(
394
394
  ctx=ctx,
395
395
  table=self.table,
396
396
  config=config,
@@ -945,9 +945,9 @@ class Mapper:
945
945
  # Convert tiles to charts
946
946
  ds_mcps, chart_mcps = self.to_datahub_chart(dashboard.tiles, workspace)
947
947
  # Lets convert dashboard to datahub dashboard
948
- dashboard_mcps: List[
949
- MetadataChangeProposalWrapper
950
- ] = self.to_datahub_dashboard_mcp(dashboard, workspace, chart_mcps, user_mcps)
948
+ dashboard_mcps: List[MetadataChangeProposalWrapper] = (
949
+ self.to_datahub_dashboard_mcp(dashboard, workspace, chart_mcps, user_mcps)
950
+ )
951
951
 
952
952
  # Now add MCPs in sequence
953
953
  mcps.extend(ds_mcps)
@@ -1472,9 +1472,9 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1472
1472
  def _get_dashboard_patch_work_unit(
1473
1473
  self, work_unit: MetadataWorkUnit
1474
1474
  ) -> Optional[MetadataWorkUnit]:
1475
- dashboard_info_aspect: Optional[
1476
- DashboardInfoClass
1477
- ] = work_unit.get_aspect_of_type(DashboardInfoClass)
1475
+ dashboard_info_aspect: Optional[DashboardInfoClass] = (
1476
+ work_unit.get_aspect_of_type(DashboardInfoClass)
1477
+ )
1478
1478
 
1479
1479
  if dashboard_info_aspect and self.source_config.patch_metadata:
1480
1480
  return convert_dashboard_info_to_patch(
@@ -425,9 +425,9 @@ class DataResolverBase(ABC):
425
425
 
426
426
  response.raise_for_status()
427
427
 
428
- assert (
429
- Constant.VALUE in response.json()
430
- ), "'value' key is not present in paginated response"
428
+ assert Constant.VALUE in response.json(), (
429
+ "'value' key is not present in paginated response"
430
+ )
431
431
 
432
432
  if not response.json()[Constant.VALUE]: # if it is an empty list then break
433
433
  break
@@ -447,13 +447,13 @@ class DataResolverBase(ABC):
447
447
  if raw_app is None:
448
448
  return None
449
449
 
450
- assert (
451
- Constant.ID in raw_app
452
- ), f"{Constant.ID} is required field not present in server response"
450
+ assert Constant.ID in raw_app, (
451
+ f"{Constant.ID} is required field not present in server response"
452
+ )
453
453
 
454
- assert (
455
- Constant.NAME in raw_app
456
- ), f"{Constant.NAME} is required field not present in server response"
454
+ assert Constant.NAME in raw_app, (
455
+ f"{Constant.NAME} is required field not present in server response"
456
+ )
457
457
 
458
458
  return App(
459
459
  id=raw_app[Constant.ID],
@@ -156,7 +156,7 @@ class QlikAPI:
156
156
  )
157
157
  if chart:
158
158
  if not chart.title:
159
- chart.title = f"Object {i+1} of Sheet '{sheet.title}'"
159
+ chart.title = f"Object {i + 1} of Sheet '{sheet.title}'"
160
160
  sheet.charts.append(chart)
161
161
  websocket_connection.handle.pop()
162
162
  return sheet
@@ -178,9 +178,9 @@ class RedshiftConfig(
178
178
  @root_validator(pre=True)
179
179
  def check_email_is_set_on_usage(cls, values):
180
180
  if values.get("include_usage_statistics"):
181
- assert (
182
- "email_domain" in values and values["email_domain"]
183
- ), "email_domain needs to be set if usage is enabled"
181
+ assert "email_domain" in values and values["email_domain"], (
182
+ "email_domain needs to be set if usage is enabled"
183
+ )
184
184
  return values
185
185
 
186
186
  @root_validator(skip_on_failure=True)
@@ -305,13 +305,13 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
305
305
  test_report.capability_report = {}
306
306
  try:
307
307
  RedshiftDataDictionary.get_schemas(connection, database=config.database)
308
- test_report.capability_report[
309
- SourceCapability.SCHEMA_METADATA
310
- ] = CapabilityReport(capable=True)
308
+ test_report.capability_report[SourceCapability.SCHEMA_METADATA] = (
309
+ CapabilityReport(capable=True)
310
+ )
311
311
  except Exception as e:
312
- test_report.capability_report[
313
- SourceCapability.SCHEMA_METADATA
314
- ] = CapabilityReport(capable=False, failure_reason=str(e))
312
+ test_report.capability_report[SourceCapability.SCHEMA_METADATA] = (
313
+ CapabilityReport(capable=False, failure_reason=str(e))
314
+ )
315
315
 
316
316
  except Exception as e:
317
317
  test_report.basic_connectivity = CapabilityReport(
@@ -947,9 +947,9 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
947
947
  def get_all_tables(
948
948
  self,
949
949
  ) -> Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]]:
950
- all_tables: Dict[
951
- str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]
952
- ] = defaultdict(dict)
950
+ all_tables: Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]] = (
951
+ defaultdict(dict)
952
+ )
953
953
  for db in set().union(self.db_tables, self.db_views):
954
954
  tables = self.db_tables.get(db, {})
955
955
  views = self.db_views.get(db, {})
@@ -967,9 +967,9 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
967
967
  all_tables: Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]],
968
968
  ) -> Iterable[MetadataWorkUnit]:
969
969
  with PerfTimer() as timer:
970
- redundant_usage_run_skip_handler: Optional[
971
- RedundantUsageRunSkipHandler
972
- ] = None
970
+ redundant_usage_run_skip_handler: Optional[RedundantUsageRunSkipHandler] = (
971
+ None
972
+ )
973
973
  if self.config.enable_stateful_usage_ingestion:
974
974
  redundant_usage_run_skip_handler = RedundantUsageRunSkipHandler(
975
975
  source=self,
@@ -199,10 +199,10 @@ class RedshiftUsageExtractor:
199
199
  end_time=self.end_time.strftime(REDSHIFT_DATETIME_FORMAT),
200
200
  database=self.config.database,
201
201
  )
202
- access_events_iterable: Iterable[
203
- RedshiftAccessEvent
204
- ] = self._gen_access_events_from_history_query(
205
- query, connection=self.connection, all_tables=all_tables
202
+ access_events_iterable: Iterable[RedshiftAccessEvent] = (
203
+ self._gen_access_events_from_history_query(
204
+ query, connection=self.connection, all_tables=all_tables
205
+ )
206
206
  )
207
207
 
208
208
  aggregated_events: AggregatedAccessEvents = self._aggregate_access_events(
@@ -225,10 +225,10 @@ class RedshiftUsageExtractor:
225
225
  start_time=self.start_time.strftime(REDSHIFT_DATETIME_FORMAT),
226
226
  end_time=self.end_time.strftime(REDSHIFT_DATETIME_FORMAT),
227
227
  )
228
- access_events_iterable: Iterable[
229
- RedshiftAccessEvent
230
- ] = self._gen_access_events_from_history_query(
231
- query, connection, all_tables=all_tables
228
+ access_events_iterable: Iterable[RedshiftAccessEvent] = (
229
+ self._gen_access_events_from_history_query(
230
+ query, connection, all_tables=all_tables
231
+ )
232
232
  )
233
233
 
234
234
  # Generate operation aspect work units from the access events
@@ -85,8 +85,8 @@ class DataLakeProfilerConfig(ConfigModel):
85
85
  if field_level_metric.startswith("include_field_"):
86
86
  values.setdefault(field_level_metric, False)
87
87
 
88
- assert (
89
- max_num_fields_to_profile is None
90
- ), f"{max_num_fields_to_profile_key} should be set to None"
88
+ assert max_num_fields_to_profile is None, (
89
+ f"{max_num_fields_to_profile_key} should be set to None"
90
+ )
91
91
 
92
92
  return values
@@ -1124,7 +1124,7 @@ class S3Source(StatefulIngestionSourceBase):
1124
1124
  table_data.table_path
1125
1125
  ].timestamp = table_data.timestamp
1126
1126
 
1127
- for guid, table_data in table_dict.items():
1127
+ for _, table_data in table_dict.items():
1128
1128
  yield from self.ingest_table(table_data, path_spec)
1129
1129
 
1130
1130
  if not self.source_config.is_profiling_enabled():
@@ -236,12 +236,12 @@ class SalesforceSource(Source):
236
236
  try:
237
237
  if self.config.auth is SalesforceAuthType.DIRECT_ACCESS_TOKEN:
238
238
  logger.debug("Access Token Provided in Config")
239
- assert (
240
- self.config.access_token is not None
241
- ), "Config access_token is required for DIRECT_ACCESS_TOKEN auth"
242
- assert (
243
- self.config.instance_url is not None
244
- ), "Config instance_url is required for DIRECT_ACCESS_TOKEN auth"
239
+ assert self.config.access_token is not None, (
240
+ "Config access_token is required for DIRECT_ACCESS_TOKEN auth"
241
+ )
242
+ assert self.config.instance_url is not None, (
243
+ "Config instance_url is required for DIRECT_ACCESS_TOKEN auth"
244
+ )
245
245
 
246
246
  self.sf = Salesforce(
247
247
  instance_url=self.config.instance_url,
@@ -250,15 +250,15 @@ class SalesforceSource(Source):
250
250
  )
251
251
  elif self.config.auth is SalesforceAuthType.USERNAME_PASSWORD:
252
252
  logger.debug("Username/Password Provided in Config")
253
- assert (
254
- self.config.username is not None
255
- ), "Config username is required for USERNAME_PASSWORD auth"
256
- assert (
257
- self.config.password is not None
258
- ), "Config password is required for USERNAME_PASSWORD auth"
259
- assert (
260
- self.config.security_token is not None
261
- ), "Config security_token is required for USERNAME_PASSWORD auth"
253
+ assert self.config.username is not None, (
254
+ "Config username is required for USERNAME_PASSWORD auth"
255
+ )
256
+ assert self.config.password is not None, (
257
+ "Config password is required for USERNAME_PASSWORD auth"
258
+ )
259
+ assert self.config.security_token is not None, (
260
+ "Config security_token is required for USERNAME_PASSWORD auth"
261
+ )
262
262
 
263
263
  self.sf = Salesforce(
264
264
  username=self.config.username,
@@ -269,15 +269,15 @@ class SalesforceSource(Source):
269
269
 
270
270
  elif self.config.auth is SalesforceAuthType.JSON_WEB_TOKEN:
271
271
  logger.debug("Json Web Token provided in the config")
272
- assert (
273
- self.config.username is not None
274
- ), "Config username is required for JSON_WEB_TOKEN auth"
275
- assert (
276
- self.config.consumer_key is not None
277
- ), "Config consumer_key is required for JSON_WEB_TOKEN auth"
278
- assert (
279
- self.config.private_key is not None
280
- ), "Config private_key is required for JSON_WEB_TOKEN auth"
272
+ assert self.config.username is not None, (
273
+ "Config username is required for JSON_WEB_TOKEN auth"
274
+ )
275
+ assert self.config.consumer_key is not None, (
276
+ "Config consumer_key is required for JSON_WEB_TOKEN auth"
277
+ )
278
+ assert self.config.private_key is not None, (
279
+ "Config private_key is required for JSON_WEB_TOKEN auth"
280
+ )
281
281
 
282
282
  self.sf = Salesforce(
283
283
  username=self.config.username,
@@ -439,7 +439,8 @@ class SalesforceSource(Source):
439
439
  dataPlatformInstance = DataPlatformInstanceClass(
440
440
  builder.make_data_platform_urn(self.platform),
441
441
  instance=builder.make_dataplatform_instance_urn(
442
- self.platform, self.config.platform_instance # type:ignore
442
+ self.platform,
443
+ self.config.platform_instance, # type:ignore
443
444
  ),
444
445
  )
445
446
 
@@ -354,7 +354,7 @@ class JsonSchemaSource(StatefulIngestionSourceBase):
354
354
  browse_prefix = f"/{self.config.env.lower()}/{self.config.platform}/{self.config.platform_instance}"
355
355
 
356
356
  if os.path.isdir(self.config.path):
357
- for root, dirs, files in os.walk(self.config.path, topdown=False):
357
+ for root, _, files in os.walk(self.config.path, topdown=False):
358
358
  for file_name in [f for f in files if f.endswith(".json")]:
359
359
  try:
360
360
  yield from self._load_one_file(