acryl-datahub 0.15.0.2rc7__py3-none-any.whl → 0.15.0.2rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (157) hide show
  1. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/METADATA +2335 -2337
  2. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/RECORD +157 -157
  3. datahub/__init__.py +1 -1
  4. datahub/api/entities/assertion/assertion_operator.py +3 -5
  5. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  6. datahub/api/entities/datacontract/assertion_operator.py +3 -5
  7. datahub/api/entities/dataproduct/dataproduct.py +4 -4
  8. datahub/api/entities/dataset/dataset.py +2 -1
  9. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  10. datahub/cli/cli_utils.py +1 -1
  11. datahub/cli/docker_cli.py +6 -6
  12. datahub/cli/lite_cli.py +2 -2
  13. datahub/cli/migrate.py +3 -3
  14. datahub/cli/specific/assertions_cli.py +3 -3
  15. datahub/cli/timeline_cli.py +1 -1
  16. datahub/configuration/common.py +1 -2
  17. datahub/configuration/config_loader.py +73 -50
  18. datahub/configuration/git.py +2 -2
  19. datahub/configuration/time_window_config.py +10 -5
  20. datahub/emitter/mce_builder.py +4 -8
  21. datahub/emitter/mcp_patch_builder.py +1 -2
  22. datahub/ingestion/api/incremental_lineage_helper.py +2 -8
  23. datahub/ingestion/api/report.py +1 -2
  24. datahub/ingestion/api/source_helpers.py +1 -1
  25. datahub/ingestion/extractor/json_schema_util.py +3 -3
  26. datahub/ingestion/extractor/schema_util.py +3 -5
  27. datahub/ingestion/fs/s3_fs.py +3 -3
  28. datahub/ingestion/glossary/datahub_classifier.py +6 -4
  29. datahub/ingestion/graph/client.py +4 -6
  30. datahub/ingestion/run/pipeline.py +8 -7
  31. datahub/ingestion/run/pipeline_config.py +3 -3
  32. datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
  33. datahub/ingestion/source/abs/source.py +19 -8
  34. datahub/ingestion/source/aws/glue.py +11 -11
  35. datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
  36. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  37. datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
  38. datahub/ingestion/source/bigquery_v2/bigquery.py +3 -3
  39. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
  40. datahub/ingestion/source/bigquery_v2/bigquery_config.py +6 -6
  41. datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
  42. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +15 -9
  43. datahub/ingestion/source/bigquery_v2/lineage.py +9 -9
  44. datahub/ingestion/source/bigquery_v2/queries.py +1 -3
  45. datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
  46. datahub/ingestion/source/bigquery_v2/usage.py +3 -3
  47. datahub/ingestion/source/cassandra/cassandra.py +0 -1
  48. datahub/ingestion/source/cassandra/cassandra_utils.py +4 -4
  49. datahub/ingestion/source/confluent_schema_registry.py +6 -6
  50. datahub/ingestion/source/csv_enricher.py +29 -29
  51. datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
  52. datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
  53. datahub/ingestion/source/dbt/dbt_common.py +9 -7
  54. datahub/ingestion/source/dremio/dremio_api.py +4 -4
  55. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
  56. datahub/ingestion/source/elastic_search.py +4 -4
  57. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +3 -3
  58. datahub/ingestion/source/gcs/gcs_source.py +3 -2
  59. datahub/ingestion/source/ge_data_profiler.py +4 -5
  60. datahub/ingestion/source/ge_profiling_config.py +3 -3
  61. datahub/ingestion/source/iceberg/iceberg.py +3 -3
  62. datahub/ingestion/source/identity/azure_ad.py +3 -3
  63. datahub/ingestion/source/identity/okta.py +3 -3
  64. datahub/ingestion/source/kafka/kafka.py +11 -9
  65. datahub/ingestion/source/kafka_connect/kafka_connect.py +2 -3
  66. datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
  67. datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
  68. datahub/ingestion/source/looker/looker_common.py +19 -19
  69. datahub/ingestion/source/looker/looker_config.py +3 -3
  70. datahub/ingestion/source/looker/looker_source.py +25 -25
  71. datahub/ingestion/source/looker/looker_template_language.py +3 -3
  72. datahub/ingestion/source/looker/looker_usage.py +5 -7
  73. datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
  74. datahub/ingestion/source/looker/lookml_source.py +13 -15
  75. datahub/ingestion/source/looker/view_upstream.py +5 -5
  76. datahub/ingestion/source/mlflow.py +4 -4
  77. datahub/ingestion/source/mongodb.py +6 -4
  78. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  79. datahub/ingestion/source/nifi.py +24 -26
  80. datahub/ingestion/source/openapi.py +9 -9
  81. datahub/ingestion/source/powerbi/config.py +12 -12
  82. datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
  83. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
  84. datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
  85. datahub/ingestion/source/powerbi/powerbi.py +6 -6
  86. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
  87. datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
  88. datahub/ingestion/source/redshift/config.py +3 -3
  89. datahub/ingestion/source/redshift/redshift.py +12 -12
  90. datahub/ingestion/source/redshift/usage.py +8 -8
  91. datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
  92. datahub/ingestion/source/s3/source.py +1 -1
  93. datahub/ingestion/source/salesforce.py +26 -25
  94. datahub/ingestion/source/schema/json_schema.py +1 -1
  95. datahub/ingestion/source/sigma/sigma.py +3 -3
  96. datahub/ingestion/source/sigma/sigma_api.py +12 -10
  97. datahub/ingestion/source/snowflake/snowflake_config.py +9 -7
  98. datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
  99. datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
  100. datahub/ingestion/source/snowflake/snowflake_schema.py +3 -3
  101. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +6 -6
  102. datahub/ingestion/source/snowflake/snowflake_tag.py +7 -7
  103. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +3 -3
  104. datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
  105. datahub/ingestion/source/snowflake/snowflake_v2.py +13 -4
  106. datahub/ingestion/source/sql/athena.py +1 -3
  107. datahub/ingestion/source/sql/clickhouse.py +8 -14
  108. datahub/ingestion/source/sql/oracle.py +1 -3
  109. datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
  110. datahub/ingestion/source/sql/teradata.py +16 -3
  111. datahub/ingestion/source/state/profiling_state_handler.py +3 -3
  112. datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
  113. datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
  114. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
  115. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  116. datahub/ingestion/source/tableau/tableau.py +48 -49
  117. datahub/ingestion/source/unity/config.py +3 -1
  118. datahub/ingestion/source/unity/proxy.py +1 -1
  119. datahub/ingestion/source/unity/source.py +3 -3
  120. datahub/ingestion/source/unity/usage.py +3 -1
  121. datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
  122. datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
  123. datahub/ingestion/source/usage/usage_common.py +1 -1
  124. datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
  125. datahub/ingestion/transformer/add_dataset_properties.py +3 -3
  126. datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
  127. datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
  128. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
  129. datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
  130. datahub/ingestion/transformer/tags_to_terms.py +7 -7
  131. datahub/integrations/assertion/snowflake/compiler.py +10 -10
  132. datahub/lite/duckdb_lite.py +12 -10
  133. datahub/metadata/_schema_classes.py +1 -1
  134. datahub/metadata/schema.avsc +6 -2
  135. datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
  136. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
  137. datahub/secret/secret_common.py +14 -8
  138. datahub/specific/aspect_helpers/custom_properties.py +1 -2
  139. datahub/sql_parsing/schema_resolver.py +5 -10
  140. datahub/sql_parsing/sql_parsing_aggregator.py +16 -16
  141. datahub/sql_parsing/sqlglot_lineage.py +5 -4
  142. datahub/sql_parsing/sqlglot_utils.py +3 -2
  143. datahub/telemetry/stats.py +1 -2
  144. datahub/testing/mcp_diff.py +1 -1
  145. datahub/utilities/file_backed_collections.py +10 -10
  146. datahub/utilities/hive_schema_to_avro.py +2 -2
  147. datahub/utilities/logging_manager.py +2 -2
  148. datahub/utilities/lossy_collections.py +3 -3
  149. datahub/utilities/mapping.py +3 -3
  150. datahub/utilities/serialized_lru_cache.py +3 -1
  151. datahub/utilities/sqlalchemy_query_combiner.py +6 -6
  152. datahub/utilities/sqllineage_patch.py +1 -1
  153. datahub/utilities/stats_collections.py +3 -1
  154. datahub/utilities/urns/urn_iter.py +2 -2
  155. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/WHEEL +0 -0
  156. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/entry_points.txt +0 -0
  157. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.2rc8.dist-info}/top_level.txt +0 -0
@@ -506,16 +506,18 @@ class DBTNode:
506
506
  materialization: Optional[str] # table, view, ephemeral, incremental, snapshot
507
507
  # see https://docs.getdbt.com/reference/artifacts/manifest-json
508
508
  catalog_type: Optional[str]
509
- missing_from_catalog: bool # indicates if the node was missing from the catalog.json
509
+ missing_from_catalog: (
510
+ bool # indicates if the node was missing from the catalog.json
511
+ )
510
512
 
511
513
  owner: Optional[str]
512
514
 
513
515
  columns: List[DBTColumn] = field(default_factory=list)
514
516
  upstream_nodes: List[str] = field(default_factory=list) # list of upstream dbt_name
515
517
  upstream_cll: List[DBTColumnLineageInfo] = field(default_factory=list)
516
- raw_sql_parsing_result: Optional[
517
- SqlParsingResult
518
- ] = None # only set for nodes that don't depend on ephemeral models
518
+ raw_sql_parsing_result: Optional[SqlParsingResult] = (
519
+ None # only set for nodes that don't depend on ephemeral models
520
+ )
519
521
  cll_debug_info: Optional[SqlParsingDebugInfo] = None
520
522
 
521
523
  meta: Dict[str, Any] = field(default_factory=dict)
@@ -869,10 +871,10 @@ class DBTSourceBase(StatefulIngestionSourceBase):
869
871
  "platform": DBT_PLATFORM,
870
872
  "name": node.dbt_name,
871
873
  "instance": self.config.platform_instance,
874
+ # Ideally we'd include the env unconditionally. However, we started out
875
+ # not including env in the guid, so we need to maintain backwards compatibility
876
+ # with existing PROD assertions.
872
877
  **(
873
- # Ideally we'd include the env unconditionally. However, we started out
874
- # not including env in the guid, so we need to maintain backwards compatibility
875
- # with existing PROD assertions.
876
878
  {"env": self.config.env}
877
879
  if self.config.env != mce_builder.DEFAULT_ENV
878
880
  and self.config.include_env_in_assertion_guid
@@ -181,7 +181,7 @@ class DremioAPIOperations:
181
181
  return
182
182
 
183
183
  # On-prem Dremio authentication (PAT or Basic Auth)
184
- for retry in range(1, self._retry_count + 1):
184
+ for _ in range(1, self._retry_count + 1):
185
185
  try:
186
186
  if connection_args.authentication_method == "PAT":
187
187
  self.session.headers.update(
@@ -191,9 +191,9 @@ class DremioAPIOperations:
191
191
  )
192
192
  return
193
193
  else:
194
- assert (
195
- connection_args.username and connection_args.password
196
- ), "Username and password are required for authentication"
194
+ assert connection_args.username and connection_args.password, (
195
+ "Username and password are required for authentication"
196
+ )
197
197
  host = connection_args.hostname
198
198
  port = connection_args.port
199
199
  protocol = "https" if connection_args.tls else "http"
@@ -101,9 +101,9 @@ class DremioToDataHubSourceTypeMapping:
101
101
  Add a new source type if not in the map (e.g., Dremio ARP).
102
102
  """
103
103
  dremio_source_type = dremio_source_type.upper()
104
- DremioToDataHubSourceTypeMapping.SOURCE_TYPE_MAPPING[
105
- dremio_source_type
106
- ] = datahub_source_type
104
+ DremioToDataHubSourceTypeMapping.SOURCE_TYPE_MAPPING[dremio_source_type] = (
105
+ datahub_source_type
106
+ )
107
107
 
108
108
  if category:
109
109
  if category.lower() == "file_object_storage":
@@ -111,10 +111,10 @@ class ElasticToSchemaFieldConverter:
111
111
 
112
112
  @staticmethod
113
113
  def get_column_type(elastic_column_type: str) -> SchemaFieldDataType:
114
- type_class: Optional[
115
- Type
116
- ] = ElasticToSchemaFieldConverter._field_type_to_schema_field_type.get(
117
- elastic_column_type
114
+ type_class: Optional[Type] = (
115
+ ElasticToSchemaFieldConverter._field_type_to_schema_field_type.get(
116
+ elastic_column_type
117
+ )
118
118
  )
119
119
  if type_class is None:
120
120
  logger.warning(
@@ -155,9 +155,9 @@ class SoftDeletedEntitiesCleanup:
155
155
  current_count = self.report.num_hard_deleted_by_type.get(entity_type, 0)
156
156
  self.report.num_hard_deleted_by_type[entity_type] = current_count + 1
157
157
  if entity_type not in self.report.sample_hard_deleted_aspects_by_type:
158
- self.report.sample_hard_deleted_aspects_by_type[
159
- entity_type
160
- ] = LossyList()
158
+ self.report.sample_hard_deleted_aspects_by_type[entity_type] = (
159
+ LossyList()
160
+ )
161
161
  self.report.sample_hard_deleted_aspects_by_type[entity_type].append(urn)
162
162
 
163
163
  def delete_entity(self, urn: str) -> None:
@@ -141,8 +141,9 @@ class GCSSource(StatefulIngestionSourceBase):
141
141
  source.source_config.platform = PLATFORM_GCS
142
142
 
143
143
  source.is_s3_platform = lambda: True # type: ignore
144
- source.create_s3_path = lambda bucket_name, key: unquote(f"s3://{bucket_name}/{key}") # type: ignore
145
-
144
+ source.create_s3_path = lambda bucket_name, key: unquote( # type: ignore
145
+ f"s3://{bucket_name}/{key}"
146
+ )
146
147
  return source
147
148
 
148
149
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
@@ -1,3 +1,5 @@
1
+ from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED
2
+
1
3
  import collections
2
4
  import concurrent.futures
3
5
  import contextlib
@@ -10,7 +12,6 @@ import threading
10
12
  import traceback
11
13
  import unittest.mock
12
14
  import uuid
13
- from datahub.utilities._markupsafe_compat import MARKUPSAFE_PATCHED
14
15
  from functools import lru_cache
15
16
  from typing import (
16
17
  TYPE_CHECKING,
@@ -326,7 +327,7 @@ def _is_single_row_query_method(query: Any) -> bool:
326
327
 
327
328
 
328
329
  def _run_with_query_combiner(
329
- method: Callable[Concatenate["_SingleDatasetProfiler", P], None]
330
+ method: Callable[Concatenate["_SingleDatasetProfiler", P], None],
330
331
  ) -> Callable[Concatenate["_SingleDatasetProfiler", P], None]:
331
332
  @functools.wraps(method)
332
333
  def inner(
@@ -1536,9 +1537,7 @@ def create_bigquery_temp_table(
1536
1537
  query_job: Optional["google.cloud.bigquery.job.query.QueryJob"] = (
1537
1538
  # In google-cloud-bigquery 3.15.0, the _query_job attribute was
1538
1539
  # made public and renamed to query_job.
1539
- cursor.query_job
1540
- if hasattr(cursor, "query_job")
1541
- else cursor._query_job # type: ignore[attr-defined]
1540
+ cursor.query_job if hasattr(cursor, "query_job") else cursor._query_job # type: ignore[attr-defined]
1542
1541
  )
1543
1542
  assert query_job
1544
1543
  temp_destination_table = query_job.destination
@@ -220,9 +220,9 @@ class GEProfilingConfig(GEProfilingBaseConfig):
220
220
  )
221
221
  values[field_level_metric] = False
222
222
 
223
- assert (
224
- max_num_fields_to_profile is None
225
- ), f"{max_num_fields_to_profile_key} should be set to None"
223
+ assert max_num_fields_to_profile is None, (
224
+ f"{max_num_fields_to_profile_key} should be set to None"
225
+ )
226
226
 
227
227
  # Disable expensive queries.
228
228
  if values.get("turn_off_expensive_profiling_metrics"):
@@ -296,9 +296,9 @@ class IcebergSource(StatefulIngestionSourceBase):
296
296
  custom_properties["snapshot-id"] = str(
297
297
  table.current_snapshot().snapshot_id
298
298
  )
299
- custom_properties[
300
- "manifest-list"
301
- ] = table.current_snapshot().manifest_list
299
+ custom_properties["manifest-list"] = (
300
+ table.current_snapshot().manifest_list
301
+ )
302
302
  dataset_properties = DatasetPropertiesClass(
303
303
  name=table.name()[-1],
304
304
  description=table.metadata.properties.get("comment", None),
@@ -354,9 +354,9 @@ class AzureADSource(StatefulIngestionSourceBase):
354
354
  yield MetadataWorkUnit(id=group_status_wu_id, mcp=group_status_mcp)
355
355
 
356
356
  # Populate GroupMembership Aspects for CorpUsers
357
- datahub_corp_user_urn_to_group_membership: Dict[
358
- str, GroupMembershipClass
359
- ] = defaultdict(lambda: GroupMembershipClass(groups=[]))
357
+ datahub_corp_user_urn_to_group_membership: Dict[str, GroupMembershipClass] = (
358
+ defaultdict(lambda: GroupMembershipClass(groups=[]))
359
+ )
360
360
  if (
361
361
  self.config.ingest_group_membership
362
362
  and len(self.selected_azure_ad_groups) > 0
@@ -344,9 +344,9 @@ class OktaSource(StatefulIngestionSourceBase):
344
344
  ).as_workunit()
345
345
 
346
346
  # Step 2: Populate GroupMembership Aspects for CorpUsers
347
- datahub_corp_user_urn_to_group_membership: Dict[
348
- str, GroupMembershipClass
349
- ] = defaultdict(lambda: GroupMembershipClass(groups=[]))
347
+ datahub_corp_user_urn_to_group_membership: Dict[str, GroupMembershipClass] = (
348
+ defaultdict(lambda: GroupMembershipClass(groups=[]))
349
+ )
350
350
  if self.config.ingest_group_membership and okta_groups is not None:
351
351
  # Fetch membership for each group.
352
352
  for okta_group in okta_groups:
@@ -419,10 +419,10 @@ class KafkaSource(StatefulIngestionSourceBase, TestableSource):
419
419
  custom_props = self.build_custom_properties(
420
420
  topic, topic_detail, extra_topic_config
421
421
  )
422
- schema_name: Optional[
423
- str
424
- ] = self.schema_registry_client._get_subject_for_topic(
425
- topic, is_key_schema=False
422
+ schema_name: Optional[str] = (
423
+ self.schema_registry_client._get_subject_for_topic(
424
+ topic, is_key_schema=False
425
+ )
426
426
  )
427
427
  if schema_name is not None:
428
428
  custom_props["Schema Name"] = schema_name
@@ -610,11 +610,13 @@ class KafkaSource(StatefulIngestionSourceBase, TestableSource):
610
610
 
611
611
  def fetch_topic_configurations(self, topics: List[str]) -> Dict[str, dict]:
612
612
  logger.info("Fetching config details for all topics")
613
- configs: Dict[
614
- ConfigResource, concurrent.futures.Future
615
- ] = self.admin_client.describe_configs(
616
- resources=[ConfigResource(ConfigResource.Type.TOPIC, t) for t in topics],
617
- request_timeout=self.source_config.connection.client_timeout_seconds,
613
+ configs: Dict[ConfigResource, concurrent.futures.Future] = (
614
+ self.admin_client.describe_configs(
615
+ resources=[
616
+ ConfigResource(ConfigResource.Type.TOPIC, t) for t in topics
617
+ ],
618
+ request_timeout=self.source_config.connection.client_timeout_seconds,
619
+ )
618
620
  )
619
621
  logger.debug("Waiting for config details futures to complete")
620
622
  concurrent.futures.wait(configs.values())
@@ -110,9 +110,8 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
110
110
  connector_manifest = self._get_connector_manifest(
111
111
  connector_name, connector_url
112
112
  )
113
- if (
114
- connector_manifest is None
115
- or not self.config.connector_patterns.allowed(connector_manifest.name)
113
+ if connector_manifest is None or not self.config.connector_patterns.allowed(
114
+ connector_manifest.name
116
115
  ):
117
116
  self.report.report_dropped(connector_name)
118
117
  continue
@@ -199,9 +199,9 @@ class BigQuerySinkConnector(BaseConnector):
199
199
  transforms.append(transform)
200
200
  for key in self.connector_manifest.config.keys():
201
201
  if key.startswith(f"transforms.{name}."):
202
- transform[
203
- key.replace(f"transforms.{name}.", "")
204
- ] = self.connector_manifest.config[key]
202
+ transform[key.replace(f"transforms.{name}.", "")] = (
203
+ self.connector_manifest.config[key]
204
+ )
205
205
 
206
206
  if "defaultDataset" in connector_manifest.config:
207
207
  defaultDataset = connector_manifest.config["defaultDataset"]
@@ -123,9 +123,9 @@ class ConfluentJDBCSourceConnector(BaseConnector):
123
123
  transforms.append(transform)
124
124
  for key in self.connector_manifest.config.keys():
125
125
  if key.startswith(f"transforms.{name}."):
126
- transform[
127
- key.replace(f"transforms.{name}.", "")
128
- ] = self.connector_manifest.config[key]
126
+ transform[key.replace(f"transforms.{name}.", "")] = (
127
+ self.connector_manifest.config[key]
128
+ )
129
129
 
130
130
  return self.JdbcParser(
131
131
  db_connection_url,
@@ -596,9 +596,9 @@ class LookerUtil:
596
596
 
597
597
  @staticmethod
598
598
  def _extract_view_from_field(field: str) -> str:
599
- assert (
600
- field.count(".") == 1
601
- ), f"Error: A field must be prefixed by a view name, field is: {field}"
599
+ assert field.count(".") == 1, (
600
+ f"Error: A field must be prefixed by a view name, field is: {field}"
601
+ )
602
602
  return field.split(".")[0]
603
603
 
604
604
  @staticmethod
@@ -815,9 +815,9 @@ class LookerExplore:
815
815
  project_name: Optional[str] = None
816
816
  label: Optional[str] = None
817
817
  description: Optional[str] = None
818
- upstream_views: Optional[
819
- List[ProjectInclude]
820
- ] = None # captures the view name(s) this explore is derived from
818
+ upstream_views: Optional[List[ProjectInclude]] = (
819
+ None # captures the view name(s) this explore is derived from
820
+ )
821
821
  upstream_views_file_path: Dict[str, Optional[str]] = dataclasses_field(
822
822
  default_factory=dict
823
823
  ) # view_name is key and file_path is value. A single file may contains multiple views
@@ -889,7 +889,7 @@ class LookerExplore:
889
889
  upstream_views.extend(parsed_explore.upstream_views or [])
890
890
  else:
891
891
  logger.warning(
892
- f'Could not find extended explore {extended_explore} for explore {dict["name"]} in model {model_name}'
892
+ f"Could not find extended explore {extended_explore} for explore {dict['name']} in model {model_name}"
893
893
  )
894
894
  else:
895
895
  # we only fallback to the view_names list if this is not an extended explore
@@ -903,7 +903,7 @@ class LookerExplore:
903
903
  )
904
904
  if not info:
905
905
  logger.warning(
906
- f'Could not resolve view {view_name} for explore {dict["name"]} in model {model_name}'
906
+ f"Could not resolve view {view_name} for explore {dict['name']} in model {model_name}"
907
907
  )
908
908
  else:
909
909
  upstream_views.append(
@@ -935,9 +935,9 @@ class LookerExplore:
935
935
  try:
936
936
  explore = client.lookml_model_explore(model, explore_name)
937
937
  views: Set[str] = set()
938
- lkml_fields: List[
939
- LookmlModelExploreField
940
- ] = explore_field_set_to_lkml_fields(explore)
938
+ lkml_fields: List[LookmlModelExploreField] = (
939
+ explore_field_set_to_lkml_fields(explore)
940
+ )
941
941
 
942
942
  if explore.view_name is not None and explore.view_name != explore.name:
943
943
  # explore is not named after a view and is instead using a from field, which is modeled as view_name.
@@ -1034,9 +1034,9 @@ class LookerExplore:
1034
1034
  if measure_field.name is None:
1035
1035
  continue
1036
1036
  else:
1037
- field_name_vs_raw_explore_field[
1038
- measure_field.name
1039
- ] = measure_field
1037
+ field_name_vs_raw_explore_field[measure_field.name] = (
1038
+ measure_field
1039
+ )
1040
1040
 
1041
1041
  view_fields.append(
1042
1042
  ViewField(
@@ -1072,11 +1072,11 @@ class LookerExplore:
1072
1072
  if view_project_map:
1073
1073
  logger.debug(f"views and their projects: {view_project_map}")
1074
1074
 
1075
- upstream_views_file_path: Dict[
1076
- str, Optional[str]
1077
- ] = create_upstream_views_file_path_map(
1078
- lkml_fields=lkml_fields,
1079
- view_names=views,
1075
+ upstream_views_file_path: Dict[str, Optional[str]] = (
1076
+ create_upstream_views_file_path_map(
1077
+ lkml_fields=lkml_fields,
1078
+ view_names=views,
1079
+ )
1080
1080
  )
1081
1081
  if upstream_views_file_path:
1082
1082
  logger.debug(f"views and their file-paths: {upstream_views_file_path}")
@@ -166,9 +166,9 @@ def _get_generic_definition(
166
166
  # e.g. spark1 or hive2 or druid_18
167
167
  platform = re.sub(r"[0-9]+", "", dialect_name.split("_")[0])
168
168
 
169
- assert (
170
- platform is not None
171
- ), f"Failed to extract a valid platform from connection {looker_connection}"
169
+ assert platform is not None, (
170
+ f"Failed to extract a valid platform from connection {looker_connection}"
171
+ )
172
172
  db = looker_connection.database
173
173
  schema = looker_connection.schema # ok for this to be None
174
174
  return platform, db, schema
@@ -250,9 +250,9 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
250
250
 
251
251
  @staticmethod
252
252
  def _extract_view_from_field(field: str) -> str:
253
- assert (
254
- field.count(".") == 1
255
- ), f"Error: A field must be prefixed by a view name, field is: {field}"
253
+ assert field.count(".") == 1, (
254
+ f"Error: A field must be prefixed by a view name, field is: {field}"
255
+ )
256
256
  return field.split(".")[0]
257
257
 
258
258
  def _get_views_from_fields(self, fields: List[str]) -> List[str]:
@@ -610,12 +610,12 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
610
610
  def _create_platform_instance_aspect(
611
611
  self,
612
612
  ) -> DataPlatformInstance:
613
- assert (
614
- self.source_config.platform_name
615
- ), "Platform name is not set in the configuration."
616
- assert (
617
- self.source_config.platform_instance
618
- ), "Platform instance is not set in the configuration."
613
+ assert self.source_config.platform_name, (
614
+ "Platform name is not set in the configuration."
615
+ )
616
+ assert self.source_config.platform_instance, (
617
+ "Platform instance is not set in the configuration."
618
+ )
619
619
 
620
620
  return DataPlatformInstance(
621
621
  platform=builder.make_data_platform_urn(self.source_config.platform_name),
@@ -1016,9 +1016,9 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1016
1016
  yield from chart_events
1017
1017
 
1018
1018
  # Step 2: Emit metadata events for the Dashboard itself.
1019
- chart_urns: Set[
1020
- str
1021
- ] = set() # Collect the unique child chart urns for dashboard input lineage.
1019
+ chart_urns: Set[str] = (
1020
+ set()
1021
+ ) # Collect the unique child chart urns for dashboard input lineage.
1022
1022
  for chart_event in chart_events:
1023
1023
  chart_event_urn = self._extract_event_urn(chart_event)
1024
1024
  if chart_event_urn:
@@ -1538,20 +1538,20 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1538
1538
  }
1539
1539
  )
1540
1540
 
1541
- dashboard_element: Optional[
1542
- LookerDashboardElement
1543
- ] = self._get_looker_dashboard_element(
1544
- DashboardElement(
1545
- id=f"looks_{look.id}", # to avoid conflict with non-standalone looks (element.id prefixes),
1546
- # we add the "looks_" prefix to look.id.
1547
- title=look.title,
1548
- subtitle_text=look.description,
1549
- look_id=look.id,
1550
- dashboard_id=None, # As this is an independent look
1551
- look=LookWithQuery(
1552
- query=query, folder=look.folder, user_id=look.user_id
1541
+ dashboard_element: Optional[LookerDashboardElement] = (
1542
+ self._get_looker_dashboard_element(
1543
+ DashboardElement(
1544
+ id=f"looks_{look.id}", # to avoid conflict with non-standalone looks (element.id prefixes),
1545
+ # we add the "looks_" prefix to look.id.
1546
+ title=look.title,
1547
+ subtitle_text=look.description,
1548
+ look_id=look.id,
1549
+ dashboard_id=None, # As this is an independent look
1550
+ look=LookWithQuery(
1551
+ query=query, folder=look.folder, user_id=look.user_id
1552
+ ),
1553
1553
  ),
1554
- ),
1554
+ )
1555
1555
  )
1556
1556
 
1557
1557
  if dashboard_element is not None:
@@ -33,9 +33,9 @@ logger = logging.getLogger(__name__)
33
33
 
34
34
 
35
35
  class SpecialVariable:
36
- SPECIAL_VARIABLE_PATTERN: ClassVar[
37
- str
38
- ] = r"\b\w+(\.\w+)*\._(is_selected|in_query|is_filtered)\b"
36
+ SPECIAL_VARIABLE_PATTERN: ClassVar[str] = (
37
+ r"\b\w+(\.\w+)*\._(is_selected|in_query|is_filtered)\b"
38
+ )
39
39
  liquid_variable: dict
40
40
 
41
41
  def __init__(self, liquid_variable):
@@ -257,9 +257,9 @@ class BaseStatGenerator(ABC):
257
257
 
258
258
  for row in rows:
259
259
  logger.debug(row)
260
- entity_stat_aspect[
261
- self.get_entity_stat_key(row)
262
- ] = self.to_entity_timeseries_stat_aspect(row)
260
+ entity_stat_aspect[self.get_entity_stat_key(row)] = (
261
+ self.to_entity_timeseries_stat_aspect(row)
262
+ )
263
263
 
264
264
  return entity_stat_aspect
265
265
 
@@ -385,10 +385,8 @@ class BaseStatGenerator(ABC):
385
385
  entity_rows: List[Dict] = self._execute_query(
386
386
  entity_query_with_filters, "entity_query"
387
387
  )
388
- entity_usage_stat: Dict[
389
- Tuple[str, str], Any
390
- ] = self._process_entity_timeseries_rows(
391
- entity_rows
388
+ entity_usage_stat: Dict[Tuple[str, str], Any] = (
389
+ self._process_entity_timeseries_rows(entity_rows)
392
390
  ) # Any type to pass mypy unbound Aspect type error
393
391
 
394
392
  user_wise_query_with_filters: LookerQuery = self._append_filters(
@@ -38,16 +38,16 @@ def merge_parent_and_child_fields(
38
38
  # Create a map field-name vs field
39
39
  child_field_map: dict = {}
40
40
  for field in child_fields:
41
- assert (
42
- NAME in field
43
- ), "A lookml view must have a name field" # name is required field of lookml field array
41
+ assert NAME in field, (
42
+ "A lookml view must have a name field"
43
+ ) # name is required field of lookml field array
44
44
 
45
45
  child_field_map[field[NAME]] = field
46
46
 
47
47
  for field in parent_fields:
48
- assert (
49
- NAME in field
50
- ), "A lookml view must have a name field" # name is required field of lookml field array
48
+ assert NAME in field, (
49
+ "A lookml view must have a name field"
50
+ ) # name is required field of lookml field array
51
51
 
52
52
  if field[NAME] in child_field_map:
53
53
  # Fields defined in the child view take higher precedence.
@@ -482,14 +482,14 @@ class LookMLSource(StatefulIngestionSourceBase):
482
482
  if self.source_config.project_name is not None:
483
483
  return self.source_config.project_name
484
484
 
485
- assert (
486
- self.looker_client is not None
487
- ), "Failed to find a configured Looker API client"
485
+ assert self.looker_client is not None, (
486
+ "Failed to find a configured Looker API client"
487
+ )
488
488
  try:
489
489
  model = self.looker_client.lookml_model(model_name, fields="project_name")
490
- assert (
491
- model.project_name is not None
492
- ), f"Failed to find a project name for model {model_name}"
490
+ assert model.project_name is not None, (
491
+ f"Failed to find a project name for model {model_name}"
492
+ )
493
493
  return model.project_name
494
494
  except SDKError:
495
495
  raise ValueError(
@@ -541,9 +541,9 @@ class LookMLSource(StatefulIngestionSourceBase):
541
541
  self.reporter.git_clone_latency = datetime.now() - start_time
542
542
  self.source_config.base_folder = checkout_dir.resolve()
543
543
 
544
- self.base_projects_folder[
545
- BASE_PROJECT_NAME
546
- ] = self.source_config.base_folder
544
+ self.base_projects_folder[BASE_PROJECT_NAME] = (
545
+ self.source_config.base_folder
546
+ )
547
547
 
548
548
  visited_projects: Set[str] = set()
549
549
 
@@ -641,9 +641,9 @@ class LookMLSource(StatefulIngestionSourceBase):
641
641
  repo_url=remote_project.url,
642
642
  )
643
643
 
644
- self.base_projects_folder[
645
- remote_project.name
646
- ] = p_checkout_dir.resolve()
644
+ self.base_projects_folder[remote_project.name] = (
645
+ p_checkout_dir.resolve()
646
+ )
647
647
  repo = p_cloner.get_last_repo_cloned()
648
648
  assert repo
649
649
  remote_git_info = GitInfo(
@@ -930,9 +930,7 @@ class LookMLSource(StatefulIngestionSourceBase):
930
930
  logger.warning(
931
931
  f"view {maybe_looker_view.id.view_name} from model {model_name}, connection {model.connection} was previously processed via model {prev_model_name}, connection {prev_model_connection} and will likely lead to incorrect lineage to the underlying tables"
932
932
  )
933
- if (
934
- not self.source_config.emit_reachable_views_only
935
- ):
933
+ if not self.source_config.emit_reachable_views_only:
936
934
  logger.warning(
937
935
  "Consider enabling the `emit_reachable_views_only` flag to handle this case."
938
936
  )
@@ -484,11 +484,11 @@ class NativeDerivedViewUpstream(AbstractViewUpstream):
484
484
  )
485
485
 
486
486
  def __get_upstream_dataset_urn(self) -> List[str]:
487
- current_view_id: Optional[
488
- LookerViewId
489
- ] = self.looker_view_id_cache.get_looker_view_id(
490
- view_name=self.view_context.name(),
491
- base_folder_path=self.view_context.base_folder_path,
487
+ current_view_id: Optional[LookerViewId] = (
488
+ self.looker_view_id_cache.get_looker_view_id(
489
+ view_name=self.view_context.name(),
490
+ base_folder_path=self.view_context.base_folder_path,
491
+ )
492
492
  )
493
493
 
494
494
  # Current view will always be present in cache. assert will silence the lint
@@ -172,10 +172,10 @@ class MLflowSource(Source):
172
172
  """
173
173
  Get all Registered Models in MLflow Model Registry.
174
174
  """
175
- registered_models: Iterable[
176
- RegisteredModel
177
- ] = self._traverse_mlflow_search_func(
178
- search_func=self.client.search_registered_models,
175
+ registered_models: Iterable[RegisteredModel] = (
176
+ self._traverse_mlflow_search_func(
177
+ search_func=self.client.search_registered_models,
178
+ )
179
179
  )
180
180
  return registered_models
181
181