acryl-datahub 0.15.0.2rc7__py3-none-any.whl → 0.15.0.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (161) hide show
  1. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/METADATA +2378 -2380
  2. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/RECORD +161 -161
  3. datahub/__init__.py +1 -1
  4. datahub/api/entities/assertion/assertion_operator.py +3 -5
  5. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  6. datahub/api/entities/datacontract/assertion_operator.py +3 -5
  7. datahub/api/entities/dataproduct/dataproduct.py +4 -4
  8. datahub/api/entities/dataset/dataset.py +2 -1
  9. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  10. datahub/cli/cli_utils.py +1 -1
  11. datahub/cli/delete_cli.py +16 -2
  12. datahub/cli/docker_cli.py +6 -6
  13. datahub/cli/lite_cli.py +2 -2
  14. datahub/cli/migrate.py +3 -3
  15. datahub/cli/specific/assertions_cli.py +3 -3
  16. datahub/cli/timeline_cli.py +1 -1
  17. datahub/configuration/common.py +1 -2
  18. datahub/configuration/config_loader.py +73 -50
  19. datahub/configuration/git.py +2 -2
  20. datahub/configuration/time_window_config.py +10 -5
  21. datahub/emitter/mce_builder.py +4 -8
  22. datahub/emitter/mcp_patch_builder.py +1 -2
  23. datahub/ingestion/api/incremental_lineage_helper.py +2 -8
  24. datahub/ingestion/api/report.py +1 -2
  25. datahub/ingestion/api/source_helpers.py +1 -1
  26. datahub/ingestion/extractor/json_schema_util.py +3 -3
  27. datahub/ingestion/extractor/schema_util.py +3 -5
  28. datahub/ingestion/fs/s3_fs.py +3 -3
  29. datahub/ingestion/glossary/datahub_classifier.py +6 -4
  30. datahub/ingestion/graph/client.py +4 -6
  31. datahub/ingestion/run/pipeline.py +8 -7
  32. datahub/ingestion/run/pipeline_config.py +3 -3
  33. datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
  34. datahub/ingestion/source/abs/source.py +19 -8
  35. datahub/ingestion/source/aws/glue.py +11 -11
  36. datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
  37. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  38. datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
  39. datahub/ingestion/source/bigquery_v2/bigquery.py +3 -3
  40. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
  41. datahub/ingestion/source/bigquery_v2/bigquery_config.py +6 -6
  42. datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
  43. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +15 -9
  44. datahub/ingestion/source/bigquery_v2/lineage.py +9 -9
  45. datahub/ingestion/source/bigquery_v2/queries.py +1 -3
  46. datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
  47. datahub/ingestion/source/bigquery_v2/usage.py +3 -3
  48. datahub/ingestion/source/cassandra/cassandra.py +0 -1
  49. datahub/ingestion/source/cassandra/cassandra_utils.py +4 -4
  50. datahub/ingestion/source/confluent_schema_registry.py +6 -6
  51. datahub/ingestion/source/csv_enricher.py +29 -29
  52. datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
  53. datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
  54. datahub/ingestion/source/dbt/dbt_common.py +9 -7
  55. datahub/ingestion/source/dremio/dremio_api.py +4 -4
  56. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
  57. datahub/ingestion/source/elastic_search.py +4 -4
  58. datahub/ingestion/source/fivetran/config.py +4 -0
  59. datahub/ingestion/source/fivetran/fivetran.py +15 -5
  60. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +3 -3
  61. datahub/ingestion/source/gcs/gcs_source.py +5 -3
  62. datahub/ingestion/source/ge_data_profiler.py +4 -5
  63. datahub/ingestion/source/ge_profiling_config.py +3 -3
  64. datahub/ingestion/source/iceberg/iceberg.py +3 -3
  65. datahub/ingestion/source/identity/azure_ad.py +3 -3
  66. datahub/ingestion/source/identity/okta.py +3 -3
  67. datahub/ingestion/source/kafka/kafka.py +11 -9
  68. datahub/ingestion/source/kafka_connect/kafka_connect.py +2 -3
  69. datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
  70. datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
  71. datahub/ingestion/source/looker/looker_common.py +19 -19
  72. datahub/ingestion/source/looker/looker_config.py +3 -3
  73. datahub/ingestion/source/looker/looker_source.py +25 -25
  74. datahub/ingestion/source/looker/looker_template_language.py +3 -3
  75. datahub/ingestion/source/looker/looker_usage.py +5 -7
  76. datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
  77. datahub/ingestion/source/looker/lookml_source.py +13 -15
  78. datahub/ingestion/source/looker/view_upstream.py +5 -5
  79. datahub/ingestion/source/mlflow.py +4 -4
  80. datahub/ingestion/source/mongodb.py +6 -4
  81. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  82. datahub/ingestion/source/nifi.py +24 -26
  83. datahub/ingestion/source/openapi.py +9 -9
  84. datahub/ingestion/source/powerbi/config.py +12 -12
  85. datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
  86. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
  87. datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
  88. datahub/ingestion/source/powerbi/powerbi.py +6 -6
  89. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
  90. datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
  91. datahub/ingestion/source/redshift/config.py +3 -3
  92. datahub/ingestion/source/redshift/query.py +77 -47
  93. datahub/ingestion/source/redshift/redshift.py +12 -12
  94. datahub/ingestion/source/redshift/usage.py +8 -8
  95. datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
  96. datahub/ingestion/source/s3/source.py +1 -1
  97. datahub/ingestion/source/salesforce.py +26 -25
  98. datahub/ingestion/source/schema/json_schema.py +1 -1
  99. datahub/ingestion/source/sigma/sigma.py +3 -3
  100. datahub/ingestion/source/sigma/sigma_api.py +12 -10
  101. datahub/ingestion/source/snowflake/snowflake_config.py +9 -7
  102. datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
  103. datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
  104. datahub/ingestion/source/snowflake/snowflake_schema.py +3 -3
  105. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +6 -6
  106. datahub/ingestion/source/snowflake/snowflake_tag.py +7 -7
  107. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +3 -3
  108. datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
  109. datahub/ingestion/source/snowflake/snowflake_v2.py +13 -4
  110. datahub/ingestion/source/sql/athena.py +1 -3
  111. datahub/ingestion/source/sql/clickhouse.py +8 -14
  112. datahub/ingestion/source/sql/oracle.py +1 -3
  113. datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
  114. datahub/ingestion/source/sql/teradata.py +16 -3
  115. datahub/ingestion/source/state/profiling_state_handler.py +3 -3
  116. datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
  117. datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
  118. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
  119. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  120. datahub/ingestion/source/tableau/tableau.py +48 -49
  121. datahub/ingestion/source/unity/config.py +3 -1
  122. datahub/ingestion/source/unity/proxy.py +1 -1
  123. datahub/ingestion/source/unity/source.py +3 -3
  124. datahub/ingestion/source/unity/usage.py +3 -1
  125. datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
  126. datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
  127. datahub/ingestion/source/usage/usage_common.py +1 -1
  128. datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
  129. datahub/ingestion/transformer/add_dataset_properties.py +3 -3
  130. datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
  131. datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
  132. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
  133. datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
  134. datahub/ingestion/transformer/tags_to_terms.py +7 -7
  135. datahub/integrations/assertion/snowflake/compiler.py +10 -10
  136. datahub/lite/duckdb_lite.py +12 -10
  137. datahub/metadata/_schema_classes.py +1 -1
  138. datahub/metadata/schema.avsc +6 -2
  139. datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
  140. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
  141. datahub/secret/secret_common.py +14 -8
  142. datahub/specific/aspect_helpers/custom_properties.py +1 -2
  143. datahub/sql_parsing/schema_resolver.py +5 -10
  144. datahub/sql_parsing/sql_parsing_aggregator.py +16 -16
  145. datahub/sql_parsing/sqlglot_lineage.py +5 -4
  146. datahub/sql_parsing/sqlglot_utils.py +3 -2
  147. datahub/telemetry/stats.py +1 -2
  148. datahub/testing/mcp_diff.py +1 -1
  149. datahub/utilities/file_backed_collections.py +10 -10
  150. datahub/utilities/hive_schema_to_avro.py +2 -2
  151. datahub/utilities/logging_manager.py +2 -2
  152. datahub/utilities/lossy_collections.py +3 -3
  153. datahub/utilities/mapping.py +3 -3
  154. datahub/utilities/serialized_lru_cache.py +3 -1
  155. datahub/utilities/sqlalchemy_query_combiner.py +6 -6
  156. datahub/utilities/sqllineage_patch.py +1 -1
  157. datahub/utilities/stats_collections.py +3 -1
  158. datahub/utilities/urns/urn_iter.py +2 -2
  159. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/WHEEL +0 -0
  160. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/entry_points.txt +0 -0
  161. {acryl_datahub-0.15.0.2rc7.dist-info → acryl_datahub-0.15.0.3rc1.dist-info}/top_level.txt +0 -0
@@ -38,16 +38,16 @@ def merge_parent_and_child_fields(
38
38
  # Create a map field-name vs field
39
39
  child_field_map: dict = {}
40
40
  for field in child_fields:
41
- assert (
42
- NAME in field
43
- ), "A lookml view must have a name field" # name is required field of lookml field array
41
+ assert NAME in field, (
42
+ "A lookml view must have a name field"
43
+ ) # name is required field of lookml field array
44
44
 
45
45
  child_field_map[field[NAME]] = field
46
46
 
47
47
  for field in parent_fields:
48
- assert (
49
- NAME in field
50
- ), "A lookml view must have a name field" # name is required field of lookml field array
48
+ assert NAME in field, (
49
+ "A lookml view must have a name field"
50
+ ) # name is required field of lookml field array
51
51
 
52
52
  if field[NAME] in child_field_map:
53
53
  # Fields defined in the child view take higher precedence.
@@ -482,14 +482,14 @@ class LookMLSource(StatefulIngestionSourceBase):
482
482
  if self.source_config.project_name is not None:
483
483
  return self.source_config.project_name
484
484
 
485
- assert (
486
- self.looker_client is not None
487
- ), "Failed to find a configured Looker API client"
485
+ assert self.looker_client is not None, (
486
+ "Failed to find a configured Looker API client"
487
+ )
488
488
  try:
489
489
  model = self.looker_client.lookml_model(model_name, fields="project_name")
490
- assert (
491
- model.project_name is not None
492
- ), f"Failed to find a project name for model {model_name}"
490
+ assert model.project_name is not None, (
491
+ f"Failed to find a project name for model {model_name}"
492
+ )
493
493
  return model.project_name
494
494
  except SDKError:
495
495
  raise ValueError(
@@ -541,9 +541,9 @@ class LookMLSource(StatefulIngestionSourceBase):
541
541
  self.reporter.git_clone_latency = datetime.now() - start_time
542
542
  self.source_config.base_folder = checkout_dir.resolve()
543
543
 
544
- self.base_projects_folder[
545
- BASE_PROJECT_NAME
546
- ] = self.source_config.base_folder
544
+ self.base_projects_folder[BASE_PROJECT_NAME] = (
545
+ self.source_config.base_folder
546
+ )
547
547
 
548
548
  visited_projects: Set[str] = set()
549
549
 
@@ -641,9 +641,9 @@ class LookMLSource(StatefulIngestionSourceBase):
641
641
  repo_url=remote_project.url,
642
642
  )
643
643
 
644
- self.base_projects_folder[
645
- remote_project.name
646
- ] = p_checkout_dir.resolve()
644
+ self.base_projects_folder[remote_project.name] = (
645
+ p_checkout_dir.resolve()
646
+ )
647
647
  repo = p_cloner.get_last_repo_cloned()
648
648
  assert repo
649
649
  remote_git_info = GitInfo(
@@ -930,9 +930,7 @@ class LookMLSource(StatefulIngestionSourceBase):
930
930
  logger.warning(
931
931
  f"view {maybe_looker_view.id.view_name} from model {model_name}, connection {model.connection} was previously processed via model {prev_model_name}, connection {prev_model_connection} and will likely lead to incorrect lineage to the underlying tables"
932
932
  )
933
- if (
934
- not self.source_config.emit_reachable_views_only
935
- ):
933
+ if not self.source_config.emit_reachable_views_only:
936
934
  logger.warning(
937
935
  "Consider enabling the `emit_reachable_views_only` flag to handle this case."
938
936
  )
@@ -484,11 +484,11 @@ class NativeDerivedViewUpstream(AbstractViewUpstream):
484
484
  )
485
485
 
486
486
  def __get_upstream_dataset_urn(self) -> List[str]:
487
- current_view_id: Optional[
488
- LookerViewId
489
- ] = self.looker_view_id_cache.get_looker_view_id(
490
- view_name=self.view_context.name(),
491
- base_folder_path=self.view_context.base_folder_path,
487
+ current_view_id: Optional[LookerViewId] = (
488
+ self.looker_view_id_cache.get_looker_view_id(
489
+ view_name=self.view_context.name(),
490
+ base_folder_path=self.view_context.base_folder_path,
491
+ )
492
492
  )
493
493
 
494
494
  # Current view will always be present in cache. assert will silence the lint
@@ -172,10 +172,10 @@ class MLflowSource(Source):
172
172
  """
173
173
  Get all Registered Models in MLflow Model Registry.
174
174
  """
175
- registered_models: Iterable[
176
- RegisteredModel
177
- ] = self._traverse_mlflow_search_func(
178
- search_func=self.client.search_registered_models,
175
+ registered_models: Iterable[RegisteredModel] = (
176
+ self._traverse_mlflow_search_func(
177
+ search_func=self.client.search_registered_models,
178
+ )
179
179
  )
180
180
  return registered_models
181
181
 
@@ -288,7 +288,9 @@ class MongoDBSource(StatefulIngestionSourceBase):
288
288
 
289
289
  # See https://pymongo.readthedocs.io/en/stable/examples/datetimes.html#handling-out-of-range-datetimes
290
290
  self.mongo_client = MongoClient(
291
- self.config.connect_uri, datetime_conversion="DATETIME_AUTO", **options # type: ignore
291
+ self.config.connect_uri,
292
+ datetime_conversion="DATETIME_AUTO",
293
+ **options, # type: ignore
292
294
  )
293
295
 
294
296
  # This cheaply tests the connection. For details, see
@@ -470,9 +472,9 @@ class MongoDBSource(StatefulIngestionSourceBase):
470
472
  )
471
473
  # Add this information to the custom properties so user can know they are looking at downsampled schema
472
474
  dataset_properties.customProperties["schema.downsampled"] = "True"
473
- dataset_properties.customProperties[
474
- "schema.totalFields"
475
- ] = f"{collection_schema_size}"
475
+ dataset_properties.customProperties["schema.totalFields"] = (
476
+ f"{collection_schema_size}"
477
+ )
476
478
 
477
479
  logger.debug(f"Size of collection fields = {len(collection_fields)}")
478
480
  # append each schema field (sort so output is consistent)
@@ -286,7 +286,7 @@ class Neo4jSource(Source):
286
286
  df = self.get_neo4j_metadata(
287
287
  "CALL apoc.meta.schema() YIELD value UNWIND keys(value) AS key RETURN key, value[key] AS value;"
288
288
  )
289
- for index, row in df.iterrows():
289
+ for _, row in df.iterrows():
290
290
  try:
291
291
  yield MetadataWorkUnit(
292
292
  id=row["key"],
@@ -184,9 +184,9 @@ class NifiSourceConfig(EnvConfigMixin):
184
184
 
185
185
  @validator("site_url")
186
186
  def validator_site_url(cls, site_url: str) -> str:
187
- assert site_url.startswith(
188
- ("http://", "https://")
189
- ), "site_url must start with http:// or https://"
187
+ assert site_url.startswith(("http://", "https://")), (
188
+ "site_url must start with http:// or https://"
189
+ )
190
190
 
191
191
  if not site_url.endswith("/"):
192
192
  site_url = site_url + "/"
@@ -487,9 +487,7 @@ class NifiSource(Source):
487
487
  def get_report(self) -> SourceReport:
488
488
  return self.report
489
489
 
490
- def update_flow(
491
- self, pg_flow_dto: Dict, recursion_level: int = 0
492
- ) -> None: # noqa: C901
490
+ def update_flow(self, pg_flow_dto: Dict, recursion_level: int = 0) -> None: # noqa: C901
493
491
  """
494
492
  Update self.nifi_flow with contents of the input process group `pg_flow_dto`
495
493
  """
@@ -548,16 +546,16 @@ class NifiSource(Source):
548
546
  for inputPort in flow_dto.get("inputPorts", []):
549
547
  component = inputPort.get("component")
550
548
  if inputPort.get("allowRemoteAccess"):
551
- self.nifi_flow.remotely_accessible_ports[
552
- component.get("id")
553
- ] = NifiComponent(
554
- component.get("id"),
555
- component.get("name"),
556
- component.get("type"),
557
- component.get("parentGroupId"),
558
- NifiType.INPUT_PORT,
559
- comments=component.get("comments"),
560
- status=component.get("status", {}).get("runStatus"),
549
+ self.nifi_flow.remotely_accessible_ports[component.get("id")] = (
550
+ NifiComponent(
551
+ component.get("id"),
552
+ component.get("name"),
553
+ component.get("type"),
554
+ component.get("parentGroupId"),
555
+ NifiType.INPUT_PORT,
556
+ comments=component.get("comments"),
557
+ status=component.get("status", {}).get("runStatus"),
558
+ )
561
559
  )
562
560
  logger.debug(f"Adding remotely accessible port {component.get('id')}")
563
561
  else:
@@ -576,16 +574,16 @@ class NifiSource(Source):
576
574
  for outputPort in flow_dto.get("outputPorts", []):
577
575
  component = outputPort.get("component")
578
576
  if outputPort.get("allowRemoteAccess"):
579
- self.nifi_flow.remotely_accessible_ports[
580
- component.get("id")
581
- ] = NifiComponent(
582
- component.get("id"),
583
- component.get("name"),
584
- component.get("type"),
585
- component.get("parentGroupId"),
586
- NifiType.OUTPUT_PORT,
587
- comments=component.get("comments"),
588
- status=component.get("status", {}).get("runStatus"),
577
+ self.nifi_flow.remotely_accessible_ports[component.get("id")] = (
578
+ NifiComponent(
579
+ component.get("id"),
580
+ component.get("name"),
581
+ component.get("type"),
582
+ component.get("parentGroupId"),
583
+ NifiType.OUTPUT_PORT,
584
+ comments=component.get("comments"),
585
+ status=component.get("status", {}).get("runStatus"),
586
+ )
589
587
  )
590
588
  logger.debug(f"Adding remotely accessible port {component.get('id')}")
591
589
  else:
@@ -101,16 +101,16 @@ class OpenApiConfig(ConfigModel):
101
101
  # details there once, and then use that session for all requests.
102
102
  self.token = f"Bearer {self.bearer_token}"
103
103
  else:
104
- assert (
105
- "url_complement" in self.get_token.keys()
106
- ), "When 'request_type' is set to 'get', an url_complement is needed for the request."
104
+ assert "url_complement" in self.get_token.keys(), (
105
+ "When 'request_type' is set to 'get', an url_complement is needed for the request."
106
+ )
107
107
  if self.get_token["request_type"] == "get":
108
- assert (
109
- "{username}" in self.get_token["url_complement"]
110
- ), "we expect the keyword {username} to be present in the url"
111
- assert (
112
- "{password}" in self.get_token["url_complement"]
113
- ), "we expect the keyword {password} to be present in the url"
108
+ assert "{username}" in self.get_token["url_complement"], (
109
+ "we expect the keyword {username} to be present in the url"
110
+ )
111
+ assert "{password}" in self.get_token["url_complement"], (
112
+ "we expect the keyword {password} to be present in the url"
113
+ )
114
114
  url4req = self.get_token["url_complement"].replace(
115
115
  "{username}", self.username
116
116
  )
@@ -225,9 +225,9 @@ class PowerBiDashboardSourceReport(StaleEntityRemovalSourceReport):
225
225
  def default_for_dataset_type_mapping() -> Dict[str, str]:
226
226
  dict_: dict = {}
227
227
  for item in SupportedDataPlatform:
228
- dict_[
229
- item.value.powerbi_data_platform_name
230
- ] = item.value.datahub_data_platform_name
228
+ dict_[item.value.powerbi_data_platform_name] = (
229
+ item.value.datahub_data_platform_name
230
+ )
231
231
 
232
232
  return dict_
233
233
 
@@ -303,15 +303,15 @@ class PowerBiDashboardSourceConfig(
303
303
  # Dataset type mapping PowerBI support many type of data-sources. Here user needs to define what type of PowerBI
304
304
  # DataSource needs to be mapped to corresponding DataHub Platform DataSource. For example, PowerBI `Snowflake` is
305
305
  # mapped to DataHub `snowflake` PowerBI `PostgreSQL` is mapped to DataHub `postgres` and so on.
306
- dataset_type_mapping: Union[
307
- Dict[str, str], Dict[str, PlatformDetail]
308
- ] = pydantic.Field(
309
- default_factory=default_for_dataset_type_mapping,
310
- description="[deprecated] Use server_to_platform_instance instead. Mapping of PowerBI datasource type to "
311
- "DataHub supported datasources."
312
- "You can configured platform instance for dataset lineage. "
313
- "See Quickstart Recipe for mapping",
314
- hidden_from_docs=True,
306
+ dataset_type_mapping: Union[Dict[str, str], Dict[str, PlatformDetail]] = (
307
+ pydantic.Field(
308
+ default_factory=default_for_dataset_type_mapping,
309
+ description="[deprecated] Use server_to_platform_instance instead. Mapping of PowerBI datasource type to "
310
+ "DataHub supported datasources."
311
+ "You can configured platform instance for dataset lineage. "
312
+ "See Quickstart Recipe for mapping",
313
+ hidden_from_docs=True,
314
+ )
315
315
  )
316
316
  # PowerBI datasource's server to platform instance mapping
317
317
  server_to_platform_instance: Dict[
@@ -128,17 +128,17 @@ def get_upstream_tables(
128
128
  reporter.m_query_parse_successes += 1
129
129
 
130
130
  try:
131
- lineage: List[
132
- datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
133
- ] = resolver.MQueryResolver(
134
- table=table,
135
- parse_tree=parse_tree,
136
- reporter=reporter,
137
- parameters=parameters,
138
- ).resolve_to_lineage(
139
- ctx=ctx,
140
- config=config,
141
- platform_instance_resolver=platform_instance_resolver,
131
+ lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
132
+ resolver.MQueryResolver(
133
+ table=table,
134
+ parse_tree=parse_tree,
135
+ reporter=reporter,
136
+ parameters=parameters,
137
+ ).resolve_to_lineage(
138
+ ctx=ctx,
139
+ config=config,
140
+ platform_instance_resolver=platform_instance_resolver,
141
+ )
142
142
  )
143
143
 
144
144
  if lineage:
@@ -170,8 +170,7 @@ class AbstractLineage(ABC):
170
170
  logger.debug(f"Processing arguments {arguments}")
171
171
 
172
172
  if (
173
- len(arguments)
174
- >= 4 # [0] is warehouse FQDN.
173
+ len(arguments) >= 4 # [0] is warehouse FQDN.
175
174
  # [1] is endpoint, we are not using it.
176
175
  # [2] is "Catalog" key
177
176
  # [3] is catalog's value
@@ -215,16 +214,16 @@ class AbstractLineage(ABC):
215
214
  native_sql_parser.remove_special_characters(query)
216
215
  )
217
216
 
218
- parsed_result: Optional[
219
- "SqlParsingResult"
220
- ] = native_sql_parser.parse_custom_sql(
221
- ctx=self.ctx,
222
- query=query,
223
- platform=self.get_platform_pair().datahub_data_platform_name,
224
- platform_instance=platform_detail.platform_instance,
225
- env=platform_detail.env,
226
- database=database,
227
- schema=schema,
217
+ parsed_result: Optional["SqlParsingResult"] = (
218
+ native_sql_parser.parse_custom_sql(
219
+ ctx=self.ctx,
220
+ query=query,
221
+ platform=self.get_platform_pair().datahub_data_platform_name,
222
+ platform_instance=platform_detail.platform_instance,
223
+ env=platform_detail.env,
224
+ database=database,
225
+ schema=schema,
226
+ )
228
227
  )
229
228
 
230
229
  if parsed_result is None:
@@ -410,9 +409,9 @@ class DatabricksLineage(AbstractLineage):
410
409
  f"Processing Databrick data-access function detail {data_access_func_detail}"
411
410
  )
412
411
  table_detail: Dict[str, str] = {}
413
- temp_accessor: Optional[
414
- IdentifierAccessor
415
- ] = data_access_func_detail.identifier_accessor
412
+ temp_accessor: Optional[IdentifierAccessor] = (
413
+ data_access_func_detail.identifier_accessor
414
+ )
416
415
 
417
416
  while temp_accessor:
418
417
  # Condition to handle databricks M-query pattern where table, schema and database all are present in
@@ -647,11 +646,13 @@ class ThreeStepDataAccessPattern(AbstractLineage, ABC):
647
646
  db_name: str = data_access_func_detail.identifier_accessor.items["Name"] # type: ignore
648
647
  # Second is schema name
649
648
  schema_name: str = cast(
650
- IdentifierAccessor, data_access_func_detail.identifier_accessor.next # type: ignore
649
+ IdentifierAccessor,
650
+ data_access_func_detail.identifier_accessor.next, # type: ignore
651
651
  ).items["Name"]
652
652
  # Third is table name
653
653
  table_name: str = cast(
654
- IdentifierAccessor, data_access_func_detail.identifier_accessor.next.next # type: ignore
654
+ IdentifierAccessor,
655
+ data_access_func_detail.identifier_accessor.next.next, # type: ignore
655
656
  ).items["Name"]
656
657
 
657
658
  qualified_table_name: str = f"{db_name}.{schema_name}.{table_name}"
@@ -768,10 +769,13 @@ class NativeQueryLineage(AbstractLineage):
768
769
  ): # database name is explicitly set
769
770
  return database
770
771
 
771
- return get_next_item( # database name is set in Name argument
772
- data_access_tokens, "Name"
773
- ) or get_next_item( # If both above arguments are not available, then try Catalog
774
- data_access_tokens, "Catalog"
772
+ return (
773
+ get_next_item( # database name is set in Name argument
774
+ data_access_tokens, "Name"
775
+ )
776
+ or get_next_item( # If both above arguments are not available, then try Catalog
777
+ data_access_tokens, "Catalog"
778
+ )
775
779
  )
776
780
 
777
781
  def create_lineage(
@@ -819,9 +823,7 @@ class NativeQueryLineage(AbstractLineage):
819
823
  values=tree_function.remove_whitespaces_from_list(
820
824
  tree_function.token_values(flat_argument_list[1])
821
825
  ),
822
- )[
823
- 0
824
- ] # Remove any whitespaces and double quotes character
826
+ )[0] # Remove any whitespaces and double quotes character
825
827
 
826
828
  server = tree_function.strip_char_from_list([data_access_tokens[2]])[0]
827
829
 
@@ -188,9 +188,9 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
188
188
  # - The inner function Table.TransformColumnTypes takes #"Removed Columns1"
189
189
  # (a table reference) as its first argument
190
190
  # - Its result is then passed as the first argument to Table.SplitColumn
191
- second_invoke_expression: Optional[
192
- Tree
193
- ] = tree_function.first_invoke_expression_func(first_argument)
191
+ second_invoke_expression: Optional[Tree] = (
192
+ tree_function.first_invoke_expression_func(first_argument)
193
+ )
194
194
  if second_invoke_expression:
195
195
  # 1. The First argument is function call
196
196
  # 2. That function's first argument references next table variable
@@ -304,14 +304,14 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
304
304
  logger.debug(v_statement.pretty())
305
305
  return None
306
306
 
307
- invoke_expression: Optional[
308
- Tree
309
- ] = tree_function.first_invoke_expression_func(rh_tree)
307
+ invoke_expression: Optional[Tree] = (
308
+ tree_function.first_invoke_expression_func(rh_tree)
309
+ )
310
310
 
311
311
  if invoke_expression is not None:
312
- result: Union[
313
- DataAccessFunctionDetail, List[str], None
314
- ] = self._process_invoke_expression(invoke_expression)
312
+ result: Union[DataAccessFunctionDetail, List[str], None] = (
313
+ self._process_invoke_expression(invoke_expression)
314
+ )
315
315
  if result is None:
316
316
  return None # No need to process some un-expected grammar found while processing invoke_expression
317
317
  if isinstance(result, DataAccessFunctionDetail):
@@ -368,9 +368,9 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
368
368
  return lineage
369
369
 
370
370
  # Parse M-Query and use output_variable as root of tree and create instance of DataAccessFunctionDetail
371
- table_links: List[
372
- DataAccessFunctionDetail
373
- ] = self.create_data_access_functional_detail(output_variable)
371
+ table_links: List[DataAccessFunctionDetail] = (
372
+ self.create_data_access_functional_detail(output_variable)
373
+ )
374
374
 
375
375
  # Each item is data-access function
376
376
  for f_detail in table_links:
@@ -390,7 +390,7 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
390
390
 
391
391
  # From supported_resolver enum get respective handler like AmazonRedshift or Snowflake or Oracle or NativeQuery and create instance of it
392
392
  # & also pass additional information that will be need to generate lineage
393
- pattern_handler: (AbstractLineage) = supported_resolver.handler()(
393
+ pattern_handler: AbstractLineage = supported_resolver.handler()(
394
394
  ctx=ctx,
395
395
  table=self.table,
396
396
  config=config,
@@ -945,9 +945,9 @@ class Mapper:
945
945
  # Convert tiles to charts
946
946
  ds_mcps, chart_mcps = self.to_datahub_chart(dashboard.tiles, workspace)
947
947
  # Lets convert dashboard to datahub dashboard
948
- dashboard_mcps: List[
949
- MetadataChangeProposalWrapper
950
- ] = self.to_datahub_dashboard_mcp(dashboard, workspace, chart_mcps, user_mcps)
948
+ dashboard_mcps: List[MetadataChangeProposalWrapper] = (
949
+ self.to_datahub_dashboard_mcp(dashboard, workspace, chart_mcps, user_mcps)
950
+ )
951
951
 
952
952
  # Now add MCPs in sequence
953
953
  mcps.extend(ds_mcps)
@@ -1472,9 +1472,9 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1472
1472
  def _get_dashboard_patch_work_unit(
1473
1473
  self, work_unit: MetadataWorkUnit
1474
1474
  ) -> Optional[MetadataWorkUnit]:
1475
- dashboard_info_aspect: Optional[
1476
- DashboardInfoClass
1477
- ] = work_unit.get_aspect_of_type(DashboardInfoClass)
1475
+ dashboard_info_aspect: Optional[DashboardInfoClass] = (
1476
+ work_unit.get_aspect_of_type(DashboardInfoClass)
1477
+ )
1478
1478
 
1479
1479
  if dashboard_info_aspect and self.source_config.patch_metadata:
1480
1480
  return convert_dashboard_info_to_patch(
@@ -425,9 +425,9 @@ class DataResolverBase(ABC):
425
425
 
426
426
  response.raise_for_status()
427
427
 
428
- assert (
429
- Constant.VALUE in response.json()
430
- ), "'value' key is not present in paginated response"
428
+ assert Constant.VALUE in response.json(), (
429
+ "'value' key is not present in paginated response"
430
+ )
431
431
 
432
432
  if not response.json()[Constant.VALUE]: # if it is an empty list then break
433
433
  break
@@ -447,13 +447,13 @@ class DataResolverBase(ABC):
447
447
  if raw_app is None:
448
448
  return None
449
449
 
450
- assert (
451
- Constant.ID in raw_app
452
- ), f"{Constant.ID} is required field not present in server response"
450
+ assert Constant.ID in raw_app, (
451
+ f"{Constant.ID} is required field not present in server response"
452
+ )
453
453
 
454
- assert (
455
- Constant.NAME in raw_app
456
- ), f"{Constant.NAME} is required field not present in server response"
454
+ assert Constant.NAME in raw_app, (
455
+ f"{Constant.NAME} is required field not present in server response"
456
+ )
457
457
 
458
458
  return App(
459
459
  id=raw_app[Constant.ID],
@@ -156,7 +156,7 @@ class QlikAPI:
156
156
  )
157
157
  if chart:
158
158
  if not chart.title:
159
- chart.title = f"Object {i+1} of Sheet '{sheet.title}'"
159
+ chart.title = f"Object {i + 1} of Sheet '{sheet.title}'"
160
160
  sheet.charts.append(chart)
161
161
  websocket_connection.handle.pop()
162
162
  return sheet
@@ -178,9 +178,9 @@ class RedshiftConfig(
178
178
  @root_validator(pre=True)
179
179
  def check_email_is_set_on_usage(cls, values):
180
180
  if values.get("include_usage_statistics"):
181
- assert (
182
- "email_domain" in values and values["email_domain"]
183
- ), "email_domain needs to be set if usage is enabled"
181
+ assert "email_domain" in values and values["email_domain"], (
182
+ "email_domain needs to be set if usage is enabled"
183
+ )
184
184
  return values
185
185
 
186
186
  @root_validator(skip_on_failure=True)