acryl-datahub 0.15.0.1rc17__py3-none-any.whl → 0.15.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (211) hide show
  1. {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/METADATA +2440 -2438
  2. {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/RECORD +211 -207
  3. {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/WHEEL +1 -1
  4. datahub/__init__.py +1 -1
  5. datahub/api/entities/assertion/assertion_operator.py +3 -5
  6. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  7. datahub/api/entities/datacontract/assertion_operator.py +3 -5
  8. datahub/api/entities/dataproduct/dataproduct.py +4 -4
  9. datahub/api/entities/dataset/dataset.py +2 -1
  10. datahub/api/entities/structuredproperties/structuredproperties.py +18 -7
  11. datahub/cli/cli_utils.py +13 -2
  12. datahub/cli/delete_cli.py +3 -3
  13. datahub/cli/docker_cli.py +6 -6
  14. datahub/cli/ingest_cli.py +25 -15
  15. datahub/cli/lite_cli.py +2 -2
  16. datahub/cli/migrate.py +5 -5
  17. datahub/cli/specific/assertions_cli.py +3 -3
  18. datahub/cli/specific/structuredproperties_cli.py +84 -0
  19. datahub/cli/timeline_cli.py +1 -1
  20. datahub/configuration/common.py +1 -2
  21. datahub/configuration/config_loader.py +73 -50
  22. datahub/configuration/git.py +2 -2
  23. datahub/configuration/time_window_config.py +10 -5
  24. datahub/emitter/mce_builder.py +4 -8
  25. datahub/emitter/mcp_builder.py +27 -0
  26. datahub/emitter/mcp_patch_builder.py +1 -2
  27. datahub/emitter/rest_emitter.py +126 -85
  28. datahub/entrypoints.py +6 -0
  29. datahub/ingestion/api/incremental_lineage_helper.py +2 -8
  30. datahub/ingestion/api/report.py +1 -2
  31. datahub/ingestion/api/source.py +4 -2
  32. datahub/ingestion/api/source_helpers.py +1 -1
  33. datahub/ingestion/extractor/json_schema_util.py +3 -3
  34. datahub/ingestion/extractor/schema_util.py +3 -5
  35. datahub/ingestion/fs/s3_fs.py +3 -3
  36. datahub/ingestion/glossary/datahub_classifier.py +6 -4
  37. datahub/ingestion/graph/client.py +22 -19
  38. datahub/ingestion/graph/config.py +1 -1
  39. datahub/ingestion/run/pipeline.py +8 -7
  40. datahub/ingestion/run/pipeline_config.py +3 -3
  41. datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
  42. datahub/ingestion/source/abs/source.py +19 -8
  43. datahub/ingestion/source/aws/glue.py +77 -47
  44. datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
  45. datahub/ingestion/source/aws/s3_util.py +24 -1
  46. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  47. datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
  48. datahub/ingestion/source/bigquery_v2/bigquery.py +34 -34
  49. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
  50. datahub/ingestion/source/bigquery_v2/bigquery_config.py +14 -6
  51. datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
  52. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -3
  53. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +22 -16
  54. datahub/ingestion/source/bigquery_v2/lineage.py +16 -16
  55. datahub/ingestion/source/bigquery_v2/queries.py +1 -3
  56. datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
  57. datahub/ingestion/source/bigquery_v2/usage.py +60 -60
  58. datahub/ingestion/source/cassandra/cassandra.py +0 -1
  59. datahub/ingestion/source/cassandra/cassandra_profiling.py +24 -24
  60. datahub/ingestion/source/cassandra/cassandra_utils.py +4 -7
  61. datahub/ingestion/source/confluent_schema_registry.py +6 -6
  62. datahub/ingestion/source/csv_enricher.py +29 -29
  63. datahub/ingestion/source/datahub/config.py +10 -0
  64. datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
  65. datahub/ingestion/source/datahub/datahub_source.py +12 -2
  66. datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
  67. datahub/ingestion/source/dbt/dbt_common.py +9 -7
  68. datahub/ingestion/source/delta_lake/source.py +0 -5
  69. datahub/ingestion/source/demo_data.py +1 -1
  70. datahub/ingestion/source/dremio/dremio_api.py +4 -4
  71. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
  72. datahub/ingestion/source/dremio/dremio_reporting.py +0 -3
  73. datahub/ingestion/source/dremio/dremio_source.py +2 -2
  74. datahub/ingestion/source/elastic_search.py +4 -4
  75. datahub/ingestion/source/fivetran/fivetran.py +1 -6
  76. datahub/ingestion/source/gc/datahub_gc.py +11 -14
  77. datahub/ingestion/source/gc/execution_request_cleanup.py +31 -6
  78. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +48 -15
  79. datahub/ingestion/source/gcs/gcs_source.py +3 -2
  80. datahub/ingestion/source/ge_data_profiler.py +2 -5
  81. datahub/ingestion/source/ge_profiling_config.py +3 -3
  82. datahub/ingestion/source/iceberg/iceberg.py +13 -6
  83. datahub/ingestion/source/iceberg/iceberg_common.py +49 -9
  84. datahub/ingestion/source/iceberg/iceberg_profiler.py +3 -1
  85. datahub/ingestion/source/identity/azure_ad.py +3 -3
  86. datahub/ingestion/source/identity/okta.py +3 -3
  87. datahub/ingestion/source/kafka/kafka.py +11 -9
  88. datahub/ingestion/source/kafka_connect/kafka_connect.py +3 -9
  89. datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
  90. datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
  91. datahub/ingestion/source/looker/looker_common.py +19 -19
  92. datahub/ingestion/source/looker/looker_config.py +11 -6
  93. datahub/ingestion/source/looker/looker_source.py +25 -25
  94. datahub/ingestion/source/looker/looker_template_language.py +3 -3
  95. datahub/ingestion/source/looker/looker_usage.py +5 -7
  96. datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
  97. datahub/ingestion/source/looker/lookml_source.py +13 -15
  98. datahub/ingestion/source/looker/view_upstream.py +5 -5
  99. datahub/ingestion/source/metabase.py +1 -6
  100. datahub/ingestion/source/mlflow.py +4 -9
  101. datahub/ingestion/source/mode.py +5 -5
  102. datahub/ingestion/source/mongodb.py +6 -4
  103. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  104. datahub/ingestion/source/nifi.py +24 -31
  105. datahub/ingestion/source/openapi.py +9 -9
  106. datahub/ingestion/source/powerbi/config.py +12 -12
  107. datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
  108. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
  109. datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
  110. datahub/ingestion/source/powerbi/powerbi.py +6 -6
  111. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
  112. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +7 -7
  113. datahub/ingestion/source/powerbi_report_server/report_server.py +1 -1
  114. datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
  115. datahub/ingestion/source/redash.py +0 -5
  116. datahub/ingestion/source/redshift/config.py +3 -3
  117. datahub/ingestion/source/redshift/redshift.py +45 -46
  118. datahub/ingestion/source/redshift/usage.py +33 -33
  119. datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
  120. datahub/ingestion/source/s3/source.py +11 -15
  121. datahub/ingestion/source/salesforce.py +26 -25
  122. datahub/ingestion/source/schema/json_schema.py +1 -1
  123. datahub/ingestion/source/sigma/sigma.py +3 -3
  124. datahub/ingestion/source/sigma/sigma_api.py +12 -10
  125. datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
  126. datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
  127. datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
  128. datahub/ingestion/source/snowflake/snowflake_report.py +0 -3
  129. datahub/ingestion/source/snowflake/snowflake_schema.py +8 -5
  130. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +136 -42
  131. datahub/ingestion/source/snowflake/snowflake_tag.py +21 -11
  132. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +49 -50
  133. datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
  134. datahub/ingestion/source/snowflake/snowflake_v2.py +51 -47
  135. datahub/ingestion/source/sql/athena.py +1 -3
  136. datahub/ingestion/source/sql/clickhouse.py +8 -14
  137. datahub/ingestion/source/sql/oracle.py +1 -3
  138. datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
  139. datahub/ingestion/source/sql/sql_types.py +1 -2
  140. datahub/ingestion/source/sql/sql_utils.py +5 -0
  141. datahub/ingestion/source/sql/teradata.py +18 -5
  142. datahub/ingestion/source/state/profiling_state_handler.py +3 -3
  143. datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
  144. datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
  145. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
  146. datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
  147. datahub/ingestion/source/superset.py +1 -6
  148. datahub/ingestion/source/tableau/tableau.py +343 -117
  149. datahub/ingestion/source/tableau/tableau_common.py +5 -2
  150. datahub/ingestion/source/unity/config.py +3 -1
  151. datahub/ingestion/source/unity/proxy.py +1 -1
  152. datahub/ingestion/source/unity/source.py +74 -74
  153. datahub/ingestion/source/unity/usage.py +3 -1
  154. datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
  155. datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
  156. datahub/ingestion/source/usage/usage_common.py +1 -1
  157. datahub/ingestion/source_report/ingestion_stage.py +24 -20
  158. datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
  159. datahub/ingestion/transformer/add_dataset_properties.py +3 -3
  160. datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
  161. datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
  162. datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
  163. datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
  164. datahub/ingestion/transformer/tags_to_terms.py +7 -7
  165. datahub/integrations/assertion/snowflake/compiler.py +10 -10
  166. datahub/lite/duckdb_lite.py +12 -10
  167. datahub/metadata/_schema_classes.py +317 -44
  168. datahub/metadata/_urns/urn_defs.py +69 -15
  169. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  170. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
  171. datahub/metadata/com/linkedin/pegasus2avro/versionset/__init__.py +17 -0
  172. datahub/metadata/schema.avsc +302 -89
  173. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  174. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  175. datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
  176. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
  177. datahub/metadata/schemas/DatasetKey.avsc +2 -1
  178. datahub/metadata/schemas/MLFeatureProperties.avsc +51 -0
  179. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +51 -0
  180. datahub/metadata/schemas/MLModelGroupProperties.avsc +96 -23
  181. datahub/metadata/schemas/MLModelKey.avsc +2 -1
  182. datahub/metadata/schemas/MLModelProperties.avsc +96 -48
  183. datahub/metadata/schemas/MLPrimaryKeyProperties.avsc +51 -0
  184. datahub/metadata/schemas/MetadataChangeEvent.avsc +98 -71
  185. datahub/metadata/schemas/VersionProperties.avsc +216 -0
  186. datahub/metadata/schemas/VersionSetKey.avsc +26 -0
  187. datahub/metadata/schemas/VersionSetProperties.avsc +49 -0
  188. datahub/secret/datahub_secrets_client.py +12 -21
  189. datahub/secret/secret_common.py +14 -8
  190. datahub/specific/aspect_helpers/custom_properties.py +1 -2
  191. datahub/sql_parsing/schema_resolver.py +5 -10
  192. datahub/sql_parsing/sql_parsing_aggregator.py +18 -16
  193. datahub/sql_parsing/sqlglot_lineage.py +3 -3
  194. datahub/sql_parsing/sqlglot_utils.py +1 -1
  195. datahub/telemetry/stats.py +1 -2
  196. datahub/testing/mcp_diff.py +1 -1
  197. datahub/utilities/file_backed_collections.py +11 -11
  198. datahub/utilities/hive_schema_to_avro.py +2 -2
  199. datahub/utilities/logging_manager.py +2 -2
  200. datahub/utilities/lossy_collections.py +3 -3
  201. datahub/utilities/mapping.py +3 -3
  202. datahub/utilities/memory_footprint.py +3 -2
  203. datahub/utilities/perf_timer.py +11 -6
  204. datahub/utilities/serialized_lru_cache.py +3 -1
  205. datahub/utilities/sqlalchemy_query_combiner.py +6 -6
  206. datahub/utilities/sqllineage_patch.py +1 -1
  207. datahub/utilities/stats_collections.py +3 -1
  208. datahub/utilities/urns/_urn_base.py +28 -5
  209. datahub/utilities/urns/urn_iter.py +2 -2
  210. {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/entry_points.txt +0 -0
  211. {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/top_level.txt +0 -0
@@ -288,7 +288,9 @@ class MongoDBSource(StatefulIngestionSourceBase):
288
288
 
289
289
  # See https://pymongo.readthedocs.io/en/stable/examples/datetimes.html#handling-out-of-range-datetimes
290
290
  self.mongo_client = MongoClient(
291
- self.config.connect_uri, datetime_conversion="DATETIME_AUTO", **options # type: ignore
291
+ self.config.connect_uri,
292
+ datetime_conversion="DATETIME_AUTO",
293
+ **options, # type: ignore
292
294
  )
293
295
 
294
296
  # This cheaply tests the connection. For details, see
@@ -470,9 +472,9 @@ class MongoDBSource(StatefulIngestionSourceBase):
470
472
  )
471
473
  # Add this information to the custom properties so user can know they are looking at downsampled schema
472
474
  dataset_properties.customProperties["schema.downsampled"] = "True"
473
- dataset_properties.customProperties[
474
- "schema.totalFields"
475
- ] = f"{collection_schema_size}"
475
+ dataset_properties.customProperties["schema.totalFields"] = (
476
+ f"{collection_schema_size}"
477
+ )
476
478
 
477
479
  logger.debug(f"Size of collection fields = {len(collection_fields)}")
478
480
  # append each schema field (sort so output is consistent)
@@ -286,7 +286,7 @@ class Neo4jSource(Source):
286
286
  df = self.get_neo4j_metadata(
287
287
  "CALL apoc.meta.schema() YIELD value UNWIND keys(value) AS key RETURN key, value[key] AS value;"
288
288
  )
289
- for index, row in df.iterrows():
289
+ for _, row in df.iterrows():
290
290
  try:
291
291
  yield MetadataWorkUnit(
292
292
  id=row["key"],
@@ -184,9 +184,9 @@ class NifiSourceConfig(EnvConfigMixin):
184
184
 
185
185
  @validator("site_url")
186
186
  def validator_site_url(cls, site_url: str) -> str:
187
- assert site_url.startswith(
188
- ("http://", "https://")
189
- ), "site_url must start with http:// or https://"
187
+ assert site_url.startswith(("http://", "https://")), (
188
+ "site_url must start with http:// or https://"
189
+ )
190
190
 
191
191
  if not site_url.endswith("/"):
192
192
  site_url = site_url + "/"
@@ -484,17 +484,10 @@ class NifiSource(Source):
484
484
  def rest_api_base_url(self):
485
485
  return self.config.site_url[: -len("nifi/")] + "nifi-api/"
486
486
 
487
- @classmethod
488
- def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source":
489
- config = NifiSourceConfig.parse_obj(config_dict)
490
- return cls(config, ctx)
491
-
492
487
  def get_report(self) -> SourceReport:
493
488
  return self.report
494
489
 
495
- def update_flow(
496
- self, pg_flow_dto: Dict, recursion_level: int = 0
497
- ) -> None: # noqa: C901
490
+ def update_flow(self, pg_flow_dto: Dict, recursion_level: int = 0) -> None: # noqa: C901
498
491
  """
499
492
  Update self.nifi_flow with contents of the input process group `pg_flow_dto`
500
493
  """
@@ -553,16 +546,16 @@ class NifiSource(Source):
553
546
  for inputPort in flow_dto.get("inputPorts", []):
554
547
  component = inputPort.get("component")
555
548
  if inputPort.get("allowRemoteAccess"):
556
- self.nifi_flow.remotely_accessible_ports[
557
- component.get("id")
558
- ] = NifiComponent(
559
- component.get("id"),
560
- component.get("name"),
561
- component.get("type"),
562
- component.get("parentGroupId"),
563
- NifiType.INPUT_PORT,
564
- comments=component.get("comments"),
565
- status=component.get("status", {}).get("runStatus"),
549
+ self.nifi_flow.remotely_accessible_ports[component.get("id")] = (
550
+ NifiComponent(
551
+ component.get("id"),
552
+ component.get("name"),
553
+ component.get("type"),
554
+ component.get("parentGroupId"),
555
+ NifiType.INPUT_PORT,
556
+ comments=component.get("comments"),
557
+ status=component.get("status", {}).get("runStatus"),
558
+ )
566
559
  )
567
560
  logger.debug(f"Adding remotely accessible port {component.get('id')}")
568
561
  else:
@@ -581,16 +574,16 @@ class NifiSource(Source):
581
574
  for outputPort in flow_dto.get("outputPorts", []):
582
575
  component = outputPort.get("component")
583
576
  if outputPort.get("allowRemoteAccess"):
584
- self.nifi_flow.remotely_accessible_ports[
585
- component.get("id")
586
- ] = NifiComponent(
587
- component.get("id"),
588
- component.get("name"),
589
- component.get("type"),
590
- component.get("parentGroupId"),
591
- NifiType.OUTPUT_PORT,
592
- comments=component.get("comments"),
593
- status=component.get("status", {}).get("runStatus"),
577
+ self.nifi_flow.remotely_accessible_ports[component.get("id")] = (
578
+ NifiComponent(
579
+ component.get("id"),
580
+ component.get("name"),
581
+ component.get("type"),
582
+ component.get("parentGroupId"),
583
+ NifiType.OUTPUT_PORT,
584
+ comments=component.get("comments"),
585
+ status=component.get("status", {}).get("runStatus"),
586
+ )
594
587
  )
595
588
  logger.debug(f"Adding remotely accessible port {component.get('id')}")
596
589
  else:
@@ -101,16 +101,16 @@ class OpenApiConfig(ConfigModel):
101
101
  # details there once, and then use that session for all requests.
102
102
  self.token = f"Bearer {self.bearer_token}"
103
103
  else:
104
- assert (
105
- "url_complement" in self.get_token.keys()
106
- ), "When 'request_type' is set to 'get', an url_complement is needed for the request."
104
+ assert "url_complement" in self.get_token.keys(), (
105
+ "When 'request_type' is set to 'get', an url_complement is needed for the request."
106
+ )
107
107
  if self.get_token["request_type"] == "get":
108
- assert (
109
- "{username}" in self.get_token["url_complement"]
110
- ), "we expect the keyword {username} to be present in the url"
111
- assert (
112
- "{password}" in self.get_token["url_complement"]
113
- ), "we expect the keyword {password} to be present in the url"
108
+ assert "{username}" in self.get_token["url_complement"], (
109
+ "we expect the keyword {username} to be present in the url"
110
+ )
111
+ assert "{password}" in self.get_token["url_complement"], (
112
+ "we expect the keyword {password} to be present in the url"
113
+ )
114
114
  url4req = self.get_token["url_complement"].replace(
115
115
  "{username}", self.username
116
116
  )
@@ -225,9 +225,9 @@ class PowerBiDashboardSourceReport(StaleEntityRemovalSourceReport):
225
225
  def default_for_dataset_type_mapping() -> Dict[str, str]:
226
226
  dict_: dict = {}
227
227
  for item in SupportedDataPlatform:
228
- dict_[
229
- item.value.powerbi_data_platform_name
230
- ] = item.value.datahub_data_platform_name
228
+ dict_[item.value.powerbi_data_platform_name] = (
229
+ item.value.datahub_data_platform_name
230
+ )
231
231
 
232
232
  return dict_
233
233
 
@@ -303,15 +303,15 @@ class PowerBiDashboardSourceConfig(
303
303
  # Dataset type mapping PowerBI support many type of data-sources. Here user needs to define what type of PowerBI
304
304
  # DataSource needs to be mapped to corresponding DataHub Platform DataSource. For example, PowerBI `Snowflake` is
305
305
  # mapped to DataHub `snowflake` PowerBI `PostgreSQL` is mapped to DataHub `postgres` and so on.
306
- dataset_type_mapping: Union[
307
- Dict[str, str], Dict[str, PlatformDetail]
308
- ] = pydantic.Field(
309
- default_factory=default_for_dataset_type_mapping,
310
- description="[deprecated] Use server_to_platform_instance instead. Mapping of PowerBI datasource type to "
311
- "DataHub supported datasources."
312
- "You can configured platform instance for dataset lineage. "
313
- "See Quickstart Recipe for mapping",
314
- hidden_from_docs=True,
306
+ dataset_type_mapping: Union[Dict[str, str], Dict[str, PlatformDetail]] = (
307
+ pydantic.Field(
308
+ default_factory=default_for_dataset_type_mapping,
309
+ description="[deprecated] Use server_to_platform_instance instead. Mapping of PowerBI datasource type to "
310
+ "DataHub supported datasources."
311
+ "You can configured platform instance for dataset lineage. "
312
+ "See Quickstart Recipe for mapping",
313
+ hidden_from_docs=True,
314
+ )
315
315
  )
316
316
  # PowerBI datasource's server to platform instance mapping
317
317
  server_to_platform_instance: Dict[
@@ -128,17 +128,17 @@ def get_upstream_tables(
128
128
  reporter.m_query_parse_successes += 1
129
129
 
130
130
  try:
131
- lineage: List[
132
- datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
133
- ] = resolver.MQueryResolver(
134
- table=table,
135
- parse_tree=parse_tree,
136
- reporter=reporter,
137
- parameters=parameters,
138
- ).resolve_to_lineage(
139
- ctx=ctx,
140
- config=config,
141
- platform_instance_resolver=platform_instance_resolver,
131
+ lineage: List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage] = (
132
+ resolver.MQueryResolver(
133
+ table=table,
134
+ parse_tree=parse_tree,
135
+ reporter=reporter,
136
+ parameters=parameters,
137
+ ).resolve_to_lineage(
138
+ ctx=ctx,
139
+ config=config,
140
+ platform_instance_resolver=platform_instance_resolver,
141
+ )
142
142
  )
143
143
 
144
144
  if lineage:
@@ -170,8 +170,7 @@ class AbstractLineage(ABC):
170
170
  logger.debug(f"Processing arguments {arguments}")
171
171
 
172
172
  if (
173
- len(arguments)
174
- >= 4 # [0] is warehouse FQDN.
173
+ len(arguments) >= 4 # [0] is warehouse FQDN.
175
174
  # [1] is endpoint, we are not using it.
176
175
  # [2] is "Catalog" key
177
176
  # [3] is catalog's value
@@ -215,16 +214,16 @@ class AbstractLineage(ABC):
215
214
  native_sql_parser.remove_special_characters(query)
216
215
  )
217
216
 
218
- parsed_result: Optional[
219
- "SqlParsingResult"
220
- ] = native_sql_parser.parse_custom_sql(
221
- ctx=self.ctx,
222
- query=query,
223
- platform=self.get_platform_pair().datahub_data_platform_name,
224
- platform_instance=platform_detail.platform_instance,
225
- env=platform_detail.env,
226
- database=database,
227
- schema=schema,
217
+ parsed_result: Optional["SqlParsingResult"] = (
218
+ native_sql_parser.parse_custom_sql(
219
+ ctx=self.ctx,
220
+ query=query,
221
+ platform=self.get_platform_pair().datahub_data_platform_name,
222
+ platform_instance=platform_detail.platform_instance,
223
+ env=platform_detail.env,
224
+ database=database,
225
+ schema=schema,
226
+ )
228
227
  )
229
228
 
230
229
  if parsed_result is None:
@@ -410,9 +409,9 @@ class DatabricksLineage(AbstractLineage):
410
409
  f"Processing Databrick data-access function detail {data_access_func_detail}"
411
410
  )
412
411
  table_detail: Dict[str, str] = {}
413
- temp_accessor: Optional[
414
- IdentifierAccessor
415
- ] = data_access_func_detail.identifier_accessor
412
+ temp_accessor: Optional[IdentifierAccessor] = (
413
+ data_access_func_detail.identifier_accessor
414
+ )
416
415
 
417
416
  while temp_accessor:
418
417
  # Condition to handle databricks M-query pattern where table, schema and database all are present in
@@ -647,11 +646,13 @@ class ThreeStepDataAccessPattern(AbstractLineage, ABC):
647
646
  db_name: str = data_access_func_detail.identifier_accessor.items["Name"] # type: ignore
648
647
  # Second is schema name
649
648
  schema_name: str = cast(
650
- IdentifierAccessor, data_access_func_detail.identifier_accessor.next # type: ignore
649
+ IdentifierAccessor,
650
+ data_access_func_detail.identifier_accessor.next, # type: ignore
651
651
  ).items["Name"]
652
652
  # Third is table name
653
653
  table_name: str = cast(
654
- IdentifierAccessor, data_access_func_detail.identifier_accessor.next.next # type: ignore
654
+ IdentifierAccessor,
655
+ data_access_func_detail.identifier_accessor.next.next, # type: ignore
655
656
  ).items["Name"]
656
657
 
657
658
  qualified_table_name: str = f"{db_name}.{schema_name}.{table_name}"
@@ -768,10 +769,13 @@ class NativeQueryLineage(AbstractLineage):
768
769
  ): # database name is explicitly set
769
770
  return database
770
771
 
771
- return get_next_item( # database name is set in Name argument
772
- data_access_tokens, "Name"
773
- ) or get_next_item( # If both above arguments are not available, then try Catalog
774
- data_access_tokens, "Catalog"
772
+ return (
773
+ get_next_item( # database name is set in Name argument
774
+ data_access_tokens, "Name"
775
+ )
776
+ or get_next_item( # If both above arguments are not available, then try Catalog
777
+ data_access_tokens, "Catalog"
778
+ )
775
779
  )
776
780
 
777
781
  def create_lineage(
@@ -819,9 +823,7 @@ class NativeQueryLineage(AbstractLineage):
819
823
  values=tree_function.remove_whitespaces_from_list(
820
824
  tree_function.token_values(flat_argument_list[1])
821
825
  ),
822
- )[
823
- 0
824
- ] # Remove any whitespaces and double quotes character
826
+ )[0] # Remove any whitespaces and double quotes character
825
827
 
826
828
  server = tree_function.strip_char_from_list([data_access_tokens[2]])[0]
827
829
 
@@ -188,9 +188,9 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
188
188
  # - The inner function Table.TransformColumnTypes takes #"Removed Columns1"
189
189
  # (a table reference) as its first argument
190
190
  # - Its result is then passed as the first argument to Table.SplitColumn
191
- second_invoke_expression: Optional[
192
- Tree
193
- ] = tree_function.first_invoke_expression_func(first_argument)
191
+ second_invoke_expression: Optional[Tree] = (
192
+ tree_function.first_invoke_expression_func(first_argument)
193
+ )
194
194
  if second_invoke_expression:
195
195
  # 1. The First argument is function call
196
196
  # 2. That function's first argument references next table variable
@@ -304,14 +304,14 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
304
304
  logger.debug(v_statement.pretty())
305
305
  return None
306
306
 
307
- invoke_expression: Optional[
308
- Tree
309
- ] = tree_function.first_invoke_expression_func(rh_tree)
307
+ invoke_expression: Optional[Tree] = (
308
+ tree_function.first_invoke_expression_func(rh_tree)
309
+ )
310
310
 
311
311
  if invoke_expression is not None:
312
- result: Union[
313
- DataAccessFunctionDetail, List[str], None
314
- ] = self._process_invoke_expression(invoke_expression)
312
+ result: Union[DataAccessFunctionDetail, List[str], None] = (
313
+ self._process_invoke_expression(invoke_expression)
314
+ )
315
315
  if result is None:
316
316
  return None # No need to process some un-expected grammar found while processing invoke_expression
317
317
  if isinstance(result, DataAccessFunctionDetail):
@@ -368,9 +368,9 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
368
368
  return lineage
369
369
 
370
370
  # Parse M-Query and use output_variable as root of tree and create instance of DataAccessFunctionDetail
371
- table_links: List[
372
- DataAccessFunctionDetail
373
- ] = self.create_data_access_functional_detail(output_variable)
371
+ table_links: List[DataAccessFunctionDetail] = (
372
+ self.create_data_access_functional_detail(output_variable)
373
+ )
374
374
 
375
375
  # Each item is data-access function
376
376
  for f_detail in table_links:
@@ -390,7 +390,7 @@ class MQueryResolver(AbstractDataAccessMQueryResolver, ABC):
390
390
 
391
391
  # From supported_resolver enum get respective handler like AmazonRedshift or Snowflake or Oracle or NativeQuery and create instance of it
392
392
  # & also pass additional information that will be need to generate lineage
393
- pattern_handler: (AbstractLineage) = supported_resolver.handler()(
393
+ pattern_handler: AbstractLineage = supported_resolver.handler()(
394
394
  ctx=ctx,
395
395
  table=self.table,
396
396
  config=config,
@@ -945,9 +945,9 @@ class Mapper:
945
945
  # Convert tiles to charts
946
946
  ds_mcps, chart_mcps = self.to_datahub_chart(dashboard.tiles, workspace)
947
947
  # Lets convert dashboard to datahub dashboard
948
- dashboard_mcps: List[
949
- MetadataChangeProposalWrapper
950
- ] = self.to_datahub_dashboard_mcp(dashboard, workspace, chart_mcps, user_mcps)
948
+ dashboard_mcps: List[MetadataChangeProposalWrapper] = (
949
+ self.to_datahub_dashboard_mcp(dashboard, workspace, chart_mcps, user_mcps)
950
+ )
951
951
 
952
952
  # Now add MCPs in sequence
953
953
  mcps.extend(ds_mcps)
@@ -1472,9 +1472,9 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1472
1472
  def _get_dashboard_patch_work_unit(
1473
1473
  self, work_unit: MetadataWorkUnit
1474
1474
  ) -> Optional[MetadataWorkUnit]:
1475
- dashboard_info_aspect: Optional[
1476
- DashboardInfoClass
1477
- ] = work_unit.get_aspect_of_type(DashboardInfoClass)
1475
+ dashboard_info_aspect: Optional[DashboardInfoClass] = (
1476
+ work_unit.get_aspect_of_type(DashboardInfoClass)
1477
+ )
1478
1478
 
1479
1479
  if dashboard_info_aspect and self.source_config.patch_metadata:
1480
1480
  return convert_dashboard_info_to_patch(
@@ -425,9 +425,9 @@ class DataResolverBase(ABC):
425
425
 
426
426
  response.raise_for_status()
427
427
 
428
- assert (
429
- Constant.VALUE in response.json()
430
- ), "'value' key is not present in paginated response"
428
+ assert Constant.VALUE in response.json(), (
429
+ "'value' key is not present in paginated response"
430
+ )
431
431
 
432
432
  if not response.json()[Constant.VALUE]: # if it is an empty list then break
433
433
  break
@@ -447,13 +447,13 @@ class DataResolverBase(ABC):
447
447
  if raw_app is None:
448
448
  return None
449
449
 
450
- assert (
451
- Constant.ID in raw_app
452
- ), f"{Constant.ID} is required field not present in server response"
450
+ assert Constant.ID in raw_app, (
451
+ f"{Constant.ID} is required field not present in server response"
452
+ )
453
453
 
454
- assert (
455
- Constant.NAME in raw_app
456
- ), f"{Constant.NAME} is required field not present in server response"
454
+ assert Constant.NAME in raw_app, (
455
+ f"{Constant.NAME} is required field not present in server response"
456
+ )
457
457
 
458
458
  return App(
459
459
  id=raw_app[Constant.ID],
@@ -96,7 +96,7 @@ class PowerBiAPI:
96
96
  url: str = e.request.url if e.request else "URL not available"
97
97
  self.reporter.warning(
98
98
  title="Metadata API Timeout",
99
- message=f"Metadata endpoints are not reachable. Check network connectivity to PowerBI Service.",
99
+ message="Metadata endpoints are not reachable. Check network connectivity to PowerBI Service.",
100
100
  context=f"url={url}",
101
101
  )
102
102
 
@@ -173,7 +173,7 @@ class PowerBiAPI:
173
173
  entity=entity_name,
174
174
  entity_id=entity_id,
175
175
  )
176
- except: # It will catch all type of exception
176
+ except Exception:
177
177
  e = self.log_http_error(
178
178
  message=f"Unable to fetch users for {entity_name}({entity_id})."
179
179
  )
@@ -210,7 +210,7 @@ class PowerBiAPI:
210
210
  message="A cross-workspace reference that failed to be resolved. Please ensure that no global workspace is being filtered out due to the workspace_id_pattern.",
211
211
  context=f"report-name: {report.name} and dataset-id: {report.dataset_id}",
212
212
  )
213
- except:
213
+ except Exception:
214
214
  self.log_http_error(
215
215
  message=f"Unable to fetch reports for workspace {workspace.name}"
216
216
  )
@@ -260,7 +260,7 @@ class PowerBiAPI:
260
260
 
261
261
  groups = self._get_resolver().get_groups(filter_=filter_)
262
262
 
263
- except:
263
+ except Exception:
264
264
  self.log_http_error(message="Unable to fetch list of workspaces")
265
265
  # raise # we want this exception to bubble up
266
266
 
@@ -292,7 +292,7 @@ class PowerBiAPI:
292
292
  modified_workspace_ids = self.__admin_api_resolver.get_modified_workspaces(
293
293
  self.__config.modified_since
294
294
  )
295
- except:
295
+ except Exception:
296
296
  self.log_http_error(message="Unable to fetch list of modified workspaces.")
297
297
 
298
298
  return modified_workspace_ids
@@ -303,8 +303,8 @@ class PowerBiAPI:
303
303
  scan_id = self.__admin_api_resolver.create_scan_job(
304
304
  workspace_ids=workspace_ids
305
305
  )
306
- except:
307
- e = self.log_http_error(message=f"Unable to fetch get scan result.")
306
+ except Exception:
307
+ e = self.log_http_error(message="Unable to fetch get scan result.")
308
308
  if data_resolver.is_permission_error(cast(Exception, e)):
309
309
  logger.warning(
310
310
  "Dataset lineage can not be ingestion because this user does not have access to the PowerBI Admin "
@@ -485,7 +485,7 @@ class PowerBiReportServerDashboardSourceReport(SourceReport):
485
485
  self.filtered_reports.append(view)
486
486
 
487
487
 
488
- @platform_name("PowerBI")
488
+ @platform_name("PowerBI Report Server")
489
489
  @config_class(PowerBiReportServerDashboardSourceConfig)
490
490
  @support_status(SupportStatus.INCUBATING)
491
491
  @capability(SourceCapability.OWNERSHIP, "Enabled by default")
@@ -156,7 +156,7 @@ class QlikAPI:
156
156
  )
157
157
  if chart:
158
158
  if not chart.title:
159
- chart.title = f"Object {i+1} of Sheet '{sheet.title}'"
159
+ chart.title = f"Object {i + 1} of Sheet '{sheet.title}'"
160
160
  sheet.charts.append(chart)
161
161
  websocket_connection.handle.pop()
162
162
  return sheet
@@ -369,11 +369,6 @@ class RedashSource(Source):
369
369
  else:
370
370
  raise ValueError(f"Failed to connect to {self.config.connect_uri}/api")
371
371
 
372
- @classmethod
373
- def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
374
- config = RedashConfig.parse_obj(config_dict)
375
- return cls(ctx, config)
376
-
377
372
  def _get_chart_data_source(self, data_source_id: Optional[int] = None) -> Dict:
378
373
  url = f"/api/data_sources/{data_source_id}"
379
374
  resp = self.client._get(url).json()
@@ -178,9 +178,9 @@ class RedshiftConfig(
178
178
  @root_validator(pre=True)
179
179
  def check_email_is_set_on_usage(cls, values):
180
180
  if values.get("include_usage_statistics"):
181
- assert (
182
- "email_domain" in values and values["email_domain"]
183
- ), "email_domain needs to be set if usage is enabled"
181
+ assert "email_domain" in values and values["email_domain"], (
182
+ "email_domain needs to be set if usage is enabled"
183
+ )
184
184
  return values
185
185
 
186
186
  @root_validator(skip_on_failure=True)