acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  import datetime
2
2
  import json
3
3
  import logging
4
+ from dataclasses import dataclass
4
5
  from json import JSONDecodeError
5
6
  from typing import (
6
7
  Any,
@@ -18,7 +19,7 @@ from typing import (
18
19
  from looker_sdk.error import SDKError
19
20
  from looker_sdk.rtl.serialize import DeserializeError
20
21
  from looker_sdk.sdk.api40.models import (
21
- Dashboard,
22
+ Dashboard as LookerAPIDashboard,
22
23
  DashboardElement,
23
24
  Folder,
24
25
  FolderBase,
@@ -29,7 +30,7 @@ from looker_sdk.sdk.api40.models import (
29
30
 
30
31
  import datahub.emitter.mce_builder as builder
31
32
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
32
- from datahub.emitter.mcp_builder import create_embed_mcp, gen_containers
33
+ from datahub.emitter.mcp_builder import mcps_from_mce
33
34
  from datahub.ingestion.api.common import PipelineContext
34
35
  from datahub.ingestion.api.decorators import (
35
36
  SupportStatus,
@@ -51,6 +52,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
51
52
  from datahub.ingestion.source.common.subtypes import (
52
53
  BIAssetSubTypes,
53
54
  BIContainerSubTypes,
55
+ SourceCapabilityModifier,
54
56
  )
55
57
  from datahub.ingestion.source.looker import looker_usage
56
58
  from datahub.ingestion.source.looker.looker_common import (
@@ -79,36 +81,38 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
79
81
  StatefulIngestionSourceBase,
80
82
  )
81
83
  from datahub.metadata.com.linkedin.pegasus2avro.common import (
82
- AuditStamp,
83
- ChangeAuditStamps,
84
- DataPlatformInstance,
85
84
  Status,
86
85
  )
87
- from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
88
- ChartSnapshot,
89
- DashboardSnapshot,
90
- )
91
- from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
92
86
  from datahub.metadata.schema_classes import (
93
- BrowsePathEntryClass,
94
- BrowsePathsClass,
95
- BrowsePathsV2Class,
96
- ChartInfoClass,
97
87
  ChartTypeClass,
98
- ContainerClass,
99
- DashboardInfoClass,
88
+ EmbedClass,
100
89
  InputFieldClass,
101
90
  InputFieldsClass,
102
91
  OwnerClass,
103
- OwnershipClass,
104
92
  OwnershipTypeClass,
105
- SubTypesClass,
106
93
  )
94
+ from datahub.sdk.chart import Chart
95
+ from datahub.sdk.container import Container
96
+ from datahub.sdk.dashboard import Dashboard
97
+ from datahub.sdk.dataset import Dataset
98
+ from datahub.sdk.entity import Entity
107
99
  from datahub.utilities.backpressure_aware_executor import BackpressureAwareExecutor
100
+ from datahub.utilities.sentinels import Unset, unset
108
101
 
109
102
  logger = logging.getLogger(__name__)
110
103
 
111
104
 
105
+ @dataclass
106
+ class DashboardProcessingResult:
107
+ """Result of processing a single dashboard."""
108
+
109
+ entities: List[Entity]
110
+ dashboard_usage: Optional[looker_usage.LookerDashboardForUsage]
111
+ dashboard_id: str
112
+ start_time: datetime.datetime
113
+ end_time: datetime.datetime
114
+
115
+
112
116
  @platform_name("Looker")
113
117
  @support_status(SupportStatus.CERTIFIED)
114
118
  @config_class(LookerDashboardSourceConfig)
@@ -126,6 +130,15 @@ logger = logging.getLogger(__name__)
126
130
  SourceCapability.USAGE_STATS,
127
131
  "Enabled by default, configured using `extract_usage_history`",
128
132
  )
133
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
134
+ @capability(
135
+ SourceCapability.CONTAINERS,
136
+ "Enabled by default",
137
+ subtype_modifier=[
138
+ SourceCapabilityModifier.LOOKML_MODEL,
139
+ SourceCapabilityModifier.LOOKER_FOLDER,
140
+ ],
141
+ )
129
142
  class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
130
143
  """
131
144
  This plugin extracts the following:
@@ -279,6 +292,11 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
279
292
  return []
280
293
  result = []
281
294
 
295
+ if query is not None:
296
+ logger.debug(
297
+ f"Processing query: model={query.model}, view={query.view}, input_fields_count={len(query.fields) if query.fields else 0}"
298
+ )
299
+
282
300
  # query.dynamic_fields can contain:
283
301
  # - looker table calculations: https://docs.looker.com/exploring-data/using-table-calculations
284
302
  # - looker custom measures: https://docs.looker.com/de/exploring-data/adding-fields/custom-measure
@@ -363,7 +381,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
363
381
  filters: MutableMapping[str, Any] = (
364
382
  query.filters if query.filters is not None else {}
365
383
  )
366
- for field in filters.keys():
384
+ for field in filters:
367
385
  if field is None:
368
386
  continue
369
387
 
@@ -399,9 +417,12 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
399
417
  # Get the explore from the view directly
400
418
  explores = [element.query.view] if element.query.view is not None else []
401
419
  logger.debug(
402
- f"Element {element.title}: Explores added via query: {explores}"
420
+ f"Dashboard element {element.title} (ID: {element.id}): Upstream explores added via query={explores} with model={element.query.model}, explore={element.query.view}"
403
421
  )
404
422
  for exp in explores:
423
+ logger.debug(
424
+ f"Adding reachable explore: model={element.query.model}, explore={exp}, element_id={element.id}, title={element.title}"
425
+ )
405
426
  self.add_reachable_explore(
406
427
  model=element.query.model,
407
428
  explore=exp,
@@ -477,12 +498,10 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
477
498
 
478
499
  # Failing the above two approaches, pick out details from result_maker
479
500
  elif element.result_maker is not None:
480
- model: str = ""
481
501
  input_fields = []
482
502
 
483
503
  explores = []
484
504
  if element.result_maker.query is not None:
485
- model = element.result_maker.query.model
486
505
  if element.result_maker.query.view is not None:
487
506
  explores.append(element.result_maker.query.view)
488
507
  input_fields = self._get_input_fields_from_query(
@@ -502,9 +521,15 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
502
521
 
503
522
  # In addition to the query, filters can point to fields as well
504
523
  assert element.result_maker.filterables is not None
524
+
525
+ # Different dashboard elements my reference explores from different models
526
+ # so we need to create a mapping of explore names to their models to maintain correct associations
527
+ explore_to_model_map = {}
528
+
505
529
  for filterable in element.result_maker.filterables:
506
530
  if filterable.view is not None and filterable.model is not None:
507
- model = filterable.model
531
+ # Store the model for this view/explore in our mapping
532
+ explore_to_model_map[filterable.view] = filterable.model
508
533
  explores.append(filterable.view)
509
534
  self.add_reachable_explore(
510
535
  model=filterable.model,
@@ -527,6 +552,18 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
527
552
 
528
553
  explores = sorted(list(set(explores))) # dedup the list of views
529
554
 
555
+ logger.debug(
556
+ f"Dashboard element {element.id} and their explores with the corresponding model: {explore_to_model_map}"
557
+ )
558
+
559
+ # If we have a query, use its model as the default for any explores that don't have a model in our mapping
560
+ default_model = ""
561
+ if (
562
+ element.result_maker.query is not None
563
+ and element.result_maker.query.model is not None
564
+ ):
565
+ default_model = element.result_maker.query.model
566
+
530
567
  return LookerDashboardElement(
531
568
  id=element.id,
532
569
  title=element.title if element.title is not None else "",
@@ -540,7 +577,11 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
540
577
  else ""
541
578
  ),
542
579
  upstream_explores=[
543
- LookerExplore(model_name=model, name=exp) for exp in explores
580
+ LookerExplore(
581
+ model_name=explore_to_model_map.get(exp, default_model),
582
+ name=exp,
583
+ )
584
+ for exp in explores
544
585
  ],
545
586
  input_fields=input_fields,
546
587
  owner=None,
@@ -595,35 +636,17 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
595
636
 
596
637
  return chart_type
597
638
 
598
- def _get_folder_browse_path_v2_entries(
639
+ def _get_folder_ancestors_urn_entries(
599
640
  self, folder: LookerFolder, include_current_folder: bool = True
600
- ) -> Iterable[BrowsePathEntryClass]:
641
+ ) -> Iterable[str]:
601
642
  for ancestor in self.looker_api.folder_ancestors(folder_id=folder.id):
602
- assert ancestor.id
643
+ assert ancestor.id # to make the linter happy as `Folder` has id field marked optional - which is always returned by the API
603
644
  urn = self._gen_folder_key(ancestor.id).as_urn()
604
- yield BrowsePathEntryClass(id=urn, urn=urn)
645
+ yield urn
605
646
 
606
647
  urn = self._gen_folder_key(folder.id).as_urn()
607
648
  if include_current_folder:
608
- yield BrowsePathEntryClass(id=urn, urn=urn)
609
-
610
- def _create_platform_instance_aspect(
611
- self,
612
- ) -> DataPlatformInstance:
613
- assert self.source_config.platform_name, (
614
- "Platform name is not set in the configuration."
615
- )
616
- assert self.source_config.platform_instance, (
617
- "Platform instance is not set in the configuration."
618
- )
619
-
620
- return DataPlatformInstance(
621
- platform=builder.make_data_platform_urn(self.source_config.platform_name),
622
- instance=builder.make_dataplatform_instance_urn(
623
- platform=self.source_config.platform_name,
624
- instance=self.source_config.platform_instance,
625
- ),
626
- )
649
+ yield urn
627
650
 
628
651
  def _make_chart_urn(self, element_id: str) -> str:
629
652
  platform_instance: Optional[str] = None
@@ -636,104 +659,46 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
636
659
  platform_instance=platform_instance,
637
660
  )
638
661
 
639
- def _make_chart_metadata_events(
662
+ def _make_chart_entities(
640
663
  self,
641
664
  dashboard_element: LookerDashboardElement,
642
665
  dashboard: Optional[
643
666
  LookerDashboard
644
667
  ], # dashboard will be None if this is a standalone look
645
- ) -> List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
646
- chart_urn = self._make_chart_urn(
647
- element_id=dashboard_element.get_urn_element_id()
648
- )
649
- self.chart_urns.add(chart_urn)
650
- chart_snapshot = ChartSnapshot(
651
- urn=chart_urn,
652
- aspects=[Status(removed=False)],
653
- )
654
- browse_path_v2: Optional[BrowsePathsV2Class] = None
655
-
656
- chart_type = self._get_chart_type(dashboard_element)
657
- chart_info = ChartInfoClass(
658
- type=chart_type,
659
- description=dashboard_element.description or "",
660
- title=dashboard_element.title or "",
661
- lastModified=ChangeAuditStamps(),
662
- chartUrl=dashboard_element.url(self.source_config.external_base_url or ""),
663
- inputs=dashboard_element.get_view_urns(self.source_config),
664
- customProperties={
665
- "upstream_fields": (
666
- ",".join(
667
- sorted({field.name for field in dashboard_element.input_fields})
668
- )
669
- if dashboard_element.input_fields
670
- else ""
671
- )
672
- },
673
- )
674
- chart_snapshot.aspects.append(chart_info)
675
-
668
+ ) -> List[Chart]:
669
+ chart_parent_container: Union[List[str], Unset] = unset
676
670
  if (
677
671
  dashboard
678
672
  and dashboard.folder_path is not None
679
673
  and dashboard.folder is not None
680
674
  ):
681
- browse_path = BrowsePathsClass(
682
- paths=[f"/Folders/{dashboard.folder_path}/{dashboard.title}"]
683
- )
684
- chart_snapshot.aspects.append(browse_path)
685
-
686
- dashboard_urn = self.make_dashboard_urn(dashboard)
687
- browse_path_v2 = BrowsePathsV2Class(
688
- path=[
689
- BrowsePathEntryClass("Folders"),
690
- *self._get_folder_browse_path_v2_entries(dashboard.folder),
691
- BrowsePathEntryClass(id=dashboard_urn, urn=dashboard_urn),
692
- ],
693
- )
675
+ chart_parent_container = [
676
+ "Folders",
677
+ *self._get_folder_ancestors_urn_entries(dashboard.folder),
678
+ self.make_dashboard_urn(dashboard),
679
+ ]
694
680
  elif (
695
681
  dashboard is None
696
682
  and dashboard_element.folder_path is not None
697
683
  and dashboard_element.folder is not None
698
- ): # independent look
699
- browse_path = BrowsePathsClass(
700
- paths=[f"/Folders/{dashboard_element.folder_path}"]
701
- )
702
- chart_snapshot.aspects.append(browse_path)
703
- browse_path_v2 = BrowsePathsV2Class(
704
- path=[
705
- BrowsePathEntryClass("Folders"),
706
- *self._get_folder_browse_path_v2_entries(dashboard_element.folder),
707
- ],
708
- )
684
+ ): # Independent look
685
+ chart_parent_container = [
686
+ "Folders",
687
+ *self._get_folder_ancestors_urn_entries(dashboard_element.folder),
688
+ ]
709
689
 
690
+ # Determine chart ownership
691
+ chart_ownership: Optional[List[OwnerClass]] = None
710
692
  if dashboard is not None:
711
693
  ownership = self.get_ownership(dashboard)
712
694
  if ownership is not None:
713
- chart_snapshot.aspects.append(ownership)
695
+ chart_ownership = [ownership]
714
696
  elif dashboard is None and dashboard_element is not None:
715
697
  ownership = self.get_ownership(dashboard_element)
716
698
  if ownership is not None:
717
- chart_snapshot.aspects.append(ownership)
718
-
719
- chart_mce = MetadataChangeEvent(proposedSnapshot=chart_snapshot)
720
-
721
- proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
722
- chart_mce,
723
- MetadataChangeProposalWrapper(
724
- entityUrn=chart_urn,
725
- aspect=SubTypesClass(typeNames=[BIAssetSubTypes.LOOKER_LOOK]),
726
- ),
727
- ]
728
-
729
- if self.source_config.include_platform_instance_in_urns:
730
- proposals.append(
731
- MetadataChangeProposalWrapper(
732
- entityUrn=chart_urn,
733
- aspect=self._create_platform_instance_aspect(),
734
- ),
735
- )
699
+ chart_ownership = [ownership]
736
700
 
701
+ chart_extra_aspects: List[Union[InputFieldsClass, EmbedClass]] = []
737
702
  # If extracting embeds is enabled, produce an MCP for embed URL.
738
703
  if (
739
704
  self.source_config.extract_embed_urls
@@ -743,111 +708,124 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
743
708
  self.source_config.external_base_url
744
709
  )
745
710
  if maybe_embed_url:
746
- proposals.append(
747
- create_embed_mcp(
748
- chart_snapshot.urn,
749
- maybe_embed_url,
750
- )
751
- )
752
-
753
- if dashboard is None and dashboard_element.folder:
754
- container = ContainerClass(
755
- container=self._gen_folder_key(dashboard_element.folder.id).as_urn(),
756
- )
757
- proposals.append(
758
- MetadataChangeProposalWrapper(entityUrn=chart_urn, aspect=container)
759
- )
711
+ chart_extra_aspects.append(EmbedClass(renderUrl=maybe_embed_url))
760
712
 
761
- if browse_path_v2:
762
- proposals.append(
763
- MetadataChangeProposalWrapper(
764
- entityUrn=chart_urn, aspect=browse_path_v2
765
- )
713
+ chart_extra_aspects.append(
714
+ InputFieldsClass(
715
+ fields=self._input_fields_from_dashboard_element(dashboard_element)
766
716
  )
767
-
768
- return proposals
769
-
770
- def _make_dashboard_metadata_events(
771
- self, looker_dashboard: LookerDashboard, chart_urns: List[str]
772
- ) -> List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
773
- dashboard_urn = self.make_dashboard_urn(looker_dashboard)
774
- dashboard_snapshot = DashboardSnapshot(
775
- urn=dashboard_urn,
776
- aspects=[],
777
717
  )
778
- browse_path_v2: Optional[BrowsePathsV2Class] = None
779
- dashboard_info = DashboardInfoClass(
780
- description=looker_dashboard.description or "",
781
- title=looker_dashboard.title,
782
- charts=chart_urns,
783
- lastModified=self._get_change_audit_stamps(looker_dashboard),
784
- dashboardUrl=looker_dashboard.url(self.source_config.external_base_url),
785
- )
786
-
787
- dashboard_snapshot.aspects.append(dashboard_info)
788
- if (
789
- looker_dashboard.folder_path is not None
790
- and looker_dashboard.folder is not None
791
- ):
792
- browse_path = BrowsePathsClass(
793
- paths=[f"/Folders/{looker_dashboard.folder_path}"]
794
- )
795
- browse_path_v2 = BrowsePathsV2Class(
796
- path=[
797
- BrowsePathEntryClass("Folders"),
798
- *self._get_folder_browse_path_v2_entries(looker_dashboard.folder),
799
- ],
718
+ return [
719
+ Chart(
720
+ chart_type=self._get_chart_type(dashboard_element),
721
+ chart_url=dashboard_element.url(
722
+ self.source_config.external_base_url or ""
723
+ ),
724
+ custom_properties={
725
+ "upstream_fields": (
726
+ ",".join(
727
+ sorted(
728
+ {field.name for field in dashboard_element.input_fields}
729
+ )
730
+ )
731
+ if dashboard_element.input_fields
732
+ else ""
733
+ )
734
+ },
735
+ description=dashboard_element.description or "",
736
+ display_name=dashboard_element.title, # title is (deprecated) using display_name
737
+ extra_aspects=chart_extra_aspects,
738
+ input_datasets=dashboard_element.get_view_urns(self.source_config),
739
+ last_modified=self._get_last_modified_time(
740
+ dashboard
741
+ ), # Inherited from Dashboard
742
+ last_modified_by=self._get_last_modified_by(
743
+ dashboard
744
+ ), # Inherited from Dashboard
745
+ created_at=self._get_created_at(dashboard), # Inherited from Dashboard
746
+ created_by=self._get_created_by(dashboard), # Inherited from Dashboard
747
+ deleted_on=self._get_deleted_on(dashboard), # Inherited from Dashboard
748
+ deleted_by=self._get_deleted_by(dashboard), # Inherited from Dashboard
749
+ name=dashboard_element.get_urn_element_id(),
750
+ owners=chart_ownership,
751
+ parent_container=chart_parent_container,
752
+ platform=self.source_config.platform_name,
753
+ platform_instance=self.source_config.platform_instance
754
+ if self.source_config.include_platform_instance_in_urns
755
+ else None,
756
+ subtype=BIAssetSubTypes.LOOKER_LOOK,
800
757
  )
801
- dashboard_snapshot.aspects.append(browse_path)
802
-
803
- ownership = self.get_ownership(looker_dashboard)
804
- if ownership is not None:
805
- dashboard_snapshot.aspects.append(ownership)
806
-
807
- dashboard_snapshot.aspects.append(Status(removed=looker_dashboard.is_deleted))
808
-
809
- dashboard_mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot)
810
-
811
- proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
812
- dashboard_mce
813
758
  ]
814
759
 
815
- if looker_dashboard.folder is not None:
816
- container = ContainerClass(
817
- container=self._gen_folder_key(looker_dashboard.folder.id).as_urn(),
818
- )
819
- proposals.append(
820
- MetadataChangeProposalWrapper(entityUrn=dashboard_urn, aspect=container)
821
- )
760
+ def _make_dashboard_entities(
761
+ self, looker_dashboard: LookerDashboard, charts: List[Chart]
762
+ ) -> List[Dashboard]:
763
+ dashboard_ownership: Optional[List[OwnerClass]] = None
764
+ ownership: Optional[OwnerClass] = self.get_ownership(looker_dashboard)
765
+ if ownership is not None:
766
+ dashboard_ownership = [ownership]
822
767
 
823
- if browse_path_v2:
824
- proposals.append(
825
- MetadataChangeProposalWrapper(
826
- entityUrn=dashboard_urn, aspect=browse_path_v2
827
- )
828
- )
768
+ # Extra Aspects not yet supported in the Dashboard entity class SDKv2
769
+ dashboard_extra_aspects: List[Union[EmbedClass, InputFieldsClass, Status]] = []
829
770
 
830
- # If extracting embeds is enabled, produce an MCP for embed URL.
771
+ # Embed URL aspect
831
772
  if (
832
773
  self.source_config.extract_embed_urls
833
774
  and self.source_config.external_base_url
834
775
  ):
835
- proposals.append(
836
- create_embed_mcp(
837
- dashboard_snapshot.urn,
838
- looker_dashboard.embed_url(self.source_config.external_base_url),
776
+ dashboard_extra_aspects.append(
777
+ EmbedClass(
778
+ renderUrl=looker_dashboard.embed_url(
779
+ self.source_config.external_base_url
780
+ )
839
781
  )
840
782
  )
841
783
 
842
- if self.source_config.include_platform_instance_in_urns:
843
- proposals.append(
844
- MetadataChangeProposalWrapper(
845
- entityUrn=dashboard_urn,
846
- aspect=self._create_platform_instance_aspect(),
847
- )
784
+ # Input fields aspect
785
+ # Populate input fields from all the dashboard elements
786
+ all_fields: List[InputFieldClass] = []
787
+ for dashboard_element in looker_dashboard.dashboard_elements:
788
+ all_fields.extend(
789
+ self._input_fields_from_dashboard_element(dashboard_element)
848
790
  )
791
+ dashboard_extra_aspects.append(InputFieldsClass(fields=all_fields))
792
+ # Status aspect
793
+ dashboard_extra_aspects.append(Status(removed=looker_dashboard.is_deleted))
849
794
 
850
- return proposals
795
+ dashboard_parent_container: Union[List[str], Unset] = unset
796
+ if (
797
+ looker_dashboard.folder_path is not None
798
+ and looker_dashboard.folder is not None
799
+ ):
800
+ dashboard_parent_container = [
801
+ "Folders",
802
+ *self._get_folder_ancestors_urn_entries(looker_dashboard.folder),
803
+ ]
804
+
805
+ return [
806
+ Dashboard(
807
+ charts=charts,
808
+ dashboard_url=looker_dashboard.url(
809
+ self.source_config.external_base_url
810
+ ),
811
+ description=looker_dashboard.description or "",
812
+ display_name=looker_dashboard.title, # title is (deprecated) using display_name
813
+ extra_aspects=dashboard_extra_aspects,
814
+ last_modified=self._get_last_modified_time(looker_dashboard),
815
+ last_modified_by=self._get_last_modified_by(looker_dashboard),
816
+ created_at=self._get_created_at(looker_dashboard),
817
+ created_by=self._get_created_by(looker_dashboard),
818
+ deleted_on=self._get_deleted_on(looker_dashboard),
819
+ deleted_by=self._get_deleted_by(looker_dashboard),
820
+ name=looker_dashboard.get_urn_dashboard_id(),
821
+ owners=dashboard_ownership,
822
+ parent_container=dashboard_parent_container,
823
+ platform=self.source_config.platform_name,
824
+ platform_instance=self.source_config.platform_instance
825
+ if self.source_config.include_platform_instance_in_urns
826
+ else None,
827
+ )
828
+ ]
851
829
 
852
830
  def _make_dashboard_urn(self, looker_dashboard_name_part: str) -> str:
853
831
  # Note that `looker_dashboard_name_part` will like be `dashboard.1234`.
@@ -864,11 +842,9 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
864
842
  def make_dashboard_urn(self, looker_dashboard: LookerDashboard) -> str:
865
843
  return self._make_dashboard_urn(looker_dashboard.get_urn_dashboard_id())
866
844
 
867
- def _make_explore_metadata_events(
845
+ def _make_explore_containers(
868
846
  self,
869
- ) -> Iterable[
870
- Union[MetadataChangeEvent, MetadataChangeProposalWrapper, MetadataWorkUnit]
871
- ]:
847
+ ) -> Iterable[Union[Container, Dataset]]:
872
848
  if not self.source_config.emit_used_explores_only:
873
849
  explores_to_fetch = list(self.list_all_explores())
874
850
  else:
@@ -877,8 +853,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
877
853
  # fine to set them to None.
878
854
  # TODO: Track project names for each explore.
879
855
  explores_to_fetch = [
880
- (None, model, explore)
881
- for (model, explore) in self.reachable_explores.keys()
856
+ (None, model, explore) for (model, explore) in self.reachable_explores
882
857
  ]
883
858
  explores_to_fetch.sort()
884
859
 
@@ -887,19 +862,14 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
887
862
  for project_name, model, _ in explores_to_fetch:
888
863
  if model not in processed_models:
889
864
  model_key = gen_model_key(self.source_config, model)
890
- yield from gen_containers(
865
+ yield Container(
891
866
  container_key=model_key,
892
- name=model,
893
- sub_types=[BIContainerSubTypes.LOOKML_MODEL],
867
+ display_name=model,
868
+ subtype=BIContainerSubTypes.LOOKML_MODEL,
894
869
  extra_properties=(
895
870
  {"project": project_name} if project_name is not None else None
896
871
  ),
897
- )
898
- yield MetadataChangeProposalWrapper(
899
- entityUrn=model_key.as_urn(),
900
- aspect=BrowsePathsV2Class(
901
- path=[BrowsePathEntryClass("Explore")],
902
- ),
872
+ parent_container=["Explore"],
903
873
  )
904
874
 
905
875
  processed_models.append(model)
@@ -910,9 +880,10 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
910
880
  ((model, explore) for (_project, model, explore) in explores_to_fetch),
911
881
  max_workers=self.source_config.max_threads,
912
882
  ):
913
- events, explore_id, start_time, end_time = future.result()
883
+ explore_dataset_entity, explore_id, start_time, end_time = future.result()
914
884
  self.reporter.explores_scanned += 1
915
- yield from events
885
+ if explore_dataset_entity:
886
+ yield explore_dataset_entity
916
887
  self.reporter.report_upstream_latency(start_time, end_time)
917
888
  logger.debug(
918
889
  f"Running time of fetch_one_explore for {explore_id}: {(end_time - start_time).total_seconds()}"
@@ -932,66 +903,50 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
932
903
  def fetch_one_explore(
933
904
  self, model: str, explore: str
934
905
  ) -> Tuple[
935
- List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]],
906
+ Optional[Dataset],
936
907
  str,
937
908
  datetime.datetime,
938
909
  datetime.datetime,
939
910
  ]:
940
911
  start_time = datetime.datetime.now()
941
- events: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = []
942
912
  looker_explore = self.explore_registry.get_explore(model, explore)
913
+ explore_dataset_entity: Optional[Dataset] = None
943
914
  if looker_explore is not None:
944
- events = (
945
- looker_explore._to_metadata_events(
946
- self.source_config,
947
- self.reporter,
948
- self.source_config.external_base_url or self.source_config.base_url,
949
- self.source_config.extract_embed_urls,
950
- )
951
- or events
915
+ explore_dataset_entity = looker_explore._to_metadata_events(
916
+ self.source_config,
917
+ self.reporter,
918
+ self.source_config.external_base_url or self.source_config.base_url,
919
+ self.source_config.extract_embed_urls,
952
920
  )
953
921
 
954
- return events, f"{model}:{explore}", start_time, datetime.datetime.now()
955
-
956
- def _extract_event_urn(
957
- self, event: Union[MetadataChangeEvent, MetadataChangeProposalWrapper]
958
- ) -> Optional[str]:
959
- if isinstance(event, MetadataChangeEvent):
960
- return event.proposedSnapshot.urn
961
- else:
962
- return event.entityUrn
922
+ return (
923
+ explore_dataset_entity,
924
+ f"{model}:{explore}",
925
+ start_time,
926
+ datetime.datetime.now(),
927
+ )
963
928
 
964
- def _emit_folder_as_container(
965
- self, folder: LookerFolder
966
- ) -> Iterable[MetadataWorkUnit]:
929
+ def _emit_folder_as_container(self, folder: LookerFolder) -> Iterable[Container]:
967
930
  if folder.id not in self.processed_folders:
968
- yield from gen_containers(
969
- container_key=self._gen_folder_key(folder.id),
970
- name=folder.name,
971
- sub_types=[BIContainerSubTypes.LOOKER_FOLDER],
972
- parent_container_key=(
973
- self._gen_folder_key(folder.parent_id) if folder.parent_id else None
974
- ),
975
- )
976
931
  if folder.parent_id is None:
977
- yield MetadataChangeProposalWrapper(
978
- entityUrn=self._gen_folder_key(folder.id).as_urn(),
979
- aspect=BrowsePathsV2Class(
980
- path=[BrowsePathEntryClass("Folders")],
981
- ),
982
- ).as_workunit()
932
+ yield Container(
933
+ container_key=self._gen_folder_key(folder.id),
934
+ display_name=folder.name,
935
+ subtype=BIContainerSubTypes.LOOKER_FOLDER,
936
+ parent_container=["Folders"],
937
+ )
983
938
  else:
984
- yield MetadataChangeProposalWrapper(
985
- entityUrn=self._gen_folder_key(folder.id).as_urn(),
986
- aspect=BrowsePathsV2Class(
987
- path=[
988
- BrowsePathEntryClass("Folders"),
989
- *self._get_folder_browse_path_v2_entries(
990
- folder, include_current_folder=False
991
- ),
992
- ],
993
- ),
994
- ).as_workunit()
939
+ yield Container(
940
+ container_key=self._gen_folder_key(folder.id),
941
+ display_name=folder.name,
942
+ subtype=BIContainerSubTypes.LOOKER_FOLDER,
943
+ parent_container=[
944
+ "Folders",
945
+ *self._get_folder_ancestors_urn_entries(
946
+ folder, include_current_folder=False
947
+ ),
948
+ ],
949
+ )
995
950
  self.processed_folders.append(folder.id)
996
951
 
997
952
  def _gen_folder_key(self, folder_id: str) -> LookerFolderKey:
@@ -1002,91 +957,89 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1002
957
  instance=self.source_config.platform_instance,
1003
958
  )
1004
959
 
1005
- def _make_dashboard_and_chart_mces(
960
+ def _make_dashboard_and_chart_entities(
1006
961
  self, looker_dashboard: LookerDashboard
1007
- ) -> Iterable[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
962
+ ) -> Iterable[Union[Chart, Dashboard]]:
1008
963
  # Step 1: Emit metadata for each Chart inside the Dashboard.
1009
- chart_events = []
964
+ chart_events: List[Chart] = []
1010
965
  for element in looker_dashboard.dashboard_elements:
1011
966
  if element.type == "vis":
1012
967
  chart_events.extend(
1013
- self._make_chart_metadata_events(element, looker_dashboard)
968
+ self._make_chart_entities(element, looker_dashboard)
1014
969
  )
1015
970
 
1016
971
  yield from chart_events
1017
972
 
1018
- # Step 2: Emit metadata events for the Dashboard itself.
1019
- chart_urns: Set[str] = (
1020
- set()
1021
- ) # Collect the unique child chart urns for dashboard input lineage.
973
+ # # Step 2: Emit metadata events for the Dashboard itself.
974
+ # Create a set of unique chart entities for dashboard input lineage based in chart.urn
975
+ unique_chart_entities: List[Chart] = []
1022
976
  for chart_event in chart_events:
1023
- chart_event_urn = self._extract_event_urn(chart_event)
1024
- if chart_event_urn:
1025
- chart_urns.add(chart_event_urn)
1026
-
1027
- dashboard_events = self._make_dashboard_metadata_events(
1028
- looker_dashboard, list(chart_urns)
977
+ # Use chart.urn to ensure uniqueness based on the chart's URN property
978
+ # Also, update the set of processed chart urns
979
+ if str(chart_event.urn) not in self.chart_urns:
980
+ self.chart_urns.add(str(chart_event.urn))
981
+ unique_chart_entities.append(chart_event)
982
+
983
+ dashboard_events = self._make_dashboard_entities(
984
+ looker_dashboard, unique_chart_entities
1029
985
  )
1030
986
  yield from dashboard_events
1031
987
 
1032
988
  def get_ownership(
1033
989
  self, looker_dashboard_look: Union[LookerDashboard, LookerDashboardElement]
1034
- ) -> Optional[OwnershipClass]:
990
+ ) -> Optional[OwnerClass]:
1035
991
  if looker_dashboard_look.owner is not None:
1036
992
  owner_urn = looker_dashboard_look.owner.get_urn(
1037
993
  self.source_config.strip_user_ids_from_email
1038
994
  )
1039
995
  if owner_urn is not None:
1040
- ownership: OwnershipClass = OwnershipClass(
1041
- owners=[
1042
- OwnerClass(
1043
- owner=owner_urn,
1044
- type=OwnershipTypeClass.DATAOWNER,
1045
- )
1046
- ]
996
+ return OwnerClass(
997
+ owner=owner_urn,
998
+ type=OwnershipTypeClass.DATAOWNER,
1047
999
  )
1048
- return ownership
1049
1000
  return None
1050
1001
 
1051
- def _get_change_audit_stamps(
1052
- self, looker_dashboard: LookerDashboard
1053
- ) -> ChangeAuditStamps:
1054
- change_audit_stamp: ChangeAuditStamps = ChangeAuditStamps()
1055
- if looker_dashboard.created_at is not None:
1056
- change_audit_stamp.created.time = round(
1057
- looker_dashboard.created_at.timestamp() * 1000
1058
- )
1059
- if looker_dashboard.owner is not None:
1060
- owner_urn = looker_dashboard.owner.get_urn(
1061
- self.source_config.strip_user_ids_from_email
1062
- )
1063
- if owner_urn:
1064
- change_audit_stamp.created.actor = owner_urn
1065
- if looker_dashboard.last_updated_at is not None:
1066
- change_audit_stamp.lastModified.time = round(
1067
- looker_dashboard.last_updated_at.timestamp() * 1000
1068
- )
1069
- if looker_dashboard.last_updated_by is not None:
1070
- updated_by_urn = looker_dashboard.last_updated_by.get_urn(
1071
- self.source_config.strip_user_ids_from_email
1072
- )
1073
- if updated_by_urn:
1074
- change_audit_stamp.lastModified.actor = updated_by_urn
1075
- if (
1076
- looker_dashboard.is_deleted
1077
- and looker_dashboard.deleted_by is not None
1078
- and looker_dashboard.deleted_at is not None
1079
- ):
1080
- deleter_urn = looker_dashboard.deleted_by.get_urn(
1081
- self.source_config.strip_user_ids_from_email
1082
- )
1083
- if deleter_urn:
1084
- change_audit_stamp.deleted = AuditStamp(
1085
- actor=deleter_urn,
1086
- time=round(looker_dashboard.deleted_at.timestamp() * 1000),
1087
- )
1002
+ def _get_last_modified_time(
1003
+ self, looker_dashboard: Optional[LookerDashboard]
1004
+ ) -> Optional[datetime.datetime]:
1005
+ return looker_dashboard.last_updated_at if looker_dashboard else None
1088
1006
 
1089
- return change_audit_stamp
1007
+ def _get_last_modified_by(
1008
+ self, looker_dashboard: Optional[LookerDashboard]
1009
+ ) -> Optional[str]:
1010
+ if not looker_dashboard or not looker_dashboard.last_updated_by:
1011
+ return None
1012
+ return looker_dashboard.last_updated_by.get_urn(
1013
+ self.source_config.strip_user_ids_from_email
1014
+ )
1015
+
1016
+ def _get_created_at(
1017
+ self, looker_dashboard: Optional[LookerDashboard]
1018
+ ) -> Optional[datetime.datetime]:
1019
+ return looker_dashboard.created_at if looker_dashboard else None
1020
+
1021
+ def _get_created_by(
1022
+ self, looker_dashboard: Optional[LookerDashboard]
1023
+ ) -> Optional[str]:
1024
+ if not looker_dashboard or not looker_dashboard.owner:
1025
+ return None
1026
+ return looker_dashboard.owner.get_urn(
1027
+ self.source_config.strip_user_ids_from_email
1028
+ )
1029
+
1030
+ def _get_deleted_on(
1031
+ self, looker_dashboard: Optional[LookerDashboard]
1032
+ ) -> Optional[datetime.datetime]:
1033
+ return looker_dashboard.deleted_at if looker_dashboard else None
1034
+
1035
+ def _get_deleted_by(
1036
+ self, looker_dashboard: Optional[LookerDashboard]
1037
+ ) -> Optional[str]:
1038
+ if not looker_dashboard or not looker_dashboard.deleted_by:
1039
+ return None
1040
+ return looker_dashboard.deleted_by.get_urn(
1041
+ self.source_config.strip_user_ids_from_email
1042
+ )
1090
1043
 
1091
1044
  def _get_looker_folder(self, folder: Union[Folder, FolderBase]) -> LookerFolder:
1092
1045
  assert folder.id
@@ -1099,7 +1052,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1099
1052
  ]
1100
1053
  return "/".join(ancestors + [folder.name])
1101
1054
 
1102
- def _get_looker_dashboard(self, dashboard: Dashboard) -> LookerDashboard:
1055
+ def _get_looker_dashboard(self, dashboard: LookerAPIDashboard) -> LookerDashboard:
1103
1056
  self.reporter.accessed_dashboards += 1
1104
1057
  if dashboard.folder is None:
1105
1058
  logger.debug(f"{dashboard.id} has no folder")
@@ -1173,22 +1126,6 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1173
1126
 
1174
1127
  return user
1175
1128
 
1176
- def process_metrics_dimensions_and_fields_for_dashboard(
1177
- self, dashboard: LookerDashboard
1178
- ) -> List[MetadataWorkUnit]:
1179
- chart_mcps = [
1180
- self._make_metrics_dimensions_chart_mcp(element)
1181
- for element in dashboard.dashboard_elements
1182
- ]
1183
- dashboard_mcp = self._make_metrics_dimensions_dashboard_mcp(dashboard)
1184
-
1185
- mcps = chart_mcps
1186
- mcps.append(dashboard_mcp)
1187
-
1188
- workunits = [mcp.as_workunit() for mcp in mcps]
1189
-
1190
- return workunits
1191
-
1192
1129
  def _input_fields_from_dashboard_element(
1193
1130
  self, dashboard_element: LookerDashboardElement
1194
1131
  ) -> List[InputFieldClass]:
@@ -1271,6 +1208,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1271
1208
  chart_urn = self._make_chart_urn(
1272
1209
  element_id=dashboard_element.get_urn_element_id()
1273
1210
  )
1211
+
1274
1212
  input_fields_aspect = InputFieldsClass(
1275
1213
  fields=self._input_fields_from_dashboard_element(dashboard_element)
1276
1214
  )
@@ -1280,104 +1218,141 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1280
1218
  aspect=input_fields_aspect,
1281
1219
  )
1282
1220
 
1283
- def process_dashboard(
1221
+ def _should_skip_personal_folder_dashboard(
1222
+ self, dashboard_object: LookerAPIDashboard
1223
+ ) -> bool:
1224
+ """Check if dashboard should be skipped due to being in personal folder."""
1225
+ if not self.source_config.skip_personal_folders:
1226
+ return False
1227
+
1228
+ if dashboard_object.folder is not None and (
1229
+ dashboard_object.folder.is_personal
1230
+ or dashboard_object.folder.is_personal_descendant
1231
+ ):
1232
+ self.reporter.info(
1233
+ title="Dropped Dashboard",
1234
+ message="Dropped due to being a personal folder",
1235
+ context=f"Dashboard ID: {dashboard_object.id}",
1236
+ )
1237
+ assert dashboard_object.id is not None
1238
+ self.reporter.report_dashboards_dropped(dashboard_object.id)
1239
+ return True
1240
+ return False
1241
+
1242
+ def _should_skip_dashboard_by_folder_path(
1243
+ self, looker_dashboard: LookerDashboard
1244
+ ) -> bool:
1245
+ """Check if dashboard should be skipped based on folder path pattern."""
1246
+ if (
1247
+ looker_dashboard.folder_path is not None
1248
+ and not self.source_config.folder_path_pattern.allowed(
1249
+ looker_dashboard.folder_path
1250
+ )
1251
+ ):
1252
+ logger.debug(
1253
+ f"Folder path {looker_dashboard.folder_path} is denied in folder_path_pattern"
1254
+ )
1255
+ self.reporter.report_dashboards_dropped(looker_dashboard.id)
1256
+ return True
1257
+ return False
1258
+
1259
+ def _fetch_dashboard_from_api(
1284
1260
  self, dashboard_id: str, fields: List[str]
1285
- ) -> Tuple[
1286
- List[MetadataWorkUnit],
1287
- Optional[looker_usage.LookerDashboardForUsage],
1288
- str,
1289
- datetime.datetime,
1290
- datetime.datetime,
1291
- ]:
1292
- start_time = datetime.datetime.now()
1293
- assert dashboard_id is not None
1294
- if not self.source_config.dashboard_pattern.allowed(dashboard_id):
1295
- self.reporter.report_dashboards_dropped(dashboard_id)
1296
- return [], None, dashboard_id, start_time, datetime.datetime.now()
1261
+ ) -> Optional[LookerAPIDashboard]:
1262
+ """Fetch dashboard object from Looker API with error handling."""
1297
1263
  try:
1298
- dashboard_object: Dashboard = self.looker_api.dashboard(
1264
+ return self.looker_api.dashboard(
1299
1265
  dashboard_id=dashboard_id,
1300
1266
  fields=fields,
1301
1267
  )
1302
1268
  except (SDKError, DeserializeError) as e:
1303
- # A looker dashboard could be deleted in between the list and the get
1304
1269
  self.reporter.report_warning(
1305
1270
  title="Failed to fetch dashboard from the Looker API",
1306
1271
  message="Error occurred while attempting to loading dashboard from Looker API. Skipping.",
1307
1272
  context=f"Dashboard ID: {dashboard_id}",
1308
1273
  exc=e,
1309
1274
  )
1310
- return [], None, dashboard_id, start_time, datetime.datetime.now()
1275
+ return None
1311
1276
 
1312
- if self.source_config.skip_personal_folders:
1313
- if dashboard_object.folder is not None and (
1314
- dashboard_object.folder.is_personal
1315
- or dashboard_object.folder.is_personal_descendant
1316
- ):
1317
- self.reporter.info(
1318
- title="Dropped Dashboard",
1319
- message="Dropped due to being a personal folder",
1320
- context=f"Dashboard ID: {dashboard_id}",
1321
- )
1322
- self.reporter.report_dashboards_dropped(dashboard_id)
1323
- return [], None, dashboard_id, start_time, datetime.datetime.now()
1277
+ def _create_empty_result(
1278
+ self, dashboard_id: str, start_time: datetime.datetime
1279
+ ) -> DashboardProcessingResult:
1280
+ """Create an empty result for skipped or failed dashboard processing."""
1281
+ return DashboardProcessingResult(
1282
+ entities=[],
1283
+ dashboard_usage=None,
1284
+ dashboard_id=dashboard_id,
1285
+ start_time=start_time,
1286
+ end_time=datetime.datetime.now(),
1287
+ )
1324
1288
 
1325
- looker_dashboard = self._get_looker_dashboard(dashboard_object)
1289
+ def process_dashboard(
1290
+ self, dashboard_id: str, fields: List[str]
1291
+ ) -> DashboardProcessingResult:
1292
+ """
1293
+ Process a single dashboard and return the metadata workunits.
1326
1294
 
1327
- workunits = []
1328
- if (
1329
- looker_dashboard.folder_path is not None
1330
- and not self.source_config.folder_path_pattern.allowed(
1331
- looker_dashboard.folder_path
1332
- )
1333
- ):
1334
- logger.debug(
1335
- f"Folder path {looker_dashboard.folder_path} is denied in folder_path_pattern"
1336
- )
1337
- return [], None, dashboard_id, start_time, datetime.datetime.now()
1295
+ Args:
1296
+ dashboard_id: The ID of the dashboard to process
1297
+ fields: List of fields to fetch from the Looker API
1338
1298
 
1339
- if looker_dashboard.folder:
1340
- workunits += list(
1341
- self._get_folder_and_ancestors_workunits(looker_dashboard.folder)
1342
- )
1299
+ Returns:
1300
+ DashboardProcessingResult containing entities, usage data, and timing information
1301
+ """
1302
+ start_time = datetime.datetime.now()
1343
1303
 
1344
- mces = self._make_dashboard_and_chart_mces(looker_dashboard)
1345
- workunits += [
1346
- (
1347
- MetadataWorkUnit(id=f"looker-{mce.proposedSnapshot.urn}", mce=mce)
1348
- if isinstance(mce, MetadataChangeEvent)
1349
- else MetadataWorkUnit(
1350
- id=f"looker-{mce.aspectName}-{mce.entityUrn}", mcp=mce
1351
- )
1352
- )
1353
- for mce in mces
1354
- ]
1304
+ if dashboard_id is None:
1305
+ raise ValueError("Dashboard ID cannot be None")
1355
1306
 
1356
- # add on metrics, dimensions, fields events
1357
- metric_dim_workunits = self.process_metrics_dimensions_and_fields_for_dashboard(
1358
- looker_dashboard
1307
+ # Fetch dashboard from API
1308
+ dashboard_object: Optional[LookerAPIDashboard] = self._fetch_dashboard_from_api(
1309
+ dashboard_id, fields
1359
1310
  )
1311
+ if dashboard_object is None:
1312
+ return self._create_empty_result(dashboard_id, start_time)
1313
+
1314
+ # Check if dashboard should be skipped due to personal folder
1315
+ if self._should_skip_personal_folder_dashboard(dashboard_object):
1316
+ return self._create_empty_result(dashboard_id, start_time)
1360
1317
 
1361
- workunits.extend(metric_dim_workunits)
1318
+ # Convert to internal representation
1319
+ looker_dashboard: LookerDashboard = self._get_looker_dashboard(dashboard_object)
1362
1320
 
1321
+ # Check folder path pattern
1322
+ if self._should_skip_dashboard_by_folder_path(looker_dashboard):
1323
+ return self._create_empty_result(dashboard_id, start_time)
1324
+
1325
+ # Build entities list
1326
+ entities: List[Entity] = []
1327
+
1328
+ # Add folder containers if dashboard has a folder
1329
+ if looker_dashboard.folder:
1330
+ entities.extend(
1331
+ list(self._get_folder_and_ancestors_containers(looker_dashboard.folder))
1332
+ )
1333
+
1334
+ # Add dashboard and chart entities
1335
+ entities.extend(list(self._make_dashboard_and_chart_entities(looker_dashboard)))
1336
+
1337
+ # Report successful processing
1363
1338
  self.reporter.report_dashboards_scanned()
1364
1339
 
1365
- # generate usage tracking object
1340
+ # Generate usage tracking object
1366
1341
  dashboard_usage = looker_usage.LookerDashboardForUsage.from_dashboard(
1367
1342
  dashboard_object
1368
1343
  )
1369
1344
 
1370
- return (
1371
- workunits,
1372
- dashboard_usage,
1373
- dashboard_id,
1374
- start_time,
1375
- datetime.datetime.now(),
1345
+ return DashboardProcessingResult(
1346
+ entities=entities,
1347
+ dashboard_usage=dashboard_usage,
1348
+ dashboard_id=dashboard_id,
1349
+ start_time=start_time,
1350
+ end_time=datetime.datetime.now(),
1376
1351
  )
1377
1352
 
1378
- def _get_folder_and_ancestors_workunits(
1353
+ def _get_folder_and_ancestors_containers(
1379
1354
  self, folder: LookerFolder
1380
- ) -> Iterable[MetadataWorkUnit]:
1355
+ ) -> Iterable[Container]:
1381
1356
  for ancestor_folder in self.looker_api.folder_ancestors(folder.id):
1382
1357
  yield from self._emit_folder_as_container(
1383
1358
  self._get_looker_folder(ancestor_folder)
@@ -1448,39 +1423,27 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1448
1423
  ).workunit_processor,
1449
1424
  ]
1450
1425
 
1451
- def emit_independent_looks_mcp(
1426
+ def emit_independent_looks_entities(
1452
1427
  self, dashboard_element: LookerDashboardElement
1453
- ) -> Iterable[MetadataWorkUnit]:
1428
+ ) -> Iterable[Union[Container, Chart]]:
1454
1429
  if dashboard_element.folder: # independent look
1455
- yield from self._get_folder_and_ancestors_workunits(
1430
+ yield from self._get_folder_and_ancestors_containers(
1456
1431
  dashboard_element.folder
1457
1432
  )
1458
1433
 
1459
- yield from auto_workunit(
1460
- stream=self._make_chart_metadata_events(
1461
- dashboard_element=dashboard_element,
1462
- dashboard=None,
1463
- )
1464
- )
1465
-
1466
- yield from auto_workunit(
1467
- [
1468
- self._make_metrics_dimensions_chart_mcp(
1469
- dashboard_element,
1470
- )
1471
- ]
1434
+ yield from self._make_chart_entities(
1435
+ dashboard_element=dashboard_element,
1436
+ dashboard=None,
1472
1437
  )
1473
1438
 
1474
- def extract_independent_looks(self) -> Iterable[MetadataWorkUnit]:
1439
+ def extract_independent_looks(self) -> Iterable[Union[Container, Chart]]:
1475
1440
  """
1476
- Emit MetadataWorkUnit for looks which are not part of any Dashboard
1477
- """
1478
- if self.source_config.extract_independent_looks is False:
1479
- return
1441
+ Emit entities for Looks which are not part of any Dashboard.
1480
1442
 
1481
- self.reporter.report_stage_start("extract_independent_looks")
1443
+ Returns: Containers for the folders and ancestors folders and Charts for the looks
1444
+ """
1445
+ logger.debug("Extracting Looks not part of any Dashboard")
1482
1446
 
1483
- logger.debug("Extracting looks not part of Dashboard")
1484
1447
  look_fields: List[str] = [
1485
1448
  "id",
1486
1449
  "title",
@@ -1502,15 +1465,21 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1502
1465
  all_looks: List[Look] = self.looker_api.all_looks(
1503
1466
  fields=look_fields, soft_deleted=self.source_config.include_deleted
1504
1467
  )
1468
+
1505
1469
  for look in all_looks:
1470
+ # Skip looks that are already referenced from a dashboard
1471
+ if look.id is None:
1472
+ logger.warning("Encountered Look with no ID, skipping.")
1473
+ continue
1474
+
1506
1475
  if look.id in self.reachable_look_registry:
1507
- # This look is reachable from the Dashboard
1508
1476
  continue
1509
1477
 
1510
1478
  if look.query_id is None:
1511
1479
  logger.info(f"query_id is None for look {look.title}({look.id})")
1512
1480
  continue
1513
1481
 
1482
+ # Skip looks in personal folders if configured
1514
1483
  if self.source_config.skip_personal_folders:
1515
1484
  if look.folder is not None and (
1516
1485
  look.folder.is_personal or look.folder.is_personal_descendant
@@ -1521,76 +1490,96 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1521
1490
  context=f"Look ID: {look.id}",
1522
1491
  )
1523
1492
 
1524
- assert look.id, "Looker id is null"
1525
1493
  self.reporter.report_charts_dropped(look.id)
1526
1494
  continue
1527
1495
 
1528
- if look.id is not None:
1529
- query: Optional[Query] = self.looker_api.get_look(
1530
- look.id, fields=["query"]
1531
- ).query
1532
- # Only include fields that are in the query_fields list
1533
- query = Query(
1534
- **{
1535
- key: getattr(query, key)
1536
- for key in query_fields
1537
- if hasattr(query, key)
1538
- }
1539
- )
1496
+ # Fetch the Look's query and filter to allowed fields
1497
+ query: Optional[Query] = None
1498
+ try:
1499
+ look_with_query = self.looker_api.get_look(look.id, fields=["query"])
1500
+ query_obj = look_with_query.query
1501
+ if query_obj:
1502
+ query = Query(
1503
+ **{
1504
+ key: getattr(query_obj, key)
1505
+ for key in query_fields
1506
+ if hasattr(query_obj, key)
1507
+ }
1508
+ )
1509
+ except Exception as exc:
1510
+ logger.warning(f"Failed to fetch query for Look {look.id}: {exc}")
1511
+ continue
1540
1512
 
1541
- dashboard_element: Optional[LookerDashboardElement] = (
1542
- self._get_looker_dashboard_element(
1543
- DashboardElement(
1544
- id=f"looks_{look.id}", # to avoid conflict with non-standalone looks (element.id prefixes),
1545
- # we add the "looks_" prefix to look.id.
1546
- title=look.title,
1547
- subtitle_text=look.description,
1548
- look_id=look.id,
1549
- dashboard_id=None, # As this is an independent look
1550
- look=LookWithQuery(
1551
- query=query, folder=look.folder, user_id=look.user_id
1552
- ),
1513
+ dashboard_element = self._get_looker_dashboard_element(
1514
+ DashboardElement(
1515
+ id=f"looks_{look.id}", # to avoid conflict with non-standalone looks (element.id prefixes),
1516
+ # we add the "looks_" prefix to look.id.
1517
+ title=look.title,
1518
+ subtitle_text=look.description,
1519
+ look_id=look.id,
1520
+ dashboard_id=None, # As this is an independent look
1521
+ look=LookWithQuery(
1522
+ query=query,
1523
+ folder=getattr(look, "folder", None),
1524
+ user_id=getattr(look, "user_id", None),
1553
1525
  ),
1554
1526
  )
1555
1527
  )
1556
1528
 
1557
1529
  if dashboard_element is not None:
1558
- logger.debug(f"Emitting MCPS for look {look.title}({look.id})")
1559
- yield from self.emit_independent_looks_mcp(
1530
+ logger.debug(f"Emitting MCPs for look {look.title}({look.id})")
1531
+ yield from self.emit_independent_looks_entities(
1560
1532
  dashboard_element=dashboard_element
1561
1533
  )
1562
1534
 
1563
- self.reporter.report_stage_end("extract_independent_looks")
1535
+ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
1536
+ """
1537
+ Note: Returns Entities from SDKv2 where possible else MCPs only.
1564
1538
 
1565
- def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
1566
- self.reporter.report_stage_start("list_dashboards")
1567
- dashboards = self.looker_api.all_dashboards(fields="id")
1568
- deleted_dashboards = (
1569
- self.looker_api.search_dashboards(fields="id", deleted="true")
1570
- if self.source_config.include_deleted
1571
- else []
1572
- )
1573
- if deleted_dashboards != []:
1574
- logger.debug(f"Deleted Dashboards = {deleted_dashboards}")
1539
+ Using SDKv2: Containers, Datasets, Dashboards and Charts
1540
+ Using MCPW: Tags, DashboardUsageStats and UserResourceMapping
1575
1541
 
1576
- dashboard_ids = [dashboard_base.id for dashboard_base in dashboards]
1577
- dashboard_ids.extend(
1578
- [deleted_dashboard.id for deleted_dashboard in deleted_dashboards]
1579
- )
1580
- selected_dashboard_ids: List[Optional[str]] = []
1581
- for id in dashboard_ids:
1582
- if id is None:
1583
- continue
1584
- if not self.source_config.dashboard_pattern.allowed(id):
1585
- self.reporter.report_dashboards_dropped(id)
1542
+ TODO: Convert MCPWs to use SDKv2 entities
1543
+ """
1544
+ with self.reporter.report_stage("list_dashboards"):
1545
+ # Fetch all dashboards (not deleted)
1546
+ dashboards = self.looker_api.all_dashboards(fields="id")
1547
+
1548
+ # Optionally fetch deleted dashboards if configured
1549
+ if self.source_config.include_deleted:
1550
+ deleted_dashboards = self.looker_api.search_dashboards(
1551
+ fields="id", deleted="true"
1552
+ )
1586
1553
  else:
1587
- selected_dashboard_ids.append(id)
1588
- dashboard_ids = selected_dashboard_ids
1589
- self.reporter.report_stage_end("list_dashboards")
1590
- self.reporter.report_total_dashboards(len(dashboard_ids))
1554
+ deleted_dashboards = []
1555
+
1556
+ if deleted_dashboards:
1557
+ logger.debug(f"Deleted Dashboards = {deleted_dashboards}")
1591
1558
 
1592
- # List dashboard fields to extract for processing
1593
- fields = [
1559
+ # Collect all dashboard IDs (including deleted if applicable)
1560
+ all_dashboard_ids: List[Optional[str]] = [
1561
+ dashboard.id for dashboard in dashboards
1562
+ ]
1563
+ all_dashboard_ids.extend([dashboard.id for dashboard in deleted_dashboards])
1564
+
1565
+ # Filter dashboard IDs based on the allowed pattern
1566
+ filtered_dashboard_ids: List[str] = []
1567
+ for dashboard_id in all_dashboard_ids:
1568
+ if dashboard_id is None:
1569
+ continue
1570
+ if not self.source_config.dashboard_pattern.allowed(dashboard_id):
1571
+ self.reporter.report_dashboards_dropped(dashboard_id)
1572
+ else:
1573
+ filtered_dashboard_ids.append(dashboard_id)
1574
+
1575
+ # Use the filtered list for further processing
1576
+ dashboard_ids: List[str] = filtered_dashboard_ids
1577
+
1578
+ # Report the total number of dashboards to be processed
1579
+ self.reporter.report_total_dashboards(len(dashboard_ids))
1580
+
1581
+ # Define the fields to extract for each dashboard
1582
+ dashboard_fields = [
1594
1583
  "id",
1595
1584
  "title",
1596
1585
  "dashboard_elements",
@@ -1606,41 +1595,47 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1606
1595
  "deleted_at",
1607
1596
  "deleter_id",
1608
1597
  ]
1598
+
1599
+ # Add usage-related fields if usage history extraction is enabled
1609
1600
  if self.source_config.extract_usage_history:
1610
- fields += [
1611
- "favorite_count",
1612
- "view_count",
1613
- "last_viewed_at",
1614
- ]
1601
+ dashboard_fields.extend(
1602
+ [
1603
+ "favorite_count",
1604
+ "view_count",
1605
+ "last_viewed_at",
1606
+ ]
1607
+ )
1615
1608
 
1609
+ # Store dashboards for which usage stats will be extracted
1616
1610
  looker_dashboards_for_usage: List[looker_usage.LookerDashboardForUsage] = []
1617
1611
 
1612
+ # Process dashboard and chart metadata
1618
1613
  with self.reporter.report_stage("dashboard_chart_metadata"):
1614
+ dashboard_jobs = (
1615
+ (dashboard_id, dashboard_fields)
1616
+ for dashboard_id in dashboard_ids
1617
+ if dashboard_id is not None
1618
+ )
1619
1619
  for job in BackpressureAwareExecutor.map(
1620
1620
  self.process_dashboard,
1621
- (
1622
- (dashboard_id, fields)
1623
- for dashboard_id in dashboard_ids
1624
- if dashboard_id is not None
1625
- ),
1621
+ dashboard_jobs,
1626
1622
  max_workers=self.source_config.max_threads,
1627
1623
  ):
1628
- (
1629
- work_units,
1630
- dashboard_usage,
1631
- dashboard_id,
1632
- start_time,
1633
- end_time,
1634
- ) = job.result()
1624
+ result: DashboardProcessingResult = job.result()
1625
+
1635
1626
  logger.debug(
1636
- f"Running time of process_dashboard for {dashboard_id} = {(end_time - start_time).total_seconds()}"
1627
+ f"Running time of process_dashboard for {result.dashboard_id} = {(result.end_time - result.start_time).total_seconds()}"
1628
+ )
1629
+ self.reporter.report_upstream_latency(
1630
+ result.start_time, result.end_time
1637
1631
  )
1638
- self.reporter.report_upstream_latency(start_time, end_time)
1639
1632
 
1640
- yield from work_units
1641
- if dashboard_usage is not None:
1642
- looker_dashboards_for_usage.append(dashboard_usage)
1633
+ yield from result.entities
1643
1634
 
1635
+ if result.dashboard_usage is not None:
1636
+ looker_dashboards_for_usage.append(result.dashboard_usage)
1637
+
1638
+ # Warn if owner extraction was enabled but no emails could be found
1644
1639
  if (
1645
1640
  self.source_config.extract_owners
1646
1641
  and self.reporter.resolved_user_ids > 0
@@ -1652,53 +1647,42 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1652
1647
  "Failed to extract owners emails for any dashboards. Please enable the see_users permission for your Looker API key",
1653
1648
  )
1654
1649
 
1655
- # Extract independent look here, so that explore of this look would get consider in _make_explore_metadata_events
1656
- yield from self.extract_independent_looks()
1657
-
1658
- self.reporter.report_stage_start("explore_metadata")
1650
+ # Extract independent looks first, so their explores are considered in _make_explore_containers.
1651
+ if self.source_config.extract_independent_looks:
1652
+ with self.reporter.report_stage("extract_independent_looks"):
1653
+ yield from self.extract_independent_looks()
1659
1654
 
1660
- for event in self._make_explore_metadata_events():
1661
- if isinstance(event, MetadataChangeEvent):
1662
- yield MetadataWorkUnit(
1663
- id=f"looker-{event.proposedSnapshot.urn}", mce=event
1664
- )
1665
- elif isinstance(event, MetadataChangeProposalWrapper):
1666
- yield event.as_workunit()
1667
- elif isinstance(event, MetadataWorkUnit):
1668
- yield event
1669
- else:
1670
- raise Exception(f"Unexpected type of event {event}")
1671
- self.reporter.report_stage_end("explore_metadata")
1655
+ # Process explore containers and yield them.
1656
+ with self.reporter.report_stage("explore_metadata"):
1657
+ yield from self._make_explore_containers()
1672
1658
 
1673
1659
  if (
1674
1660
  self.source_config.tag_measures_and_dimensions
1675
1661
  and self.reporter.explores_scanned > 0
1676
1662
  ):
1677
- # Emit tag MCEs for measures and dimensions if we produced any explores:
1663
+ # Emit tag MCPs for measures and dimensions if we produced any explores:
1664
+ # Tags MCEs are converted to MCPs
1678
1665
  for tag_mce in LookerUtil.get_tag_mces():
1679
- yield MetadataWorkUnit(
1680
- id=f"tag-{tag_mce.proposedSnapshot.urn}",
1681
- mce=tag_mce,
1682
- )
1666
+ yield from auto_workunit(mcps_from_mce(tag_mce))
1683
1667
 
1684
1668
  # Extract usage history is enabled
1685
1669
  if self.source_config.extract_usage_history:
1686
- self.reporter.report_stage_start("usage_extraction")
1687
- usage_mcps: List[MetadataChangeProposalWrapper] = self.extract_usage_stat(
1688
- looker_dashboards_for_usage, self.chart_urns
1689
- )
1690
- for usage_mcp in usage_mcps:
1691
- yield usage_mcp.as_workunit()
1692
- self.reporter.report_stage_end("usage_extraction")
1670
+ with self.reporter.report_stage("usage_extraction"):
1671
+ usage_mcps: List[MetadataChangeProposalWrapper] = (
1672
+ self.extract_usage_stat(
1673
+ looker_dashboards_for_usage, self.chart_urns
1674
+ )
1675
+ )
1676
+ yield from auto_workunit(usage_mcps)
1693
1677
 
1694
- # Dump looker user resource mappings.
1678
+ # Ingest looker user resource mapping workunits.
1695
1679
  logger.info("Ingesting looker user resource mapping workunits")
1696
- self.reporter.report_stage_start("user_resource_extraction")
1697
- yield from auto_workunit(
1698
- self.user_registry.to_platform_resource(
1699
- self.source_config.platform_instance
1680
+ with self.reporter.report_stage("user_resource_extraction"):
1681
+ yield from auto_workunit(
1682
+ self.user_registry.to_platform_resource(
1683
+ self.source_config.platform_instance
1684
+ )
1700
1685
  )
1701
- )
1702
1686
 
1703
1687
  def get_report(self) -> SourceReport:
1704
1688
  return self.reporter