acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  import datetime
2
2
  import json
3
3
  import logging
4
+ from dataclasses import dataclass
4
5
  from json import JSONDecodeError
5
6
  from typing import (
6
7
  Any,
@@ -18,7 +19,7 @@ from typing import (
18
19
  from looker_sdk.error import SDKError
19
20
  from looker_sdk.rtl.serialize import DeserializeError
20
21
  from looker_sdk.sdk.api40.models import (
21
- Dashboard,
22
+ Dashboard as LookerAPIDashboard,
22
23
  DashboardElement,
23
24
  Folder,
24
25
  FolderBase,
@@ -29,7 +30,7 @@ from looker_sdk.sdk.api40.models import (
29
30
 
30
31
  import datahub.emitter.mce_builder as builder
31
32
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
32
- from datahub.emitter.mcp_builder import create_embed_mcp, gen_containers
33
+ from datahub.emitter.mcp_builder import mcps_from_mce
33
34
  from datahub.ingestion.api.common import PipelineContext
34
35
  from datahub.ingestion.api.decorators import (
35
36
  SupportStatus,
@@ -51,6 +52,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
51
52
  from datahub.ingestion.source.common.subtypes import (
52
53
  BIAssetSubTypes,
53
54
  BIContainerSubTypes,
55
+ SourceCapabilityModifier,
54
56
  )
55
57
  from datahub.ingestion.source.looker import looker_usage
56
58
  from datahub.ingestion.source.looker.looker_common import (
@@ -79,36 +81,38 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
79
81
  StatefulIngestionSourceBase,
80
82
  )
81
83
  from datahub.metadata.com.linkedin.pegasus2avro.common import (
82
- AuditStamp,
83
- ChangeAuditStamps,
84
- DataPlatformInstance,
85
84
  Status,
86
85
  )
87
- from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
88
- ChartSnapshot,
89
- DashboardSnapshot,
90
- )
91
- from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
92
86
  from datahub.metadata.schema_classes import (
93
- BrowsePathEntryClass,
94
- BrowsePathsClass,
95
- BrowsePathsV2Class,
96
- ChartInfoClass,
97
87
  ChartTypeClass,
98
- ContainerClass,
99
- DashboardInfoClass,
88
+ EmbedClass,
100
89
  InputFieldClass,
101
90
  InputFieldsClass,
102
91
  OwnerClass,
103
- OwnershipClass,
104
92
  OwnershipTypeClass,
105
- SubTypesClass,
106
93
  )
94
+ from datahub.sdk.chart import Chart
95
+ from datahub.sdk.container import Container
96
+ from datahub.sdk.dashboard import Dashboard
97
+ from datahub.sdk.dataset import Dataset
98
+ from datahub.sdk.entity import Entity
107
99
  from datahub.utilities.backpressure_aware_executor import BackpressureAwareExecutor
100
+ from datahub.utilities.sentinels import Unset, unset
108
101
 
109
102
  logger = logging.getLogger(__name__)
110
103
 
111
104
 
105
+ @dataclass
106
+ class DashboardProcessingResult:
107
+ """Result of processing a single dashboard."""
108
+
109
+ entities: List[Entity]
110
+ dashboard_usage: Optional[looker_usage.LookerDashboardForUsage]
111
+ dashboard_id: str
112
+ start_time: datetime.datetime
113
+ end_time: datetime.datetime
114
+
115
+
112
116
  @platform_name("Looker")
113
117
  @support_status(SupportStatus.CERTIFIED)
114
118
  @config_class(LookerDashboardSourceConfig)
@@ -126,6 +130,15 @@ logger = logging.getLogger(__name__)
126
130
  SourceCapability.USAGE_STATS,
127
131
  "Enabled by default, configured using `extract_usage_history`",
128
132
  )
133
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
134
+ @capability(
135
+ SourceCapability.CONTAINERS,
136
+ "Enabled by default",
137
+ subtype_modifier=[
138
+ SourceCapabilityModifier.LOOKML_MODEL,
139
+ SourceCapabilityModifier.LOOKER_FOLDER,
140
+ ],
141
+ )
129
142
  class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
130
143
  """
131
144
  This plugin extracts the following:
@@ -623,35 +636,17 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
623
636
 
624
637
  return chart_type
625
638
 
626
- def _get_folder_browse_path_v2_entries(
639
+ def _get_folder_ancestors_urn_entries(
627
640
  self, folder: LookerFolder, include_current_folder: bool = True
628
- ) -> Iterable[BrowsePathEntryClass]:
641
+ ) -> Iterable[str]:
629
642
  for ancestor in self.looker_api.folder_ancestors(folder_id=folder.id):
630
- assert ancestor.id
643
+ assert ancestor.id # to make the linter happy as `Folder` has id field marked optional - which is always returned by the API
631
644
  urn = self._gen_folder_key(ancestor.id).as_urn()
632
- yield BrowsePathEntryClass(id=urn, urn=urn)
645
+ yield urn
633
646
 
634
647
  urn = self._gen_folder_key(folder.id).as_urn()
635
648
  if include_current_folder:
636
- yield BrowsePathEntryClass(id=urn, urn=urn)
637
-
638
- def _create_platform_instance_aspect(
639
- self,
640
- ) -> DataPlatformInstance:
641
- assert self.source_config.platform_name, (
642
- "Platform name is not set in the configuration."
643
- )
644
- assert self.source_config.platform_instance, (
645
- "Platform instance is not set in the configuration."
646
- )
647
-
648
- return DataPlatformInstance(
649
- platform=builder.make_data_platform_urn(self.source_config.platform_name),
650
- instance=builder.make_dataplatform_instance_urn(
651
- platform=self.source_config.platform_name,
652
- instance=self.source_config.platform_instance,
653
- ),
654
- )
649
+ yield urn
655
650
 
656
651
  def _make_chart_urn(self, element_id: str) -> str:
657
652
  platform_instance: Optional[str] = None
@@ -664,104 +659,46 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
664
659
  platform_instance=platform_instance,
665
660
  )
666
661
 
667
- def _make_chart_metadata_events(
662
+ def _make_chart_entities(
668
663
  self,
669
664
  dashboard_element: LookerDashboardElement,
670
665
  dashboard: Optional[
671
666
  LookerDashboard
672
667
  ], # dashboard will be None if this is a standalone look
673
- ) -> List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
674
- chart_urn = self._make_chart_urn(
675
- element_id=dashboard_element.get_urn_element_id()
676
- )
677
- self.chart_urns.add(chart_urn)
678
- chart_snapshot = ChartSnapshot(
679
- urn=chart_urn,
680
- aspects=[Status(removed=False)],
681
- )
682
- browse_path_v2: Optional[BrowsePathsV2Class] = None
683
-
684
- chart_type = self._get_chart_type(dashboard_element)
685
- chart_info = ChartInfoClass(
686
- type=chart_type,
687
- description=dashboard_element.description or "",
688
- title=dashboard_element.title or "",
689
- lastModified=ChangeAuditStamps(),
690
- chartUrl=dashboard_element.url(self.source_config.external_base_url or ""),
691
- inputs=dashboard_element.get_view_urns(self.source_config),
692
- customProperties={
693
- "upstream_fields": (
694
- ",".join(
695
- sorted({field.name for field in dashboard_element.input_fields})
696
- )
697
- if dashboard_element.input_fields
698
- else ""
699
- )
700
- },
701
- )
702
- chart_snapshot.aspects.append(chart_info)
703
-
668
+ ) -> List[Chart]:
669
+ chart_parent_container: Union[List[str], Unset] = unset
704
670
  if (
705
671
  dashboard
706
672
  and dashboard.folder_path is not None
707
673
  and dashboard.folder is not None
708
674
  ):
709
- browse_path = BrowsePathsClass(
710
- paths=[f"/Folders/{dashboard.folder_path}/{dashboard.title}"]
711
- )
712
- chart_snapshot.aspects.append(browse_path)
713
-
714
- dashboard_urn = self.make_dashboard_urn(dashboard)
715
- browse_path_v2 = BrowsePathsV2Class(
716
- path=[
717
- BrowsePathEntryClass("Folders"),
718
- *self._get_folder_browse_path_v2_entries(dashboard.folder),
719
- BrowsePathEntryClass(id=dashboard_urn, urn=dashboard_urn),
720
- ],
721
- )
675
+ chart_parent_container = [
676
+ "Folders",
677
+ *self._get_folder_ancestors_urn_entries(dashboard.folder),
678
+ self.make_dashboard_urn(dashboard),
679
+ ]
722
680
  elif (
723
681
  dashboard is None
724
682
  and dashboard_element.folder_path is not None
725
683
  and dashboard_element.folder is not None
726
- ): # independent look
727
- browse_path = BrowsePathsClass(
728
- paths=[f"/Folders/{dashboard_element.folder_path}"]
729
- )
730
- chart_snapshot.aspects.append(browse_path)
731
- browse_path_v2 = BrowsePathsV2Class(
732
- path=[
733
- BrowsePathEntryClass("Folders"),
734
- *self._get_folder_browse_path_v2_entries(dashboard_element.folder),
735
- ],
736
- )
684
+ ): # Independent look
685
+ chart_parent_container = [
686
+ "Folders",
687
+ *self._get_folder_ancestors_urn_entries(dashboard_element.folder),
688
+ ]
737
689
 
690
+ # Determine chart ownership
691
+ chart_ownership: Optional[List[OwnerClass]] = None
738
692
  if dashboard is not None:
739
693
  ownership = self.get_ownership(dashboard)
740
694
  if ownership is not None:
741
- chart_snapshot.aspects.append(ownership)
695
+ chart_ownership = [ownership]
742
696
  elif dashboard is None and dashboard_element is not None:
743
697
  ownership = self.get_ownership(dashboard_element)
744
698
  if ownership is not None:
745
- chart_snapshot.aspects.append(ownership)
746
-
747
- chart_mce = MetadataChangeEvent(proposedSnapshot=chart_snapshot)
748
-
749
- proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
750
- chart_mce,
751
- MetadataChangeProposalWrapper(
752
- entityUrn=chart_urn,
753
- aspect=SubTypesClass(typeNames=[BIAssetSubTypes.LOOKER_LOOK]),
754
- ),
755
- ]
756
-
757
- if self.source_config.include_platform_instance_in_urns:
758
- proposals.append(
759
- MetadataChangeProposalWrapper(
760
- entityUrn=chart_urn,
761
- aspect=self._create_platform_instance_aspect(),
762
- ),
763
- )
699
+ chart_ownership = [ownership]
764
700
 
701
+ chart_extra_aspects: List[Union[InputFieldsClass, EmbedClass]] = []
765
702
  # If extracting embeds is enabled, produce an MCP for embed URL.
766
703
  if (
767
704
  self.source_config.extract_embed_urls
@@ -771,111 +708,124 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
771
708
  self.source_config.external_base_url
772
709
  )
773
710
  if maybe_embed_url:
774
- proposals.append(
775
- create_embed_mcp(
776
- chart_snapshot.urn,
777
- maybe_embed_url,
778
- )
779
- )
711
+ chart_extra_aspects.append(EmbedClass(renderUrl=maybe_embed_url))
780
712
 
781
- if dashboard is None and dashboard_element.folder:
782
- container = ContainerClass(
783
- container=self._gen_folder_key(dashboard_element.folder.id).as_urn(),
713
+ chart_extra_aspects.append(
714
+ InputFieldsClass(
715
+ fields=self._input_fields_from_dashboard_element(dashboard_element)
784
716
  )
785
- proposals.append(
786
- MetadataChangeProposalWrapper(entityUrn=chart_urn, aspect=container)
787
- )
788
-
789
- if browse_path_v2:
790
- proposals.append(
791
- MetadataChangeProposalWrapper(
792
- entityUrn=chart_urn, aspect=browse_path_v2
793
- )
794
- )
795
-
796
- return proposals
797
-
798
- def _make_dashboard_metadata_events(
799
- self, looker_dashboard: LookerDashboard, chart_urns: List[str]
800
- ) -> List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
801
- dashboard_urn = self.make_dashboard_urn(looker_dashboard)
802
- dashboard_snapshot = DashboardSnapshot(
803
- urn=dashboard_urn,
804
- aspects=[],
805
- )
806
- browse_path_v2: Optional[BrowsePathsV2Class] = None
807
- dashboard_info = DashboardInfoClass(
808
- description=looker_dashboard.description or "",
809
- title=looker_dashboard.title,
810
- charts=chart_urns,
811
- lastModified=self._get_change_audit_stamps(looker_dashboard),
812
- dashboardUrl=looker_dashboard.url(self.source_config.external_base_url),
813
717
  )
814
-
815
- dashboard_snapshot.aspects.append(dashboard_info)
816
- if (
817
- looker_dashboard.folder_path is not None
818
- and looker_dashboard.folder is not None
819
- ):
820
- browse_path = BrowsePathsClass(
821
- paths=[f"/Folders/{looker_dashboard.folder_path}"]
822
- )
823
- browse_path_v2 = BrowsePathsV2Class(
824
- path=[
825
- BrowsePathEntryClass("Folders"),
826
- *self._get_folder_browse_path_v2_entries(looker_dashboard.folder),
827
- ],
718
+ return [
719
+ Chart(
720
+ chart_type=self._get_chart_type(dashboard_element),
721
+ chart_url=dashboard_element.url(
722
+ self.source_config.external_base_url or ""
723
+ ),
724
+ custom_properties={
725
+ "upstream_fields": (
726
+ ",".join(
727
+ sorted(
728
+ {field.name for field in dashboard_element.input_fields}
729
+ )
730
+ )
731
+ if dashboard_element.input_fields
732
+ else ""
733
+ )
734
+ },
735
+ description=dashboard_element.description or "",
736
+ display_name=dashboard_element.title, # title is (deprecated) using display_name
737
+ extra_aspects=chart_extra_aspects,
738
+ input_datasets=dashboard_element.get_view_urns(self.source_config),
739
+ last_modified=self._get_last_modified_time(
740
+ dashboard
741
+ ), # Inherited from Dashboard
742
+ last_modified_by=self._get_last_modified_by(
743
+ dashboard
744
+ ), # Inherited from Dashboard
745
+ created_at=self._get_created_at(dashboard), # Inherited from Dashboard
746
+ created_by=self._get_created_by(dashboard), # Inherited from Dashboard
747
+ deleted_on=self._get_deleted_on(dashboard), # Inherited from Dashboard
748
+ deleted_by=self._get_deleted_by(dashboard), # Inherited from Dashboard
749
+ name=dashboard_element.get_urn_element_id(),
750
+ owners=chart_ownership,
751
+ parent_container=chart_parent_container,
752
+ platform=self.source_config.platform_name,
753
+ platform_instance=self.source_config.platform_instance
754
+ if self.source_config.include_platform_instance_in_urns
755
+ else None,
756
+ subtype=BIAssetSubTypes.LOOKER_LOOK,
828
757
  )
829
- dashboard_snapshot.aspects.append(browse_path)
830
-
831
- ownership = self.get_ownership(looker_dashboard)
832
- if ownership is not None:
833
- dashboard_snapshot.aspects.append(ownership)
834
-
835
- dashboard_snapshot.aspects.append(Status(removed=looker_dashboard.is_deleted))
836
-
837
- dashboard_mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot)
838
-
839
- proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
840
- dashboard_mce
841
758
  ]
842
759
 
843
- if looker_dashboard.folder is not None:
844
- container = ContainerClass(
845
- container=self._gen_folder_key(looker_dashboard.folder.id).as_urn(),
846
- )
847
- proposals.append(
848
- MetadataChangeProposalWrapper(entityUrn=dashboard_urn, aspect=container)
849
- )
760
+ def _make_dashboard_entities(
761
+ self, looker_dashboard: LookerDashboard, charts: List[Chart]
762
+ ) -> List[Dashboard]:
763
+ dashboard_ownership: Optional[List[OwnerClass]] = None
764
+ ownership: Optional[OwnerClass] = self.get_ownership(looker_dashboard)
765
+ if ownership is not None:
766
+ dashboard_ownership = [ownership]
850
767
 
851
- if browse_path_v2:
852
- proposals.append(
853
- MetadataChangeProposalWrapper(
854
- entityUrn=dashboard_urn, aspect=browse_path_v2
855
- )
856
- )
768
+ # Extra Aspects not yet supported in the Dashboard entity class SDKv2
769
+ dashboard_extra_aspects: List[Union[EmbedClass, InputFieldsClass, Status]] = []
857
770
 
858
- # If extracting embeds is enabled, produce an MCP for embed URL.
771
+ # Embed URL aspect
859
772
  if (
860
773
  self.source_config.extract_embed_urls
861
774
  and self.source_config.external_base_url
862
775
  ):
863
- proposals.append(
864
- create_embed_mcp(
865
- dashboard_snapshot.urn,
866
- looker_dashboard.embed_url(self.source_config.external_base_url),
776
+ dashboard_extra_aspects.append(
777
+ EmbedClass(
778
+ renderUrl=looker_dashboard.embed_url(
779
+ self.source_config.external_base_url
780
+ )
867
781
  )
868
782
  )
869
783
 
870
- if self.source_config.include_platform_instance_in_urns:
871
- proposals.append(
872
- MetadataChangeProposalWrapper(
873
- entityUrn=dashboard_urn,
874
- aspect=self._create_platform_instance_aspect(),
875
- )
784
+ # Input fields aspect
785
+ # Populate input fields from all the dashboard elements
786
+ all_fields: List[InputFieldClass] = []
787
+ for dashboard_element in looker_dashboard.dashboard_elements:
788
+ all_fields.extend(
789
+ self._input_fields_from_dashboard_element(dashboard_element)
876
790
  )
791
+ dashboard_extra_aspects.append(InputFieldsClass(fields=all_fields))
792
+ # Status aspect
793
+ dashboard_extra_aspects.append(Status(removed=looker_dashboard.is_deleted))
877
794
 
878
- return proposals
795
+ dashboard_parent_container: Union[List[str], Unset] = unset
796
+ if (
797
+ looker_dashboard.folder_path is not None
798
+ and looker_dashboard.folder is not None
799
+ ):
800
+ dashboard_parent_container = [
801
+ "Folders",
802
+ *self._get_folder_ancestors_urn_entries(looker_dashboard.folder),
803
+ ]
804
+
805
+ return [
806
+ Dashboard(
807
+ charts=charts,
808
+ dashboard_url=looker_dashboard.url(
809
+ self.source_config.external_base_url
810
+ ),
811
+ description=looker_dashboard.description or "",
812
+ display_name=looker_dashboard.title, # title is (deprecated) using display_name
813
+ extra_aspects=dashboard_extra_aspects,
814
+ last_modified=self._get_last_modified_time(looker_dashboard),
815
+ last_modified_by=self._get_last_modified_by(looker_dashboard),
816
+ created_at=self._get_created_at(looker_dashboard),
817
+ created_by=self._get_created_by(looker_dashboard),
818
+ deleted_on=self._get_deleted_on(looker_dashboard),
819
+ deleted_by=self._get_deleted_by(looker_dashboard),
820
+ name=looker_dashboard.get_urn_dashboard_id(),
821
+ owners=dashboard_ownership,
822
+ parent_container=dashboard_parent_container,
823
+ platform=self.source_config.platform_name,
824
+ platform_instance=self.source_config.platform_instance
825
+ if self.source_config.include_platform_instance_in_urns
826
+ else None,
827
+ )
828
+ ]
879
829
 
880
830
  def _make_dashboard_urn(self, looker_dashboard_name_part: str) -> str:
881
831
  # Note that `looker_dashboard_name_part` will like be `dashboard.1234`.
@@ -892,11 +842,9 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
892
842
  def make_dashboard_urn(self, looker_dashboard: LookerDashboard) -> str:
893
843
  return self._make_dashboard_urn(looker_dashboard.get_urn_dashboard_id())
894
844
 
895
- def _make_explore_metadata_events(
845
+ def _make_explore_containers(
896
846
  self,
897
- ) -> Iterable[
898
- Union[MetadataChangeEvent, MetadataChangeProposalWrapper, MetadataWorkUnit]
899
- ]:
847
+ ) -> Iterable[Union[Container, Dataset]]:
900
848
  if not self.source_config.emit_used_explores_only:
901
849
  explores_to_fetch = list(self.list_all_explores())
902
850
  else:
@@ -914,19 +862,14 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
914
862
  for project_name, model, _ in explores_to_fetch:
915
863
  if model not in processed_models:
916
864
  model_key = gen_model_key(self.source_config, model)
917
- yield from gen_containers(
865
+ yield Container(
918
866
  container_key=model_key,
919
- name=model,
920
- sub_types=[BIContainerSubTypes.LOOKML_MODEL],
867
+ display_name=model,
868
+ subtype=BIContainerSubTypes.LOOKML_MODEL,
921
869
  extra_properties=(
922
870
  {"project": project_name} if project_name is not None else None
923
871
  ),
924
- )
925
- yield MetadataChangeProposalWrapper(
926
- entityUrn=model_key.as_urn(),
927
- aspect=BrowsePathsV2Class(
928
- path=[BrowsePathEntryClass("Explore")],
929
- ),
872
+ parent_container=["Explore"],
930
873
  )
931
874
 
932
875
  processed_models.append(model)
@@ -937,9 +880,10 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
937
880
  ((model, explore) for (_project, model, explore) in explores_to_fetch),
938
881
  max_workers=self.source_config.max_threads,
939
882
  ):
940
- events, explore_id, start_time, end_time = future.result()
883
+ explore_dataset_entity, explore_id, start_time, end_time = future.result()
941
884
  self.reporter.explores_scanned += 1
942
- yield from events
885
+ if explore_dataset_entity:
886
+ yield explore_dataset_entity
943
887
  self.reporter.report_upstream_latency(start_time, end_time)
944
888
  logger.debug(
945
889
  f"Running time of fetch_one_explore for {explore_id}: {(end_time - start_time).total_seconds()}"
@@ -959,66 +903,50 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
959
903
  def fetch_one_explore(
960
904
  self, model: str, explore: str
961
905
  ) -> Tuple[
962
- List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]],
906
+ Optional[Dataset],
963
907
  str,
964
908
  datetime.datetime,
965
909
  datetime.datetime,
966
910
  ]:
967
911
  start_time = datetime.datetime.now()
968
- events: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = []
969
912
  looker_explore = self.explore_registry.get_explore(model, explore)
913
+ explore_dataset_entity: Optional[Dataset] = None
970
914
  if looker_explore is not None:
971
- events = (
972
- looker_explore._to_metadata_events(
973
- self.source_config,
974
- self.reporter,
975
- self.source_config.external_base_url or self.source_config.base_url,
976
- self.source_config.extract_embed_urls,
977
- )
978
- or events
915
+ explore_dataset_entity = looker_explore._to_metadata_events(
916
+ self.source_config,
917
+ self.reporter,
918
+ self.source_config.external_base_url or self.source_config.base_url,
919
+ self.source_config.extract_embed_urls,
979
920
  )
980
921
 
981
- return events, f"{model}:{explore}", start_time, datetime.datetime.now()
982
-
983
- def _extract_event_urn(
984
- self, event: Union[MetadataChangeEvent, MetadataChangeProposalWrapper]
985
- ) -> Optional[str]:
986
- if isinstance(event, MetadataChangeEvent):
987
- return event.proposedSnapshot.urn
988
- else:
989
- return event.entityUrn
922
+ return (
923
+ explore_dataset_entity,
924
+ f"{model}:{explore}",
925
+ start_time,
926
+ datetime.datetime.now(),
927
+ )
990
928
 
991
- def _emit_folder_as_container(
992
- self, folder: LookerFolder
993
- ) -> Iterable[MetadataWorkUnit]:
929
+ def _emit_folder_as_container(self, folder: LookerFolder) -> Iterable[Container]:
994
930
  if folder.id not in self.processed_folders:
995
- yield from gen_containers(
996
- container_key=self._gen_folder_key(folder.id),
997
- name=folder.name,
998
- sub_types=[BIContainerSubTypes.LOOKER_FOLDER],
999
- parent_container_key=(
1000
- self._gen_folder_key(folder.parent_id) if folder.parent_id else None
1001
- ),
1002
- )
1003
931
  if folder.parent_id is None:
1004
- yield MetadataChangeProposalWrapper(
1005
- entityUrn=self._gen_folder_key(folder.id).as_urn(),
1006
- aspect=BrowsePathsV2Class(
1007
- path=[BrowsePathEntryClass("Folders")],
1008
- ),
1009
- ).as_workunit()
932
+ yield Container(
933
+ container_key=self._gen_folder_key(folder.id),
934
+ display_name=folder.name,
935
+ subtype=BIContainerSubTypes.LOOKER_FOLDER,
936
+ parent_container=["Folders"],
937
+ )
1010
938
  else:
1011
- yield MetadataChangeProposalWrapper(
1012
- entityUrn=self._gen_folder_key(folder.id).as_urn(),
1013
- aspect=BrowsePathsV2Class(
1014
- path=[
1015
- BrowsePathEntryClass("Folders"),
1016
- *self._get_folder_browse_path_v2_entries(
1017
- folder, include_current_folder=False
1018
- ),
1019
- ],
1020
- ),
1021
- ).as_workunit()
939
+ yield Container(
940
+ container_key=self._gen_folder_key(folder.id),
941
+ display_name=folder.name,
942
+ subtype=BIContainerSubTypes.LOOKER_FOLDER,
943
+ parent_container=[
944
+ "Folders",
945
+ *self._get_folder_ancestors_urn_entries(
946
+ folder, include_current_folder=False
947
+ ),
948
+ ],
949
+ )
1022
950
  self.processed_folders.append(folder.id)
1023
951
 
1024
952
  def _gen_folder_key(self, folder_id: str) -> LookerFolderKey:
@@ -1029,91 +957,89 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1029
957
  instance=self.source_config.platform_instance,
1030
958
  )
1031
959
 
1032
- def _make_dashboard_and_chart_mces(
960
+ def _make_dashboard_and_chart_entities(
1033
961
  self, looker_dashboard: LookerDashboard
1034
- ) -> Iterable[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
962
+ ) -> Iterable[Union[Chart, Dashboard]]:
1035
963
  # Step 1: Emit metadata for each Chart inside the Dashboard.
1036
- chart_events = []
964
+ chart_events: List[Chart] = []
1037
965
  for element in looker_dashboard.dashboard_elements:
1038
966
  if element.type == "vis":
1039
967
  chart_events.extend(
1040
- self._make_chart_metadata_events(element, looker_dashboard)
968
+ self._make_chart_entities(element, looker_dashboard)
1041
969
  )
1042
970
 
1043
971
  yield from chart_events
1044
972
 
1045
- # Step 2: Emit metadata events for the Dashboard itself.
1046
- chart_urns: Set[str] = (
1047
- set()
1048
- ) # Collect the unique child chart urns for dashboard input lineage.
973
+ # # Step 2: Emit metadata events for the Dashboard itself.
974
+ # Create a set of unique chart entities for dashboard input lineage based in chart.urn
975
+ unique_chart_entities: List[Chart] = []
1049
976
  for chart_event in chart_events:
1050
- chart_event_urn = self._extract_event_urn(chart_event)
1051
- if chart_event_urn:
1052
- chart_urns.add(chart_event_urn)
1053
-
1054
- dashboard_events = self._make_dashboard_metadata_events(
1055
- looker_dashboard, list(chart_urns)
977
+ # Use chart.urn to ensure uniqueness based on the chart's URN property
978
+ # Also, update the set of processed chart urns
979
+ if str(chart_event.urn) not in self.chart_urns:
980
+ self.chart_urns.add(str(chart_event.urn))
981
+ unique_chart_entities.append(chart_event)
982
+
983
+ dashboard_events = self._make_dashboard_entities(
984
+ looker_dashboard, unique_chart_entities
1056
985
  )
1057
986
  yield from dashboard_events
1058
987
 
1059
988
  def get_ownership(
1060
989
  self, looker_dashboard_look: Union[LookerDashboard, LookerDashboardElement]
1061
- ) -> Optional[OwnershipClass]:
990
+ ) -> Optional[OwnerClass]:
1062
991
  if looker_dashboard_look.owner is not None:
1063
992
  owner_urn = looker_dashboard_look.owner.get_urn(
1064
993
  self.source_config.strip_user_ids_from_email
1065
994
  )
1066
995
  if owner_urn is not None:
1067
- ownership: OwnershipClass = OwnershipClass(
1068
- owners=[
1069
- OwnerClass(
1070
- owner=owner_urn,
1071
- type=OwnershipTypeClass.DATAOWNER,
1072
- )
1073
- ]
996
+ return OwnerClass(
997
+ owner=owner_urn,
998
+ type=OwnershipTypeClass.DATAOWNER,
1074
999
  )
1075
- return ownership
1076
1000
  return None
1077
1001
 
1078
- def _get_change_audit_stamps(
1079
- self, looker_dashboard: LookerDashboard
1080
- ) -> ChangeAuditStamps:
1081
- change_audit_stamp: ChangeAuditStamps = ChangeAuditStamps()
1082
- if looker_dashboard.created_at is not None:
1083
- change_audit_stamp.created.time = round(
1084
- looker_dashboard.created_at.timestamp() * 1000
1085
- )
1086
- if looker_dashboard.owner is not None:
1087
- owner_urn = looker_dashboard.owner.get_urn(
1088
- self.source_config.strip_user_ids_from_email
1089
- )
1090
- if owner_urn:
1091
- change_audit_stamp.created.actor = owner_urn
1092
- if looker_dashboard.last_updated_at is not None:
1093
- change_audit_stamp.lastModified.time = round(
1094
- looker_dashboard.last_updated_at.timestamp() * 1000
1095
- )
1096
- if looker_dashboard.last_updated_by is not None:
1097
- updated_by_urn = looker_dashboard.last_updated_by.get_urn(
1098
- self.source_config.strip_user_ids_from_email
1099
- )
1100
- if updated_by_urn:
1101
- change_audit_stamp.lastModified.actor = updated_by_urn
1102
- if (
1103
- looker_dashboard.is_deleted
1104
- and looker_dashboard.deleted_by is not None
1105
- and looker_dashboard.deleted_at is not None
1106
- ):
1107
- deleter_urn = looker_dashboard.deleted_by.get_urn(
1108
- self.source_config.strip_user_ids_from_email
1109
- )
1110
- if deleter_urn:
1111
- change_audit_stamp.deleted = AuditStamp(
1112
- actor=deleter_urn,
1113
- time=round(looker_dashboard.deleted_at.timestamp() * 1000),
1114
- )
1002
+ def _get_last_modified_time(
1003
+ self, looker_dashboard: Optional[LookerDashboard]
1004
+ ) -> Optional[datetime.datetime]:
1005
+ return looker_dashboard.last_updated_at if looker_dashboard else None
1115
1006
 
1116
- return change_audit_stamp
1007
+ def _get_last_modified_by(
1008
+ self, looker_dashboard: Optional[LookerDashboard]
1009
+ ) -> Optional[str]:
1010
+ if not looker_dashboard or not looker_dashboard.last_updated_by:
1011
+ return None
1012
+ return looker_dashboard.last_updated_by.get_urn(
1013
+ self.source_config.strip_user_ids_from_email
1014
+ )
1015
+
1016
+ def _get_created_at(
1017
+ self, looker_dashboard: Optional[LookerDashboard]
1018
+ ) -> Optional[datetime.datetime]:
1019
+ return looker_dashboard.created_at if looker_dashboard else None
1020
+
1021
+ def _get_created_by(
1022
+ self, looker_dashboard: Optional[LookerDashboard]
1023
+ ) -> Optional[str]:
1024
+ if not looker_dashboard or not looker_dashboard.owner:
1025
+ return None
1026
+ return looker_dashboard.owner.get_urn(
1027
+ self.source_config.strip_user_ids_from_email
1028
+ )
1029
+
1030
+ def _get_deleted_on(
1031
+ self, looker_dashboard: Optional[LookerDashboard]
1032
+ ) -> Optional[datetime.datetime]:
1033
+ return looker_dashboard.deleted_at if looker_dashboard else None
1034
+
1035
+ def _get_deleted_by(
1036
+ self, looker_dashboard: Optional[LookerDashboard]
1037
+ ) -> Optional[str]:
1038
+ if not looker_dashboard or not looker_dashboard.deleted_by:
1039
+ return None
1040
+ return looker_dashboard.deleted_by.get_urn(
1041
+ self.source_config.strip_user_ids_from_email
1042
+ )
1117
1043
 
1118
1044
  def _get_looker_folder(self, folder: Union[Folder, FolderBase]) -> LookerFolder:
1119
1045
  assert folder.id
@@ -1126,7 +1052,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1126
1052
  ]
1127
1053
  return "/".join(ancestors + [folder.name])
1128
1054
 
1129
- def _get_looker_dashboard(self, dashboard: Dashboard) -> LookerDashboard:
1055
+ def _get_looker_dashboard(self, dashboard: LookerAPIDashboard) -> LookerDashboard:
1130
1056
  self.reporter.accessed_dashboards += 1
1131
1057
  if dashboard.folder is None:
1132
1058
  logger.debug(f"{dashboard.id} has no folder")
@@ -1200,22 +1126,6 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1200
1126
 
1201
1127
  return user
1202
1128
 
1203
- def process_metrics_dimensions_and_fields_for_dashboard(
1204
- self, dashboard: LookerDashboard
1205
- ) -> List[MetadataWorkUnit]:
1206
- chart_mcps = [
1207
- self._make_metrics_dimensions_chart_mcp(element)
1208
- for element in dashboard.dashboard_elements
1209
- ]
1210
- dashboard_mcp = self._make_metrics_dimensions_dashboard_mcp(dashboard)
1211
-
1212
- mcps = chart_mcps
1213
- mcps.append(dashboard_mcp)
1214
-
1215
- workunits = [mcp.as_workunit() for mcp in mcps]
1216
-
1217
- return workunits
1218
-
1219
1129
  def _input_fields_from_dashboard_element(
1220
1130
  self, dashboard_element: LookerDashboardElement
1221
1131
  ) -> List[InputFieldClass]:
@@ -1308,104 +1218,141 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1308
1218
  aspect=input_fields_aspect,
1309
1219
  )
1310
1220
 
1311
- def process_dashboard(
1221
+ def _should_skip_personal_folder_dashboard(
1222
+ self, dashboard_object: LookerAPIDashboard
1223
+ ) -> bool:
1224
+ """Check if dashboard should be skipped due to being in personal folder."""
1225
+ if not self.source_config.skip_personal_folders:
1226
+ return False
1227
+
1228
+ if dashboard_object.folder is not None and (
1229
+ dashboard_object.folder.is_personal
1230
+ or dashboard_object.folder.is_personal_descendant
1231
+ ):
1232
+ self.reporter.info(
1233
+ title="Dropped Dashboard",
1234
+ message="Dropped due to being a personal folder",
1235
+ context=f"Dashboard ID: {dashboard_object.id}",
1236
+ )
1237
+ assert dashboard_object.id is not None
1238
+ self.reporter.report_dashboards_dropped(dashboard_object.id)
1239
+ return True
1240
+ return False
1241
+
1242
+ def _should_skip_dashboard_by_folder_path(
1243
+ self, looker_dashboard: LookerDashboard
1244
+ ) -> bool:
1245
+ """Check if dashboard should be skipped based on folder path pattern."""
1246
+ if (
1247
+ looker_dashboard.folder_path is not None
1248
+ and not self.source_config.folder_path_pattern.allowed(
1249
+ looker_dashboard.folder_path
1250
+ )
1251
+ ):
1252
+ logger.debug(
1253
+ f"Folder path {looker_dashboard.folder_path} is denied in folder_path_pattern"
1254
+ )
1255
+ self.reporter.report_dashboards_dropped(looker_dashboard.id)
1256
+ return True
1257
+ return False
1258
+
1259
+ def _fetch_dashboard_from_api(
1312
1260
  self, dashboard_id: str, fields: List[str]
1313
- ) -> Tuple[
1314
- List[MetadataWorkUnit],
1315
- Optional[looker_usage.LookerDashboardForUsage],
1316
- str,
1317
- datetime.datetime,
1318
- datetime.datetime,
1319
- ]:
1320
- start_time = datetime.datetime.now()
1321
- assert dashboard_id is not None
1322
- if not self.source_config.dashboard_pattern.allowed(dashboard_id):
1323
- self.reporter.report_dashboards_dropped(dashboard_id)
1324
- return [], None, dashboard_id, start_time, datetime.datetime.now()
1261
+ ) -> Optional[LookerAPIDashboard]:
1262
+ """Fetch dashboard object from Looker API with error handling."""
1325
1263
  try:
1326
- dashboard_object: Dashboard = self.looker_api.dashboard(
1264
+ return self.looker_api.dashboard(
1327
1265
  dashboard_id=dashboard_id,
1328
1266
  fields=fields,
1329
1267
  )
1330
1268
  except (SDKError, DeserializeError) as e:
1331
- # A looker dashboard could be deleted in between the list and the get
1332
1269
  self.reporter.report_warning(
1333
1270
  title="Failed to fetch dashboard from the Looker API",
1334
1271
  message="Error occurred while attempting to loading dashboard from Looker API. Skipping.",
1335
1272
  context=f"Dashboard ID: {dashboard_id}",
1336
1273
  exc=e,
1337
1274
  )
1338
- return [], None, dashboard_id, start_time, datetime.datetime.now()
1275
+ return None
1339
1276
 
1340
- if self.source_config.skip_personal_folders:
1341
- if dashboard_object.folder is not None and (
1342
- dashboard_object.folder.is_personal
1343
- or dashboard_object.folder.is_personal_descendant
1344
- ):
1345
- self.reporter.info(
1346
- title="Dropped Dashboard",
1347
- message="Dropped due to being a personal folder",
1348
- context=f"Dashboard ID: {dashboard_id}",
1349
- )
1350
- self.reporter.report_dashboards_dropped(dashboard_id)
1351
- return [], None, dashboard_id, start_time, datetime.datetime.now()
1277
+ def _create_empty_result(
1278
+ self, dashboard_id: str, start_time: datetime.datetime
1279
+ ) -> DashboardProcessingResult:
1280
+ """Create an empty result for skipped or failed dashboard processing."""
1281
+ return DashboardProcessingResult(
1282
+ entities=[],
1283
+ dashboard_usage=None,
1284
+ dashboard_id=dashboard_id,
1285
+ start_time=start_time,
1286
+ end_time=datetime.datetime.now(),
1287
+ )
1352
1288
 
1353
- looker_dashboard = self._get_looker_dashboard(dashboard_object)
1289
+ def process_dashboard(
1290
+ self, dashboard_id: str, fields: List[str]
1291
+ ) -> DashboardProcessingResult:
1292
+ """
1293
+ Process a single dashboard and return the metadata workunits.
1354
1294
 
1355
- workunits = []
1356
- if (
1357
- looker_dashboard.folder_path is not None
1358
- and not self.source_config.folder_path_pattern.allowed(
1359
- looker_dashboard.folder_path
1360
- )
1361
- ):
1362
- logger.debug(
1363
- f"Folder path {looker_dashboard.folder_path} is denied in folder_path_pattern"
1364
- )
1365
- return [], None, dashboard_id, start_time, datetime.datetime.now()
1295
+ Args:
1296
+ dashboard_id: The ID of the dashboard to process
1297
+ fields: List of fields to fetch from the Looker API
1366
1298
 
1367
- if looker_dashboard.folder:
1368
- workunits += list(
1369
- self._get_folder_and_ancestors_workunits(looker_dashboard.folder)
1370
- )
1299
+ Returns:
1300
+ DashboardProcessingResult containing entities, usage data, and timing information
1301
+ """
1302
+ start_time = datetime.datetime.now()
1371
1303
 
1372
- mces = self._make_dashboard_and_chart_mces(looker_dashboard)
1373
- workunits += [
1374
- (
1375
- MetadataWorkUnit(id=f"looker-{mce.proposedSnapshot.urn}", mce=mce)
1376
- if isinstance(mce, MetadataChangeEvent)
1377
- else MetadataWorkUnit(
1378
- id=f"looker-{mce.aspectName}-{mce.entityUrn}", mcp=mce
1379
- )
1380
- )
1381
- for mce in mces
1382
- ]
1304
+ if dashboard_id is None:
1305
+ raise ValueError("Dashboard ID cannot be None")
1383
1306
 
1384
- # add on metrics, dimensions, fields events
1385
- metric_dim_workunits = self.process_metrics_dimensions_and_fields_for_dashboard(
1386
- looker_dashboard
1307
+ # Fetch dashboard from API
1308
+ dashboard_object: Optional[LookerAPIDashboard] = self._fetch_dashboard_from_api(
1309
+ dashboard_id, fields
1387
1310
  )
1311
+ if dashboard_object is None:
1312
+ return self._create_empty_result(dashboard_id, start_time)
1313
+
1314
+ # Check if dashboard should be skipped due to personal folder
1315
+ if self._should_skip_personal_folder_dashboard(dashboard_object):
1316
+ return self._create_empty_result(dashboard_id, start_time)
1317
+
1318
+ # Convert to internal representation
1319
+ looker_dashboard: LookerDashboard = self._get_looker_dashboard(dashboard_object)
1320
+
1321
+ # Check folder path pattern
1322
+ if self._should_skip_dashboard_by_folder_path(looker_dashboard):
1323
+ return self._create_empty_result(dashboard_id, start_time)
1324
+
1325
+ # Build entities list
1326
+ entities: List[Entity] = []
1388
1327
 
1389
- workunits.extend(metric_dim_workunits)
1328
+ # Add folder containers if dashboard has a folder
1329
+ if looker_dashboard.folder:
1330
+ entities.extend(
1331
+ list(self._get_folder_and_ancestors_containers(looker_dashboard.folder))
1332
+ )
1390
1333
 
1334
+ # Add dashboard and chart entities
1335
+ entities.extend(list(self._make_dashboard_and_chart_entities(looker_dashboard)))
1336
+
1337
+ # Report successful processing
1391
1338
  self.reporter.report_dashboards_scanned()
1392
1339
 
1393
- # generate usage tracking object
1340
+ # Generate usage tracking object
1394
1341
  dashboard_usage = looker_usage.LookerDashboardForUsage.from_dashboard(
1395
1342
  dashboard_object
1396
1343
  )
1397
1344
 
1398
- return (
1399
- workunits,
1400
- dashboard_usage,
1401
- dashboard_id,
1402
- start_time,
1403
- datetime.datetime.now(),
1345
+ return DashboardProcessingResult(
1346
+ entities=entities,
1347
+ dashboard_usage=dashboard_usage,
1348
+ dashboard_id=dashboard_id,
1349
+ start_time=start_time,
1350
+ end_time=datetime.datetime.now(),
1404
1351
  )
1405
1352
 
1406
- def _get_folder_and_ancestors_workunits(
1353
+ def _get_folder_and_ancestors_containers(
1407
1354
  self, folder: LookerFolder
1408
- ) -> Iterable[MetadataWorkUnit]:
1355
+ ) -> Iterable[Container]:
1409
1356
  for ancestor_folder in self.looker_api.folder_ancestors(folder.id):
1410
1357
  yield from self._emit_folder_as_container(
1411
1358
  self._get_looker_folder(ancestor_folder)
@@ -1476,39 +1423,27 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1476
1423
  ).workunit_processor,
1477
1424
  ]
1478
1425
 
1479
- def emit_independent_looks_mcp(
1426
+ def emit_independent_looks_entities(
1480
1427
  self, dashboard_element: LookerDashboardElement
1481
- ) -> Iterable[MetadataWorkUnit]:
1428
+ ) -> Iterable[Union[Container, Chart]]:
1482
1429
  if dashboard_element.folder: # independent look
1483
- yield from self._get_folder_and_ancestors_workunits(
1430
+ yield from self._get_folder_and_ancestors_containers(
1484
1431
  dashboard_element.folder
1485
1432
  )
1486
1433
 
1487
- yield from auto_workunit(
1488
- stream=self._make_chart_metadata_events(
1489
- dashboard_element=dashboard_element,
1490
- dashboard=None,
1491
- )
1434
+ yield from self._make_chart_entities(
1435
+ dashboard_element=dashboard_element,
1436
+ dashboard=None,
1492
1437
  )
1493
1438
 
1494
- yield from auto_workunit(
1495
- [
1496
- self._make_metrics_dimensions_chart_mcp(
1497
- dashboard_element,
1498
- )
1499
- ]
1500
- )
1501
-
1502
- def extract_independent_looks(self) -> Iterable[MetadataWorkUnit]:
1503
- """
1504
- Emit MetadataWorkUnit for looks which are not part of any Dashboard
1439
+ def extract_independent_looks(self) -> Iterable[Union[Container, Chart]]:
1505
1440
  """
1506
- if self.source_config.extract_independent_looks is False:
1507
- return
1441
+ Emit entities for Looks which are not part of any Dashboard.
1508
1442
 
1509
- self.reporter.report_stage_start("extract_independent_looks")
1443
+ Returns: Containers for the folders and ancestors folders and Charts for the looks
1444
+ """
1445
+ logger.debug("Extracting Looks not part of any Dashboard")
1510
1446
 
1511
- logger.debug("Extracting looks not part of Dashboard")
1512
1447
  look_fields: List[str] = [
1513
1448
  "id",
1514
1449
  "title",
@@ -1530,15 +1465,21 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1530
1465
  all_looks: List[Look] = self.looker_api.all_looks(
1531
1466
  fields=look_fields, soft_deleted=self.source_config.include_deleted
1532
1467
  )
1468
+
1533
1469
  for look in all_looks:
1470
+ # Skip looks that are already referenced from a dashboard
1471
+ if look.id is None:
1472
+ logger.warning("Encountered Look with no ID, skipping.")
1473
+ continue
1474
+
1534
1475
  if look.id in self.reachable_look_registry:
1535
- # This look is reachable from the Dashboard
1536
1476
  continue
1537
1477
 
1538
1478
  if look.query_id is None:
1539
1479
  logger.info(f"query_id is None for look {look.title}({look.id})")
1540
1480
  continue
1541
1481
 
1482
+ # Skip looks in personal folders if configured
1542
1483
  if self.source_config.skip_personal_folders:
1543
1484
  if look.folder is not None and (
1544
1485
  look.folder.is_personal or look.folder.is_personal_descendant
@@ -1549,76 +1490,96 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1549
1490
  context=f"Look ID: {look.id}",
1550
1491
  )
1551
1492
 
1552
- assert look.id, "Looker id is null"
1553
1493
  self.reporter.report_charts_dropped(look.id)
1554
1494
  continue
1555
1495
 
1556
- if look.id is not None:
1557
- query: Optional[Query] = self.looker_api.get_look(
1558
- look.id, fields=["query"]
1559
- ).query
1560
- # Only include fields that are in the query_fields list
1561
- query = Query(
1562
- **{
1563
- key: getattr(query, key)
1564
- for key in query_fields
1565
- if hasattr(query, key)
1566
- }
1567
- )
1496
+ # Fetch the Look's query and filter to allowed fields
1497
+ query: Optional[Query] = None
1498
+ try:
1499
+ look_with_query = self.looker_api.get_look(look.id, fields=["query"])
1500
+ query_obj = look_with_query.query
1501
+ if query_obj:
1502
+ query = Query(
1503
+ **{
1504
+ key: getattr(query_obj, key)
1505
+ for key in query_fields
1506
+ if hasattr(query_obj, key)
1507
+ }
1508
+ )
1509
+ except Exception as exc:
1510
+ logger.warning(f"Failed to fetch query for Look {look.id}: {exc}")
1511
+ continue
1568
1512
 
1569
- dashboard_element: Optional[LookerDashboardElement] = (
1570
- self._get_looker_dashboard_element(
1571
- DashboardElement(
1572
- id=f"looks_{look.id}", # to avoid conflict with non-standalone looks (element.id prefixes),
1573
- # we add the "looks_" prefix to look.id.
1574
- title=look.title,
1575
- subtitle_text=look.description,
1576
- look_id=look.id,
1577
- dashboard_id=None, # As this is an independent look
1578
- look=LookWithQuery(
1579
- query=query, folder=look.folder, user_id=look.user_id
1580
- ),
1513
+ dashboard_element = self._get_looker_dashboard_element(
1514
+ DashboardElement(
1515
+ id=f"looks_{look.id}", # to avoid conflict with non-standalone looks (element.id prefixes),
1516
+ # we add the "looks_" prefix to look.id.
1517
+ title=look.title,
1518
+ subtitle_text=look.description,
1519
+ look_id=look.id,
1520
+ dashboard_id=None, # As this is an independent look
1521
+ look=LookWithQuery(
1522
+ query=query,
1523
+ folder=getattr(look, "folder", None),
1524
+ user_id=getattr(look, "user_id", None),
1581
1525
  ),
1582
1526
  )
1583
1527
  )
1584
1528
 
1585
1529
  if dashboard_element is not None:
1586
- logger.debug(f"Emitting MCPS for look {look.title}({look.id})")
1587
- yield from self.emit_independent_looks_mcp(
1530
+ logger.debug(f"Emitting MCPs for look {look.title}({look.id})")
1531
+ yield from self.emit_independent_looks_entities(
1588
1532
  dashboard_element=dashboard_element
1589
1533
  )
1590
1534
 
1591
- self.reporter.report_stage_end("extract_independent_looks")
1535
+ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
1536
+ """
1537
+ Note: Returns Entities from SDKv2 where possible else MCPs only.
1592
1538
 
1593
- def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
1594
- self.reporter.report_stage_start("list_dashboards")
1595
- dashboards = self.looker_api.all_dashboards(fields="id")
1596
- deleted_dashboards = (
1597
- self.looker_api.search_dashboards(fields="id", deleted="true")
1598
- if self.source_config.include_deleted
1599
- else []
1600
- )
1601
- if deleted_dashboards != []:
1602
- logger.debug(f"Deleted Dashboards = {deleted_dashboards}")
1539
+ Using SDKv2: Containers, Datasets, Dashboards and Charts
1540
+ Using MCPW: Tags, DashboardUsageStats and UserResourceMapping
1603
1541
 
1604
- dashboard_ids = [dashboard_base.id for dashboard_base in dashboards]
1605
- dashboard_ids.extend(
1606
- [deleted_dashboard.id for deleted_dashboard in deleted_dashboards]
1607
- )
1608
- selected_dashboard_ids: List[Optional[str]] = []
1609
- for id in dashboard_ids:
1610
- if id is None:
1611
- continue
1612
- if not self.source_config.dashboard_pattern.allowed(id):
1613
- self.reporter.report_dashboards_dropped(id)
1542
+ TODO: Convert MCPWs to use SDKv2 entities
1543
+ """
1544
+ with self.reporter.report_stage("list_dashboards"):
1545
+ # Fetch all dashboards (not deleted)
1546
+ dashboards = self.looker_api.all_dashboards(fields="id")
1547
+
1548
+ # Optionally fetch deleted dashboards if configured
1549
+ if self.source_config.include_deleted:
1550
+ deleted_dashboards = self.looker_api.search_dashboards(
1551
+ fields="id", deleted="true"
1552
+ )
1614
1553
  else:
1615
- selected_dashboard_ids.append(id)
1616
- dashboard_ids = selected_dashboard_ids
1617
- self.reporter.report_stage_end("list_dashboards")
1618
- self.reporter.report_total_dashboards(len(dashboard_ids))
1554
+ deleted_dashboards = []
1555
+
1556
+ if deleted_dashboards:
1557
+ logger.debug(f"Deleted Dashboards = {deleted_dashboards}")
1558
+
1559
+ # Collect all dashboard IDs (including deleted if applicable)
1560
+ all_dashboard_ids: List[Optional[str]] = [
1561
+ dashboard.id for dashboard in dashboards
1562
+ ]
1563
+ all_dashboard_ids.extend([dashboard.id for dashboard in deleted_dashboards])
1619
1564
 
1620
- # List dashboard fields to extract for processing
1621
- fields = [
1565
+ # Filter dashboard IDs based on the allowed pattern
1566
+ filtered_dashboard_ids: List[str] = []
1567
+ for dashboard_id in all_dashboard_ids:
1568
+ if dashboard_id is None:
1569
+ continue
1570
+ if not self.source_config.dashboard_pattern.allowed(dashboard_id):
1571
+ self.reporter.report_dashboards_dropped(dashboard_id)
1572
+ else:
1573
+ filtered_dashboard_ids.append(dashboard_id)
1574
+
1575
+ # Use the filtered list for further processing
1576
+ dashboard_ids: List[str] = filtered_dashboard_ids
1577
+
1578
+ # Report the total number of dashboards to be processed
1579
+ self.reporter.report_total_dashboards(len(dashboard_ids))
1580
+
1581
+ # Define the fields to extract for each dashboard
1582
+ dashboard_fields = [
1622
1583
  "id",
1623
1584
  "title",
1624
1585
  "dashboard_elements",
@@ -1634,41 +1595,47 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1634
1595
  "deleted_at",
1635
1596
  "deleter_id",
1636
1597
  ]
1598
+
1599
+ # Add usage-related fields if usage history extraction is enabled
1637
1600
  if self.source_config.extract_usage_history:
1638
- fields += [
1639
- "favorite_count",
1640
- "view_count",
1641
- "last_viewed_at",
1642
- ]
1601
+ dashboard_fields.extend(
1602
+ [
1603
+ "favorite_count",
1604
+ "view_count",
1605
+ "last_viewed_at",
1606
+ ]
1607
+ )
1643
1608
 
1609
+ # Store dashboards for which usage stats will be extracted
1644
1610
  looker_dashboards_for_usage: List[looker_usage.LookerDashboardForUsage] = []
1645
1611
 
1612
+ # Process dashboard and chart metadata
1646
1613
  with self.reporter.report_stage("dashboard_chart_metadata"):
1614
+ dashboard_jobs = (
1615
+ (dashboard_id, dashboard_fields)
1616
+ for dashboard_id in dashboard_ids
1617
+ if dashboard_id is not None
1618
+ )
1647
1619
  for job in BackpressureAwareExecutor.map(
1648
1620
  self.process_dashboard,
1649
- (
1650
- (dashboard_id, fields)
1651
- for dashboard_id in dashboard_ids
1652
- if dashboard_id is not None
1653
- ),
1621
+ dashboard_jobs,
1654
1622
  max_workers=self.source_config.max_threads,
1655
1623
  ):
1656
- (
1657
- work_units,
1658
- dashboard_usage,
1659
- dashboard_id,
1660
- start_time,
1661
- end_time,
1662
- ) = job.result()
1624
+ result: DashboardProcessingResult = job.result()
1625
+
1663
1626
  logger.debug(
1664
- f"Running time of process_dashboard for {dashboard_id} = {(end_time - start_time).total_seconds()}"
1627
+ f"Running time of process_dashboard for {result.dashboard_id} = {(result.end_time - result.start_time).total_seconds()}"
1665
1628
  )
1666
- self.reporter.report_upstream_latency(start_time, end_time)
1629
+ self.reporter.report_upstream_latency(
1630
+ result.start_time, result.end_time
1631
+ )
1632
+
1633
+ yield from result.entities
1667
1634
 
1668
- yield from work_units
1669
- if dashboard_usage is not None:
1670
- looker_dashboards_for_usage.append(dashboard_usage)
1635
+ if result.dashboard_usage is not None:
1636
+ looker_dashboards_for_usage.append(result.dashboard_usage)
1671
1637
 
1638
+ # Warn if owner extraction was enabled but no emails could be found
1672
1639
  if (
1673
1640
  self.source_config.extract_owners
1674
1641
  and self.reporter.resolved_user_ids > 0
@@ -1680,53 +1647,42 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1680
1647
  "Failed to extract owners emails for any dashboards. Please enable the see_users permission for your Looker API key",
1681
1648
  )
1682
1649
 
1683
- # Extract independent look here, so that explore of this look would get consider in _make_explore_metadata_events
1684
- yield from self.extract_independent_looks()
1650
+ # Extract independent looks first, so their explores are considered in _make_explore_containers.
1651
+ if self.source_config.extract_independent_looks:
1652
+ with self.reporter.report_stage("extract_independent_looks"):
1653
+ yield from self.extract_independent_looks()
1685
1654
 
1686
- self.reporter.report_stage_start("explore_metadata")
1687
-
1688
- for event in self._make_explore_metadata_events():
1689
- if isinstance(event, MetadataChangeEvent):
1690
- yield MetadataWorkUnit(
1691
- id=f"looker-{event.proposedSnapshot.urn}", mce=event
1692
- )
1693
- elif isinstance(event, MetadataChangeProposalWrapper):
1694
- yield event.as_workunit()
1695
- elif isinstance(event, MetadataWorkUnit):
1696
- yield event
1697
- else:
1698
- raise Exception(f"Unexpected type of event {event}")
1699
- self.reporter.report_stage_end("explore_metadata")
1655
+ # Process explore containers and yield them.
1656
+ with self.reporter.report_stage("explore_metadata"):
1657
+ yield from self._make_explore_containers()
1700
1658
 
1701
1659
  if (
1702
1660
  self.source_config.tag_measures_and_dimensions
1703
1661
  and self.reporter.explores_scanned > 0
1704
1662
  ):
1705
- # Emit tag MCEs for measures and dimensions if we produced any explores:
1663
+ # Emit tag MCPs for measures and dimensions if we produced any explores:
1664
+ # Tags MCEs are converted to MCPs
1706
1665
  for tag_mce in LookerUtil.get_tag_mces():
1707
- yield MetadataWorkUnit(
1708
- id=f"tag-{tag_mce.proposedSnapshot.urn}",
1709
- mce=tag_mce,
1710
- )
1666
+ yield from auto_workunit(mcps_from_mce(tag_mce))
1711
1667
 
1712
1668
  # Extract usage history is enabled
1713
1669
  if self.source_config.extract_usage_history:
1714
- self.reporter.report_stage_start("usage_extraction")
1715
- usage_mcps: List[MetadataChangeProposalWrapper] = self.extract_usage_stat(
1716
- looker_dashboards_for_usage, self.chart_urns
1717
- )
1718
- for usage_mcp in usage_mcps:
1719
- yield usage_mcp.as_workunit()
1720
- self.reporter.report_stage_end("usage_extraction")
1670
+ with self.reporter.report_stage("usage_extraction"):
1671
+ usage_mcps: List[MetadataChangeProposalWrapper] = (
1672
+ self.extract_usage_stat(
1673
+ looker_dashboards_for_usage, self.chart_urns
1674
+ )
1675
+ )
1676
+ yield from auto_workunit(usage_mcps)
1721
1677
 
1722
- # Dump looker user resource mappings.
1678
+ # Ingest looker user resource mapping workunits.
1723
1679
  logger.info("Ingesting looker user resource mapping workunits")
1724
- self.reporter.report_stage_start("user_resource_extraction")
1725
- yield from auto_workunit(
1726
- self.user_registry.to_platform_resource(
1727
- self.source_config.platform_instance
1680
+ with self.reporter.report_stage("user_resource_extraction"):
1681
+ yield from auto_workunit(
1682
+ self.user_registry.to_platform_resource(
1683
+ self.source_config.platform_instance
1684
+ )
1728
1685
  )
1729
- )
1730
1686
 
1731
1687
  def get_report(self) -> SourceReport:
1732
1688
  return self.reporter