acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,33 @@
1
1
  import logging
2
2
  import re
3
3
  from abc import ABC, abstractmethod
4
+ from datetime import datetime
4
5
  from functools import lru_cache
5
6
  from typing import Dict, List, Optional
6
7
 
8
+ from looker_sdk.sdk.api40.models import (
9
+ WriteQuery,
10
+ )
11
+
7
12
  from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
8
13
  from datahub.ingestion.api.common import PipelineContext
9
14
  from datahub.ingestion.source.looker.looker_common import (
10
15
  LookerExplore,
11
16
  LookerViewId,
12
17
  ViewField,
18
+ ViewFieldDimensionGroupType,
13
19
  ViewFieldType,
14
20
  )
15
21
  from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition
22
+ from datahub.ingestion.source.looker.looker_constant import (
23
+ NAME,
24
+ VIEW_FIELD_INTERVALS_ATTRIBUTE,
25
+ VIEW_FIELD_TIMEFRAMES_ATTRIBUTE,
26
+ VIEW_FIELD_TYPE_ATTRIBUTE,
27
+ )
28
+ from datahub.ingestion.source.looker.looker_lib_wrapper import (
29
+ LookerAPI,
30
+ )
16
31
  from datahub.ingestion.source.looker.looker_view_id_cache import LookerViewIdCache
17
32
  from datahub.ingestion.source.looker.lookml_concept_context import (
18
33
  LookerFieldContext,
@@ -20,7 +35,6 @@ from datahub.ingestion.source.looker.lookml_concept_context import (
20
35
  )
21
36
  from datahub.ingestion.source.looker.lookml_config import (
22
37
  DERIVED_VIEW_SUFFIX,
23
- NAME,
24
38
  LookMLSourceConfig,
25
39
  LookMLSourceReport,
26
40
  )
@@ -280,6 +294,447 @@ class AbstractViewUpstream(ABC):
280
294
  return upstream_column_refs
281
295
 
282
296
 
297
+ class LookerQueryAPIBasedViewUpstream(AbstractViewUpstream):
298
+ """
299
+ Implements Looker view upstream lineage extraction using the Looker Query API.
300
+
301
+ This class leverages the Looker API to generate the fully resolved SQL for a Looker view by constructing a WriteQuery
302
+ that includes all dimensions, dimension groups and measures. The SQL is then parsed to extract column-level lineage.
303
+ The Looker client is required for this class, as it is used to execute the WriteQuery and retrieve the SQL.
304
+
305
+ Other view upstream implementations use string parsing to extract lineage information from the SQL, which does not cover all the edge cases.
306
+ Limitations of string based lineage extraction: Ref: https://cloud.google.com/looker/docs/reference/param-field-sql#sql_for_dimensions
307
+
308
+ Key Features:
309
+ - Requires a Looker client (`looker_client`) to execute queries and retrieve SQL for the view.
310
+ - Requires a `view_to_explore_map` to map view names to their corresponding explore name
311
+ - Field name translation is handled: Looker API field names are constructed as `<view_name>.<field_name>`, and helper
312
+ methods are provided to convert between Looker API field names and raw field names.
313
+ - SQL parsing is cached for efficiency, and the class is designed to gracefully fall back if the Looker Query API fails.
314
+ - All lineage extraction is based on the SQL returned by the Looker API, ensuring accurate and up-to-date lineage.
315
+
316
+ Why view_to_explore_map is required:
317
+ The Looker Query API expects the explore name (not the view name) as the "view" parameter in the WriteQuery.
318
+ In Looker, a view can be referenced by multiple explores, but the API needs any one of the
319
+ explores to access the view's fields
320
+
321
+ Example WriteQuery request (see `_execute_query` for details):
322
+ {
323
+ "model": "test_model",
324
+ "view": "users_explore", # This is the explore name, not the view name
325
+ "fields": [
326
+ "users.email", "users.lifetime_purchase_count"
327
+ ],
328
+ "limit": "1",
329
+ "cache": true
330
+ }
331
+ The SQL response is then parsed to extract upstream tables and column-level lineage.
332
+
333
+ For further details, see the method-level docstrings, especially:
334
+ - `__get_spr`: SQL parsing and lineage extraction workflow
335
+ - `_get_sql_write_query`: WriteQuery construction and field enumeration
336
+ - `_execute_query`: Looker API invocation and SQL retrieval - this only generates the SQL query, does not execute it
337
+ - Field name translation: `_get_looker_api_field_name` and `_get_field_name_from_looker_api_field_name`
338
+
339
+ Note: This class is intended to be robust and raise exceptions if SQL parsing or API calls fail, and will fall back to
340
+ other implementations - custom regex-based parsing if necessary.
341
+ """
342
+
343
+ def __init__(
344
+ self,
345
+ view_context: LookerViewContext,
346
+ looker_view_id_cache: LookerViewIdCache,
347
+ config: LookMLSourceConfig,
348
+ reporter: LookMLSourceReport,
349
+ ctx: PipelineContext,
350
+ looker_client: LookerAPI,
351
+ view_to_explore_map: Dict[str, str],
352
+ ):
353
+ super().__init__(view_context, looker_view_id_cache, config, reporter, ctx)
354
+ self.looker_client = looker_client
355
+ self.view_to_explore_map = view_to_explore_map
356
+ # Cache the SQL parsing results
357
+ # We use maxsize=1 because a new class instance is created for each view, Ref: view_upstream.create_view_upstream
358
+ self._get_spr = lru_cache(maxsize=1)(self.__get_spr)
359
+ self._get_upstream_dataset_urn = lru_cache(maxsize=1)(
360
+ self.__get_upstream_dataset_urn
361
+ )
362
+
363
+ # Initialize the cache
364
+ # Done to fallback to other implementations if the Looker Query API fails
365
+ self._get_spr()
366
+
367
+ def __get_spr(self) -> SqlParsingResult:
368
+ """
369
+ Retrieves the SQL parsing result for the current Looker view by:
370
+ 1. Building a WriteQuery for the view.
371
+ 2. Executing the query via the Looker API to get the SQL.
372
+ 3. Parsing the SQL to extract lineage information.
373
+
374
+ Returns:
375
+ SqlParsingResult if successful, otherwise None.
376
+ Raises:
377
+ ValueError: If no SQL is found in the response.
378
+ ValueError: If no fields are found for the view.
379
+ ValueError: If explore name is not found for the view.
380
+ ValueError: If error in parsing SQL for upstream tables.
381
+ ValueError: If error in parsing SQL for column lineage.
382
+ """
383
+ try:
384
+ # Build the WriteQuery for the current view.
385
+ sql_query: WriteQuery = self._get_sql_write_query()
386
+
387
+ # Execute the query to get the SQL representation from Looker.
388
+ sql_response = self._execute_query(sql_query)
389
+
390
+ # Parse the SQL to extract lineage information.
391
+ spr = create_lineage_sql_parsed_result(
392
+ query=sql_response,
393
+ default_schema=self.view_context.view_connection.default_schema,
394
+ default_db=self.view_context.view_connection.default_db,
395
+ platform=self.view_context.view_connection.platform,
396
+ platform_instance=self.view_context.view_connection.platform_instance,
397
+ env=self.view_context.view_connection.platform_env or self.config.env,
398
+ graph=self.ctx.graph,
399
+ )
400
+
401
+ # Check for errors encountered during table extraction.
402
+ table_error = spr.debug_info.table_error
403
+ if table_error is not None:
404
+ self.reporter.report_warning(
405
+ title="Table Level Lineage Extraction Failed",
406
+ message="Error in parsing derived sql",
407
+ context=f"View-name: {self.view_context.name()}",
408
+ exc=table_error,
409
+ )
410
+ raise ValueError(
411
+ f"Error in parsing SQL for upstream tables: {table_error}"
412
+ )
413
+
414
+ column_error = spr.debug_info.column_error
415
+ if column_error is not None:
416
+ self.reporter.report_warning(
417
+ title="Column Level Lineage Extraction Failed",
418
+ message="Error in parsing derived sql",
419
+ context=f"View-name: {self.view_context.name()}",
420
+ exc=column_error,
421
+ )
422
+ raise ValueError(
423
+ f"Error in parsing SQL for column lineage: {column_error}"
424
+ )
425
+
426
+ return spr
427
+ except Exception:
428
+ # Reraise the exception to allow higher-level handling.
429
+ raise
430
+
431
+ def _get_time_dim_group_field_name(self, dim_group: dict) -> str:
432
+ """
433
+ Time dimension groups must be referenced by their individual timeframes suffix.
434
+ Example:
435
+ dimension_group: created {
436
+ type: time
437
+ timeframes: [date, week, month]
438
+ sql: ${TABLE}.created_at ;;
439
+ }
440
+ Used as: {view_name.date_created}
441
+
442
+ created -> created_date, created_week, created_month
443
+ # Ref: https://cloud.google.com/looker/docs/reference/param-field-dimension-group#dimension_groups_must_be_referenced_by_their_individual_dimensions
444
+ """
445
+ dim_group_name = dim_group.get(NAME)
446
+ timeframes = dim_group.get(VIEW_FIELD_TIMEFRAMES_ATTRIBUTE)
447
+
448
+ # If timeframes is not included (rare case), the dimension group will include all possible timeframes.
449
+ # We will pick to use "raw"
450
+ suffix = timeframes[0] if timeframes else "raw"
451
+ return f"{dim_group_name}_{suffix}"
452
+
453
+ def _get_duration_dim_group_field_name(self, dim_group: dict) -> str:
454
+ """
455
+ Duration dimension groups must be referenced by their plural version of the interval value as prefix
456
+ Example:
457
+ dimension_group: since_event {
458
+ type: duration
459
+ intervals: [hour, day, week, month, quarter, year]
460
+ sql_start: ${faa_event_date_raw} ;;
461
+ sql_end: CURRENT_TIMESTAMP();;
462
+ }
463
+ Used as: {view_name.hours_since_event}
464
+
465
+ since_event -> hours_since_event, days_since_event, weeks_since_event, months_since_event, quarters_since_event, years_since_event
466
+ # Ref: https://cloud.google.com/looker/docs/reference/param-field-dimension-group#referencing_intervals_from_another_lookml_field
467
+ """
468
+ dim_group_name = dim_group.get(NAME)
469
+ intervals = dim_group.get(VIEW_FIELD_INTERVALS_ATTRIBUTE)
470
+
471
+ # If intervals is not included (rare case), the dimension group will include all possible intervals.
472
+ # We will pick to use "day" -> "days"
473
+ prefix = f"{intervals[0]}s" if intervals else "days"
474
+ return f"{prefix}_{dim_group_name}"
475
+
476
+ def _get_sql_write_query(self) -> WriteQuery:
477
+ """
478
+ Constructs a WriteQuery object to obtain the SQL representation of the current Looker view.
479
+
480
+ We need to list all the fields for the view to get the SQL representation of the view - this fully resolved SQL for view dimensions and measures.
481
+
482
+ The method uses the view_to_explore_map to determine the correct explore name to use in the WriteQuery.
483
+ This is crucial because the Looker Query API expects the explore name (not the view name) as the "view" parameter.
484
+
485
+ Ref: https://cloud.google.com/looker/docs/reference/param-field-sql#sql_for_dimensions
486
+
487
+ Returns:
488
+ WriteQuery: The WriteQuery object if fields are found and explore name is available, otherwise None.
489
+
490
+ Raises:
491
+ ValueError: If the explore name is not found in the view_to_explore_map for the current view.
492
+ ValueError: If no fields are found for the view.
493
+ """
494
+
495
+ # Collect all dimension and measure fields for the view.
496
+ view_fields: List[str] = []
497
+ # Add dimension fields in the format: <view_name>.<dimension_name> or <view_name>.<measure_name>
498
+ for field in self.view_context.dimensions() + self.view_context.measures():
499
+ field_name = field.get(NAME)
500
+ assert field_name # Happy linter
501
+ view_fields.append(self._get_looker_api_field_name(field_name))
502
+
503
+ for dim_group in self.view_context.dimension_groups():
504
+ dim_group_type: ViewFieldDimensionGroupType = ViewFieldDimensionGroupType(
505
+ dim_group.get(VIEW_FIELD_TYPE_ATTRIBUTE)
506
+ )
507
+
508
+ if dim_group_type == ViewFieldDimensionGroupType.TIME:
509
+ view_fields.append(
510
+ self._get_looker_api_field_name(
511
+ self._get_time_dim_group_field_name(dim_group)
512
+ )
513
+ )
514
+ elif dim_group_type == ViewFieldDimensionGroupType.DURATION:
515
+ view_fields.append(
516
+ self._get_looker_api_field_name(
517
+ self._get_duration_dim_group_field_name(dim_group)
518
+ )
519
+ )
520
+
521
+ # Use explore name from view_to_explore_map if available
522
+ # explore_name is always present in the view_to_explore_map because of the check in view_upstream.create_view_upstream
523
+ explore_name = self.view_to_explore_map.get(self.view_context.name())
524
+ assert explore_name # Happy linter
525
+
526
+ if not view_fields:
527
+ raise ValueError(
528
+ f"No fields found for view '{self.view_context.name()}'. Cannot proceed with Looker API for view lineage."
529
+ )
530
+
531
+ # Construct and return the WriteQuery object.
532
+ # The 'limit' is set to "1" as the query is only used to obtain SQL, not to fetch data.
533
+ return WriteQuery(
534
+ model=self.looker_view_id_cache.model_name,
535
+ view=explore_name,
536
+ fields=view_fields,
537
+ filters={},
538
+ limit="1",
539
+ )
540
+
541
+ def _execute_query(self, query: WriteQuery) -> str:
542
+ """
543
+ Executes a Looker SQL query using the Looker API and returns the SQL string.
544
+
545
+ Ref: https://cloud.google.com/looker/docs/reference/looker-api/latest/methods/Query/run_inline_query
546
+
547
+ Example Request:
548
+ WriteQuery:
549
+ {
550
+ "model": "test_model",
551
+ "view": "users",
552
+ "fields": [
553
+ "users.email", "users.lifetime_purchase_count"
554
+ ],
555
+ "limit": "1",
556
+ "cache": true
557
+ }
558
+
559
+ Response:
560
+ "
561
+ SELECT
562
+ users."EMAIL" AS "users.email",
563
+ COUNT(DISTINCT ( purchases."PK" ) ) AS "users.lifetime_purchase_count"
564
+ FROM "ECOMMERCE"."USERS" AS users
565
+ LEFT JOIN "ECOMMERCE"."PURCHASES" AS purchases ON (users."PK") = (purchases."USER_FK")
566
+ GROUP BY
567
+ 1
568
+ ORDER BY
569
+ 2 DESC
570
+ FETCH NEXT 1 ROWS ONLY
571
+ "
572
+ Args:
573
+ query (WriteQuery): The Looker WriteQuery object to execute.
574
+
575
+ Returns:
576
+ str: The SQL string returned by the Looker API, or an empty string if execution fails.
577
+ """
578
+
579
+ # Record the start time for latency measurement.
580
+ start_time = datetime.now()
581
+
582
+ # Execute the query using the Looker client.
583
+ sql_response = self.looker_client.generate_sql_query(
584
+ write_query=query, use_cache=self.config.use_api_cache_for_view_lineage
585
+ )
586
+
587
+ # Record the end time after query execution.
588
+ end_time = datetime.now()
589
+
590
+ # Attempt to get the LookerViewId for reporting.
591
+ looker_view_id: Optional[LookerViewId] = (
592
+ self.looker_view_id_cache.get_looker_view_id(
593
+ view_name=self.view_context.name(),
594
+ base_folder_path=self.view_context.base_folder_path,
595
+ )
596
+ )
597
+
598
+ # Report the query API latency if the view ID is available.
599
+ if looker_view_id is not None:
600
+ self.reporter.report_looker_query_api_latency(
601
+ looker_view_id.get_urn(self.config),
602
+ end_time - start_time,
603
+ )
604
+
605
+ # Validate the response structure.
606
+ if not sql_response:
607
+ raise ValueError(
608
+ f"No SQL found in response for view '{self.view_context.name()}'. Response: {sql_response}"
609
+ )
610
+
611
+ # Extract the SQL string from the response.
612
+ return sql_response
613
+
614
+ def __get_upstream_dataset_urn(self) -> List[Urn]:
615
+ """
616
+ Extract upstream dataset URNs by parsing the SQL for the current view.
617
+
618
+ Returns:
619
+ List[Urn]: List of upstream dataset URNs, or an empty list if parsing fails.
620
+ """
621
+ # Attempt to get the SQL parsing result for the current view.
622
+ spr: SqlParsingResult = self._get_spr()
623
+
624
+ # Remove any 'hive.' prefix from upstream table URNs.
625
+ upstream_dataset_urns: List[str] = [
626
+ _drop_hive_dot(urn) for urn in spr.in_tables
627
+ ]
628
+
629
+ # Fix any derived view references present in the URNs.
630
+ upstream_dataset_urns = fix_derived_view_urn(
631
+ urns=upstream_dataset_urns,
632
+ looker_view_id_cache=self.looker_view_id_cache,
633
+ base_folder_path=self.view_context.base_folder_path,
634
+ config=self.config,
635
+ )
636
+
637
+ return upstream_dataset_urns
638
+
639
+ def _get_looker_api_field_name(self, field_name: str) -> str:
640
+ """
641
+ Translate the field name to the looker api field name
642
+
643
+ Example:
644
+ pk -> purchases.pk
645
+ """
646
+ return f"{self.view_context.name()}.{field_name}"
647
+
648
+ def _get_field_name_from_looker_api_field_name(
649
+ self, looker_api_field_name: str
650
+ ) -> str:
651
+ """
652
+ Translate the looker api field name to the field name
653
+
654
+ Example:
655
+ purchases.pk -> pk
656
+ """
657
+ # Remove the view name at the start and the dot from the looker_api_field_name, but only if it matches the current view name
658
+ prefix = f"{self.view_context.name()}."
659
+ if looker_api_field_name.startswith(prefix):
660
+ return looker_api_field_name[len(prefix) :]
661
+ else:
662
+ # Don't throw an error, just return the original field name
663
+ return looker_api_field_name
664
+
665
+ def get_upstream_dataset_urn(self) -> List[Urn]:
666
+ """Get upstream dataset URNs"""
667
+ return self._get_upstream_dataset_urn()
668
+
669
+ def get_upstream_column_ref(
670
+ self, field_context: LookerFieldContext
671
+ ) -> List[ColumnRef]:
672
+ """Return upstream column references for a given field."""
673
+ spr: SqlParsingResult = self._get_spr()
674
+ if not spr.column_lineage:
675
+ return []
676
+
677
+ field_type: Optional[ViewFieldDimensionGroupType] = None
678
+ field_name = field_context.name()
679
+ try:
680
+ # Try if field is a dimension group
681
+ field_type = ViewFieldDimensionGroupType(
682
+ field_context.raw_field.get(VIEW_FIELD_TYPE_ATTRIBUTE)
683
+ )
684
+
685
+ if field_type == ViewFieldDimensionGroupType.TIME:
686
+ field_name = self._get_time_dim_group_field_name(
687
+ field_context.raw_field
688
+ )
689
+ elif field_type == ViewFieldDimensionGroupType.DURATION:
690
+ field_name = self._get_duration_dim_group_field_name(
691
+ field_context.raw_field
692
+ )
693
+
694
+ except Exception:
695
+ # Not a dimension group, no modification needed
696
+ logger.debug(
697
+ f"view-name={self.view_context.name()}, field-name={field_name}, field-type={field_context.raw_field.get(VIEW_FIELD_TYPE_ATTRIBUTE)}"
698
+ )
699
+
700
+ field_api_name = self._get_looker_api_field_name(field_name).lower()
701
+
702
+ upstream_refs: List[ColumnRef] = []
703
+
704
+ for lineage in spr.column_lineage:
705
+ if lineage.downstream.column.lower() == field_api_name:
706
+ for upstream in lineage.upstreams:
707
+ upstream_refs.append(
708
+ ColumnRef(table=upstream.table, column=upstream.column)
709
+ )
710
+
711
+ return _drop_hive_dot_from_upstream(upstream_refs)
712
+
713
+ def create_fields(self) -> List[ViewField]:
714
+ """Create ViewField objects from SQL parsing result."""
715
+ spr: SqlParsingResult = self._get_spr()
716
+
717
+ if not spr.column_lineage:
718
+ return []
719
+
720
+ fields: List[ViewField] = []
721
+
722
+ for lineage in spr.column_lineage:
723
+ fields.append(
724
+ ViewField(
725
+ name=self._get_field_name_from_looker_api_field_name(
726
+ lineage.downstream.column
727
+ ),
728
+ label="",
729
+ type=lineage.downstream.native_column_type or "unknown",
730
+ description="",
731
+ field_type=ViewFieldType.UNKNOWN,
732
+ upstream_fields=_drop_hive_dot_from_upstream(lineage.upstreams),
733
+ )
734
+ )
735
+ return fields
736
+
737
+
283
738
  class SqlBasedDerivedViewUpstream(AbstractViewUpstream, ABC):
284
739
  """
285
740
  Handle the case where upstream dataset is defined in derived_table.sql
@@ -674,7 +1129,45 @@ def create_view_upstream(
674
1129
  config: LookMLSourceConfig,
675
1130
  ctx: PipelineContext,
676
1131
  reporter: LookMLSourceReport,
1132
+ looker_client: Optional["LookerAPI"] = None,
1133
+ view_to_explore_map: Optional[Dict[str, str]] = None,
677
1134
  ) -> AbstractViewUpstream:
1135
+ # Looker client is required for LookerQueryAPIBasedViewUpstream also enforced by config.use_api_for_view_lineage
1136
+ # view_to_explore_map is required for Looker query API args
1137
+ # Only process if view exists in view_to_explore_map, because we cannot query views which are not reachable from an explore
1138
+ if (
1139
+ config.use_api_for_view_lineage
1140
+ and looker_client
1141
+ and view_to_explore_map
1142
+ and view_context.name() in view_to_explore_map
1143
+ ):
1144
+ try:
1145
+ return LookerQueryAPIBasedViewUpstream(
1146
+ view_context=view_context,
1147
+ config=config,
1148
+ reporter=reporter,
1149
+ ctx=ctx,
1150
+ looker_view_id_cache=looker_view_id_cache,
1151
+ looker_client=looker_client,
1152
+ view_to_explore_map=view_to_explore_map,
1153
+ )
1154
+ except Exception as e:
1155
+ # Falling back to custom regex-based parsing - best effort approach
1156
+ reporter.report_warning(
1157
+ title="Looker Query API based View Upstream Failed",
1158
+ message="Error in getting upstream lineage for view using Looker Query API",
1159
+ context=f"View-name: {view_context.name()}",
1160
+ exc=e,
1161
+ )
1162
+ else:
1163
+ logger.debug(
1164
+ f"Skipping Looker Query API for view: {view_context.name()} because one or more conditions are not met: "
1165
+ f"use_api_for_view_lineage={config.use_api_for_view_lineage}, "
1166
+ f"looker_client={'set' if looker_client else 'not set'}, "
1167
+ f"view_to_explore_map={'set' if view_to_explore_map else 'not set'}, "
1168
+ f"view_in_view_to_explore_map={view_context.name() in view_to_explore_map if view_to_explore_map else False}"
1169
+ )
1170
+
678
1171
  if view_context.is_regular_case():
679
1172
  return RegularViewUpstream(
680
1173
  view_context=view_context,
@@ -13,7 +13,10 @@ from pydantic import Field, root_validator, validator
13
13
  from requests.models import HTTPError
14
14
 
15
15
  import datahub.emitter.mce_builder as builder
16
- from datahub.configuration.source_common import DatasetLineageProviderConfigBase
16
+ from datahub.configuration.source_common import (
17
+ DatasetLineageProviderConfigBase,
18
+ LowerCaseDatasetUrnConfigMixin,
19
+ )
17
20
  from datahub.ingestion.api.common import PipelineContext
18
21
  from datahub.ingestion.api.decorators import (
19
22
  SourceCapability,
@@ -49,6 +52,7 @@ from datahub.metadata.schema_classes import (
49
52
  ChartQueryTypeClass,
50
53
  ChartTypeClass,
51
54
  DashboardInfoClass,
55
+ EdgeClass,
52
56
  OwnerClass,
53
57
  OwnershipClass,
54
58
  OwnershipTypeClass,
@@ -61,7 +65,11 @@ logger = logging.getLogger(__name__)
61
65
  DATASOURCE_URN_RECURSION_LIMIT = 5
62
66
 
63
67
 
64
- class MetabaseConfig(DatasetLineageProviderConfigBase, StatefulIngestionConfigBase):
68
+ class MetabaseConfig(
69
+ DatasetLineageProviderConfigBase,
70
+ StatefulIngestionConfigBase,
71
+ LowerCaseDatasetUrnConfigMixin,
72
+ ):
65
73
  # See the Metabase /api/session endpoint for details
66
74
  # https://www.metabase.com/docs/latest/api-documentation.html#post-apisession
67
75
  connect_uri: str = Field(default="localhost:3000", description="Metabase host URL.")
@@ -331,19 +339,25 @@ class MetabaseSource(StatefulIngestionSourceBase):
331
339
  lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
332
340
  )
333
341
 
334
- chart_urns = []
342
+ # Convert chart URNs to chart edges (instead of deprecated charts field)
343
+ chart_edges = []
335
344
  cards_data = dashboard_details.get("dashcards", {})
336
345
  for card_info in cards_data:
337
346
  card_id = card_info.get("card").get("id", "")
338
347
  if not card_id:
339
348
  continue # most likely a virtual card without an id (text or heading), not relevant.
340
349
  chart_urn = builder.make_chart_urn(self.platform, str(card_id))
341
- chart_urns.append(chart_urn)
350
+ chart_edges.append(
351
+ EdgeClass(
352
+ destinationUrn=chart_urn,
353
+ lastModified=last_modified.lastModified,
354
+ )
355
+ )
342
356
 
343
357
  dashboard_info_class = DashboardInfoClass(
344
358
  description=description,
345
359
  title=title,
346
- charts=chart_urns,
360
+ chartEdges=chart_edges,
347
361
  lastModified=last_modified,
348
362
  dashboardUrl=f"{self.config.display_uri}/dashboard/{dashboard_id}",
349
363
  customProperties={},
@@ -481,13 +495,25 @@ class MetabaseSource(StatefulIngestionSourceBase):
481
495
  datasource_urn = self.get_datasource_urn(card_details)
482
496
  custom_properties = self.construct_card_custom_properties(card_details)
483
497
 
498
+ input_edges = (
499
+ [
500
+ EdgeClass(
501
+ destinationUrn=urn,
502
+ lastModified=last_modified.lastModified,
503
+ )
504
+ for urn in datasource_urn
505
+ ]
506
+ if datasource_urn
507
+ else None
508
+ )
509
+
484
510
  chart_info = ChartInfoClass(
485
511
  type=chart_type,
486
512
  description=description,
487
513
  title=title,
488
514
  lastModified=last_modified,
489
515
  chartUrl=f"{self.config.display_uri}/card/{card_id}",
490
- inputs=datasource_urn,
516
+ inputEdges=input_edges,
491
517
  customProperties=custom_properties,
492
518
  )
493
519
  chart_snapshot.aspects.append(chart_info)
@@ -5,11 +5,11 @@ import time
5
5
  from dataclasses import dataclass, field
6
6
  from typing import Any, Dict, Iterable, List, Optional, TypeVar, Union
7
7
 
8
- from pydantic import validator
8
+ import pydantic
9
9
  from pydantic.fields import Field
10
10
 
11
11
  import datahub.metadata.schema_classes as models
12
- from datahub.configuration.common import ConfigModel
12
+ from datahub.configuration.common import ConfigModel, LaxStr
13
13
  from datahub.configuration.config_loader import load_config_file
14
14
  from datahub.emitter.mce_builder import (
15
15
  datahub_guid,
@@ -66,7 +66,7 @@ class GlossaryTermConfig(ConfigModel):
66
66
  contains: Optional[List[str]] = None
67
67
  values: Optional[List[str]] = None
68
68
  related_terms: Optional[List[str]] = None
69
- custom_properties: Optional[Dict[str, str]] = None
69
+ custom_properties: Optional[Dict[str, LaxStr]] = None
70
70
  knowledge_links: Optional[List[KnowledgeCard]] = None
71
71
  domain: Optional[str] = None
72
72
 
@@ -82,7 +82,7 @@ class GlossaryNodeConfig(ConfigModel):
82
82
  terms: Optional[List["GlossaryTermConfig"]] = None
83
83
  nodes: Optional[List["GlossaryNodeConfig"]] = None
84
84
  knowledge_links: Optional[List[KnowledgeCard]] = None
85
- custom_properties: Optional[Dict[str, str]] = None
85
+ custom_properties: Optional[Dict[str, LaxStr]] = None
86
86
 
87
87
  # Private fields.
88
88
  _urn: str
@@ -108,12 +108,12 @@ class BusinessGlossarySourceConfig(ConfigModel):
108
108
 
109
109
 
110
110
  class BusinessGlossaryConfig(DefaultConfig):
111
- version: str
111
+ version: LaxStr
112
112
  terms: Optional[List["GlossaryTermConfig"]] = None
113
113
  nodes: Optional[List["GlossaryNodeConfig"]] = None
114
114
 
115
- @validator("version")
116
- def version_must_be_1(cls, v):
115
+ @pydantic.field_validator("version", mode="after")
116
+ def version_must_be_1(cls, v: str) -> str:
117
117
  if v != "1":
118
118
  raise ValueError("Only version 1 is supported")
119
119
  return v