acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -3,11 +3,11 @@ from typing import Dict, List, Optional
3
3
 
4
4
  from datahub.ingestion.source.looker.looker_common import LookerViewId, ViewFieldValue
5
5
  from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition
6
+ from datahub.ingestion.source.looker.looker_constant import NAME
6
7
  from datahub.ingestion.source.looker.looker_dataclasses import LookerModel
7
8
  from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader
8
9
  from datahub.ingestion.source.looker.lookml_config import (
9
10
  BASE_PROJECT_NAME,
10
- NAME,
11
11
  LookMLSourceReport,
12
12
  )
13
13
 
@@ -12,12 +12,12 @@ from datahub.ingestion.source.looker.looker_constant import (
12
12
  DIMENSION_GROUPS,
13
13
  DIMENSIONS,
14
14
  MEASURES,
15
+ NAME,
15
16
  )
16
17
  from datahub.ingestion.source.looker.looker_dataclasses import LookerViewFile
17
18
  from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader
18
19
  from datahub.ingestion.source.looker.lookml_config import (
19
20
  DERIVED_VIEW_SUFFIX,
20
- NAME,
21
21
  LookMLSourceReport,
22
22
  )
23
23
  from datahub.ingestion.source.looker.lookml_refinement import LookerRefinementResolver
@@ -28,11 +28,10 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
28
28
  StatefulIngestionConfigBase,
29
29
  )
30
30
  from datahub.utilities.lossy_collections import LossyList
31
+ from datahub.utilities.stats_collections import TopKDict, float_top_k_dict
31
32
 
32
33
  logger = logging.getLogger(__name__)
33
34
 
34
- NAME: str = "name"
35
-
36
35
  BASE_PROJECT_NAME = "__BASE"
37
36
 
38
37
  EXPLORE_FILE_EXTENSION = ".explore.lkml"
@@ -47,6 +46,9 @@ DERIVED_VIEW_PATTERN: str = r"\$\{([^}]*)\}"
47
46
  @dataclass
48
47
  class LookMLSourceReport(StaleEntityRemovalSourceReport):
49
48
  git_clone_latency: Optional[timedelta] = None
49
+ looker_query_api_latency_seconds: TopKDict[str, float] = dataclass_field(
50
+ default_factory=float_top_k_dict
51
+ )
50
52
  models_discovered: int = 0
51
53
  models_dropped: LossyList[str] = dataclass_field(default_factory=LossyList)
52
54
  views_discovered: int = 0
@@ -81,6 +83,11 @@ class LookMLSourceReport(StaleEntityRemovalSourceReport):
81
83
  self.api_stats = self._looker_api.compute_stats()
82
84
  return super().compute_stats()
83
85
 
86
+ def report_looker_query_api_latency(
87
+ self, view_urn: str, latency: timedelta
88
+ ) -> None:
89
+ self.looker_query_api_latency_seconds[view_urn] = latency.total_seconds()
90
+
84
91
 
85
92
  class LookMLSourceConfig(
86
93
  LookerCommonConfig, StatefulIngestionConfigBase, EnvConfigMixin
@@ -122,7 +129,17 @@ class LookMLSourceConfig(
122
129
  description="List of regex patterns for LookML views to include in the extraction.",
123
130
  )
124
131
  parse_table_names_from_sql: bool = Field(True, description="See note below.")
125
- api: Optional[LookerAPIConfig]
132
+ use_api_for_view_lineage: bool = Field(
133
+ False,
134
+ description="When enabled, uses Looker API to get SQL representation of views for lineage parsing instead of parsing LookML files directly. Requires 'api' configuration to be provided."
135
+ "Coverage of regex based lineage extraction has limitations, it only supportes ${TABLE}.column_name syntax, See (https://cloud.google.com/looker/docs/reference/param-field-sql#sql_for_dimensions) to"
136
+ "understand the other substitutions and cross-references allowed in LookML.",
137
+ )
138
+ use_api_cache_for_view_lineage: bool = Field(
139
+ False,
140
+ description="When enabled, uses Looker API server-side caching for query execution. Requires 'api' configuration to be provided.",
141
+ )
142
+ api: Optional[LookerAPIConfig] = None
126
143
  project_name: Optional[str] = Field(
127
144
  None,
128
145
  description="Required if you don't specify the `api` section. The project name within which all the model "
@@ -239,6 +256,17 @@ class LookMLSourceConfig(
239
256
  )
240
257
  return values
241
258
 
259
+ @root_validator(skip_on_failure=True)
260
+ def check_api_provided_for_view_lineage(cls, values):
261
+ """Validate that we must have an api credential to use Looker API for view's column lineage"""
262
+ if not values.get("api") and values.get("use_api_for_view_lineage"):
263
+ raise ValueError(
264
+ "API credential was not found. LookML source requires api credentials "
265
+ "for Looker to use Looker APIs for view's column lineage extraction."
266
+ "Set `use_api_for_view_lineage` to False to skip using Looker APIs."
267
+ )
268
+ return values
269
+
242
270
  @validator("base_folder", always=True)
243
271
  def check_base_folder_if_not_provided(
244
272
  cls, v: Optional[pydantic.DirectoryPath], values: Dict[str, Any]
@@ -4,10 +4,10 @@ import logging
4
4
  from typing import ClassVar, Dict, List, Set
5
5
 
6
6
  from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition
7
+ from datahub.ingestion.source.looker.looker_constant import NAME
7
8
  from datahub.ingestion.source.looker.looker_dataclasses import LookerModel
8
9
  from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader
9
10
  from datahub.ingestion.source.looker.lookml_config import (
10
- NAME,
11
11
  LookMLSourceConfig,
12
12
  LookMLSourceReport,
13
13
  )
@@ -4,7 +4,7 @@ import tempfile
4
4
  from collections import OrderedDict
5
5
  from dataclasses import dataclass
6
6
  from datetime import datetime, timezone
7
- from typing import Dict, Iterable, List, Optional, Set, Tuple
7
+ from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
8
8
 
9
9
  import lkml
10
10
  import lkml.simple
@@ -12,8 +12,7 @@ from looker_sdk.error import SDKError
12
12
 
13
13
  from datahub.configuration.git import GitInfo
14
14
  from datahub.emitter.mce_builder import make_schema_field_urn
15
- from datahub.emitter.mcp import MetadataChangeProposalWrapper
16
- from datahub.emitter.mcp_builder import gen_containers
15
+ from datahub.emitter.mcp_builder import mcps_from_mce
17
16
  from datahub.ingestion.api.common import PipelineContext
18
17
  from datahub.ingestion.api.decorators import (
19
18
  SupportStatus,
@@ -27,6 +26,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
27
26
  from datahub.ingestion.source.common.subtypes import (
28
27
  BIContainerSubTypes,
29
28
  DatasetSubTypes,
29
+ SourceCapabilityModifier,
30
30
  )
31
31
  from datahub.ingestion.source.git.git_import import GitClone
32
32
  from datahub.ingestion.source.looker.looker_common import (
@@ -76,7 +76,7 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import (
76
76
  from datahub.ingestion.source.state.stateful_ingestion_base import (
77
77
  StatefulIngestionSourceBase,
78
78
  )
79
- from datahub.metadata.com.linkedin.pegasus2avro.common import BrowsePaths, Status
79
+ from datahub.metadata.com.linkedin.pegasus2avro.common import Status
80
80
  from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
81
81
  DatasetLineageTypeClass,
82
82
  FineGrainedLineageDownstreamType,
@@ -84,18 +84,15 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
84
84
  UpstreamLineage,
85
85
  ViewProperties,
86
86
  )
87
- from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
88
- from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
89
87
  from datahub.metadata.schema_classes import (
90
88
  AuditStampClass,
91
- BrowsePathEntryClass,
92
- BrowsePathsV2Class,
93
- ContainerClass,
94
89
  DatasetPropertiesClass,
95
90
  FineGrainedLineageClass,
96
91
  FineGrainedLineageUpstreamTypeClass,
97
- SubTypesClass,
98
92
  )
93
+ from datahub.sdk.container import Container
94
+ from datahub.sdk.dataset import Dataset
95
+ from datahub.sdk.entity import Entity
99
96
  from datahub.sql_parsing.sqlglot_lineage import ColumnRef
100
97
 
101
98
  VIEW_LANGUAGE_LOOKML: str = "lookml"
@@ -145,6 +142,8 @@ class LookerView:
145
142
  ctx: PipelineContext,
146
143
  extract_col_level_lineage: bool = False,
147
144
  populate_sql_logic_in_descriptions: bool = False,
145
+ looker_client: Optional[LookerAPI] = None,
146
+ view_to_explore_map: Optional[Dict[str, str]] = None,
148
147
  ) -> Optional["LookerView"]:
149
148
  view_name = view_context.name()
150
149
 
@@ -163,6 +162,8 @@ class LookerView:
163
162
  config=config,
164
163
  ctx=ctx,
165
164
  reporter=reporter,
165
+ looker_client=looker_client,
166
+ view_to_explore_map=view_to_explore_map,
166
167
  )
167
168
 
168
169
  field_type_vs_raw_fields = OrderedDict(
@@ -273,6 +274,13 @@ class LookerManifest:
273
274
  SourceCapability.LINEAGE_FINE,
274
275
  "Enabled by default, configured using `extract_column_level_lineage`",
275
276
  )
277
+ @capability(
278
+ SourceCapability.CONTAINERS,
279
+ "Enabled by default",
280
+ subtype_modifier=[
281
+ SourceCapabilityModifier.LOOKML_PROJECT,
282
+ ],
283
+ )
276
284
  class LookMLSource(StatefulIngestionSourceBase):
277
285
  """
278
286
  This plugin extracts the following:
@@ -420,69 +428,40 @@ class LookMLSource(StatefulIngestionSourceBase):
420
428
 
421
429
  return dataset_props
422
430
 
423
- def _build_dataset_mcps(
424
- self, looker_view: LookerView
425
- ) -> List[MetadataChangeProposalWrapper]:
426
- view_urn = looker_view.id.get_urn(self.source_config)
427
-
428
- subTypeEvent = MetadataChangeProposalWrapper(
429
- entityUrn=view_urn,
430
- aspect=SubTypesClass(typeNames=[DatasetSubTypes.VIEW]),
431
- )
432
- events = [subTypeEvent]
431
+ def _build_dataset_entities(self, looker_view: LookerView) -> Iterable[Dataset]:
432
+ dataset_extra_aspects: List[Union[ViewProperties, Status]] = [
433
+ Status(removed=False)
434
+ ]
433
435
  if looker_view.view_details is not None:
434
- viewEvent = MetadataChangeProposalWrapper(
435
- entityUrn=view_urn,
436
- aspect=looker_view.view_details,
437
- )
438
- events.append(viewEvent)
439
-
440
- project_key = gen_project_key(self.source_config, looker_view.id.project_name)
441
-
442
- container = ContainerClass(container=project_key.as_urn())
443
- events.append(
444
- MetadataChangeProposalWrapper(entityUrn=view_urn, aspect=container)
445
- )
436
+ dataset_extra_aspects.append(looker_view.view_details)
446
437
 
447
- events.append(
448
- MetadataChangeProposalWrapper(
449
- entityUrn=view_urn,
450
- aspect=looker_view.id.get_browse_path_v2(self.source_config),
451
- )
452
- )
453
-
454
- return events
455
-
456
- def _build_dataset_mce(self, looker_view: LookerView) -> MetadataChangeEvent:
457
- """
458
- Creates MetadataChangeEvent for the dataset, creating upstream lineage links
459
- """
460
- logger.debug(f"looker_view = {looker_view.id}")
461
-
462
- dataset_snapshot = DatasetSnapshot(
463
- urn=looker_view.id.get_urn(self.source_config),
464
- aspects=[], # we append to this list later on
465
- )
466
- browse_paths = BrowsePaths(
467
- paths=[looker_view.id.get_browse_path(self.source_config)]
468
- )
469
-
470
- dataset_snapshot.aspects.append(browse_paths)
471
- dataset_snapshot.aspects.append(Status(removed=False))
472
- upstream_lineage = self._get_upstream_lineage(looker_view)
473
- if upstream_lineage is not None:
474
- dataset_snapshot.aspects.append(upstream_lineage)
475
438
  schema_metadata = LookerUtil._get_schema(
476
439
  self.source_config.platform_name,
477
440
  looker_view.id.view_name,
478
441
  looker_view.fields,
479
442
  self.reporter,
480
443
  )
481
- if schema_metadata is not None:
482
- dataset_snapshot.aspects.append(schema_metadata)
483
- dataset_snapshot.aspects.append(self._get_custom_properties(looker_view))
484
444
 
485
- return MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
445
+ custom_properties: DatasetPropertiesClass = self._get_custom_properties(
446
+ looker_view
447
+ )
448
+
449
+ yield Dataset(
450
+ platform=self.source_config.platform_name,
451
+ name=looker_view.id.get_view_dataset_name(self.source_config),
452
+ display_name=looker_view.id.view_name,
453
+ platform_instance=self.source_config.platform_instance,
454
+ env=self.source_config.env,
455
+ subtype=DatasetSubTypes.VIEW,
456
+ parent_container=looker_view.id.get_view_dataset_parent_container(
457
+ self.source_config
458
+ ),
459
+ schema=schema_metadata,
460
+ custom_properties=custom_properties.customProperties,
461
+ external_url=custom_properties.externalUrl,
462
+ upstreams=self._get_upstream_lineage(looker_view),
463
+ extra_aspects=dataset_extra_aspects,
464
+ )
486
465
 
487
466
  def get_project_name(self, model_name: str) -> str:
488
467
  if self.source_config.project_name is not None:
@@ -497,7 +476,13 @@ class LookMLSource(StatefulIngestionSourceBase):
497
476
  f"Failed to find a project name for model {model_name}"
498
477
  )
499
478
  return model.project_name
500
- except SDKError:
479
+ except SDKError as e:
480
+ self.reporter.failure(
481
+ title="Failed to find a project name for model",
482
+ message="Consider configuring a static project name in your config file",
483
+ context=str(dict(model_name=model_name)),
484
+ exc=e,
485
+ )
501
486
  raise ValueError(
502
487
  f"Could not locate a project name for model {model_name}. Consider configuring a static project name "
503
488
  f"in your config file"
@@ -540,7 +525,7 @@ class LookMLSource(StatefulIngestionSourceBase):
540
525
  ).workunit_processor,
541
526
  ]
542
527
 
543
- def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
528
+ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
544
529
  with tempfile.TemporaryDirectory("lookml_tmp") as tmp_dir:
545
530
  # Clone the base_folder if necessary.
546
531
  if not self.source_config.base_folder:
@@ -701,7 +686,7 @@ class LookMLSource(StatefulIngestionSourceBase):
701
686
  tmp_dir, project, project_visited, manifest_constants
702
687
  )
703
688
 
704
- def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901
689
+ def get_internal_workunits(self) -> Iterable[Union[MetadataWorkUnit, Entity]]: # noqa: C901
705
690
  assert self.source_config.base_folder
706
691
  viewfile_loader = LookerViewFileLoader(
707
692
  self.source_config.project_name,
@@ -724,6 +709,11 @@ class LookMLSource(StatefulIngestionSourceBase):
724
709
  # Value: Tuple(model file name, connection name)
725
710
  view_connection_map: Dict[str, Tuple[str, str]] = {}
726
711
 
712
+ # Map of view name to explore name for API-based view lineage
713
+ # A view can be referenced by multiple explores, we only need one of the explores to use Looker Query API
714
+ # Key: view_name, Value: explore_name
715
+ view_to_explore_map: Dict[str, str] = {}
716
+
727
717
  # The ** means "this directory and all subdirectories", and hence should
728
718
  # include all the files we want.
729
719
  model_files = sorted(
@@ -778,37 +768,37 @@ class LookMLSource(StatefulIngestionSourceBase):
778
768
  )
779
769
  )
780
770
 
781
- if self.source_config.emit_reachable_views_only:
782
- model_explores_map = {d["name"]: d for d in model.explores}
783
- for explore_dict in model.explores:
784
- try:
785
- if LookerRefinementResolver.is_refinement(explore_dict["name"]):
786
- continue
771
+ model_explores_map = {d["name"]: d for d in model.explores}
772
+ for explore_dict in model.explores:
773
+ try:
774
+ if LookerRefinementResolver.is_refinement(explore_dict["name"]):
775
+ continue
787
776
 
788
- explore_dict = (
789
- looker_refinement_resolver.apply_explore_refinement(
790
- explore_dict
791
- )
792
- )
793
- explore: LookerExplore = LookerExplore.from_dict(
794
- model_name,
795
- explore_dict,
796
- model.resolved_includes,
797
- viewfile_loader,
798
- self.reporter,
799
- model_explores_map,
800
- )
801
- if explore.upstream_views:
802
- for view_name in explore.upstream_views:
777
+ explore_dict = looker_refinement_resolver.apply_explore_refinement(
778
+ explore_dict
779
+ )
780
+ explore: LookerExplore = LookerExplore.from_dict(
781
+ model_name,
782
+ explore_dict,
783
+ model.resolved_includes,
784
+ viewfile_loader,
785
+ self.reporter,
786
+ model_explores_map,
787
+ )
788
+ if explore.upstream_views:
789
+ for view_name in explore.upstream_views:
790
+ if self.source_config.emit_reachable_views_only:
803
791
  explore_reachable_views.add(view_name.include)
804
- except Exception as e:
805
- self.reporter.report_warning(
806
- title="Failed to process explores",
807
- message="Failed to process explore dictionary.",
808
- context=f"Explore Details: {explore_dict}",
809
- exc=e,
810
- )
811
- logger.debug("Failed to process explore", exc_info=e)
792
+ # Build view to explore mapping for API-based view lineage
793
+ view_to_explore_map[view_name.include] = explore.name
794
+ except Exception as e:
795
+ self.reporter.report_warning(
796
+ title="Failed to process explores",
797
+ message="Failed to process explore dictionary.",
798
+ context=f"Explore Details: {explore_dict}",
799
+ exc=e,
800
+ )
801
+ logger.debug("Failed to process explore", exc_info=e)
812
802
 
813
803
  processed_view_files = processed_view_map.setdefault(
814
804
  model.connection, set()
@@ -897,6 +887,10 @@ class LookMLSource(StatefulIngestionSourceBase):
897
887
  populate_sql_logic_in_descriptions=self.source_config.populate_sql_logic_for_missing_descriptions,
898
888
  config=self.source_config,
899
889
  ctx=self.ctx,
890
+ looker_client=self.looker_client,
891
+ view_to_explore_map=view_to_explore_map
892
+ if view_to_explore_map
893
+ else None,
900
894
  )
901
895
  except Exception as e:
902
896
  self.reporter.report_warning(
@@ -935,7 +929,7 @@ class LookMLSource(StatefulIngestionSourceBase):
935
929
  maybe_looker_view.id.project_name
936
930
  not in self.processed_projects
937
931
  ):
938
- yield from self.gen_project_workunits(
932
+ yield from self.gen_project_containers(
939
933
  maybe_looker_view.id.project_name
940
934
  )
941
935
 
@@ -943,15 +937,10 @@ class LookMLSource(StatefulIngestionSourceBase):
943
937
  maybe_looker_view.id.project_name
944
938
  )
945
939
 
946
- for mcp in self._build_dataset_mcps(
940
+ yield from self._build_dataset_entities(
947
941
  maybe_looker_view
948
- ):
949
- yield mcp.as_workunit()
950
- mce = self._build_dataset_mce(maybe_looker_view)
951
- yield MetadataWorkUnit(
952
- id=f"lookml-view-{maybe_looker_view.id}",
953
- mce=mce,
954
942
  )
943
+
955
944
  processed_view_files.add(include.include)
956
945
  else:
957
946
  (
@@ -980,28 +969,24 @@ class LookMLSource(StatefulIngestionSourceBase):
980
969
  self.source_config.tag_measures_and_dimensions
981
970
  and self.reporter.events_produced != 0
982
971
  ):
983
- # Emit tag MCEs for measures and dimensions:
972
+ # Emit tag MCEs for measures and dimensions if we produced any explores:
984
973
  for tag_mce in LookerUtil.get_tag_mces():
985
- yield MetadataWorkUnit(
986
- id=f"tag-{tag_mce.proposedSnapshot.urn}", mce=tag_mce
987
- )
974
+ # Convert MCE to MCPs
975
+ for mcp in mcps_from_mce(tag_mce):
976
+ yield mcp.as_workunit()
988
977
 
989
- def gen_project_workunits(self, project_name: str) -> Iterable[MetadataWorkUnit]:
978
+ def gen_project_containers(self, project_name: str) -> Iterable[Container]:
990
979
  project_key = gen_project_key(
991
980
  self.source_config,
992
981
  project_name,
993
982
  )
994
- yield from gen_containers(
983
+
984
+ yield Container(
995
985
  container_key=project_key,
996
- name=project_name,
997
- sub_types=[BIContainerSubTypes.LOOKML_PROJECT],
986
+ display_name=project_name,
987
+ subtype=BIContainerSubTypes.LOOKML_PROJECT,
988
+ parent_container=["Folders"],
998
989
  )
999
- yield MetadataChangeProposalWrapper(
1000
- entityUrn=project_key.as_urn(),
1001
- aspect=BrowsePathsV2Class(
1002
- path=[BrowsePathEntryClass("Folders")],
1003
- ),
1004
- ).as_workunit()
1005
990
 
1006
991
  def report_skipped_unreachable_views(
1007
992
  self,