acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -28,7 +28,7 @@ from looker_sdk.sdk.api40.models import (
28
28
  User,
29
29
  WriteQuery,
30
30
  )
31
- from pydantic.class_validators import validator
31
+ from pydantic import validator
32
32
 
33
33
  import datahub.emitter.mce_builder as builder
34
34
  from datahub.api.entities.platformresource.platform_resource import (
@@ -36,7 +36,7 @@ from datahub.api.entities.platformresource.platform_resource import (
36
36
  PlatformResourceKey,
37
37
  )
38
38
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
39
- from datahub.emitter.mcp_builder import ContainerKey, create_embed_mcp
39
+ from datahub.emitter.mcp_builder import ContainerKey
40
40
  from datahub.ingestion.api.report import Report
41
41
  from datahub.ingestion.api.source import SourceReport
42
42
  from datahub.ingestion.source.common.subtypes import DatasetSubTypes
@@ -72,7 +72,6 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
72
72
  UpstreamClass,
73
73
  UpstreamLineage,
74
74
  )
75
- from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
76
75
  from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
77
76
  from datahub.metadata.com.linkedin.pegasus2avro.schema import (
78
77
  ArrayTypeClass,
@@ -90,21 +89,18 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
90
89
  )
91
90
  from datahub.metadata.schema_classes import (
92
91
  BrowsePathEntryClass,
93
- BrowsePathsClass,
94
92
  BrowsePathsV2Class,
95
- ContainerClass,
96
- DatasetPropertiesClass,
93
+ EmbedClass,
97
94
  EnumTypeClass,
98
95
  FineGrainedLineageClass,
99
96
  GlobalTagsClass,
100
97
  SchemaMetadataClass,
101
- StatusClass,
102
- SubTypesClass,
103
98
  TagAssociationClass,
104
99
  TagPropertiesClass,
105
100
  TagSnapshotClass,
106
101
  )
107
102
  from datahub.metadata.urns import TagUrn
103
+ from datahub.sdk.dataset import Dataset
108
104
  from datahub.sql_parsing.sqlglot_lineage import ColumnRef
109
105
  from datahub.utilities.lossy_collections import LossyList, LossySet
110
106
  from datahub.utilities.url_util import remove_port_from_url
@@ -242,13 +238,24 @@ class LookerViewId:
242
238
 
243
239
  dataset_name = config.view_naming_pattern.replace_variables(n_mapping)
244
240
 
245
- return builder.make_dataset_urn_with_platform_instance(
241
+ generated_urn = builder.make_dataset_urn_with_platform_instance(
246
242
  platform=config.platform_name,
247
243
  name=dataset_name,
248
244
  platform_instance=config.platform_instance,
249
245
  env=config.env,
250
246
  )
251
247
 
248
+ logger.debug(
249
+ f"LookerViewId.get_urn for view '{self.view_name}': project='{self.project_name}', model='{self.model_name}', file_path='{self.file_path}', dataset_name='{dataset_name}', generated_urn='{generated_urn}'"
250
+ )
251
+
252
+ return generated_urn
253
+
254
+ def get_view_dataset_name(self, config: LookerCommonConfig) -> str:
255
+ n_mapping: ViewNamingPatternMapping = self.get_mapping(config)
256
+ n_mapping.file_path = self.preprocess_file_path(n_mapping.file_path)
257
+ return config.view_naming_pattern.replace_variables(n_mapping)
258
+
252
259
  def get_browse_path(self, config: LookerCommonConfig) -> str:
253
260
  browse_path = config.view_browse_pattern.replace_variables(
254
261
  self.get_mapping(config)
@@ -276,6 +283,22 @@ class LookerViewId:
276
283
  ],
277
284
  )
278
285
 
286
+ def get_view_dataset_parent_container(
287
+ self, config: LookerCommonConfig
288
+ ) -> List[str]:
289
+ project_key = gen_project_key(config, self.project_name)
290
+ view_path = (
291
+ remove_suffix(self.file_path, ".view.lkml")
292
+ if "{file_path}" in config.view_browse_pattern.pattern
293
+ else os.path.dirname(self.file_path)
294
+ )
295
+ path_entries = view_path.split("/") if view_path else []
296
+ return [
297
+ "Develop",
298
+ project_key.as_urn(),
299
+ *path_entries,
300
+ ]
301
+
279
302
 
280
303
  class ViewFieldType(Enum):
281
304
  DIMENSION = "Dimension"
@@ -284,6 +307,12 @@ class ViewFieldType(Enum):
284
307
  UNKNOWN = "Unknown"
285
308
 
286
309
 
310
+ class ViewFieldDimensionGroupType(Enum):
311
+ # Ref: https://cloud.google.com/looker/docs/reference/param-field-dimension-group
312
+ TIME = "time"
313
+ DURATION = "duration"
314
+
315
+
287
316
  class ViewFieldValue(Enum):
288
317
  NOT_AVAILABLE = "NotAvailable"
289
318
 
@@ -299,6 +328,7 @@ class ViewField:
299
328
  view_name: Optional[str] = None
300
329
  is_primary_key: bool = False
301
330
  tags: List[str] = dataclasses_field(default_factory=list)
331
+ group_label: Optional[str] = None
302
332
 
303
333
  # It is the list of ColumnRef for derived view defined using SQL otherwise simple column name
304
334
  upstream_fields: Union[List[ColumnRef]] = dataclasses_field(default_factory=list)
@@ -326,6 +356,7 @@ class ViewField:
326
356
  description = field_dict.get("description", default_description)
327
357
 
328
358
  label = field_dict.get("label", "")
359
+ group_label = field_dict.get("group_label")
329
360
 
330
361
  return ViewField(
331
362
  name=name,
@@ -336,6 +367,7 @@ class ViewField:
336
367
  field_type=type_cls,
337
368
  tags=field_dict.get("tags") or [],
338
369
  upstream_fields=upstream_column_ref,
370
+ group_label=group_label,
339
371
  )
340
372
 
341
373
 
@@ -370,6 +402,14 @@ class ExploreUpstreamViewField:
370
402
  : -(len(self.field.field_group_variant.lower()) + 1)
371
403
  ]
372
404
 
405
+ # Validate that field_name is not empty to prevent invalid schema field URNs
406
+ if not field_name or not field_name.strip():
407
+ logger.warning(
408
+ f"Empty field name detected for field '{self.field.name}' in explore '{self.explore.name}'. "
409
+ f"Skipping field to prevent invalid schema field URN generation."
410
+ )
411
+ return None
412
+
373
413
  assert view_name # for lint false positive
374
414
 
375
415
  project_include: ProjectInclude = ProjectInclude(
@@ -449,15 +489,36 @@ class ExploreUpstreamViewField:
449
489
  )
450
490
 
451
491
 
452
- def create_view_project_map(view_fields: List[ViewField]) -> Dict[str, str]:
492
+ def create_view_project_map(
493
+ view_fields: List[ViewField],
494
+ explore_primary_view: Optional[str] = None,
495
+ explore_project_name: Optional[str] = None,
496
+ ) -> Dict[str, str]:
453
497
  """
454
498
  Each view in a model has unique name.
455
499
  Use this function in scope of a model.
500
+
501
+ Args:
502
+ view_fields: List of ViewField objects
503
+ explore_primary_view: The primary view name of the explore (explore.view_name)
504
+ explore_project_name: The project name of the explore (explore.project_name)
456
505
  """
457
506
  view_project_map: Dict[str, str] = {}
458
507
  for view_field in view_fields:
459
508
  if view_field.view_name is not None and view_field.project_name is not None:
460
- view_project_map[view_field.view_name] = view_field.project_name
509
+ # Override field-level project assignment for the primary view when different
510
+ if (
511
+ view_field.view_name == explore_primary_view
512
+ and explore_project_name is not None
513
+ and explore_project_name != view_field.project_name
514
+ ):
515
+ logger.debug(
516
+ f"Overriding project assignment for primary view '{view_field.view_name}': "
517
+ f"field-level project '{view_field.project_name}' → explore-level project '{explore_project_name}'"
518
+ )
519
+ view_project_map[view_field.view_name] = explore_project_name
520
+ else:
521
+ view_project_map[view_field.view_name] = view_field.project_name
461
522
 
462
523
  return view_project_map
463
524
 
@@ -471,7 +532,10 @@ def get_view_file_path(
471
532
  logger.debug("Entered")
472
533
 
473
534
  for field in lkml_fields:
474
- if field.view == view_name:
535
+ if (
536
+ LookerUtil.extract_view_name_from_lookml_model_explore_field(field)
537
+ == view_name
538
+ ):
475
539
  # This path is relative to git clone directory
476
540
  logger.debug(f"Found view({view_name}) file-path {field.source_file}")
477
541
  return field.source_file
@@ -701,14 +765,47 @@ class LookerUtil:
701
765
  ),
702
766
  }
703
767
 
768
+ # Add a pattern-based regex for checking if a tag is a group_label tag
769
+ GROUP_LABEL_TAG_PATTERN = re.compile(r"^looker\:group_label\:(.+)$")
770
+
704
771
  @staticmethod
705
772
  def _get_tag_mce_for_urn(tag_urn: str) -> MetadataChangeEvent:
706
- assert tag_urn in LookerUtil.tag_definitions
707
- return MetadataChangeEvent(
708
- proposedSnapshot=TagSnapshotClass(
709
- urn=tag_urn, aspects=[LookerUtil.tag_definitions[tag_urn]]
773
+ # Check if this is a group_label tag
774
+ tag_name = tag_urn[len("urn:li:tag:") :]
775
+ match = LookerUtil.GROUP_LABEL_TAG_PATTERN.match(tag_name)
776
+
777
+ if match:
778
+ # This is a group_label tag, create tag definition on the fly
779
+ group_label_value = match.group(1)
780
+ tag_properties = TagPropertiesClass(
781
+ name=f"looker:group_label:{group_label_value}",
782
+ description=f"Fields with Looker group label: {group_label_value}",
783
+ )
784
+
785
+ return MetadataChangeEvent(
786
+ proposedSnapshot=TagSnapshotClass(urn=tag_urn, aspects=[tag_properties])
787
+ )
788
+ elif tag_urn in LookerUtil.tag_definitions:
789
+ # This is a predefined tag
790
+ return MetadataChangeEvent(
791
+ proposedSnapshot=TagSnapshotClass(
792
+ urn=tag_urn, aspects=[LookerUtil.tag_definitions[tag_urn]]
793
+ )
794
+ )
795
+ else:
796
+ # Should not happen, but handle gracefully
797
+ logger.warning(f"No tag definition found for tag URN: {tag_urn}")
798
+ return MetadataChangeEvent(
799
+ proposedSnapshot=TagSnapshotClass(
800
+ urn=tag_urn,
801
+ aspects=[
802
+ TagPropertiesClass(
803
+ name=tag_name,
804
+ description=f"Tag: {tag_name}",
805
+ )
806
+ ],
807
+ )
710
808
  )
711
- )
712
809
 
713
810
  @staticmethod
714
811
  def _get_tags_from_field_type(
@@ -732,6 +829,14 @@ class LookerUtil:
732
829
  message=f"Failed to map view field type {field.field_type}. Won't emit tags for measure and dimension",
733
830
  )
734
831
 
832
+ # Add group_label as tags if present
833
+ if field.group_label:
834
+ schema_field_tags.append(
835
+ TagAssociationClass(
836
+ tag=builder.make_tag_urn(f"looker:group_label:{field.group_label}")
837
+ )
838
+ )
839
+
735
840
  if schema_field_tags:
736
841
  return GlobalTagsClass(tags=schema_field_tags)
737
842
 
@@ -906,6 +1011,9 @@ class LookerExplore:
906
1011
  f"Could not resolve view {view_name} for explore {dict['name']} in model {model_name}"
907
1012
  )
908
1013
  else:
1014
+ logger.debug(
1015
+ f"LookerExplore.from_dict adding upstream view for explore '{dict['name']}' (model='{model_name}'): view_name='{view_name}', info[0].project='{info[0].project}'"
1016
+ )
909
1017
  upstream_views.append(
910
1018
  ProjectInclude(project=info[0].project, include=view_name)
911
1019
  )
@@ -934,6 +1042,7 @@ class LookerExplore:
934
1042
  ) -> Optional["LookerExplore"]:
935
1043
  try:
936
1044
  explore = client.lookml_model_explore(model, explore_name)
1045
+
937
1046
  views: Set[str] = set()
938
1047
  lkml_fields: List[LookmlModelExploreField] = (
939
1048
  explore_field_set_to_lkml_fields(explore)
@@ -1027,6 +1136,7 @@ class LookerExplore:
1027
1136
  else False
1028
1137
  ),
1029
1138
  upstream_fields=[],
1139
+ group_label=dim_field.field_group_label,
1030
1140
  )
1031
1141
  )
1032
1142
  if explore.fields.measures is not None:
@@ -1065,10 +1175,15 @@ class LookerExplore:
1065
1175
  else False
1066
1176
  ),
1067
1177
  upstream_fields=[],
1178
+ group_label=measure_field.field_group_label,
1068
1179
  )
1069
1180
  )
1070
1181
 
1071
- view_project_map: Dict[str, str] = create_view_project_map(view_fields)
1182
+ view_project_map: Dict[str, str] = create_view_project_map(
1183
+ view_fields,
1184
+ explore_primary_view=explore.view_name,
1185
+ explore_project_name=explore.project_name,
1186
+ )
1072
1187
  if view_project_map:
1073
1188
  logger.debug(f"views and their projects: {view_project_map}")
1074
1189
 
@@ -1103,7 +1218,7 @@ class LookerExplore:
1103
1218
  [column_ref] if column_ref is not None else []
1104
1219
  )
1105
1220
 
1106
- return cls(
1221
+ looker_explore = cls(
1107
1222
  name=explore_name,
1108
1223
  model_name=model,
1109
1224
  project_name=explore.project_name,
@@ -1121,6 +1236,8 @@ class LookerExplore:
1121
1236
  source_file=explore.source_file,
1122
1237
  tags=list(explore.tags) if explore.tags is not None else [],
1123
1238
  )
1239
+ logger.debug(f"Created LookerExplore from API: {looker_explore}")
1240
+ return looker_explore
1124
1241
  except SDKError as e:
1125
1242
  if "<title>Looker Not Found (404)</title>" in str(e):
1126
1243
  logger.info(
@@ -1161,6 +1278,9 @@ class LookerExplore:
1161
1278
  dataset_name = config.explore_naming_pattern.replace_variables(
1162
1279
  self.get_mapping(config)
1163
1280
  )
1281
+ logger.debug(
1282
+ f"Generated dataset_name={dataset_name} for explore with model_name={self.model_name}, name={self.name}"
1283
+ )
1164
1284
 
1165
1285
  return builder.make_dataset_urn_with_platform_instance(
1166
1286
  platform=config.platform_name,
@@ -1189,52 +1309,31 @@ class LookerExplore:
1189
1309
  reporter: SourceReport,
1190
1310
  base_url: str,
1191
1311
  extract_embed_urls: bool,
1192
- ) -> Optional[List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]]:
1193
- # We only generate MCE-s for explores that contain from clauses and do NOT contain joins
1194
- # All other explores (passthrough explores and joins) end in correct resolution of lineage, and don't need additional nodes in the graph.
1195
-
1196
- dataset_snapshot = DatasetSnapshot(
1197
- urn=self.get_explore_urn(config),
1198
- aspects=[], # we append to this list later on
1199
- )
1200
-
1201
- model_key = gen_model_key(config, self.model_name)
1202
- browse_paths = BrowsePathsClass(paths=[self.get_explore_browse_path(config)])
1203
- container = ContainerClass(container=model_key.as_urn())
1204
- dataset_snapshot.aspects.append(browse_paths)
1205
- dataset_snapshot.aspects.append(StatusClass(removed=False))
1206
-
1207
- custom_properties = {
1208
- "project": self.project_name,
1209
- "model": self.model_name,
1210
- "looker.explore.label": self.label,
1211
- "looker.explore.name": self.name,
1212
- "looker.explore.file": self.source_file,
1213
- }
1214
- dataset_props = DatasetPropertiesClass(
1215
- name=str(self.label) if self.label else LookerUtil._display_name(self.name),
1216
- description=self.description,
1217
- customProperties={
1218
- k: str(v) for k, v in custom_properties.items() if v is not None
1219
- },
1220
- )
1221
- dataset_props.externalUrl = self._get_url(base_url)
1312
+ ) -> Dataset:
1313
+ """
1314
+ Generate a Dataset metadata event for this Looker Explore.
1222
1315
 
1223
- dataset_snapshot.aspects.append(dataset_props)
1316
+ Only generates datasets for explores that contain FROM clauses and do NOT contain joins.
1317
+ Passthrough explores and joins are handled via lineage and do not need additional nodes.
1318
+ """
1319
+ upstream_lineage = None
1224
1320
  view_name_to_urn_map: Dict[str, str] = {}
1321
+
1225
1322
  if self.upstream_views is not None:
1226
1323
  assert self.project_name is not None
1227
- upstreams = []
1324
+ upstreams: list[UpstreamClass] = []
1228
1325
  observed_lineage_ts = datetime.datetime.now(tz=datetime.timezone.utc)
1326
+
1229
1327
  for view_ref in sorted(self.upstream_views):
1230
1328
  # set file_path to ViewFieldType.UNKNOWN if file_path is not available to keep backward compatibility
1231
1329
  # if we raise error on file_path equal to None then existing test-cases will fail as mock data
1232
1330
  # doesn't have required attributes.
1233
1331
  file_path: str = (
1234
1332
  cast(str, self.upstream_views_file_path[view_ref.include])
1235
- if self.upstream_views_file_path[view_ref.include] is not None
1333
+ if self.upstream_views_file_path.get(view_ref.include) is not None
1236
1334
  else ViewFieldValue.NOT_AVAILABLE.value
1237
1335
  )
1336
+
1238
1337
  view_urn = LookerViewId(
1239
1338
  project_name=(
1240
1339
  view_ref.project
@@ -1258,10 +1357,28 @@ class LookerExplore:
1258
1357
  )
1259
1358
  view_name_to_urn_map[view_ref.include] = view_urn
1260
1359
 
1261
- fine_grained_lineages = []
1360
+ fine_grained_lineages: list[FineGrainedLineageClass] = []
1262
1361
  if config.extract_column_level_lineage:
1263
1362
  for field in self.fields or []:
1363
+ # Skip creating fine-grained lineage for empty field names to prevent invalid schema field URNs
1364
+ if not field.name or not field.name.strip():
1365
+ logger.warning(
1366
+ f"Skipping fine-grained lineage for field with empty name in explore '{self.name}'"
1367
+ )
1368
+ continue
1369
+
1264
1370
  for upstream_column_ref in field.upstream_fields:
1371
+ # Skip creating fine-grained lineage for empty column names to prevent invalid schema field URNs
1372
+ if (
1373
+ not upstream_column_ref.column
1374
+ or not upstream_column_ref.column.strip()
1375
+ ):
1376
+ logger.warning(
1377
+ f"Skipping some fine-grained lineage for field '{field.name}' in explore '{self.name}' "
1378
+ f"due to empty upstream column name in table '{upstream_column_ref.table}'"
1379
+ )
1380
+ continue
1381
+
1265
1382
  fine_grained_lineages.append(
1266
1383
  FineGrainedLineageClass(
1267
1384
  upstreamType=FineGrainedLineageUpstreamType.FIELD_SET,
@@ -1281,9 +1398,11 @@ class LookerExplore:
1281
1398
  )
1282
1399
 
1283
1400
  upstream_lineage = UpstreamLineage(
1284
- upstreams=upstreams, fineGrainedLineages=fine_grained_lineages or None
1401
+ upstreams=upstreams,
1402
+ fineGrainedLineages=fine_grained_lineages or None,
1285
1403
  )
1286
- dataset_snapshot.aspects.append(upstream_lineage)
1404
+
1405
+ schema_metadata = None
1287
1406
  if self.fields is not None:
1288
1407
  schema_metadata = LookerUtil._get_schema(
1289
1408
  platform_name=config.platform_name,
@@ -1291,42 +1410,46 @@ class LookerExplore:
1291
1410
  view_fields=self.fields,
1292
1411
  reporter=reporter,
1293
1412
  )
1294
- if schema_metadata is not None:
1295
- dataset_snapshot.aspects.append(schema_metadata)
1296
1413
 
1297
- mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
1298
- mcp = MetadataChangeProposalWrapper(
1299
- entityUrn=dataset_snapshot.urn,
1300
- aspect=SubTypesClass(typeNames=[DatasetSubTypes.LOOKER_EXPLORE]),
1301
- )
1302
-
1303
- proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
1304
- mce,
1305
- mcp,
1306
- ]
1414
+ extra_aspects: List[Union[GlobalTagsClass, EmbedClass]] = []
1307
1415
 
1308
- # Add tags
1309
- explore_tag_urns: List[TagAssociationClass] = [
1310
- TagAssociationClass(tag=TagUrn(tag).urn()) for tag in self.tags
1311
- ]
1312
- if explore_tag_urns:
1313
- dataset_snapshot.aspects.append(GlobalTagsClass(explore_tag_urns))
1314
-
1315
- # If extracting embeds is enabled, produce an MCP for embed URL.
1416
+ explore_tag_urns: List[TagUrn] = [TagUrn(tag) for tag in self.tags]
1316
1417
  if extract_embed_urls:
1317
- embed_mcp = create_embed_mcp(
1318
- dataset_snapshot.urn, self._get_embed_url(base_url)
1319
- )
1320
- proposals.append(embed_mcp)
1418
+ extra_aspects.append(EmbedClass(renderUrl=self._get_embed_url(base_url)))
1321
1419
 
1322
- proposals.append(
1323
- MetadataChangeProposalWrapper(
1324
- entityUrn=dataset_snapshot.urn,
1325
- aspect=container,
1326
- )
1327
- )
1420
+ custom_properties: Dict[str, Optional[str]] = {
1421
+ "project": self.project_name,
1422
+ "model": self.model_name,
1423
+ "looker.explore.label": self.label,
1424
+ "looker.explore.name": self.name,
1425
+ "looker.explore.file": self.source_file,
1426
+ }
1328
1427
 
1329
- return proposals
1428
+ return Dataset(
1429
+ platform=config.platform_name,
1430
+ name=config.explore_naming_pattern.replace_variables(
1431
+ self.get_mapping(config)
1432
+ ),
1433
+ display_name=str(self.label)
1434
+ if self.label
1435
+ else LookerUtil._display_name(self.name),
1436
+ description=self.description,
1437
+ subtype=DatasetSubTypes.LOOKER_EXPLORE,
1438
+ env=config.env,
1439
+ platform_instance=config.platform_instance,
1440
+ custom_properties={
1441
+ k: str(v) for k, v in custom_properties.items() if v is not None
1442
+ },
1443
+ external_url=self._get_url(base_url),
1444
+ upstreams=upstream_lineage,
1445
+ schema=schema_metadata,
1446
+ parent_container=[
1447
+ "Explore",
1448
+ gen_model_key(config, self.model_name).as_urn(),
1449
+ ],
1450
+ tags=explore_tag_urns if explore_tag_urns else None,
1451
+ extra_aspects=extra_aspects,
1452
+ )
1330
1453
 
1331
1454
 
1332
1455
  def gen_project_key(config: LookerCommonConfig, project_name: str) -> LookMLProjectKey:
@@ -1362,6 +1485,7 @@ class LookerExploreRegistry:
1362
1485
 
1363
1486
  @lru_cache(maxsize=200)
1364
1487
  def get_explore(self, model: str, explore: str) -> Optional[LookerExplore]:
1488
+ logger.debug(f"Retrieving explore: model={model}, explore={explore}")
1365
1489
  looker_explore = LookerExplore.from_api(
1366
1490
  model,
1367
1491
  explore,
@@ -1369,6 +1493,12 @@ class LookerExploreRegistry:
1369
1493
  self.report,
1370
1494
  self.source_config,
1371
1495
  )
1496
+ if looker_explore is not None:
1497
+ logger.debug(
1498
+ f"Found explore with model_name={looker_explore.model_name}, name={looker_explore.name}"
1499
+ )
1500
+ else:
1501
+ logger.debug(f"No explore found for model={model}, explore={explore}")
1372
1502
  return looker_explore
1373
1503
 
1374
1504
  def compute_stats(self) -> Dict:
@@ -5,10 +5,14 @@ from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union, cast
5
5
 
6
6
  import pydantic
7
7
  from looker_sdk.sdk.api40.models import DBConnection
8
- from pydantic import Field, validator
8
+ from pydantic import Field, model_validator, validator
9
9
 
10
10
  from datahub.configuration import ConfigModel
11
- from datahub.configuration.common import AllowDenyPattern, ConfigurationError
11
+ from datahub.configuration.common import (
12
+ AllowDenyPattern,
13
+ ConfigurationError,
14
+ HiddenFromDocs,
15
+ )
12
16
  from datahub.configuration.source_common import (
13
17
  EnvConfigMixin,
14
18
  PlatformInstanceConfigMixin,
@@ -43,6 +47,14 @@ class NamingPattern(ConfigModel):
43
47
  assert isinstance(v, str), "pattern must be a string"
44
48
  return {"pattern": v}
45
49
 
50
+ @model_validator(mode="before")
51
+ @classmethod
52
+ def pydantic_v2_accept_raw_pattern(cls, v):
53
+ # Pydantic v2 compatibility: handle string input by converting to dict
54
+ if isinstance(v, str):
55
+ return {"pattern": v}
56
+ return v
57
+
46
58
  @classmethod
47
59
  def pydantic_validate_pattern(cls, v):
48
60
  assert isinstance(v, NamingPattern)
@@ -132,11 +144,10 @@ class LookerCommonConfig(EnvConfigMixin, PlatformInstanceConfigMixin):
132
144
  description="When enabled, attaches tags to measures, dimensions and dimension groups to make them more "
133
145
  "discoverable. When disabled, adds this information to the description of the column.",
134
146
  )
135
- platform_name: str = Field(
147
+ platform_name: HiddenFromDocs[str] = Field(
136
148
  # TODO: This shouldn't be part of the config.
137
149
  "looker",
138
150
  description="Default platform name.",
139
- hidden_from_docs=True,
140
151
  )
141
152
  extract_column_level_lineage: bool = Field(
142
153
  True,
@@ -11,3 +11,7 @@ prod = "prod"
11
11
  dev = "dev"
12
12
  NAME = "name"
13
13
  DERIVED_DOT_SQL = "derived.sql"
14
+
15
+ VIEW_FIELD_TYPE_ATTRIBUTE = "type"
16
+ VIEW_FIELD_INTERVALS_ATTRIBUTE = "intervals"
17
+ VIEW_FIELD_TIMEFRAMES_ATTRIBUTE = "timeframes"
@@ -2,6 +2,7 @@
2
2
  import json
3
3
  import logging
4
4
  import os
5
+ from enum import Enum
5
6
  from functools import lru_cache
6
7
  from typing import Dict, List, MutableMapping, Optional, Sequence, Set, Union, cast
7
8
 
@@ -31,6 +32,14 @@ from datahub.configuration.common import ConfigurationError
31
32
  logger = logging.getLogger(__name__)
32
33
 
33
34
 
35
+ class LookerQueryResponseFormat(Enum):
36
+ # result_format - Ref: https://cloud.google.com/looker/docs/reference/looker-api/latest/methods/Query/run_inline_query
37
+ JSON = "json"
38
+ SQL = (
39
+ "sql" # Note: This does not execute the query, it only generates the SQL query.
40
+ )
41
+
42
+
34
43
  class TransportOptionsConfig(ConfigModel):
35
44
  timeout: int
36
45
  headers: MutableMapping[str, str]
@@ -69,6 +78,7 @@ class LookerAPIStats(BaseModel):
69
78
  search_looks_calls: int = 0
70
79
  search_dashboards_calls: int = 0
71
80
  all_user_calls: int = 0
81
+ generate_sql_query_calls: int = 0
72
82
 
73
83
 
74
84
  class LookerAPI:
@@ -113,7 +123,7 @@ class LookerAPI:
113
123
  )
114
124
  except SDKError as e:
115
125
  raise ConfigurationError(
116
- f"Failed to connect/authenticate with looker - check your configuration: {e}"
126
+ "Failed to connect/authenticate with looker - check your configuration"
117
127
  ) from e
118
128
 
119
129
  self.client_stats = LookerAPIStats()
@@ -170,17 +180,40 @@ class LookerAPI:
170
180
  logger.debug(f"Executing query {write_query}")
171
181
  self.client_stats.query_calls += 1
172
182
 
173
- response_json = self.client.run_inline_query(
174
- result_format="json",
183
+ response = self.client.run_inline_query(
184
+ result_format=LookerQueryResponseFormat.JSON.value,
175
185
  body=write_query,
176
186
  transport_options=self.transport_options,
177
187
  )
178
188
 
189
+ data = json.loads(response)
190
+
179
191
  logger.debug("=================Response=================")
180
- data = json.loads(response_json)
181
192
  logger.debug("Length of response: %d", len(data))
182
193
  return data
183
194
 
195
+ def generate_sql_query(
196
+ self, write_query: WriteQuery, use_cache: bool = False
197
+ ) -> str:
198
+ """
199
+ Generates a SQL query string for a given WriteQuery.
200
+
201
+ Note: This does not execute the query, it only generates the SQL query.
202
+ """
203
+ logger.debug(f"Generating SQL query for {write_query}")
204
+ self.client_stats.generate_sql_query_calls += 1
205
+
206
+ response = self.client.run_inline_query(
207
+ result_format=LookerQueryResponseFormat.SQL.value,
208
+ body=write_query,
209
+ transport_options=self.transport_options,
210
+ cache=use_cache,
211
+ )
212
+
213
+ logger.debug("=================Response=================")
214
+ logger.debug("Length of SQL response: %d", len(response))
215
+ return str(response)
216
+
184
217
  def dashboard(self, dashboard_id: str, fields: Union[str, List[str]]) -> Dashboard:
185
218
  self.client_stats.dashboard_calls += 1
186
219
  return self.client.dashboard(