acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -120,7 +120,6 @@ SNOWFLAKE = "snowflake"
120
120
  BIGQUERY = "bigquery"
121
121
  REDSHIFT = "redshift"
122
122
  DATABRICKS = "databricks"
123
- TRINO = "trino"
124
123
 
125
124
  # Type names for Databricks, to match Title Case types in sqlalchemy
126
125
  ProfilerTypeMapping.INT_TYPE_NAMES.append("Integer")
@@ -206,6 +205,25 @@ def get_column_unique_count_dh_patch(self: SqlAlchemyDataset, column: str) -> in
206
205
  )
207
206
  )
208
207
  return convert_to_json_serializable(element_values.fetchone()[0])
208
+ elif (
209
+ self.engine.dialect.name.lower() == GXSqlDialect.AWSATHENA
210
+ or self.engine.dialect.name.lower() == GXSqlDialect.TRINO
211
+ ):
212
+ return convert_to_json_serializable(
213
+ self.engine.execute(
214
+ sa.select(sa.func.approx_distinct(sa.column(column))).select_from(
215
+ self._table
216
+ )
217
+ ).scalar()
218
+ )
219
+ elif self.engine.dialect.name.lower() == DATABRICKS:
220
+ return convert_to_json_serializable(
221
+ self.engine.execute(
222
+ sa.select(sa.func.approx_count_distinct(sa.column(column))).select_from(
223
+ self._table
224
+ )
225
+ ).scalar()
226
+ )
209
227
  return convert_to_json_serializable(
210
228
  self.engine.execute(
211
229
  sa.select([sa.func.count(sa.func.distinct(sa.column(column)))]).select_from(
@@ -289,7 +307,6 @@ def _is_single_row_query_method(query: Any) -> bool:
289
307
  "get_column_max",
290
308
  "get_column_mean",
291
309
  "get_column_stdev",
292
- "get_column_nonnull_count",
293
310
  "get_column_unique_count",
294
311
  }
295
312
  CONSTANT_ROW_QUERY_METHODS = {
@@ -313,6 +330,7 @@ def _is_single_row_query_method(query: Any) -> bool:
313
330
 
314
331
  FIRST_PARTY_SINGLE_ROW_QUERY_METHODS = {
315
332
  "get_column_unique_count_dh_patch",
333
+ "_get_column_cardinality",
316
334
  }
317
335
 
318
336
  # We'll do this the inefficient way since the arrays are pretty small.
@@ -479,7 +497,20 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
479
497
  self, column_spec: _SingleColumnSpec, column: str
480
498
  ) -> None:
481
499
  try:
482
- nonnull_count = self.dataset.get_column_nonnull_count(column)
500
+ # Don't use Great Expectations get_column_nonnull_count because it
501
+ # generates this SQL:
502
+ #
503
+ # sum(CASE WHEN (mycolumn IN (NULL) OR mycolumn IS NULL) THEN 1 ELSE 0 END)
504
+ #
505
+ # which fails for complex types (such as Databricks maps) that don't
506
+ # support the IN operator.
507
+ nonnull_count = convert_to_json_serializable(
508
+ self.dataset.engine.execute(
509
+ sa.select(sa.func.count(sa.column(column))).select_from(
510
+ self.dataset._table
511
+ )
512
+ ).scalar()
513
+ )
483
514
  column_spec.nonnull_count = nonnull_count
484
515
  except Exception as e:
485
516
  logger.debug(
@@ -734,11 +765,41 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
734
765
  def _get_dataset_column_distinct_value_frequencies(
735
766
  self, column_profile: DatasetFieldProfileClass, column: str
736
767
  ) -> None:
737
- if self.config.include_field_distinct_value_frequencies:
768
+ if not self.config.include_field_distinct_value_frequencies:
769
+ return
770
+ try:
771
+ results = self.dataset.engine.execute(
772
+ sa.select(
773
+ [
774
+ sa.column(column),
775
+ sa.func.count(sa.column(column)),
776
+ ]
777
+ )
778
+ .select_from(self.dataset._table)
779
+ .where(sa.column(column).is_not(None))
780
+ .group_by(sa.column(column))
781
+ ).fetchall()
782
+
738
783
  column_profile.distinctValueFrequencies = [
739
- ValueFrequencyClass(value=str(value), frequency=count)
740
- for value, count in self.dataset.get_column_value_counts(column).items()
784
+ ValueFrequencyClass(value=str(value), frequency=int(count))
785
+ for value, count in results
741
786
  ]
787
+ # sort so output is deterministic. don't do it in SQL because not all column
788
+ # types are sortable in SQL (such as JSON data types on Athena/Trino).
789
+ column_profile.distinctValueFrequencies = sorted(
790
+ column_profile.distinctValueFrequencies, key=lambda x: x.value
791
+ )
792
+ except Exception as e:
793
+ logger.debug(
794
+ f"Caught exception while attempting to get distinct value frequencies for column {column}. {e}"
795
+ )
796
+
797
+ self.report.report_warning(
798
+ title="Profiling: Unable to Calculate Distinct Value Frequencies",
799
+ message="Distinct value frequencies for the column will not be accessible",
800
+ context=f"{self.dataset_name}.{column}",
801
+ exc=e,
802
+ )
742
803
 
743
804
  @_run_with_query_combiner
744
805
  def _get_dataset_column_histogram(
@@ -1173,26 +1234,34 @@ class DatahubGEProfiler:
1173
1234
  f"Will profile {len(requests)} table(s) with {max_workers} worker(s) - this may take a while"
1174
1235
  )
1175
1236
 
1176
- with PerfTimer() as timer, unittest.mock.patch(
1177
- "great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyDataset.get_column_unique_count",
1178
- get_column_unique_count_dh_patch,
1179
- ), unittest.mock.patch(
1180
- "great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyDataset._get_column_quantiles_bigquery",
1181
- _get_column_quantiles_bigquery_patch,
1182
- ), unittest.mock.patch(
1183
- "great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyDataset._get_column_quantiles_awsathena",
1184
- _get_column_quantiles_awsathena_patch,
1185
- ), unittest.mock.patch(
1186
- "great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyDataset.get_column_median",
1187
- _get_column_median_patch,
1188
- ), concurrent.futures.ThreadPoolExecutor(
1189
- max_workers=max_workers
1190
- ) as async_executor, SQLAlchemyQueryCombiner(
1191
- enabled=self.config.query_combiner_enabled,
1192
- catch_exceptions=self.config.catch_exceptions,
1193
- is_single_row_query_method=_is_single_row_query_method,
1194
- serial_execution_fallback_enabled=True,
1195
- ).activate() as query_combiner:
1237
+ with (
1238
+ PerfTimer() as timer,
1239
+ unittest.mock.patch(
1240
+ "great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyDataset.get_column_unique_count",
1241
+ get_column_unique_count_dh_patch,
1242
+ ),
1243
+ unittest.mock.patch(
1244
+ "great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyDataset._get_column_quantiles_bigquery",
1245
+ _get_column_quantiles_bigquery_patch,
1246
+ ),
1247
+ unittest.mock.patch(
1248
+ "great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyDataset._get_column_quantiles_awsathena",
1249
+ _get_column_quantiles_awsathena_patch,
1250
+ ),
1251
+ unittest.mock.patch(
1252
+ "great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyDataset.get_column_median",
1253
+ _get_column_median_patch,
1254
+ ),
1255
+ concurrent.futures.ThreadPoolExecutor(
1256
+ max_workers=max_workers
1257
+ ) as async_executor,
1258
+ SQLAlchemyQueryCombiner(
1259
+ enabled=self.config.query_combiner_enabled,
1260
+ catch_exceptions=self.config.catch_exceptions,
1261
+ is_single_row_query_method=_is_single_row_query_method,
1262
+ serial_execution_fallback_enabled=True,
1263
+ ).activate() as query_combiner,
1264
+ ):
1196
1265
  # Submit the profiling requests to the thread pool executor.
1197
1266
  async_profiles = collections.deque(
1198
1267
  async_executor.submit(
@@ -1395,12 +1464,12 @@ class DatahubGEProfiler:
1395
1464
  )
1396
1465
  return None
1397
1466
  finally:
1398
- if batch is not None and self.base_engine.engine.name.upper() in [
1399
- "TRINO",
1400
- "AWSATHENA",
1467
+ if batch is not None and self.base_engine.engine.name.lower() in [
1468
+ GXSqlDialect.TRINO,
1469
+ GXSqlDialect.AWSATHENA,
1401
1470
  ]:
1402
1471
  if (
1403
- self.base_engine.engine.name.upper() == "TRINO"
1472
+ self.base_engine.engine.name.lower() == GXSqlDialect.TRINO
1404
1473
  or temp_view is not None
1405
1474
  ):
1406
1475
  self._drop_temp_table(batch)
@@ -1449,9 +1518,17 @@ class DatahubGEProfiler:
1449
1518
  logger.error(
1450
1519
  f"Unexpected {pretty_name} while profiling. Should have 3 parts but has {len(name_parts)} parts."
1451
1520
  )
1521
+ if platform == DATABRICKS:
1522
+ # TODO: Review logic for BigQuery as well, probably project.dataset.table should be quoted there as well
1523
+ quoted_name = ".".join(
1524
+ batch.engine.dialect.identifier_preparer.quote(part)
1525
+ for part in name_parts
1526
+ )
1527
+ batch._table = sa.text(quoted_name)
1528
+ logger.debug(f"Setting quoted table name to be {batch._table}")
1452
1529
  # If we only have two parts that means the project_id is missing from the table name and we add it
1453
1530
  # Temp tables has 3 parts while normal tables only has 2 parts
1454
- if len(str(batch._table).split(".")) == 2:
1531
+ elif len(str(batch._table).split(".")) == 2:
1455
1532
  batch._table = sa.text(f"{name_parts[0]}.{str(batch._table)}")
1456
1533
  logger.debug(f"Setting table name to be {batch._table}")
1457
1534
 
@@ -1,12 +1,12 @@
1
1
  import datetime
2
2
  import logging
3
3
  import os
4
- from typing import Any, Dict, List, Optional
4
+ from typing import Annotated, Any, Dict, List, Optional
5
5
 
6
6
  import pydantic
7
7
  from pydantic.fields import Field
8
8
 
9
- from datahub.configuration.common import AllowDenyPattern, ConfigModel
9
+ from datahub.configuration.common import AllowDenyPattern, ConfigModel, SupportedSources
10
10
  from datahub.ingestion.source_config.operation_config import OperationConfig
11
11
 
12
12
  _PROFILING_FLAGS_TO_REPORT = {
@@ -120,28 +120,37 @@ class GEProfilingConfig(GEProfilingBaseConfig):
120
120
  "number of columns to profile goes up.",
121
121
  )
122
122
 
123
- profile_if_updated_since_days: Optional[pydantic.PositiveFloat] = Field(
123
+ profile_if_updated_since_days: Annotated[
124
+ Optional[pydantic.PositiveFloat], SupportedSources(["snowflake", "bigquery"])
125
+ ] = Field(
124
126
  default=None,
125
127
  description="Profile table only if it has been updated since these many number of days. "
126
128
  "If set to `null`, no constraint of last modified time for tables to profile. "
127
129
  "Supported only in `snowflake` and `BigQuery`.",
128
130
  )
129
131
 
130
- profile_table_size_limit: Optional[int] = Field(
132
+ profile_table_size_limit: Annotated[
133
+ Optional[int],
134
+ SupportedSources(["snowflake", "bigquery", "unity-catalog", "oracle"]),
135
+ ] = Field(
131
136
  default=5,
132
137
  description="Profile tables only if their size is less than specified GBs. If set to `null`, "
133
138
  "no limit on the size of tables to profile. Supported only in `Snowflake`, `BigQuery` and "
134
139
  "`Databricks`. Supported for `Oracle` based on calculated size from gathered stats.",
135
140
  )
136
141
 
137
- profile_table_row_limit: Optional[int] = Field(
142
+ profile_table_row_limit: Annotated[
143
+ Optional[int], SupportedSources(["snowflake", "bigquery", "oracle"])
144
+ ] = Field(
138
145
  default=5000000,
139
146
  description="Profile tables only if their row count is less than specified count. "
140
147
  "If set to `null`, no limit on the row count of tables to profile. Supported only in "
141
148
  "`Snowflake`, `BigQuery`. Supported for `Oracle` based on gathered stats.",
142
149
  )
143
150
 
144
- profile_table_row_count_estimate_only: bool = Field(
151
+ profile_table_row_count_estimate_only: Annotated[
152
+ bool, SupportedSources(["postgres", "mysql"])
153
+ ] = Field(
145
154
  default=False,
146
155
  description="Use an approximate query for row count. This will be much faster but slightly "
147
156
  "less accurate. Only supported for Postgres and MySQL. ",
@@ -157,29 +166,35 @@ class GEProfilingConfig(GEProfilingBaseConfig):
157
166
  # Hidden option - used for debugging purposes.
158
167
  catch_exceptions: bool = Field(default=True, description="")
159
168
 
160
- partition_profiling_enabled: bool = Field(
169
+ partition_profiling_enabled: Annotated[
170
+ bool, SupportedSources(["athena", "bigquery"])
171
+ ] = Field(
161
172
  default=True,
162
173
  description="Whether to profile partitioned tables. Only BigQuery and Aws Athena supports this. "
163
174
  "If enabled, latest partition data is used for profiling.",
164
175
  )
165
- partition_datetime: Optional[datetime.datetime] = Field(
176
+ partition_datetime: Annotated[
177
+ Optional[datetime.datetime], SupportedSources(["bigquery"])
178
+ ] = Field(
166
179
  default=None,
167
180
  description="If specified, profile only the partition which matches this datetime. "
168
181
  "If not specified, profile the latest partition. Only Bigquery supports this.",
169
182
  )
170
- use_sampling: bool = Field(
183
+ use_sampling: Annotated[bool, SupportedSources(["bigquery", "snowflake"])] = Field(
171
184
  default=True,
172
185
  description="Whether to profile column level stats on sample of table. Only BigQuery and Snowflake support this. "
173
186
  "If enabled, profiling is done on rows sampled from table. Sampling is not done for smaller tables. ",
174
187
  )
175
188
 
176
- sample_size: int = Field(
189
+ sample_size: Annotated[int, SupportedSources(["bigquery", "snowflake"])] = Field(
177
190
  default=10000,
178
191
  description="Number of rows to be sampled from table for column level profiling."
179
192
  "Applicable only if `use_sampling` is set to True.",
180
193
  )
181
194
 
182
- profile_external_tables: bool = Field(
195
+ profile_external_tables: Annotated[
196
+ bool, SupportedSources(["redshift", "snowflake"])
197
+ ] = Field(
183
198
  default=False,
184
199
  description="Whether to profile external tables. Only Snowflake and Redshift supports this.",
185
200
  )
@@ -0,0 +1,272 @@
1
+ from typing import Dict, List, Optional, Tuple
2
+
3
+ from datahub.emitter.mce_builder import (
4
+ make_chart_urn,
5
+ make_dashboard_urn,
6
+ make_data_platform_urn,
7
+ make_dataplatform_instance_urn,
8
+ make_dataset_urn_with_platform_instance,
9
+ make_tag_urn,
10
+ make_user_urn,
11
+ )
12
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
13
+ from datahub.ingestion.source.grafana.models import Dashboard, Panel
14
+ from datahub.ingestion.source.grafana.types import CHART_TYPE_MAPPINGS
15
+ from datahub.metadata.schema_classes import (
16
+ ChangeAuditStampsClass,
17
+ ChartInfoClass,
18
+ DashboardInfoClass,
19
+ DataPlatformInstanceClass,
20
+ GlobalTagsClass,
21
+ OwnerClass,
22
+ OwnershipClass,
23
+ OwnershipTypeClass,
24
+ StatusClass,
25
+ TagAssociationClass,
26
+ )
27
+
28
+
29
+ def build_chart_mcps(
30
+ panel: Panel,
31
+ dashboard: Dashboard,
32
+ platform: str,
33
+ platform_instance: Optional[str],
34
+ env: str,
35
+ base_url: str,
36
+ ingest_tags: bool,
37
+ ) -> Tuple[Optional[str], str, List[MetadataChangeProposalWrapper]]:
38
+ """Build chart metadata change proposals"""
39
+ ds_urn = None
40
+ mcps = []
41
+
42
+ chart_urn = make_chart_urn(
43
+ platform,
44
+ f"{dashboard.uid}.{panel.id}",
45
+ platform_instance,
46
+ )
47
+
48
+ # Platform instance aspect
49
+ mcps.append(
50
+ MetadataChangeProposalWrapper(
51
+ entityUrn=chart_urn,
52
+ aspect=DataPlatformInstanceClass(
53
+ platform=make_data_platform_urn(platform),
54
+ instance=make_dataplatform_instance_urn(
55
+ platform=platform,
56
+ instance=platform_instance,
57
+ )
58
+ if platform_instance
59
+ else None,
60
+ ),
61
+ )
62
+ )
63
+
64
+ # Status aspect
65
+ mcps.append(
66
+ MetadataChangeProposalWrapper(
67
+ entityUrn=chart_urn,
68
+ aspect=StatusClass(removed=False),
69
+ )
70
+ )
71
+
72
+ # Get input datasets
73
+ input_datasets = []
74
+ if panel.datasource_ref:
75
+ ds_type = panel.datasource_ref.type or "unknown"
76
+ ds_uid = panel.datasource_ref.uid or "unknown"
77
+
78
+ # Add Grafana dataset
79
+ dataset_name = f"{ds_type}.{ds_uid}.{panel.id}"
80
+ ds_urn = make_dataset_urn_with_platform_instance(
81
+ platform=platform,
82
+ name=dataset_name,
83
+ platform_instance=platform_instance,
84
+ env=env,
85
+ )
86
+ input_datasets.append(ds_urn)
87
+
88
+ # Chart info aspect
89
+ title = panel.title or f"Panel {panel.id}"
90
+ mcps.append(
91
+ MetadataChangeProposalWrapper(
92
+ entityUrn=chart_urn,
93
+ aspect=ChartInfoClass(
94
+ type=CHART_TYPE_MAPPINGS.get(panel.type) if panel.type else None,
95
+ description=panel.description,
96
+ title=title,
97
+ lastModified=ChangeAuditStampsClass(),
98
+ chartUrl=f"{base_url}/d/{dashboard.uid}?viewPanel={panel.id}",
99
+ customProperties=_build_custom_properties(panel),
100
+ inputs=input_datasets,
101
+ ),
102
+ )
103
+ )
104
+
105
+ # Tags aspect
106
+ if dashboard.tags and ingest_tags:
107
+ tags = []
108
+ for tag in dashboard.tags:
109
+ if ":" in tag:
110
+ key, value = tag.split(":", 1)
111
+ tag_urn = make_tag_urn(f"{key}.{value}")
112
+ else:
113
+ tag_urn = make_tag_urn(tag)
114
+ tags.append(TagAssociationClass(tag=tag_urn))
115
+
116
+ if tags:
117
+ mcps.append(
118
+ MetadataChangeProposalWrapper(
119
+ entityUrn=chart_urn,
120
+ aspect=GlobalTagsClass(tags=tags),
121
+ )
122
+ )
123
+
124
+ return ds_urn, chart_urn, mcps
125
+
126
+
127
+ def build_dashboard_mcps(
128
+ dashboard: Dashboard,
129
+ platform: str,
130
+ platform_instance: Optional[str],
131
+ chart_urns: List[str],
132
+ base_url: str,
133
+ ingest_owners: bool,
134
+ ingest_tags: bool,
135
+ ) -> Tuple[str, List[MetadataChangeProposalWrapper]]:
136
+ """Build dashboard metadata change proposals"""
137
+ mcps = []
138
+ dashboard_urn = make_dashboard_urn(platform, dashboard.uid, platform_instance)
139
+
140
+ # Platform instance aspect
141
+ mcps.append(
142
+ MetadataChangeProposalWrapper(
143
+ entityUrn=dashboard_urn,
144
+ aspect=DataPlatformInstanceClass(
145
+ platform=make_data_platform_urn(platform),
146
+ instance=make_dataplatform_instance_urn(
147
+ platform=platform,
148
+ instance=platform_instance,
149
+ )
150
+ if platform_instance
151
+ else None,
152
+ ),
153
+ )
154
+ )
155
+
156
+ # Dashboard info aspect
157
+ mcps.append(
158
+ MetadataChangeProposalWrapper(
159
+ entityUrn=dashboard_urn,
160
+ aspect=DashboardInfoClass(
161
+ description=dashboard.description,
162
+ title=dashboard.title,
163
+ charts=chart_urns,
164
+ lastModified=ChangeAuditStampsClass(),
165
+ dashboardUrl=f"{base_url}/d/{dashboard.uid}",
166
+ customProperties=_build_dashboard_properties(dashboard),
167
+ ),
168
+ )
169
+ )
170
+
171
+ # Ownership aspect
172
+ if dashboard.uid and ingest_owners:
173
+ owner = _build_ownership(dashboard)
174
+ if owner:
175
+ mcps.append(
176
+ MetadataChangeProposalWrapper(
177
+ entityUrn=dashboard_urn,
178
+ aspect=owner,
179
+ )
180
+ )
181
+
182
+ # Tags aspect
183
+ if dashboard.tags and ingest_tags:
184
+ tags = [TagAssociationClass(tag=make_tag_urn(tag)) for tag in dashboard.tags]
185
+ if tags:
186
+ mcps.append(
187
+ MetadataChangeProposalWrapper(
188
+ entityUrn=dashboard_urn,
189
+ aspect=GlobalTagsClass(tags=tags),
190
+ )
191
+ )
192
+
193
+ # Status aspect
194
+ mcps.append(
195
+ MetadataChangeProposalWrapper(
196
+ entityUrn=dashboard_urn,
197
+ aspect=StatusClass(removed=False),
198
+ )
199
+ )
200
+
201
+ return dashboard_urn, mcps
202
+
203
+
204
+ def _build_custom_properties(panel: Panel) -> Dict[str, str]:
205
+ """Build custom properties for chart"""
206
+ props = {}
207
+
208
+ if panel.type:
209
+ props["type"] = panel.type
210
+
211
+ if panel.datasource_ref:
212
+ props["datasourceType"] = panel.datasource_ref.type or ""
213
+ props["datasourceUid"] = panel.datasource_ref.uid or ""
214
+
215
+ for key in [
216
+ "description",
217
+ "format",
218
+ "pluginVersion",
219
+ "repeatDirection",
220
+ "maxDataPoints",
221
+ ]:
222
+ value = getattr(panel, key, None)
223
+ if value:
224
+ props[key] = str(value)
225
+
226
+ if panel.query_targets:
227
+ props["targetsCount"] = str(len(panel.query_targets))
228
+
229
+ return props
230
+
231
+
232
+ def _build_dashboard_properties(dashboard: Dashboard) -> Dict[str, str]:
233
+ """Build custom properties for dashboard"""
234
+ props = {}
235
+
236
+ if dashboard.timezone:
237
+ props["timezone"] = dashboard.timezone
238
+
239
+ if dashboard.schema_version:
240
+ props["schema_version"] = dashboard.schema_version
241
+
242
+ if dashboard.version:
243
+ props["version"] = dashboard.version
244
+
245
+ if dashboard.refresh:
246
+ props["refresh"] = dashboard.refresh
247
+
248
+ return props
249
+
250
+
251
+ def _build_ownership(dashboard: Dashboard) -> Optional[OwnershipClass]:
252
+ """Build ownership information"""
253
+ owners = []
254
+
255
+ if dashboard.uid:
256
+ owners.append(
257
+ OwnerClass(
258
+ owner=make_user_urn(dashboard.uid),
259
+ type=OwnershipTypeClass.TECHNICAL_OWNER,
260
+ )
261
+ )
262
+
263
+ if dashboard.created_by:
264
+ owner_id = dashboard.created_by.split("@")[0]
265
+ owners.append(
266
+ OwnerClass(
267
+ owner=make_user_urn(owner_id),
268
+ type=OwnershipTypeClass.DATAOWNER,
269
+ )
270
+ )
271
+
272
+ return OwnershipClass(owners=owners) if owners else None