acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/sdk/dataset.py CHANGED
@@ -26,12 +26,14 @@ from datahub.sdk._shared import (
26
26
  HasInstitutionalMemory,
27
27
  HasOwnership,
28
28
  HasPlatformInstance,
29
+ HasStructuredProperties,
29
30
  HasSubtype,
30
31
  HasTags,
31
32
  HasTerms,
32
33
  LinksInputType,
33
34
  OwnersInputType,
34
35
  ParentContainerInputType,
36
+ StructuredPropertyInputType,
35
37
  TagInputType,
36
38
  TagsInputType,
37
39
  TermInputType,
@@ -44,6 +46,10 @@ from datahub.sdk.entity import Entity, ExtraAspectsType
44
46
  from datahub.utilities.sentinels import Unset, unset
45
47
 
46
48
  SchemaFieldInputType: TypeAlias = Union[
49
+ # There is no Enum variant for schema field types because that would force users to do a mapping
50
+ # to our enum from the raw source type, so additional complexity on their side.
51
+ # To avoid that, the raw source native type can be provided as a string,
52
+ # and we will do the mapping internally (in sql_types.py)
47
53
  Tuple[str, str], # (name, type)
48
54
  Tuple[str, str, str], # (name, type, description)
49
55
  models.SchemaFieldClass,
@@ -70,6 +76,11 @@ UpstreamLineageInputType: TypeAlias = Union[
70
76
  Dict[DatasetUrnOrStr, ColumnLineageMapping],
71
77
  ]
72
78
 
79
+ ViewDefinitionInputType: TypeAlias = Union[
80
+ str,
81
+ models.ViewPropertiesClass,
82
+ ]
83
+
73
84
 
74
85
  def _parse_upstream_input(
75
86
  upstream_input: UpstreamInputType,
@@ -428,6 +439,7 @@ class Dataset(
428
439
  HasTags,
429
440
  HasTerms,
430
441
  HasDomain,
442
+ HasStructuredProperties,
431
443
  Entity,
432
444
  ):
433
445
  """Represents a dataset in DataHub.
@@ -464,6 +476,7 @@ class Dataset(
464
476
  custom_properties: Optional[Dict[str, str]] = None,
465
477
  created: Optional[datetime] = None,
466
478
  last_modified: Optional[datetime] = None,
479
+ view_definition: Optional[ViewDefinitionInputType] = None,
467
480
  # Standard aspects.
468
481
  parent_container: ParentContainerInputType | Unset = unset,
469
482
  subtype: Optional[str] = None,
@@ -471,12 +484,12 @@ class Dataset(
471
484
  links: Optional[LinksInputType] = None,
472
485
  tags: Optional[TagsInputType] = None,
473
486
  terms: Optional[TermsInputType] = None,
474
- # TODO structured_properties
475
487
  domain: Optional[DomainInputType] = None,
476
- extra_aspects: ExtraAspectsType = None,
477
488
  # Dataset-specific aspects.
478
489
  schema: Optional[SchemaFieldsInputType] = None,
479
490
  upstreams: Optional[models.UpstreamLineageClass] = None,
491
+ structured_properties: Optional[StructuredPropertyInputType] = None,
492
+ extra_aspects: ExtraAspectsType = None,
480
493
  ):
481
494
  """Initialize a new Dataset instance.
482
495
 
@@ -492,6 +505,7 @@ class Dataset(
492
505
  custom_properties: Optional dictionary of custom properties.
493
506
  created: Optional creation timestamp.
494
507
  last_modified: Optional last modification timestamp.
508
+ view_definition: Optional view definition for the dataset.
495
509
  parent_container: Optional parent container for this dataset.
496
510
  subtype: Optional subtype of the dataset.
497
511
  owners: Optional list of owners.
@@ -533,6 +547,8 @@ class Dataset(
533
547
  self.set_created(created)
534
548
  if last_modified is not None:
535
549
  self.set_last_modified(last_modified)
550
+ if view_definition is not None:
551
+ self.set_view_definition(view_definition)
536
552
 
537
553
  if parent_container is not unset:
538
554
  self._set_container(parent_container)
@@ -548,6 +564,9 @@ class Dataset(
548
564
  self.set_terms(terms)
549
565
  if domain is not None:
550
566
  self.set_domain(domain)
567
+ if structured_properties is not None:
568
+ for key, value in structured_properties.items():
569
+ self.set_structured_property(property_urn=key, values=value)
551
570
 
552
571
  @classmethod
553
572
  def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
@@ -711,6 +730,41 @@ class Dataset(
711
730
  def set_last_modified(self, last_modified: datetime) -> None:
712
731
  self._ensure_dataset_props().lastModified = make_time_stamp(last_modified)
713
732
 
733
+ @property
734
+ def view_definition(self) -> Optional[models.ViewPropertiesClass]:
735
+ """Get the view definition of the dataset.
736
+
737
+ Under typical usage, this will be present if the subtype is "View".
738
+
739
+ Returns:
740
+ The view definition if set, None otherwise.
741
+ """
742
+ return self._get_aspect(models.ViewPropertiesClass)
743
+
744
+ def set_view_definition(self, view_definition: ViewDefinitionInputType) -> None:
745
+ """Set the view definition of the dataset.
746
+
747
+ If you're setting a view definition, subtype should typically be set to "view".
748
+
749
+ If a string is provided, it will be treated as a SQL view definition. To set
750
+ a custom language or other properties, provide a ViewPropertiesClass object.
751
+
752
+ Args:
753
+ view_definition: The view definition to set.
754
+ """
755
+ if isinstance(view_definition, models.ViewPropertiesClass):
756
+ self._set_aspect(view_definition)
757
+ elif isinstance(view_definition, str):
758
+ self._set_aspect(
759
+ models.ViewPropertiesClass(
760
+ materialized=False,
761
+ viewLogic=view_definition,
762
+ viewLanguage="SQL",
763
+ )
764
+ )
765
+ else:
766
+ assert_never(view_definition)
767
+
714
768
  def _schema_dict(self) -> Dict[str, models.SchemaFieldClass]:
715
769
  schema_metadata = self._get_aspect(models.SchemaMetadataClass)
716
770
  if schema_metadata is None:
@@ -1,15 +1,18 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import warnings
4
- from typing import TYPE_CHECKING, Union, overload
4
+ from typing import TYPE_CHECKING, Optional, Union, overload
5
5
 
6
6
  import datahub.metadata.schema_classes as models
7
7
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
8
8
  from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
9
+ from datahub.emitter.rest_emitter import EmitMode
9
10
  from datahub.errors import IngestionAttributionWarning, ItemNotFoundError, SdkUsageError
10
11
  from datahub.ingestion.graph.client import DataHubGraph
11
12
  from datahub.metadata.urns import (
13
+ ChartUrn,
12
14
  ContainerUrn,
15
+ DashboardUrn,
13
16
  DataFlowUrn,
14
17
  DataJobUrn,
15
18
  DatasetUrn,
@@ -19,7 +22,9 @@ from datahub.metadata.urns import (
19
22
  )
20
23
  from datahub.sdk._all_entities import ENTITY_CLASSES
21
24
  from datahub.sdk._shared import UrnOrStr
25
+ from datahub.sdk.chart import Chart
22
26
  from datahub.sdk.container import Container
27
+ from datahub.sdk.dashboard import Dashboard
23
28
  from datahub.sdk.dataflow import DataFlow
24
29
  from datahub.sdk.datajob import DataJob
25
30
  from datahub.sdk.dataset import Dataset
@@ -65,6 +70,10 @@ class EntityClient:
65
70
  @overload
66
71
  def get(self, urn: DataJobUrn) -> DataJob: ...
67
72
  @overload
73
+ def get(self, urn: DashboardUrn) -> Dashboard: ...
74
+ @overload
75
+ def get(self, urn: ChartUrn) -> Chart: ...
76
+ @overload
68
77
  def get(self, urn: Union[Urn, str]) -> Entity: ...
69
78
  def get(self, urn: UrnOrStr) -> Entity:
70
79
  """Retrieve an entity by its urn.
@@ -84,7 +93,29 @@ class EntityClient:
84
93
  urn = Urn.from_string(urn)
85
94
 
86
95
  # TODO: add error handling around this with a suggested alternative if not yet supported
87
- EntityClass = ENTITY_CLASSES[urn.entity_type]
96
+ try:
97
+ EntityClass = ENTITY_CLASSES[urn.entity_type]
98
+ except KeyError as e:
99
+ # Try to import cloud-specific entities if not found
100
+ try:
101
+ from acryl_datahub_cloud.sdk.entities.assertion import Assertion
102
+ from acryl_datahub_cloud.sdk.entities.monitor import Monitor
103
+ from acryl_datahub_cloud.sdk.entities.subscription import Subscription
104
+
105
+ if urn.entity_type == "assertion":
106
+ EntityClass = Assertion
107
+ elif urn.entity_type == "subscription":
108
+ EntityClass = Subscription
109
+ elif urn.entity_type == "monitor":
110
+ EntityClass = Monitor
111
+ else:
112
+ raise SdkUsageError(
113
+ f"Entity type {urn.entity_type} is not yet supported"
114
+ ) from e
115
+ except ImportError as e:
116
+ raise SdkUsageError(
117
+ f"Entity type {urn.entity_type} is not yet supported"
118
+ ) from e
88
119
 
89
120
  if not self._graph.exists(str(urn)):
90
121
  raise ItemNotFoundError(f"Entity {urn} not found")
@@ -92,9 +123,25 @@ class EntityClient:
92
123
  aspects = self._graph.get_entity_semityped(str(urn))
93
124
 
94
125
  # TODO: save the timestamp so we can use If-Unmodified-Since on the updates
95
- return EntityClass._new_from_graph(urn, aspects)
126
+ entity = EntityClass._new_from_graph(urn, aspects)
127
+
128
+ # Type narrowing for cloud-specific entities
129
+ if urn.entity_type == "assertion":
130
+ from acryl_datahub_cloud.sdk.entities.assertion import Assertion
131
+
132
+ assert isinstance(entity, Assertion)
133
+ elif urn.entity_type == "monitor":
134
+ from acryl_datahub_cloud.sdk.entities.monitor import Monitor
135
+
136
+ assert isinstance(entity, Monitor)
137
+ elif urn.entity_type == "subscription":
138
+ from acryl_datahub_cloud.sdk.entities.subscription import Subscription
139
+
140
+ assert isinstance(entity, Subscription)
96
141
 
97
- def create(self, entity: Entity) -> None:
142
+ return entity
143
+
144
+ def create(self, entity: Entity, *, emit_mode: Optional[EmitMode] = None) -> None:
98
145
  mcps = []
99
146
 
100
147
  if self._graph.exists(str(entity.urn)):
@@ -113,9 +160,12 @@ class EntityClient:
113
160
  )
114
161
  mcps.extend(entity.as_mcps(models.ChangeTypeClass.CREATE))
115
162
 
116
- self._graph.emit_mcps(mcps)
163
+ if emit_mode:
164
+ self._graph.emit_mcps(mcps, emit_mode=emit_mode)
165
+ else:
166
+ self._graph.emit_mcps(mcps)
117
167
 
118
- def upsert(self, entity: Entity) -> None:
168
+ def upsert(self, entity: Entity, *, emit_mode: Optional[EmitMode] = None) -> None:
119
169
  if entity._prev_aspects is None and self._graph.exists(str(entity.urn)):
120
170
  warnings.warn(
121
171
  f"The entity {entity.urn} already exists. This operation will partially overwrite the existing entity.",
@@ -125,9 +175,17 @@ class EntityClient:
125
175
  # TODO: If there are no previous aspects but the entity exists, should we delete aspects that are not present here?
126
176
 
127
177
  mcps = entity.as_mcps(models.ChangeTypeClass.UPSERT)
128
- self._graph.emit_mcps(mcps)
178
+ if emit_mode:
179
+ self._graph.emit_mcps(mcps, emit_mode=emit_mode)
180
+ else:
181
+ self._graph.emit_mcps(mcps)
129
182
 
130
- def update(self, entity: Union[Entity, MetadataPatchProposal]) -> None:
183
+ def update(
184
+ self,
185
+ entity: Union[Entity, MetadataPatchProposal],
186
+ *,
187
+ emit_mode: Optional[EmitMode] = None,
188
+ ) -> None:
131
189
  if isinstance(entity, MetadataPatchProposal):
132
190
  return self._update_patch(entity)
133
191
 
@@ -140,7 +198,10 @@ class EntityClient:
140
198
  # -> probably add a "mode" parameter that can be "update" (e.g. if not modified) or "update_force"
141
199
 
142
200
  mcps = entity.as_mcps(models.ChangeTypeClass.UPSERT)
143
- self._graph.emit_mcps(mcps)
201
+ if emit_mode:
202
+ self._graph.emit_mcps(mcps, emit_mode=emit_mode)
203
+ else:
204
+ self._graph.emit_mcps(mcps)
144
205
 
145
206
  def _update_patch(
146
207
  self, updater: MetadataPatchProposal, check_exists: bool = True
@@ -153,3 +214,44 @@ class EntityClient:
153
214
 
154
215
  mcps = updater.build()
155
216
  self._graph.emit_mcps(mcps)
217
+
218
+ def delete(
219
+ self,
220
+ urn: UrnOrStr,
221
+ check_exists: bool = True,
222
+ cascade: bool = False,
223
+ hard: bool = False,
224
+ ) -> None:
225
+ """Delete an entity by its urn.
226
+
227
+ Args:
228
+ urn: The urn of the entity to delete. Can be a string or :py:class:`Urn` object.
229
+ check_exists: Whether to check if the entity exists before deletion. Defaults to True.
230
+ cascade: Whether to cascade delete related entities. When True, deletes child entities
231
+ like datajobs within dataflows, datasets within containers, etc. Not yet supported.
232
+ hard: Whether to perform a hard delete (permanent) or soft delete. Defaults to False.
233
+
234
+ Raises:
235
+ SdkUsageError: If the entity does not exist and check_exists is True, or if cascade is True (not supported).
236
+
237
+ Note:
238
+ When hard is True, the operation is irreversible and the entity will be permanently removed.
239
+
240
+ Impact of cascade deletion (still to be done) depends on the input entity type:
241
+ - Container: Recursively deletes all containers and data assets within the container.
242
+ - Dataflow: Recursively deletes all data jobs within the dataflow.
243
+ - Dashboard: TBD
244
+ - DataPlatformInstance: TBD
245
+ - ...
246
+ """
247
+ urn_str = str(urn) if isinstance(urn, Urn) else urn
248
+ if check_exists and not self._graph.exists(entity_urn=urn_str):
249
+ raise SdkUsageError(
250
+ f"Entity {urn_str} does not exist, and hence cannot be deleted. "
251
+ "You can bypass this check by setting check_exists=False."
252
+ )
253
+
254
+ if cascade:
255
+ raise SdkUsageError("The 'cascade' parameter is not yet supported.")
256
+
257
+ self._graph.delete_entity(urn=urn_str, hard=hard)