acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -1,30 +1,54 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import warnings
4
- from typing import TYPE_CHECKING, Union, overload
4
+ from typing import TYPE_CHECKING, Optional, Union, overload
5
5
 
6
6
  import datahub.metadata.schema_classes as models
7
7
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
8
8
  from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
9
+ from datahub.emitter.rest_emitter import EmitMode
9
10
  from datahub.errors import IngestionAttributionWarning, ItemNotFoundError, SdkUsageError
10
11
  from datahub.ingestion.graph.client import DataHubGraph
11
12
  from datahub.metadata.urns import (
13
+ ChartUrn,
12
14
  ContainerUrn,
15
+ DashboardUrn,
16
+ DataFlowUrn,
17
+ DataJobUrn,
13
18
  DatasetUrn,
19
+ MlModelGroupUrn,
20
+ MlModelUrn,
14
21
  Urn,
15
22
  )
16
23
  from datahub.sdk._all_entities import ENTITY_CLASSES
17
24
  from datahub.sdk._shared import UrnOrStr
25
+ from datahub.sdk.chart import Chart
18
26
  from datahub.sdk.container import Container
27
+ from datahub.sdk.dashboard import Dashboard
28
+ from datahub.sdk.dataflow import DataFlow
29
+ from datahub.sdk.datajob import DataJob
19
30
  from datahub.sdk.dataset import Dataset
20
31
  from datahub.sdk.entity import Entity
32
+ from datahub.sdk.mlmodel import MLModel
33
+ from datahub.sdk.mlmodelgroup import MLModelGroup
21
34
 
22
35
  if TYPE_CHECKING:
23
36
  from datahub.sdk.main_client import DataHubClient
24
37
 
25
38
 
26
39
  class EntityClient:
40
+ """Client for managing DataHub entities.
41
+
42
+ This class provides methods for retrieving and managing DataHub entities
43
+ such as datasets, containers, and other metadata objects.
44
+ """
45
+
27
46
  def __init__(self, client: DataHubClient):
47
+ """Private constructor - use :py:attr:`DataHubClient.entities` instead.
48
+
49
+ Args:
50
+ client: The parent DataHubClient instance.
51
+ """
28
52
  self._client = client
29
53
 
30
54
  # TODO: Make all of these methods sync by default.
@@ -38,13 +62,60 @@ class EntityClient:
38
62
  @overload
39
63
  def get(self, urn: DatasetUrn) -> Dataset: ...
40
64
  @overload
65
+ def get(self, urn: MlModelUrn) -> MLModel: ...
66
+ @overload
67
+ def get(self, urn: MlModelGroupUrn) -> MLModelGroup: ...
68
+ @overload
69
+ def get(self, urn: DataFlowUrn) -> DataFlow: ...
70
+ @overload
71
+ def get(self, urn: DataJobUrn) -> DataJob: ...
72
+ @overload
73
+ def get(self, urn: DashboardUrn) -> Dashboard: ...
74
+ @overload
75
+ def get(self, urn: ChartUrn) -> Chart: ...
76
+ @overload
41
77
  def get(self, urn: Union[Urn, str]) -> Entity: ...
42
78
  def get(self, urn: UrnOrStr) -> Entity:
79
+ """Retrieve an entity by its urn.
80
+
81
+ Args:
82
+ urn: The urn of the entity to retrieve. Can be a string or :py:class:`Urn` object.
83
+
84
+ Returns:
85
+ The retrieved entity instance.
86
+
87
+ Raises:
88
+ ItemNotFoundError: If the entity does not exist.
89
+ SdkUsageError: If the entity type is not yet supported.
90
+ InvalidUrnError: If the URN is invalid.
91
+ """
43
92
  if not isinstance(urn, Urn):
44
93
  urn = Urn.from_string(urn)
45
94
 
46
95
  # TODO: add error handling around this with a suggested alternative if not yet supported
47
- EntityClass = ENTITY_CLASSES[urn.entity_type]
96
+ try:
97
+ EntityClass = ENTITY_CLASSES[urn.entity_type]
98
+ except KeyError as e:
99
+ # Try to import cloud-specific entities if not found
100
+ try:
101
+ from acryl_datahub_cloud.sdk.entities.assertion import Assertion
102
+ from acryl_datahub_cloud.sdk.entities.monitor import Monitor
103
+ from acryl_datahub_cloud.sdk.entities.subscription import Subscription
104
+
105
+ if urn.entity_type == "assertion":
106
+ EntityClass = Assertion
107
+ elif urn.entity_type == "subscription":
108
+ EntityClass = Subscription
109
+ elif urn.entity_type == "monitor":
110
+ EntityClass = Monitor
111
+ else:
112
+ raise SdkUsageError(
113
+ f"Entity type {urn.entity_type} is not yet supported"
114
+ ) from e
115
+ except ImportError as e:
116
+ raise SdkUsageError(
117
+ f"Entity type {urn.entity_type} is not yet supported"
118
+ ) from e
48
119
 
49
120
  if not self._graph.exists(str(urn)):
50
121
  raise ItemNotFoundError(f"Entity {urn} not found")
@@ -52,9 +123,25 @@ class EntityClient:
52
123
  aspects = self._graph.get_entity_semityped(str(urn))
53
124
 
54
125
  # TODO: save the timestamp so we can use If-Unmodified-Since on the updates
55
- return EntityClass._new_from_graph(urn, aspects)
126
+ entity = EntityClass._new_from_graph(urn, aspects)
127
+
128
+ # Type narrowing for cloud-specific entities
129
+ if urn.entity_type == "assertion":
130
+ from acryl_datahub_cloud.sdk.entities.assertion import Assertion
131
+
132
+ assert isinstance(entity, Assertion)
133
+ elif urn.entity_type == "monitor":
134
+ from acryl_datahub_cloud.sdk.entities.monitor import Monitor
135
+
136
+ assert isinstance(entity, Monitor)
137
+ elif urn.entity_type == "subscription":
138
+ from acryl_datahub_cloud.sdk.entities.subscription import Subscription
139
+
140
+ assert isinstance(entity, Subscription)
56
141
 
57
- def create(self, entity: Entity) -> None:
142
+ return entity
143
+
144
+ def create(self, entity: Entity, *, emit_mode: Optional[EmitMode] = None) -> None:
58
145
  mcps = []
59
146
 
60
147
  if self._graph.exists(str(entity.urn)):
@@ -71,11 +158,14 @@ class EntityClient:
71
158
  changeType=models.ChangeTypeClass.CREATE_ENTITY,
72
159
  )
73
160
  )
74
- mcps.extend(entity._as_mcps(models.ChangeTypeClass.CREATE))
161
+ mcps.extend(entity.as_mcps(models.ChangeTypeClass.CREATE))
75
162
 
76
- self._graph.emit_mcps(mcps)
163
+ if emit_mode:
164
+ self._graph.emit_mcps(mcps, emit_mode=emit_mode)
165
+ else:
166
+ self._graph.emit_mcps(mcps)
77
167
 
78
- def upsert(self, entity: Entity) -> None:
168
+ def upsert(self, entity: Entity, *, emit_mode: Optional[EmitMode] = None) -> None:
79
169
  if entity._prev_aspects is None and self._graph.exists(str(entity.urn)):
80
170
  warnings.warn(
81
171
  f"The entity {entity.urn} already exists. This operation will partially overwrite the existing entity.",
@@ -84,10 +174,18 @@ class EntityClient:
84
174
  )
85
175
  # TODO: If there are no previous aspects but the entity exists, should we delete aspects that are not present here?
86
176
 
87
- mcps = entity._as_mcps(models.ChangeTypeClass.UPSERT)
88
- self._graph.emit_mcps(mcps)
177
+ mcps = entity.as_mcps(models.ChangeTypeClass.UPSERT)
178
+ if emit_mode:
179
+ self._graph.emit_mcps(mcps, emit_mode=emit_mode)
180
+ else:
181
+ self._graph.emit_mcps(mcps)
89
182
 
90
- def update(self, entity: Union[Entity, MetadataPatchProposal]) -> None:
183
+ def update(
184
+ self,
185
+ entity: Union[Entity, MetadataPatchProposal],
186
+ *,
187
+ emit_mode: Optional[EmitMode] = None,
188
+ ) -> None:
91
189
  if isinstance(entity, MetadataPatchProposal):
92
190
  return self._update_patch(entity)
93
191
 
@@ -99,8 +197,11 @@ class EntityClient:
99
197
  # TODO: respect If-Unmodified-Since?
100
198
  # -> probably add a "mode" parameter that can be "update" (e.g. if not modified) or "update_force"
101
199
 
102
- mcps = entity._as_mcps(models.ChangeTypeClass.UPSERT)
103
- self._graph.emit_mcps(mcps)
200
+ mcps = entity.as_mcps(models.ChangeTypeClass.UPSERT)
201
+ if emit_mode:
202
+ self._graph.emit_mcps(mcps, emit_mode=emit_mode)
203
+ else:
204
+ self._graph.emit_mcps(mcps)
104
205
 
105
206
  def _update_patch(
106
207
  self, updater: MetadataPatchProposal, check_exists: bool = True
@@ -113,3 +214,44 @@ class EntityClient:
113
214
 
114
215
  mcps = updater.build()
115
216
  self._graph.emit_mcps(mcps)
217
+
218
+ def delete(
219
+ self,
220
+ urn: UrnOrStr,
221
+ check_exists: bool = True,
222
+ cascade: bool = False,
223
+ hard: bool = False,
224
+ ) -> None:
225
+ """Delete an entity by its urn.
226
+
227
+ Args:
228
+ urn: The urn of the entity to delete. Can be a string or :py:class:`Urn` object.
229
+ check_exists: Whether to check if the entity exists before deletion. Defaults to True.
230
+ cascade: Whether to cascade delete related entities. When True, deletes child entities
231
+ like datajobs within dataflows, datasets within containers, etc. Not yet supported.
232
+ hard: Whether to perform a hard delete (permanent) or soft delete. Defaults to False.
233
+
234
+ Raises:
235
+ SdkUsageError: If the entity does not exist and check_exists is True, or if cascade is True (not supported).
236
+
237
+ Note:
238
+ When hard is True, the operation is irreversible and the entity will be permanently removed.
239
+
240
+ Impact of cascade deletion (still to be done) depends on the input entity type:
241
+ - Container: Recursively deletes all containers and data assets within the container.
242
+ - Dataflow: Recursively deletes all data jobs within the dataflow.
243
+ - Dashboard: TBD
244
+ - DataPlatformInstance: TBD
245
+ - ...
246
+ """
247
+ urn_str = str(urn) if isinstance(urn, Urn) else urn
248
+ if check_exists and not self._graph.exists(entity_urn=urn_str):
249
+ raise SdkUsageError(
250
+ f"Entity {urn_str} does not exist, and hence cannot be deleted. "
251
+ "You can bypass this check by setting check_exists=False."
252
+ )
253
+
254
+ if cascade:
255
+ raise SdkUsageError("The 'cascade' parameter is not yet supported.")
256
+
257
+ self._graph.delete_entity(urn=urn_str, hard=hard)