acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -13,8 +13,10 @@ import datahub.metadata.schema_classes
13
13
  from datahub.cli.cli_utils import post_entity
14
14
  from datahub.configuration.common import GraphError
15
15
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
16
+ from datahub.ingestion.graph.config import ClientMode
16
17
  from datahub.metadata.schema_classes import SystemMetadataClass
17
18
  from datahub.telemetry import telemetry
19
+ from datahub.upgrade import upgrade
18
20
 
19
21
  logger = logging.getLogger(__name__)
20
22
 
@@ -163,6 +165,7 @@ def validate_warehouse(data_root: str) -> None:
163
165
  help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
164
166
  )
165
167
  @telemetry.with_telemetry(capture_kwargs=["duration_seconds"])
168
+ @upgrade.check_upgrade
166
169
  def create(
167
170
  warehouse: str,
168
171
  description: Optional[str],
@@ -178,7 +181,7 @@ def create(
178
181
  Create an iceberg warehouse.
179
182
  """
180
183
 
181
- client = get_default_graph()
184
+ client = get_default_graph(ClientMode.CLI)
182
185
 
183
186
  urn = iceberg_data_platform_instance_urn(warehouse)
184
187
 
@@ -316,6 +319,7 @@ def create(
316
319
  help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified",
317
320
  )
318
321
  @telemetry.with_telemetry(capture_kwargs=["duration_seconds"])
322
+ @upgrade.check_upgrade
319
323
  def update(
320
324
  warehouse: str,
321
325
  data_root: str,
@@ -331,7 +335,7 @@ def update(
331
335
  Update iceberg warehouses. Can only update credentials, and role. Cannot update region
332
336
  """
333
337
 
334
- client = get_default_graph()
338
+ client = get_default_graph(ClientMode.CLI)
335
339
 
336
340
  urn = iceberg_data_platform_instance_urn(warehouse)
337
341
 
@@ -402,12 +406,13 @@ def update(
402
406
 
403
407
  @iceberg.command()
404
408
  @telemetry.with_telemetry()
409
+ @upgrade.check_upgrade
405
410
  def list() -> None:
406
411
  """
407
412
  List iceberg warehouses
408
413
  """
409
414
 
410
- client = get_default_graph()
415
+ client = get_default_graph(ClientMode.CLI)
411
416
 
412
417
  for warehouse in get_all_warehouses(client):
413
418
  click.echo(warehouse)
@@ -418,9 +423,10 @@ def list() -> None:
418
423
  "-w", "--warehouse", required=True, type=str, help="The name of the warehouse"
419
424
  )
420
425
  @telemetry.with_telemetry()
426
+ @upgrade.check_upgrade
421
427
  def get(warehouse: str) -> None:
422
428
  """Fetches the details of the specified iceberg warehouse"""
423
- client = get_default_graph()
429
+ client = get_default_graph(ClientMode.CLI)
424
430
  urn = iceberg_data_platform_instance_urn(warehouse)
425
431
 
426
432
  if client.exists(urn):
@@ -455,7 +461,7 @@ def delete(warehouse: str, dry_run: bool, force: bool) -> None:
455
461
 
456
462
  urn = iceberg_data_platform_instance_urn(warehouse)
457
463
 
458
- client = get_default_graph()
464
+ client = get_default_graph(ClientMode.CLI)
459
465
 
460
466
  if not client.exists(urn):
461
467
  raise click.ClickException(f"urn {urn} not found")
datahub/cli/ingest_cli.py CHANGED
@@ -14,16 +14,16 @@ from tabulate import tabulate
14
14
 
15
15
  from datahub._version import nice_version_name
16
16
  from datahub.cli import cli_utils
17
- from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH
17
+ from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH, load_client_config
18
18
  from datahub.configuration.common import GraphError
19
19
  from datahub.configuration.config_loader import load_config_file
20
20
  from datahub.ingestion.graph.client import get_default_graph
21
+ from datahub.ingestion.graph.config import ClientMode
21
22
  from datahub.ingestion.run.connection import ConnectionManager
22
23
  from datahub.ingestion.run.pipeline import Pipeline
23
24
  from datahub.telemetry import telemetry
24
25
  from datahub.upgrade import upgrade
25
26
  from datahub.utilities.ingest_utils import deploy_source_vars
26
- from datahub.utilities.perf_timer import PerfTimer
27
27
 
28
28
  logger = logging.getLogger(__name__)
29
29
 
@@ -113,6 +113,7 @@ def ingest() -> None:
113
113
  "no_progress",
114
114
  ]
115
115
  )
116
+ @upgrade.check_upgrade
116
117
  def run(
117
118
  config: str,
118
119
  dry_run: bool,
@@ -177,14 +178,7 @@ def run(
177
178
  no_progress=no_progress,
178
179
  raw_config=raw_pipeline_config,
179
180
  )
180
- with PerfTimer() as timer:
181
- ret = run_pipeline_to_completion(pipeline)
182
-
183
- # The main ingestion has completed. If it was successful, potentially show an upgrade nudge message.
184
- if ret == 0:
185
- upgrade.check_upgrade_post(
186
- main_method_runtime=timer.elapsed_seconds(), graph=pipeline.ctx.graph
187
- )
181
+ ret = run_pipeline_to_completion(pipeline)
188
182
 
189
183
  if ret:
190
184
  sys.exit(ret)
@@ -192,8 +186,6 @@ def run(
192
186
 
193
187
 
194
188
  @ingest.command()
195
- @upgrade.check_upgrade
196
- @telemetry.with_telemetry()
197
189
  @click.option(
198
190
  "-n",
199
191
  "--name",
@@ -216,9 +208,9 @@ def run(
216
208
  @click.option(
217
209
  "--executor-id",
218
210
  type=str,
219
- default="default",
220
211
  help="Executor id to route execution requests to. Do not use this unless you have configured a custom executor.",
221
212
  required=False,
213
+ default=None,
222
214
  )
223
215
  @click.option(
224
216
  "--cli-version",
@@ -239,7 +231,7 @@ def run(
239
231
  type=str,
240
232
  help="Timezone for the schedule in 'America/New_York' format. Uses UTC by default.",
241
233
  required=False,
242
- default="UTC",
234
+ default=None,
243
235
  )
244
236
  @click.option(
245
237
  "--debug", type=bool, help="Should we debug.", required=False, default=False
@@ -251,14 +243,15 @@ def run(
251
243
  required=False,
252
244
  default=None,
253
245
  )
246
+ @upgrade.check_upgrade
254
247
  def deploy(
255
248
  name: Optional[str],
256
249
  config: str,
257
250
  urn: Optional[str],
258
- executor_id: str,
251
+ executor_id: Optional[str],
259
252
  cli_version: Optional[str],
260
253
  schedule: Optional[str],
261
- time_zone: str,
254
+ time_zone: Optional[str],
262
255
  extra_pip: Optional[str],
263
256
  debug: bool = False,
264
257
  ) -> None:
@@ -269,7 +262,7 @@ def deploy(
269
262
  urn:li:dataHubIngestionSource:<name>
270
263
  """
271
264
 
272
- datahub_graph = get_default_graph()
265
+ datahub_graph = get_default_graph(ClientMode.CLI)
273
266
 
274
267
  variables = deploy_source_vars(
275
268
  name=name,
@@ -360,6 +353,7 @@ def mcps(path: str) -> None:
360
353
  """
361
354
 
362
355
  click.echo("Starting ingestion...")
356
+ datahub_config = load_client_config()
363
357
  recipe: dict = {
364
358
  "source": {
365
359
  "type": "file",
@@ -367,6 +361,7 @@ def mcps(path: str) -> None:
367
361
  "path": path,
368
362
  },
369
363
  },
364
+ "datahub_api": datahub_config,
370
365
  }
371
366
 
372
367
  pipeline = Pipeline.create(recipe, report_to=None)
@@ -383,9 +378,11 @@ def mcps(path: str) -> None:
383
378
  "--source", type=str, default=None, help="Filter by ingestion source name."
384
379
  )
385
380
  @upgrade.check_upgrade
386
- @telemetry.with_telemetry()
387
381
  def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) -> None:
388
- """List ingestion source runs with their details, optionally filtered by URN or source."""
382
+ """
383
+ List ingestion source runs with their details, optionally filtered by URN or source.
384
+ Required the Manage Metadata Ingestion permission.
385
+ """
389
386
 
390
387
  query = """
391
388
  query listIngestionRuns($input: ListIngestionSourcesInput!) {
@@ -422,7 +419,7 @@ def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) ->
422
419
  }
423
420
  }
424
421
 
425
- client = get_default_graph()
422
+ client = get_default_graph(ClientMode.CLI)
426
423
  session = client._session
427
424
  gms_host = client.config.server
428
425
 
@@ -443,6 +440,11 @@ def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) ->
443
440
  if not data:
444
441
  click.echo("No response received from the server.")
445
442
  return
443
+ if "errors" in data:
444
+ click.echo("Errors in response:")
445
+ for error in data["errors"]:
446
+ click.echo(f"- {error.get('message', 'Unknown error')}")
447
+ return
446
448
 
447
449
  # a lot of responses can be null if there's errors in the run
448
450
  ingestion_sources = (
@@ -504,11 +506,10 @@ def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) ->
504
506
  help="If enabled, will list ingestion runs which have been soft deleted",
505
507
  )
506
508
  @upgrade.check_upgrade
507
- @telemetry.with_telemetry()
508
509
  def list_runs(page_offset: int, page_size: int, include_soft_deletes: bool) -> None:
509
510
  """List recent ingestion runs to datahub"""
510
511
 
511
- client = get_default_graph()
512
+ client = get_default_graph(ClientMode.CLI)
512
513
  session = client._session
513
514
  gms_host = client.config.server
514
515
 
@@ -554,12 +555,11 @@ def list_runs(page_offset: int, page_size: int, include_soft_deletes: bool) -> N
554
555
  )
555
556
  @click.option("-a", "--show-aspect", required=False, is_flag=True)
556
557
  @upgrade.check_upgrade
557
- @telemetry.with_telemetry()
558
558
  def show(
559
559
  run_id: str, start: int, count: int, include_soft_deletes: bool, show_aspect: bool
560
560
  ) -> None:
561
561
  """Describe a provided ingestion run to datahub"""
562
- client = get_default_graph()
562
+ client = get_default_graph(ClientMode.CLI)
563
563
  session = client._session
564
564
  gms_host = client.config.server
565
565
 
@@ -604,12 +604,11 @@ def show(
604
604
  help="Path to directory where rollback reports will be saved to",
605
605
  )
606
606
  @upgrade.check_upgrade
607
- @telemetry.with_telemetry()
608
607
  def rollback(
609
608
  run_id: str, force: bool, dry_run: bool, safe: bool, report_dir: str
610
609
  ) -> None:
611
610
  """Rollback a provided ingestion run to datahub"""
612
- client = get_default_graph()
611
+ client = get_default_graph(ClientMode.CLI)
613
612
 
614
613
  if not force and not dry_run:
615
614
  click.confirm(
datahub/cli/migrate.py CHANGED
@@ -25,6 +25,7 @@ from datahub.emitter.mcp_builder import (
25
25
  )
26
26
  from datahub.emitter.rest_emitter import DatahubRestEmitter
27
27
  from datahub.ingestion.graph.client import (
28
+ ClientMode,
28
29
  DataHubGraph,
29
30
  RelatedEntity,
30
31
  get_default_graph,
@@ -36,6 +37,7 @@ from datahub.metadata.schema_classes import (
36
37
  SystemMetadataClass,
37
38
  )
38
39
  from datahub.telemetry import telemetry
40
+ from datahub.upgrade import upgrade
39
41
  from datahub.utilities.urns.urn import Urn
40
42
 
41
43
  log = logging.getLogger(__name__)
@@ -76,13 +78,13 @@ class MigrationReport:
76
78
  def __repr__(self) -> str:
77
79
  repr = f"{self._get_prefix()}Migration Report:\n--------------\n"
78
80
  repr += f"{self._get_prefix()}Migration Run Id: {self.run_id}\n"
79
- repr += f"{self._get_prefix()}Num entities created = {len(set([x[0] for x in self.entities_created.keys()]))}\n"
80
- repr += f"{self._get_prefix()}Num entities affected = {len(set([x[0] for x in self.entities_affected.keys()]))}\n"
81
- repr += f"{self._get_prefix()}Num entities {'kept' if self.keep else 'migrated'} = {len(set([x[0] for x in self.entities_migrated.keys()]))}\n"
81
+ repr += f"{self._get_prefix()}Num entities created = {len(set([x[0] for x in self.entities_created]))}\n"
82
+ repr += f"{self._get_prefix()}Num entities affected = {len(set([x[0] for x in self.entities_affected]))}\n"
83
+ repr += f"{self._get_prefix()}Num entities {'kept' if self.keep else 'migrated'} = {len(set([x[0] for x in self.entities_migrated]))}\n"
82
84
  repr += f"{self._get_prefix()}Details:\n"
83
- repr += f"{self._get_prefix()}New Entities Created: {set([x[0] for x in self.entities_created.keys()]) or 'None'}\n"
84
- repr += f"{self._get_prefix()}External Entities Affected: {set([x[0] for x in self.entities_affected.keys()]) or 'None'}\n"
85
- repr += f"{self._get_prefix()}Old Entities {'Kept' if self.keep else 'Migrated'} = {set([x[0] for x in self.entities_migrated.keys()]) or 'None'}\n"
85
+ repr += f"{self._get_prefix()}New Entities Created: {set([x[0] for x in self.entities_created]) or 'None'}\n"
86
+ repr += f"{self._get_prefix()}External Entities Affected: {set([x[0] for x in self.entities_affected]) or 'None'}\n"
87
+ repr += f"{self._get_prefix()}Old Entities {'Kept' if self.keep else 'Migrated'} = {set([x[0] for x in self.entities_migrated]) or 'None'}\n"
86
88
  return repr
87
89
 
88
90
 
@@ -118,6 +120,7 @@ def _get_type_from_urn(urn: str) -> str:
118
120
  help="When enabled, will not delete (hard/soft) the previous entities.",
119
121
  )
120
122
  @telemetry.with_telemetry()
123
+ @upgrade.check_upgrade
121
124
  def dataplatform2instance(
122
125
  instance: str,
123
126
  platform: str,
@@ -147,7 +150,7 @@ def dataplatform2instance_func(
147
150
  migration_report = MigrationReport(run_id, dry_run, keep)
148
151
  system_metadata = SystemMetadataClass(runId=run_id)
149
152
 
150
- graph = get_default_graph()
153
+ graph = get_default_graph(ClientMode.CLI)
151
154
 
152
155
  urns_to_migrate: List[str] = []
153
156
 
@@ -386,7 +389,7 @@ def migrate_containers(
386
389
 
387
390
 
388
391
  def get_containers_for_migration(env: str) -> List[Any]:
389
- client = get_default_graph()
392
+ client = get_default_graph(ClientMode.CLI)
390
393
  containers_to_migrate = list(
391
394
  client.get_urns_by_filter(entity_types=["container"], env=env)
392
395
  )
@@ -445,7 +448,7 @@ def process_container_relationships(
445
448
  relationships: Iterable[RelatedEntity] = migration_utils.get_incoming_relationships(
446
449
  urn=src_urn
447
450
  )
448
- client = get_default_graph()
451
+ client = get_default_graph(ClientMode.CLI)
449
452
  for relationship in relationships:
450
453
  log.debug(f"Incoming Relationship: {relationship}")
451
454
  target_urn: str = relationship.urn
@@ -12,6 +12,7 @@ from datahub.ingestion.graph.client import (
12
12
  RelatedEntity,
13
13
  get_default_graph,
14
14
  )
15
+ from datahub.ingestion.graph.config import ClientMode
15
16
  from datahub.metadata.schema_classes import (
16
17
  ChartInfoClass,
17
18
  ContainerClass,
@@ -243,7 +244,7 @@ def clone_aspect(
243
244
  run_id: str = str(uuid.uuid4()),
244
245
  dry_run: bool = False,
245
246
  ) -> Iterable[MetadataChangeProposalWrapper]:
246
- client = get_default_graph()
247
+ client = get_default_graph(ClientMode.CLI)
247
248
  aspect_map = cli_utils.get_aspects_for_entity(
248
249
  client._session,
249
250
  client.config.server,
@@ -274,7 +275,7 @@ def clone_aspect(
274
275
 
275
276
 
276
277
  def get_incoming_relationships(urn: str) -> Iterable[RelatedEntity]:
277
- client = get_default_graph()
278
+ client = get_default_graph(ClientMode.CLI)
278
279
  yield from client.get_related_entities(
279
280
  entity_urn=urn,
280
281
  relationship_types=[
@@ -290,7 +291,7 @@ def get_incoming_relationships(urn: str) -> Iterable[RelatedEntity]:
290
291
 
291
292
 
292
293
  def get_outgoing_relationships(urn: str) -> Iterable[RelatedEntity]:
293
- client = get_default_graph()
294
+ client = get_default_graph(ClientMode.CLI)
294
295
  yield from client.get_related_entities(
295
296
  entity_urn=urn,
296
297
  relationship_types=[
datahub/cli/put_cli.py CHANGED
@@ -8,12 +8,12 @@ from datahub.cli.cli_utils import post_entity
8
8
  from datahub.configuration.config_loader import load_config_file
9
9
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
10
10
  from datahub.ingestion.graph.client import get_default_graph
11
+ from datahub.ingestion.graph.config import ClientMode
11
12
  from datahub.metadata.schema_classes import (
12
13
  DataPlatformInfoClass as DataPlatformInfo,
13
14
  PlatformTypeClass,
14
15
  SystemMetadataClass,
15
16
  )
16
- from datahub.telemetry import telemetry
17
17
  from datahub.upgrade import upgrade
18
18
  from datahub.utilities.urns.data_platform_urn import DataPlatformUrn
19
19
  from datahub.utilities.urns.urn import guess_entity_type
@@ -44,7 +44,6 @@ def put() -> None:
44
44
  help="Run ID into which we should log the aspect.",
45
45
  )
46
46
  @upgrade.check_upgrade
47
- @telemetry.with_telemetry()
48
47
  def aspect(urn: str, aspect: str, aspect_data: str, run_id: Optional[str]) -> None:
49
48
  """Update a single aspect of an entity"""
50
49
 
@@ -53,7 +52,7 @@ def aspect(urn: str, aspect: str, aspect_data: str, run_id: Optional[str]) -> No
53
52
  aspect_data, allow_stdin=True, resolve_env_vars=False, process_directives=False
54
53
  )
55
54
 
56
- client = get_default_graph()
55
+ client = get_default_graph(ClientMode.CLI)
57
56
 
58
57
  system_metadata: Union[None, SystemMetadataClass] = None
59
58
  if run_id:
@@ -74,8 +73,6 @@ def aspect(urn: str, aspect: str, aspect_data: str, run_id: Optional[str]) -> No
74
73
 
75
74
  @put.command()
76
75
  @click.pass_context
77
- @upgrade.check_upgrade
78
- @telemetry.with_telemetry()
79
76
  @click.option(
80
77
  "--name",
81
78
  type=str,
@@ -97,6 +94,7 @@ def aspect(urn: str, aspect: str, aspect_data: str, run_id: Optional[str]) -> No
97
94
  @click.option(
98
95
  "--run-id", type=str, help="Run ID into which we should log the platform."
99
96
  )
97
+ @upgrade.check_upgrade
100
98
  def platform(
101
99
  ctx: click.Context, name: str, display_name: Optional[str], logo: str, run_id: str
102
100
  ) -> None:
@@ -118,7 +116,7 @@ def platform(
118
116
  displayName=display_name or platform_name,
119
117
  logoUrl=logo,
120
118
  )
121
- datahub_graph = get_default_graph()
119
+ datahub_graph = get_default_graph(ClientMode.CLI)
122
120
  mcp = MetadataChangeProposalWrapper(
123
121
  entityUrn=str(platform_urn),
124
122
  aspect=data_platform_info,
@@ -1,27 +1,50 @@
1
1
  import json
2
2
  import logging
3
- import os
4
3
  import os.path
5
4
  import re
6
5
  from typing import Dict, Optional
7
6
 
8
7
  import click
8
+ import packaging
9
9
  import requests
10
10
  import yaml
11
11
  from packaging.version import parse
12
12
  from pydantic import BaseModel
13
13
 
14
+ from datahub._version import nice_version_name
15
+ from datahub.configuration.env_vars import get_force_local_quickstart_mapping
16
+
14
17
  logger = logging.getLogger(__name__)
15
18
 
16
- LOCAL_QUICKSTART_MAPPING_FILE = os.environ.get("FORCE_LOCAL_QUICKSTART_MAPPING", "")
19
+ LOCAL_QUICKSTART_MAPPING_FILE = get_force_local_quickstart_mapping()
17
20
  DEFAULT_LOCAL_CONFIG_PATH = "~/.datahub/quickstart/quickstart_version_mapping.yaml"
18
21
  DEFAULT_REMOTE_CONFIG_PATH = "https://raw.githubusercontent.com/datahub-project/datahub/master/docker/quickstart/quickstart_version_mapping.yaml"
19
22
 
23
+ MINIMUM_SUPPORTED_VERSION = "v1.1.0"
24
+
25
+
26
+ def get_minimum_supported_version_message(version: str) -> str:
27
+ MINIMUM_SUPPORTED_VERSION_MESSAGE = f"""
28
+ DataHub CLI Version Compatibility Issue
29
+
30
+ You're trying to install DataHub server version {version} which is not supported by this CLI version.
31
+
32
+ This CLI (version {nice_version_name()}) only supports installing DataHub server versions {MINIMUM_SUPPORTED_VERSION} and above.
33
+
34
+ To install older server versions:
35
+ 1. Uninstall current CLI: pip uninstall acryl-datahub
36
+ 2. Install older CLI: pip install acryl-datahub==1.1
37
+ 3. Run quickstart with your desired version: datahub docker quickstart --version <version>
38
+
39
+ For more information: https://docs.datahub.com/docs/quickstart#install-datahub-server
40
+ """
41
+ return MINIMUM_SUPPORTED_VERSION_MESSAGE
42
+
20
43
 
21
44
  class QuickstartExecutionPlan(BaseModel):
22
45
  composefile_git_ref: str
23
46
  docker_tag: str
24
- mysql_tag: Optional[str]
47
+ mysql_tag: Optional[str] = None
25
48
 
26
49
 
27
50
  def _is_it_a_version(version: str) -> bool:
@@ -126,15 +149,23 @@ class QuickstartVersionMappingConfig(BaseModel):
126
149
  mysql_tag=str(mysql_tag),
127
150
  ),
128
151
  )
152
+
153
+ if not is_minimum_supported_version(requested_version):
154
+ click.secho(
155
+ get_minimum_supported_version_message(version=requested_version),
156
+ fg="red",
157
+ )
158
+ raise click.ClickException("Minimum supported version not met")
159
+
129
160
  # new CLI version is downloading the composefile corresponding to the requested version
130
- # if the version is older than v0.10.1, it doesn't contain the setup job labels and the
131
- # the checks will fail, so in those cases we pick the composefile from v0.10.1 which contains
132
- # the setup job labels
161
+ # if the version is older than <MINIMUM_SUPPORTED_VERSION>, it doesn't contain the
162
+ # docker compose based resolved compose file. In those cases, we pick up the composefile from
163
+ # MINIMUM_SUPPORTED_VERSION which contains the compose file.
133
164
  if _is_it_a_version(result.composefile_git_ref):
134
- if parse("v0.10.1") > parse(result.composefile_git_ref):
135
- # The merge commit where the labels were added
136
- # https://github.com/datahub-project/datahub/pull/7473
137
- result.composefile_git_ref = "1d3339276129a7cb8385c07a958fcc93acda3b4e"
165
+ if parse("v1.2.0") > parse(result.composefile_git_ref):
166
+ # The merge commit where profiles based resolved compose file was added.
167
+ # https://github.com/datahub-project/datahub/pull/13566
168
+ result.composefile_git_ref = "21726bc3341490f4182b904626c793091ac95edd"
138
169
 
139
170
  return result
140
171
 
@@ -148,3 +179,15 @@ def save_quickstart_config(
148
179
  with open(path, "w") as f:
149
180
  yaml.dump(config.dict(), f)
150
181
  logger.info(f"Saved quickstart config to {path}.")
182
+
183
+
184
+ def is_minimum_supported_version(version: str) -> bool:
185
+ if not _is_it_a_version(version):
186
+ return True
187
+
188
+ requested_version = packaging.version.parse(version)
189
+ minimum_supported_version = packaging.version.parse(MINIMUM_SUPPORTED_VERSION)
190
+ if requested_version < minimum_supported_version:
191
+ return False
192
+
193
+ return True
@@ -1,3 +1,8 @@
1
+ """
2
+ DEPRECATED: This assertions CLI is no longer supported and will be removed in a future version.
3
+ Please use alternative methods for managing assertions in DataHub.
4
+ """
5
+
1
6
  import logging
2
7
  import os
3
8
  from pathlib import Path
@@ -15,8 +20,8 @@ from datahub.api.entities.assertion.compiler_interface import (
15
20
  from datahub.emitter.mce_builder import make_assertion_urn
16
21
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
17
22
  from datahub.ingestion.graph.client import get_default_graph
23
+ from datahub.ingestion.graph.config import ClientMode
18
24
  from datahub.integrations.assertion.registry import ASSERTION_PLATFORMS
19
- from datahub.telemetry import telemetry
20
25
  from datahub.upgrade import upgrade
21
26
 
22
27
  logger = logging.getLogger(__name__)
@@ -26,20 +31,39 @@ REPORT_FILE_NAME = "compile_report.json"
26
31
 
27
32
  @click.group(cls=DefaultGroup, default="upsert")
28
33
  def assertions() -> None:
29
- """A group of commands to interact with the Assertion entity in DataHub."""
34
+ """A group of commands to interact with the Assertion entity in DataHub.
35
+
36
+ ⚠️ DEPRECATED: This assertions CLI is no longer supported and will be removed
37
+ in a future version. Please use alternative methods for managing assertions in DataHub.
38
+ """
39
+ click.secho(
40
+ "⚠️ WARNING: The assertions CLI is deprecated and no longer supported. "
41
+ "It may be removed in a future version. Please use alternative methods for managing assertions in DataHub.",
42
+ fg="yellow",
43
+ bold=True,
44
+ err=True,
45
+ )
30
46
  pass
31
47
 
32
48
 
33
49
  @assertions.command()
34
50
  @click.option("-f", "--file", required=True, type=click.Path(exists=True))
35
51
  @upgrade.check_upgrade
36
- @telemetry.with_telemetry()
37
52
  def upsert(file: str) -> None:
38
- """Upsert (create or update) a set of assertions in DataHub."""
53
+ """Upsert (create or update) a set of assertions in DataHub.
54
+
55
+ ⚠️ DEPRECATED: This command is deprecated and no longer supported.
56
+ """
57
+ click.secho(
58
+ "⚠️ WARNING: The 'upsert' command is deprecated and no longer supported.",
59
+ fg="yellow",
60
+ bold=True,
61
+ err=True,
62
+ )
39
63
 
40
64
  assertions_spec: AssertionsConfigSpec = AssertionsConfigSpec.from_yaml(file)
41
65
 
42
- with get_default_graph() as graph:
66
+ with get_default_graph(ClientMode.CLI) as graph:
43
67
  for assertion_spec in assertions_spec.assertions:
44
68
  try:
45
69
  mcp = MetadataChangeProposalWrapper(
@@ -70,8 +94,6 @@ def upsert(file: str) -> None:
70
94
  default=[],
71
95
  help="Platform-specific extra key-value inputs in form key=value",
72
96
  )
73
- @upgrade.check_upgrade
74
- @telemetry.with_telemetry()
75
97
  def compile(
76
98
  file: str, platform: str, output_to: Optional[str], extras: List[str]
77
99
  ) -> None:
@@ -81,7 +103,15 @@ def compile(
81
103
  In future, we may introduce separate command to automatically apply these compiled changes
82
104
  in assertion platform. Currently, generated result artifacts are stored in target folder
83
105
  unless another folder is specified using option `--output-to <folder>`.
106
+
107
+ ⚠️ DEPRECATED: This command is deprecated and no longer supported.
84
108
  """
109
+ click.secho(
110
+ "⚠️ WARNING: The 'compile' command is deprecated and no longer supported.",
111
+ fg="yellow",
112
+ bold=True,
113
+ err=True,
114
+ )
85
115
 
86
116
  if platform not in ASSERTION_PLATFORMS:
87
117
  click.secho(
@@ -149,3 +179,5 @@ def extras_list_to_dict(extras: List[str]) -> Dict[str, str]:
149
179
  # Later:
150
180
  # 3. execute compiled assertions on assertion platform (Later, requires connection details to platform),
151
181
  # 4. cleanup assertions from assertion platform (generate artifacts. optionally execute)
182
+ #
183
+ # NOTE: This entire assertions CLI is deprecated and these TODOs will not be implemented.