acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import warnings
2
3
  from typing import Optional
3
4
 
4
5
  import click
@@ -6,7 +7,7 @@ from click_default_group import DefaultGroup
6
7
 
7
8
  from datahub.api.entities.datacontract.datacontract import DataContract
8
9
  from datahub.ingestion.graph.client import get_default_graph
9
- from datahub.telemetry import telemetry
10
+ from datahub.ingestion.graph.config import ClientMode
10
11
  from datahub.upgrade import upgrade
11
12
 
12
13
  logger = logging.getLogger(__name__)
@@ -14,21 +15,57 @@ logger = logging.getLogger(__name__)
14
15
 
15
16
  @click.group(cls=DefaultGroup, default="upsert")
16
17
  def datacontract() -> None:
17
- """A group of commands to interact with the DataContract entity in DataHub."""
18
- pass
18
+ """
19
+ A group of commands to interact with the DataContract entity in DataHub.
20
+
21
+ WARNING: This CLI is DEPRECATED and no longer supported.
22
+ Please migrate to alternative data contract solutions.
23
+ """
24
+ # Issue deprecation warning
25
+ warnings.warn(
26
+ "The datacontract CLI is deprecated and no longer supported. "
27
+ "Please migrate to alternative data contract solutions.",
28
+ DeprecationWarning,
29
+ stacklevel=2,
30
+ )
31
+
32
+ # Log deprecation message for runtime visibility
33
+ logger.warning(
34
+ "DEPRECATED: The datacontract CLI is no longer supported and will be removed in a future version. "
35
+ "Please migrate to alternative data contract solutions."
36
+ )
37
+
38
+ # Display deprecation message to user
39
+ click.secho(
40
+ "⚠️ WARNING: This datacontract CLI is DEPRECATED and no longer supported.",
41
+ fg="yellow",
42
+ bold=True,
43
+ )
44
+ click.secho("Please migrate to alternative data contract solutions.", fg="yellow")
19
45
 
20
46
 
21
47
  @datacontract.command()
22
48
  @click.option("-f", "--file", required=True, type=click.Path(exists=True))
23
49
  @upgrade.check_upgrade
24
- @telemetry.with_telemetry()
25
50
  def upsert(file: str) -> None:
26
- """Upsert (create or update) a Data Contract in DataHub."""
51
+ """
52
+ Upsert (create or update) a Data Contract in DataHub.
53
+
54
+ WARNING: This command is DEPRECATED and no longer supported.
55
+ """
56
+
57
+ click.secho(
58
+ "⚠️ WARNING: The 'upsert' command is deprecated and no longer supported.",
59
+ fg="yellow",
60
+ bold=True,
61
+ )
62
+
63
+ logger.warning("DEPRECATED: datacontract upsert command is no longer supported")
27
64
 
28
65
  data_contract: DataContract = DataContract.from_yaml(file)
29
66
  urn = data_contract.urn
30
67
 
31
- with get_default_graph() as graph:
68
+ with get_default_graph(ClientMode.CLI) as graph:
32
69
  if not graph.exists(data_contract.entity):
33
70
  raise ValueError(
34
71
  f"Cannot define a data contract for non-existent entity {data_contract.entity}"
@@ -59,9 +96,20 @@ def upsert(file: str) -> None:
59
96
  )
60
97
  @click.option("--hard/--soft", required=False, is_flag=True, default=False)
61
98
  @upgrade.check_upgrade
62
- @telemetry.with_telemetry()
63
99
  def delete(urn: Optional[str], file: Optional[str], hard: bool) -> None:
64
- """Delete a Data Contract in DataHub. Defaults to a soft-delete. Use --hard to completely erase metadata."""
100
+ """
101
+ Delete a Data Contract in DataHub. Defaults to a soft-delete. Use --hard to completely erase metadata.
102
+
103
+ WARNING: This command is DEPRECATED and no longer supported.
104
+ """
105
+
106
+ click.secho(
107
+ "⚠️ WARNING: The 'delete' command is deprecated and no longer supported.",
108
+ fg="yellow",
109
+ bold=True,
110
+ )
111
+
112
+ logger.warning("DEPRECATED: datacontract delete command is no longer supported")
65
113
 
66
114
  if not urn:
67
115
  if not file:
@@ -72,7 +120,7 @@ def delete(urn: Optional[str], file: Optional[str], hard: bool) -> None:
72
120
  data_contract = DataContract.from_yaml(file)
73
121
  urn = data_contract.urn
74
122
 
75
- with get_default_graph() as graph:
123
+ with get_default_graph(ClientMode.CLI) as graph:
76
124
  if not graph.exists(urn):
77
125
  raise ValueError(f"Data Contract {urn} does not exist")
78
126
 
@@ -1,7 +1,6 @@
1
1
  import difflib
2
2
  import json
3
3
  import logging
4
- import os
5
4
  import pathlib
6
5
  import sys
7
6
  from pathlib import Path
@@ -14,15 +13,16 @@ from click_default_group import DefaultGroup
14
13
 
15
14
  from datahub.api.entities.dataproduct.dataproduct import DataProduct
16
15
  from datahub.cli.specific.file_loader import load_file
16
+ from datahub.configuration.env_vars import get_dataproduct_external_url
17
17
  from datahub.emitter.mce_builder import (
18
18
  make_group_urn,
19
19
  make_user_urn,
20
20
  validate_ownership_type,
21
21
  )
22
22
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
23
+ from datahub.ingestion.graph.config import ClientMode
23
24
  from datahub.metadata.schema_classes import OwnerClass, OwnershipTypeClass
24
25
  from datahub.specific.dataproduct import DataProductPatchBuilder
25
- from datahub.telemetry import telemetry
26
26
  from datahub.upgrade import upgrade
27
27
  from datahub.utilities.urns.urn import Urn
28
28
 
@@ -81,12 +81,10 @@ def mutate(file: Path, validate_assets: bool, external_url: str, upsert: bool) -
81
81
 
82
82
  config_dict = load_file(pathlib.Path(file))
83
83
  id = config_dict.get("id") if isinstance(config_dict, dict) else None
84
- with get_default_graph() as graph:
84
+ with get_default_graph(ClientMode.CLI) as graph:
85
85
  data_product: DataProduct = DataProduct.from_yaml(file, graph)
86
86
  external_url_override = (
87
- external_url
88
- or os.getenv("DATAHUB_DATAPRODUCT_EXTERNAL_URL")
89
- or data_product.external_url
87
+ external_url or get_dataproduct_external_url() or data_product.external_url
90
88
  )
91
89
  data_product.external_url = external_url_override
92
90
  if upsert and not graph.exists(data_product.urn):
@@ -129,7 +127,6 @@ def mutate(file: Path, validate_assets: bool, external_url: str, upsert: bool) -
129
127
  )
130
128
  @click.option("--external-url", required=False, type=str)
131
129
  @upgrade.check_upgrade
132
- @telemetry.with_telemetry()
133
130
  def update(file: Path, validate_assets: bool, external_url: str) -> None:
134
131
  """Create or Update a Data Product in DataHub. Use upsert if you want to apply partial updates."""
135
132
 
@@ -145,7 +142,6 @@ def update(file: Path, validate_assets: bool, external_url: str) -> None:
145
142
  )
146
143
  @click.option("--external-url", required=False, type=str)
147
144
  @upgrade.check_upgrade
148
- @telemetry.with_telemetry()
149
145
  def upsert(file: Path, validate_assets: bool, external_url: str) -> None:
150
146
  """Upsert attributes to a Data Product in DataHub."""
151
147
 
@@ -158,11 +154,10 @@ def upsert(file: Path, validate_assets: bool, external_url: str) -> None:
158
154
  @click.option("-f", "--file", required=True, type=click.Path(exists=True))
159
155
  @click.option("--update", required=False, is_flag=True, default=False)
160
156
  @upgrade.check_upgrade
161
- @telemetry.with_telemetry()
162
157
  def diff(file: Path, update: bool) -> None:
163
158
  """Diff a Data Product file with its twin in DataHub"""
164
159
 
165
- with get_default_graph() as emitter:
160
+ with get_default_graph(ClientMode.CLI) as emitter:
166
161
  id: Optional[str] = None
167
162
  try:
168
163
  data_product_local: DataProduct = DataProduct.from_yaml(file, emitter)
@@ -205,7 +200,6 @@ def diff(file: Path, update: bool) -> None:
205
200
  )
206
201
  @click.option("--hard/--soft", required=False, is_flag=True, default=False)
207
202
  @upgrade.check_upgrade
208
- @telemetry.with_telemetry()
209
203
  def delete(urn: str, file: Path, hard: bool) -> None:
210
204
  """Delete a Data Product in DataHub. Defaults to a soft-delete. Use --hard to completely erase metadata."""
211
205
 
@@ -216,7 +210,7 @@ def delete(urn: str, file: Path, hard: bool) -> None:
216
210
  raise click.Abort()
217
211
 
218
212
  graph: DataHubGraph
219
- with get_default_graph() as graph:
213
+ with get_default_graph(ClientMode.CLI) as graph:
220
214
  data_product_urn = (
221
215
  urn if urn.startswith("urn:li:dataProduct") else f"urn:li:dataProduct:{urn}"
222
216
  )
@@ -241,14 +235,13 @@ def delete(urn: str, file: Path, hard: bool) -> None:
241
235
  @click.option("--urn", required=True, type=str)
242
236
  @click.option("--to-file", required=False, type=str)
243
237
  @upgrade.check_upgrade
244
- @telemetry.with_telemetry()
245
238
  def get(urn: str, to_file: str) -> None:
246
239
  """Get a Data Product from DataHub"""
247
240
 
248
241
  if not urn.startswith("urn:li:dataProduct:"):
249
242
  urn = f"urn:li:dataProduct:{urn}"
250
243
 
251
- with get_default_graph() as graph:
244
+ with get_default_graph(ClientMode.CLI) as graph:
252
245
  if graph.exists(urn):
253
246
  dataproduct: DataProduct = DataProduct.from_datahub(graph=graph, id=urn)
254
247
  click.secho(
@@ -278,7 +271,6 @@ def get(urn: str, to_file: str) -> None:
278
271
  help="A markdown file that contains documentation for this data product",
279
272
  )
280
273
  @upgrade.check_upgrade
281
- @telemetry.with_telemetry()
282
274
  def set_description(urn: str, description: str, md_file: Path) -> None:
283
275
  """Set description for a Data Product in DataHub"""
284
276
 
@@ -306,7 +298,7 @@ def set_description(urn: str, description: str, md_file: Path) -> None:
306
298
 
307
299
  dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
308
300
  dataproduct_patcher.set_description(description)
309
- with get_default_graph() as graph:
301
+ with get_default_graph(ClientMode.CLI) as graph:
310
302
  _abort_if_non_existent_urn(graph, urn, "set description")
311
303
  for mcp in dataproduct_patcher.build():
312
304
  graph.emit(mcp)
@@ -329,7 +321,6 @@ def set_description(urn: str, description: str, md_file: Path) -> None:
329
321
  default=OwnershipTypeClass.TECHNICAL_OWNER,
330
322
  )
331
323
  @upgrade.check_upgrade
332
- @telemetry.with_telemetry()
333
324
  def add_owner(urn: str, owner: str, owner_type: str) -> None:
334
325
  """Add owner for a Data Product in DataHub"""
335
326
 
@@ -342,7 +333,7 @@ def add_owner(urn: str, owner: str, owner_type: str) -> None:
342
333
  owner=_get_owner_urn(owner), type=owner_type, typeUrn=owner_type_urn
343
334
  )
344
335
  )
345
- with get_default_graph() as graph:
336
+ with get_default_graph(ClientMode.CLI) as graph:
346
337
  _abort_if_non_existent_urn(graph, urn, "add owners")
347
338
  for mcp in dataproduct_patcher.build():
348
339
  graph.emit(mcp)
@@ -352,7 +343,6 @@ def add_owner(urn: str, owner: str, owner_type: str) -> None:
352
343
  @click.option("--urn", required=True, type=str)
353
344
  @click.argument("owner_urn", required=True, type=str)
354
345
  @upgrade.check_upgrade
355
- @telemetry.with_telemetry()
356
346
  def remove_owner(urn: str, owner_urn: str) -> None:
357
347
  """Remove owner for a Data Product in DataHub"""
358
348
 
@@ -360,7 +350,7 @@ def remove_owner(urn: str, owner_urn: str) -> None:
360
350
  urn = f"urn:li:dataProduct:{urn}"
361
351
  dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
362
352
  dataproduct_patcher.remove_owner(owner=_get_owner_urn(owner_urn))
363
- with get_default_graph() as graph:
353
+ with get_default_graph(ClientMode.CLI) as graph:
364
354
  _abort_if_non_existent_urn(graph, urn, "remove owners")
365
355
  for mcp in dataproduct_patcher.build():
366
356
  click.echo(json.dumps(mcp.to_obj()))
@@ -374,7 +364,6 @@ def remove_owner(urn: str, owner_urn: str) -> None:
374
364
  "--validate-assets/--no-validate-assets", required=False, is_flag=True, default=True
375
365
  )
376
366
  @upgrade.check_upgrade
377
- @telemetry.with_telemetry()
378
367
  def add_asset(urn: str, asset: str, validate_assets: bool) -> None:
379
368
  """Add asset for a Data Product in DataHub"""
380
369
 
@@ -382,7 +371,7 @@ def add_asset(urn: str, asset: str, validate_assets: bool) -> None:
382
371
  urn = f"urn:li:dataProduct:{urn}"
383
372
  dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
384
373
  dataproduct_patcher.add_asset(asset)
385
- with get_default_graph() as graph:
374
+ with get_default_graph(ClientMode.CLI) as graph:
386
375
  _abort_if_non_existent_urn(graph, urn, "add assets")
387
376
  if validate_assets:
388
377
  _abort_if_non_existent_urn(
@@ -401,7 +390,6 @@ def add_asset(urn: str, asset: str, validate_assets: bool) -> None:
401
390
  "--validate-assets/--no-validate-assets", required=False, is_flag=True, default=True
402
391
  )
403
392
  @upgrade.check_upgrade
404
- @telemetry.with_telemetry()
405
393
  def remove_asset(urn: str, asset: str, validate_assets: bool) -> None:
406
394
  """Remove asset for a Data Product in DataHub"""
407
395
 
@@ -409,7 +397,7 @@ def remove_asset(urn: str, asset: str, validate_assets: bool) -> None:
409
397
  urn = f"urn:li:dataProduct:{urn}"
410
398
  dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
411
399
  dataproduct_patcher.remove_asset(asset)
412
- with get_default_graph() as graph:
400
+ with get_default_graph(ClientMode.CLI) as graph:
413
401
  _abort_if_non_existent_urn(graph, urn, "remove assets")
414
402
  if validate_assets:
415
403
  _abort_if_non_existent_urn(
@@ -12,8 +12,8 @@ from click_default_group import DefaultGroup
12
12
  from datahub.api.entities.dataset.dataset import Dataset, DatasetRetrievalConfig
13
13
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
14
14
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
15
+ from datahub.ingestion.graph.config import ClientMode
15
16
  from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings
16
- from datahub.telemetry import telemetry
17
17
  from datahub.upgrade import upgrade
18
18
 
19
19
  logger = logging.getLogger(__name__)
@@ -29,13 +29,14 @@ def dataset() -> None:
29
29
  name="upsert",
30
30
  )
31
31
  @click.option("-f", "--file", required=True, type=click.Path(exists=True))
32
- @upgrade.check_upgrade
33
- @telemetry.with_telemetry()
34
- def upsert(file: Path) -> None:
32
+ @click.option(
33
+ "-n", "--dry-run", type=bool, is_flag=True, default=False, help="Perform a dry run"
34
+ )
35
+ def upsert(file: Path, dry_run: bool) -> None:
35
36
  """Upsert attributes to a Dataset in DataHub."""
36
37
  # Call the sync command with to_datahub=True to perform the upsert operation
37
38
  ctx = click.get_current_context()
38
- ctx.invoke(sync, file=str(file), to_datahub=True)
39
+ ctx.invoke(sync, file=str(file), dry_run=dry_run, to_datahub=True)
39
40
 
40
41
 
41
42
  @dataset.command(
@@ -44,14 +45,13 @@ def upsert(file: Path) -> None:
44
45
  @click.option("--urn", required=True, type=str)
45
46
  @click.option("--to-file", required=False, type=str)
46
47
  @upgrade.check_upgrade
47
- @telemetry.with_telemetry()
48
48
  def get(urn: str, to_file: str) -> None:
49
49
  """Get a Dataset from DataHub"""
50
50
 
51
51
  if not urn.startswith("urn:li:dataset:"):
52
52
  urn = f"urn:li:dataset:{urn}"
53
53
 
54
- with get_default_graph() as graph:
54
+ with get_default_graph(ClientMode.CLI) as graph:
55
55
  if graph.exists(urn):
56
56
  dataset: Dataset = Dataset.from_datahub(graph=graph, urn=urn)
57
57
  click.secho(
@@ -73,13 +73,13 @@ def get(urn: str, to_file: str) -> None:
73
73
  help="URN of secondary sibling(s)",
74
74
  multiple=True,
75
75
  )
76
- @telemetry.with_telemetry()
76
+ @upgrade.check_upgrade
77
77
  def add_sibling(urn: str, sibling_urns: Tuple[str]) -> None:
78
78
  all_urns = set()
79
79
  all_urns.add(urn)
80
80
  for sibling_urn in sibling_urns:
81
81
  all_urns.add(sibling_urn)
82
- with get_default_graph() as graph:
82
+ with get_default_graph(ClientMode.CLI) as graph:
83
83
  for _urn in all_urns:
84
84
  _emit_sibling(graph, urn, _urn, all_urns)
85
85
 
@@ -113,8 +113,6 @@ def _get_existing_siblings(graph: DataHubGraph, urn: str) -> Set[str]:
113
113
  @click.option("--lintCheck", required=False, is_flag=True)
114
114
  @click.option("--lintFix", required=False, is_flag=True)
115
115
  @click.argument("file", type=click.Path(exists=True))
116
- @upgrade.check_upgrade
117
- @telemetry.with_telemetry()
118
116
  def file(lintcheck: bool, lintfix: bool, file: str) -> None:
119
117
  """Operate on a Dataset file"""
120
118
 
@@ -167,13 +165,17 @@ def file(lintcheck: bool, lintfix: bool, file: str) -> None:
167
165
  )
168
166
  @click.option("-f", "--file", required=True, type=click.Path(exists=True))
169
167
  @click.option("--to-datahub/--from-datahub", required=True, is_flag=True)
168
+ @click.option(
169
+ "-n", "--dry-run", type=bool, is_flag=True, default=False, help="Perform a dry run"
170
+ )
170
171
  @upgrade.check_upgrade
171
- @telemetry.with_telemetry()
172
- def sync(file: str, to_datahub: bool) -> None:
172
+ def sync(file: str, to_datahub: bool, dry_run: bool) -> None:
173
173
  """Sync a Dataset file to/from DataHub"""
174
174
 
175
+ dry_run_prefix = "[dry-run]: " if dry_run else "" # prefix to use in messages
176
+
175
177
  failures: List[str] = []
176
- with get_default_graph() as graph:
178
+ with get_default_graph(ClientMode.CLI) as graph:
177
179
  datasets = Dataset.from_yaml(file)
178
180
  for dataset in datasets:
179
181
  assert (
@@ -189,7 +191,7 @@ def sync(file: str, to_datahub: bool) -> None:
189
191
  click.secho(
190
192
  "\n\t- ".join(
191
193
  [
192
- f"Skipping Dataset {dataset.urn} due to missing entity references: "
194
+ f"{dry_run_prefix}Skipping Dataset {dataset.urn} due to missing entity references: "
193
195
  ]
194
196
  + missing_entity_references
195
197
  ),
@@ -199,13 +201,18 @@ def sync(file: str, to_datahub: bool) -> None:
199
201
  continue
200
202
  try:
201
203
  for mcp in dataset.generate_mcp():
202
- graph.emit(mcp)
203
- click.secho(f"Update succeeded for urn {dataset.urn}.", fg="green")
204
+ if not dry_run:
205
+ graph.emit(mcp)
206
+ click.secho(
207
+ f"{dry_run_prefix}Update succeeded for urn {dataset.urn}.",
208
+ fg="green",
209
+ )
204
210
  except Exception as e:
205
211
  click.secho(
206
- f"Update failed for id {id}. due to {e}",
212
+ f"{dry_run_prefix}Update failed for id {id}. due to {e}",
207
213
  fg="red",
208
214
  )
215
+ failures.append(dataset.urn)
209
216
  else:
210
217
  # Sync from DataHub
211
218
  if graph.exists(dataset.urn):
@@ -215,13 +222,16 @@ def sync(file: str, to_datahub: bool) -> None:
215
222
  existing_dataset: Dataset = Dataset.from_datahub(
216
223
  graph=graph, urn=dataset.urn, config=dataset_get_config
217
224
  )
218
- existing_dataset.to_yaml(Path(file))
225
+ if not dry_run:
226
+ existing_dataset.to_yaml(Path(file))
227
+ else:
228
+ click.secho(f"{dry_run_prefix}Will update file {file}")
219
229
  else:
220
- click.secho(f"Dataset {dataset.urn} does not exist")
230
+ click.secho(f"{dry_run_prefix}Dataset {dataset.urn} does not exist")
221
231
  failures.append(dataset.urn)
222
232
  if failures:
223
233
  click.secho(
224
- f"\nFailed to sync the following Datasets: {', '.join(failures)}",
234
+ f"\n{dry_run_prefix}Failed to sync the following Datasets: {', '.join(failures)}",
225
235
  fg="red",
226
236
  )
227
237
  raise click.Abort()
@@ -7,7 +7,7 @@ from click_default_group import DefaultGroup
7
7
 
8
8
  from datahub.api.entities.forms.forms import Forms
9
9
  from datahub.ingestion.graph.client import get_default_graph
10
- from datahub.telemetry import telemetry
10
+ from datahub.ingestion.graph.config import ClientMode
11
11
  from datahub.upgrade import upgrade
12
12
 
13
13
  logger = logging.getLogger(__name__)
@@ -23,8 +23,6 @@ def forms() -> None:
23
23
  name="upsert",
24
24
  )
25
25
  @click.option("-f", "--file", required=True, type=click.Path(exists=True))
26
- @upgrade.check_upgrade
27
- @telemetry.with_telemetry()
28
26
  def upsert(file: Path) -> None:
29
27
  """Upsert forms in DataHub."""
30
28
 
@@ -37,10 +35,9 @@ def upsert(file: Path) -> None:
37
35
  @click.option("--urn", required=True, type=str)
38
36
  @click.option("--to-file", required=False, type=str)
39
37
  @upgrade.check_upgrade
40
- @telemetry.with_telemetry()
41
38
  def get(urn: str, to_file: str) -> None:
42
39
  """Get form from DataHub"""
43
- with get_default_graph() as graph:
40
+ with get_default_graph(ClientMode.CLI) as graph:
44
41
  if graph.exists(urn):
45
42
  form: Forms = Forms.from_datahub(graph=graph, urn=urn)
46
43
  click.secho(
@@ -10,7 +10,7 @@ from datahub.api.entities.corpgroup.corpgroup import (
10
10
  )
11
11
  from datahub.cli.specific.file_loader import load_file
12
12
  from datahub.ingestion.graph.client import get_default_graph
13
- from datahub.telemetry import telemetry
13
+ from datahub.ingestion.graph.config import ClientMode
14
14
  from datahub.upgrade import upgrade
15
15
 
16
16
  logger = logging.getLogger(__name__)
@@ -34,13 +34,12 @@ def group() -> None:
34
34
  help="When set, writes to the editable section of the metadata graph, overwriting writes from the UI",
35
35
  )
36
36
  @upgrade.check_upgrade
37
- @telemetry.with_telemetry()
38
37
  def upsert(file: Path, override_editable: bool) -> None:
39
38
  """Create or Update a Group with embedded Users"""
40
39
 
41
40
  config_dict = load_file(file)
42
41
  group_configs = config_dict if isinstance(config_dict, list) else [config_dict]
43
- with get_default_graph() as emitter:
42
+ with get_default_graph(ClientMode.CLI) as emitter:
44
43
  for group_config in group_configs:
45
44
  try:
46
45
  datahub_group = CorpGroup.parse_obj(group_config)
@@ -11,7 +11,7 @@ from datahub.api.entities.structuredproperties.structuredproperties import (
11
11
  StructuredProperties,
12
12
  )
13
13
  from datahub.ingestion.graph.client import get_default_graph
14
- from datahub.telemetry import telemetry
14
+ from datahub.ingestion.graph.config import ClientMode
15
15
  from datahub.upgrade import upgrade
16
16
  from datahub.utilities.urns.urn import Urn
17
17
 
@@ -29,11 +29,10 @@ def properties() -> None:
29
29
  )
30
30
  @click.option("-f", "--file", required=True, type=click.Path(exists=True))
31
31
  @upgrade.check_upgrade
32
- @telemetry.with_telemetry()
33
32
  def upsert(file: Path) -> None:
34
33
  """Upsert structured properties in DataHub."""
35
34
 
36
- with get_default_graph() as graph:
35
+ with get_default_graph(ClientMode.CLI) as graph:
37
36
  StructuredProperties.create(str(file), graph)
38
37
 
39
38
 
@@ -43,12 +42,11 @@ def upsert(file: Path) -> None:
43
42
  @click.option("--urn", required=True, type=str)
44
43
  @click.option("--to-file", required=False, type=str)
45
44
  @upgrade.check_upgrade
46
- @telemetry.with_telemetry()
47
45
  def get(urn: str, to_file: str) -> None:
48
46
  """Get structured properties from DataHub"""
49
47
  urn = Urn.make_structured_property_urn(urn)
50
48
 
51
- with get_default_graph() as graph:
49
+ with get_default_graph(ClientMode.CLI) as graph:
52
50
  if graph.exists(urn):
53
51
  structuredproperties: StructuredProperties = (
54
52
  StructuredProperties.from_datahub(graph=graph, urn=urn)
@@ -70,7 +68,7 @@ def get(urn: str, to_file: str) -> None:
70
68
  )
71
69
  @click.option("--details/--no-details", is_flag=True, default=True)
72
70
  @click.option("--to-file", required=False, type=str)
73
- @telemetry.with_telemetry()
71
+ @upgrade.check_upgrade
74
72
  def list(details: bool, to_file: str) -> None:
75
73
  """List structured properties in DataHub"""
76
74
 
@@ -117,7 +115,7 @@ def list(details: bool, to_file: str) -> None:
117
115
  with open(file, "w") as fp:
118
116
  yaml.dump(serialized_objects, fp)
119
117
 
120
- with get_default_graph() as graph:
118
+ with get_default_graph(ClientMode.CLI) as graph:
121
119
  if details:
122
120
  logger.info(
123
121
  "Listing structured properties with details. Use --no-details for urns only"