acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,7 @@ from pathlib import Path
5
5
  from typing import List, Optional, Union
6
6
 
7
7
  import yaml
8
- from pydantic import validator
8
+ from pydantic import Field, validator
9
9
  from ruamel.yaml import YAML
10
10
  from typing_extensions import Literal
11
11
 
@@ -26,6 +26,7 @@ from datahub.emitter.mce_builder import (
26
26
  )
27
27
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
28
28
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
29
+ from datahub.ingestion.graph.config import ClientMode
29
30
  from datahub.metadata.schema_classes import (
30
31
  FormActorAssignmentClass,
31
32
  FormInfoClass,
@@ -66,7 +67,7 @@ class Prompt(ConfigModel):
66
67
  description: Optional[str] = None
67
68
  type: str
68
69
  structured_property_id: Optional[str] = None
69
- structured_property_urn: Optional[str] = None
70
+ structured_property_urn: Optional[str] = Field(default=None, validate_default=True)
70
71
  required: Optional[bool] = None
71
72
 
72
73
  @validator("structured_property_urn", pre=True, always=True)
@@ -110,7 +111,7 @@ class Actors(ConfigModel):
110
111
 
111
112
  class Forms(ConfigModel):
112
113
  id: Optional[str] = None
113
- urn: Optional[str] = None
114
+ urn: Optional[str] = Field(default=None, validate_default=True)
114
115
  name: str
115
116
  description: Optional[str] = None
116
117
  prompts: List[Prompt] = []
@@ -133,47 +134,46 @@ class Forms(ConfigModel):
133
134
  def create(file: str) -> None:
134
135
  emitter: DataHubGraph
135
136
 
136
- with get_default_graph() as emitter:
137
- with open(file) as fp:
138
- forms: List[dict] = yaml.safe_load(fp)
139
- for form_raw in forms:
140
- form = Forms.parse_obj(form_raw)
137
+ with get_default_graph(ClientMode.CLI) as emitter, open(file) as fp:
138
+ forms: List[dict] = yaml.safe_load(fp)
139
+ for form_raw in forms:
140
+ form = Forms.parse_obj(form_raw)
141
141
 
142
- try:
143
- if not FormType.has_value(form.type):
144
- logger.error(
145
- f"Form type {form.type} does not exist. Please try again with a valid type."
146
- )
147
-
148
- mcp = MetadataChangeProposalWrapper(
149
- entityUrn=form.urn,
150
- aspect=FormInfoClass(
151
- name=form.name,
152
- description=form.description,
153
- prompts=form.validate_prompts(emitter),
154
- type=form.type,
155
- actors=form.create_form_actors(form.actors),
156
- ),
142
+ try:
143
+ if not FormType.has_value(form.type):
144
+ logger.error(
145
+ f"Form type {form.type} does not exist. Please try again with a valid type."
157
146
  )
158
- emitter.emit_mcp(mcp)
159
147
 
160
- logger.info(f"Created form {form.urn}")
148
+ mcp = MetadataChangeProposalWrapper(
149
+ entityUrn=form.urn,
150
+ aspect=FormInfoClass(
151
+ name=form.name,
152
+ description=form.description,
153
+ prompts=form.validate_prompts(emitter),
154
+ type=form.type,
155
+ actors=form.create_form_actors(form.actors),
156
+ ),
157
+ )
158
+ emitter.emit_mcp(mcp)
159
+
160
+ logger.info(f"Created form {form.urn}")
161
161
 
162
- if form.owners or form.group_owners:
163
- form.add_owners(emitter)
162
+ if form.owners or form.group_owners:
163
+ form.add_owners(emitter)
164
164
 
165
- if form.entities:
166
- if form.entities.urns:
167
- # Associate specific entities with a form
168
- form.upload_entities_for_form(emitter)
165
+ if form.entities:
166
+ if form.entities.urns:
167
+ # Associate specific entities with a form
168
+ form.upload_entities_for_form(emitter)
169
169
 
170
- if form.entities.filters:
171
- # Associate groups of entities with a form based on filters
172
- form.create_form_filters(emitter)
170
+ if form.entities.filters:
171
+ # Associate groups of entities with a form based on filters
172
+ form.create_form_filters(emitter)
173
173
 
174
- except Exception as e:
175
- logger.error(e)
176
- return
174
+ except Exception as e:
175
+ logger.error(e)
176
+ return
177
177
 
178
178
  def validate_prompts(self, emitter: DataHubGraph) -> List[FormPromptClass]:
179
179
  prompts = []
@@ -1,10 +1,10 @@
1
1
  import logging
2
2
  from enum import Enum
3
3
  from pathlib import Path
4
- from typing import Iterable, List, Optional, Union
4
+ from typing import Iterable, List, Optional, Type, Union
5
5
 
6
6
  import yaml
7
- from pydantic import validator
7
+ from pydantic import Field, StrictStr, validator
8
8
  from ruamel.yaml import YAML
9
9
 
10
10
  from datahub.configuration.common import ConfigModel
@@ -38,17 +38,17 @@ class AllowedTypes(Enum):
38
38
 
39
39
 
40
40
  class AllowedValue(ConfigModel):
41
- value: Union[int, float, str]
41
+ value: Union[StrictStr, float]
42
42
  description: Optional[str] = None
43
43
 
44
44
 
45
45
  VALID_ENTITY_TYPE_URNS = [
46
- Urn.make_entity_type_urn(entity_type) for entity_type in URN_TYPES.keys()
46
+ Urn.make_entity_type_urn(entity_type) for entity_type in URN_TYPES
47
47
  ]
48
48
  _VALID_ENTITY_TYPES_STRING = f"Valid entity type urns are {', '.join(VALID_ENTITY_TYPE_URNS)}, etc... Ensure that the entity type is valid."
49
49
 
50
50
 
51
- def _validate_entity_type_urn(v: str) -> str:
51
+ def _validate_entity_type_urn(cls: Type, v: str) -> str:
52
52
  urn = Urn.make_entity_type_urn(v)
53
53
  if urn not in VALID_ENTITY_TYPE_URNS:
54
54
  raise ValueError(
@@ -68,7 +68,7 @@ class TypeQualifierAllowedTypes(ConfigModel):
68
68
 
69
69
  class StructuredProperties(ConfigModel):
70
70
  id: Optional[str] = None
71
- urn: Optional[str] = None
71
+ urn: Optional[str] = Field(None, validate_default=True)
72
72
  qualified_name: Optional[str] = None
73
73
  type: str
74
74
  value_entity_types: Optional[List[str]] = None
@@ -65,7 +65,7 @@ query dataset($urn: String!, $start: Int, $count: Int, $status: AssertionRunStat
65
65
 
66
66
  :param urn: The DataHub dataset unique identifier.
67
67
  :param status: The assertion status to filter for. Every status will be accepted if it is not set.
68
- See valid status at https://datahubproject.io/docs/graphql/enums#assertionrunstatus
68
+ See valid status at https://docs.datahub.com/docs/graphql/enums#assertionrunstatus
69
69
  :param start_time_millis: The start time in milliseconds from the assertions will be queried.
70
70
  :param end_time_millis: The end time in milliseconds until the assertions will be queried.
71
71
  :param filter: Additional key value filters which will be applied as AND query
@@ -1,4 +1,4 @@
1
- from typing import Dict, List, Optional
1
+ from typing import Dict, List, Optional, Union
2
2
 
3
3
  from gql import Client
4
4
  from gql.transport.requests import RequestsHTTPTransport
@@ -39,16 +39,18 @@ class BaseApi:
39
39
 
40
40
  def gen_filter(
41
41
  self, filters: Dict[str, Optional[str]]
42
- ) -> Optional[Dict[str, List[Dict[str, str]]]]:
43
- filter_expression: Optional[Dict[str, List[Dict[str, str]]]] = None
42
+ ) -> Optional[Dict[str, List[Dict[str, Union[str, List[str]]]]]]:
43
+ filter_expression: Optional[
44
+ Dict[str, List[Dict[str, Union[str, List[str]]]]]
45
+ ] = None
44
46
  if not filters:
45
47
  return None
46
48
 
47
- filter = []
49
+ filter_list: List[Dict[str, Union[str, List[str]]]] = []
48
50
  for key, value in filters.items():
49
51
  if value is None:
50
52
  continue
51
- filter.append({"field": key, "value": value})
53
+ filter_list.append({"field": key, "values": [value]})
52
54
 
53
- filter_expression = {"and": filter}
55
+ filter_expression = {"and": filter_list}
54
56
  return filter_expression
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  from typing import Any, Dict, List, Optional
3
3
 
4
- from gql import gql
4
+ from gql import GraphQLRequest
5
5
 
6
6
  from datahub.api.graphql.base import BaseApi
7
7
 
@@ -55,10 +55,10 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
55
55
  Report operation metadata for a dataset.
56
56
  :param source_type: The source type to filter on. If not set it will accept any source type.
57
57
  Default value: DATA_PROCESS
58
- See valid types here: https://datahubproject.io/docs/graphql/enums#operationsourcetype
58
+ See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
59
59
  :param operation_type: The operation type to filter on. If not set it will accept any source type.
60
60
  Default value: "UPDATE"
61
- See valid types here: https://datahubproject.io/docs/graphql/enums/#operationtype
61
+ See valid types here: https://docs.datahub.com/docs/graphql/enums/#operationtype
62
62
  :param partition: The partition to set the operation.
63
63
  :param num_affected_rows: The number of rows affected by this operation.
64
64
  :param custom_properties: Key/value pair of custom propertis
@@ -79,10 +79,12 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
79
79
  if custom_properties is not None:
80
80
  variable_values["customProperties"] = custom_properties
81
81
 
82
- result = self.client.execute(
83
- gql(Operation.REPORT_OPERATION_MUTATION), variable_values
82
+ request = GraphQLRequest(
83
+ Operation.REPORT_OPERATION_MUTATION, variable_values=variable_values
84
84
  )
85
85
 
86
+ result = self.client.execute(request)
87
+
86
88
  return result["reportOperation"]
87
89
 
88
90
  def query_operations(
@@ -103,18 +105,18 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
103
105
  :param end_time_millis: The end time in milliseconds until the operations will be queried.
104
106
  :param limit: The maximum number of items to return.
105
107
  :param source_type: The source type to filter on. If not set it will accept any source type.
106
- See valid types here: https://datahubproject.io/docs/graphql/enums#operationsourcetype
108
+ See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
107
109
  :param operation_type: The operation type to filter on. If not set it will accept any source type.
108
- See valid types here: https://datahubproject.io/docs/graphql/enums#operationsourcetype
110
+ See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
109
111
  :param partition: The partition to check the operation.
110
112
  """
111
113
 
112
- result = self.client.execute(
113
- gql(Operation.QUERY_OPERATIONS),
114
+ request = GraphQLRequest(
115
+ Operation.QUERY_OPERATIONS,
114
116
  variable_values={
115
117
  "urn": urn,
116
118
  "startTimeMillis": start_time_millis,
117
- "end_time_millis": end_time_millis,
119
+ "endTimeMillis": end_time_millis,
118
120
  "limit": limit,
119
121
  "filter": self.gen_filter(
120
122
  {
@@ -125,6 +127,8 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
125
127
  ),
126
128
  },
127
129
  )
130
+
131
+ result = self.client.execute(request)
128
132
  if "dataset" in result and "operations" in result["dataset"]:
129
133
  operations = []
130
134
  if source_type is not None:
datahub/cli/check_cli.py CHANGED
@@ -9,6 +9,7 @@ from datetime import datetime
9
9
  from typing import Any, Dict, List, Optional, Union
10
10
 
11
11
  import click
12
+ from tabulate import tabulate
12
13
 
13
14
  from datahub._version import __package_name__
14
15
  from datahub.cli.json_file import check_mce_file
@@ -16,11 +17,12 @@ from datahub.configuration import config_loader
16
17
  from datahub.configuration.common import AllowDenyPattern
17
18
  from datahub.emitter.mce_builder import DEFAULT_ENV
18
19
  from datahub.ingestion.graph.client import get_default_graph
20
+ from datahub.ingestion.graph.config import ClientMode
19
21
  from datahub.ingestion.run.pipeline import Pipeline
20
22
  from datahub.ingestion.sink.sink_registry import sink_registry
21
23
  from datahub.ingestion.source.source_registry import source_registry
22
24
  from datahub.ingestion.transformer.transform_registry import transform_registry
23
- from datahub.telemetry import telemetry
25
+ from datahub.upgrade import upgrade
24
26
  from datahub.utilities.file_backed_collections import (
25
27
  ConnectionWrapper,
26
28
  FileBackedDict,
@@ -46,7 +48,6 @@ def check() -> None:
46
48
  @click.option(
47
49
  "--unpack-mces", default=False, is_flag=True, help="Converts MCEs into MCPs"
48
50
  )
49
- @telemetry.with_telemetry()
50
51
  def metadata_file(json_file: str, rewrite: bool, unpack_mces: bool) -> None:
51
52
  """Check the schema of a metadata (MCE or MCP) JSON file."""
52
53
 
@@ -104,7 +105,6 @@ def metadata_file(json_file: str, rewrite: bool, unpack_mces: bool) -> None:
104
105
  default=(),
105
106
  help="[Advanced] Paths in the deepdiff object to ignore",
106
107
  )
107
- @telemetry.with_telemetry()
108
108
  def metadata_diff(
109
109
  actual_file: str, expected_file: str, verbose: bool, ignore_path: List[str]
110
110
  ) -> None:
@@ -141,7 +141,6 @@ def metadata_diff(
141
141
  type=str,
142
142
  default=None,
143
143
  )
144
- @telemetry.with_telemetry()
145
144
  def plugins(source: Optional[str], verbose: bool) -> None:
146
145
  """List the enabled ingestion plugins."""
147
146
 
@@ -233,7 +232,7 @@ def sql_format(sql: str, platform: str) -> None:
233
232
  default=True,
234
233
  help="Run in offline mode and disable schema-aware parsing.",
235
234
  )
236
- @telemetry.with_telemetry()
235
+ @upgrade.check_upgrade
237
236
  def sql_lineage(
238
237
  sql: Optional[str],
239
238
  sql_file: Optional[str],
@@ -259,7 +258,7 @@ def sql_lineage(
259
258
 
260
259
  graph = None
261
260
  if online:
262
- graph = get_default_graph()
261
+ graph = get_default_graph(ClientMode.CLI)
263
262
 
264
263
  lineage = create_lineage_sql_parsed_result(
265
264
  sql,
@@ -296,7 +295,6 @@ def sql_lineage(
296
295
  type=str,
297
296
  help="the input to validate",
298
297
  )
299
- @telemetry.with_telemetry()
300
298
  def test_allow_deny(config: str, input: str, pattern_key: str) -> None:
301
299
  """Test input string against AllowDeny pattern in a DataHub recipe.
302
300
 
@@ -345,7 +343,6 @@ def test_allow_deny(config: str, input: str, pattern_key: str) -> None:
345
343
  type=str,
346
344
  help="The input to validate",
347
345
  )
348
- @telemetry.with_telemetry()
349
346
  def test_path_spec(config: str, input: str, path_spec_key: str) -> None:
350
347
  """Test input path string against PathSpec patterns in a DataHub recipe.
351
348
 
@@ -470,10 +467,95 @@ WHERE
470
467
 
471
468
 
472
469
  @check.command()
470
+ @upgrade.check_upgrade
473
471
  def server_config() -> None:
474
472
  """Print the server config."""
475
- graph = get_default_graph()
473
+ graph = get_default_graph(ClientMode.CLI)
476
474
 
477
475
  server_config = graph.get_server_config()
478
476
 
479
477
  click.echo(pprint.pformat(server_config))
478
+
479
+
480
+ @check.command()
481
+ @click.option(
482
+ "--urn", required=False, help="The urn or urn pattern (supports % for wildcard)"
483
+ )
484
+ @click.option("--aspect", default=None, help="Filter to a specific aspect name.")
485
+ @click.option(
486
+ "--start", type=int, default=None, help="Row number of sql store to restore from."
487
+ )
488
+ @click.option("--batch-size", type=int, default=None, help="How many rows to restore.")
489
+ @click.option(
490
+ "--file",
491
+ required=False,
492
+ type=click.Path(exists=True, dir_okay=True, readable=True),
493
+ help="File absolute path containing URNs (one per line) to restore indices",
494
+ )
495
+ @upgrade.check_upgrade
496
+ def restore_indices(
497
+ urn: Optional[str],
498
+ aspect: Optional[str],
499
+ start: Optional[int],
500
+ batch_size: Optional[int],
501
+ file: Optional[str],
502
+ ) -> None:
503
+ """Resync metadata changes into the search and graph indices."""
504
+ if urn is None and file is None:
505
+ raise click.UsageError("Either --urn or --file must be provided")
506
+ graph = get_default_graph(ClientMode.CLI)
507
+
508
+ graph.restore_indices(
509
+ urn_pattern=urn,
510
+ aspect=aspect,
511
+ start=start,
512
+ batch_size=batch_size,
513
+ file=file,
514
+ )
515
+
516
+
517
+ @check.command()
518
+ @upgrade.check_upgrade
519
+ def get_kafka_consumer_offsets() -> None:
520
+ """Get Kafka consumer offsets from the DataHub API."""
521
+ graph = get_default_graph(ClientMode.CLI)
522
+ result = graph.get_kafka_consumer_offsets()
523
+
524
+ table_data = []
525
+ headers = [
526
+ "Topic",
527
+ "Consumer Group",
528
+ "Schema",
529
+ "Partition",
530
+ "Offset",
531
+ "Lag",
532
+ "Avg Lag",
533
+ "Max Lag",
534
+ "Total Lag",
535
+ ]
536
+
537
+ for topic, consumers in result.items():
538
+ for consumer_group, schemas in consumers.items():
539
+ for schema, data in schemas.items():
540
+ metrics = data.get("metrics", {})
541
+ partitions = data.get("partitions", {})
542
+
543
+ for partition, partition_data in partitions.items():
544
+ table_data.append(
545
+ [
546
+ topic,
547
+ consumer_group,
548
+ schema,
549
+ partition,
550
+ partition_data.get("offset", "N/A"),
551
+ partition_data.get("lag", "N/A"),
552
+ metrics.get("avgLag", "N/A"),
553
+ metrics.get("maxLag", "N/A"),
554
+ metrics.get("totalLag", "N/A"),
555
+ ]
556
+ )
557
+
558
+ if table_data:
559
+ click.echo(tabulate(table_data, headers=headers, tablefmt="grid"))
560
+ else:
561
+ click.echo("No Kafka consumer offset data found.")
datahub/cli/cli_utils.py CHANGED
@@ -3,6 +3,7 @@ import logging
3
3
  import time
4
4
  import typing
5
5
  from datetime import datetime
6
+ from functools import wraps
6
7
  from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
7
8
 
8
9
  import click
@@ -424,3 +425,65 @@ def ensure_has_system_metadata(
424
425
  props = metadata.properties
425
426
  props["clientId"] = datahub_version.__package_name__
426
427
  props["clientVersion"] = datahub_version.__version__
428
+
429
+
430
+ def enable_auto_decorators(main_group: click.Group) -> None:
431
+ """
432
+ Enable automatic decorators for all click commands.
433
+ This wraps existing command callback functions to add upgrade and telemetry decorators.
434
+ """
435
+
436
+ def has_decorator(func: Any, module_pattern: str, function_pattern: str) -> bool:
437
+ """Check if function already has a specific decorator"""
438
+ if hasattr(func, "__wrapped__"):
439
+ current_func = func
440
+ while hasattr(current_func, "__wrapped__"):
441
+ # Check if this wrapper matches the module and function patterns
442
+ if (
443
+ hasattr(current_func, "__module__")
444
+ and module_pattern in current_func.__module__
445
+ and hasattr(current_func, "__name__")
446
+ and function_pattern in current_func.__name__
447
+ ):
448
+ return True
449
+ current_func = current_func.__wrapped__
450
+ return False
451
+
452
+ def has_telemetry_decorator(func):
453
+ return has_decorator(func, "telemetry", "with_telemetry")
454
+
455
+ def wrap_command_callback(command_obj):
456
+ """Wrap a command's callback function to add decorators"""
457
+ if hasattr(command_obj, "callback") and command_obj.callback:
458
+ original_callback = command_obj.callback
459
+
460
+ # Import here to avoid circular imports
461
+ from datahub.telemetry import telemetry
462
+
463
+ decorated_callback = original_callback
464
+
465
+ if not has_telemetry_decorator(decorated_callback):
466
+ log.debug(
467
+ f"Applying telemetry decorator to {original_callback.__module__}.{original_callback.__name__}"
468
+ )
469
+ decorated_callback = telemetry.with_telemetry()(decorated_callback)
470
+
471
+ # Preserve the original function's metadata
472
+ decorated_callback = wraps(original_callback)(decorated_callback)
473
+
474
+ command_obj.callback = decorated_callback
475
+
476
+ def wrap_group_commands(group_obj):
477
+ """Recursively wrap all commands in a group"""
478
+ if hasattr(group_obj, "commands"):
479
+ for _, command_obj in group_obj.commands.items():
480
+ if isinstance(command_obj, click.Group):
481
+ # Recursively wrap sub-groups
482
+ wrap_group_commands(command_obj)
483
+ else:
484
+ # Wrap individual commands
485
+ wrap_command_callback(command_obj)
486
+
487
+ wrap_group_commands(main_group)
488
+
489
+ log.debug("Auto-decorators enabled successfully")
@@ -11,14 +11,23 @@ import click
11
11
  import yaml
12
12
  from pydantic import BaseModel, ValidationError
13
13
 
14
- from datahub.cli.env_utils import get_boolean_env_variable
14
+ from datahub.configuration.env_vars import (
15
+ get_gms_host,
16
+ get_gms_port,
17
+ get_gms_protocol,
18
+ get_gms_token,
19
+ get_gms_url,
20
+ get_skip_config,
21
+ get_system_client_id,
22
+ get_system_client_secret,
23
+ )
15
24
  from datahub.ingestion.graph.config import DatahubClientConfig
16
25
 
17
26
  logger = logging.getLogger(__name__)
18
27
 
19
28
  CONDENSED_DATAHUB_CONFIG_PATH = "~/.datahubenv"
20
- DATAHUB_CONFIG_PATH = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH)
21
- DATAHUB_ROOT_FOLDER = os.path.expanduser("~/.datahub")
29
+ DATAHUB_CONFIG_PATH: str = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH)
30
+ DATAHUB_ROOT_FOLDER: str = os.path.expanduser("~/.datahub")
22
31
  ENV_SKIP_CONFIG = "DATAHUB_SKIP_CONFIG"
23
32
 
24
33
  ENV_DATAHUB_SYSTEM_CLIENT_ID = "DATAHUB_SYSTEM_CLIENT_ID"
@@ -36,15 +45,15 @@ class MissingConfigError(Exception):
36
45
 
37
46
 
38
47
  def get_system_auth() -> Optional[str]:
39
- system_client_id = os.environ.get(ENV_DATAHUB_SYSTEM_CLIENT_ID)
40
- system_client_secret = os.environ.get(ENV_DATAHUB_SYSTEM_CLIENT_SECRET)
48
+ system_client_id = get_system_client_id()
49
+ system_client_secret = get_system_client_secret()
41
50
  if system_client_id is not None and system_client_secret is not None:
42
51
  return f"Basic {system_client_id}:{system_client_secret}"
43
52
  return None
44
53
 
45
54
 
46
55
  def _should_skip_config() -> bool:
47
- return get_boolean_env_variable(ENV_SKIP_CONFIG, False)
56
+ return get_skip_config()
48
57
 
49
58
 
50
59
  def persist_raw_datahub_config(config: dict) -> None:
@@ -67,11 +76,11 @@ class DatahubConfig(BaseModel):
67
76
 
68
77
 
69
78
  def _get_config_from_env() -> Tuple[Optional[str], Optional[str]]:
70
- host = os.environ.get(ENV_METADATA_HOST)
71
- port = os.environ.get(ENV_METADATA_PORT)
72
- token = os.environ.get(ENV_METADATA_TOKEN)
73
- protocol = os.environ.get(ENV_METADATA_PROTOCOL, "http")
74
- url = os.environ.get(ENV_METADATA_HOST_URL)
79
+ host = get_gms_host()
80
+ port = get_gms_port()
81
+ token = get_gms_token()
82
+ protocol = get_gms_protocol()
83
+ url = get_gms_url()
75
84
  if port is not None:
76
85
  url = f"{protocol}://{host}:{port}"
77
86
  return url, token
@@ -108,7 +117,6 @@ def load_client_config() -> DatahubClientConfig:
108
117
  datahub_config: DatahubClientConfig = DatahubConfig.parse_obj(
109
118
  client_config_dict
110
119
  ).gms
111
-
112
120
  return datahub_config
113
121
  except ValidationError as e:
114
122
  click.echo(f"Error loading your {CONDENSED_DATAHUB_CONFIG_PATH}")
@@ -3,6 +3,7 @@ import logging
3
3
  import click
4
4
 
5
5
  from datahub.ingestion.source.apply.datahub_apply import apply_association_to_container
6
+ from datahub.upgrade import upgrade
6
7
 
7
8
  logger = logging.getLogger(__name__)
8
9
 
@@ -16,6 +17,7 @@ def container() -> None:
16
17
  @container.command()
17
18
  @click.option("--container-urn", required=True, type=str)
18
19
  @click.option("--tag-urn", required=True, type=str)
20
+ @upgrade.check_upgrade
19
21
  def tag(container_urn: str, tag_urn: str) -> None:
20
22
  """Add patch to add a tag to all datasets in a container"""
21
23
  apply_association_to_container(container_urn, tag_urn, "tag")
@@ -24,6 +26,7 @@ def tag(container_urn: str, tag_urn: str) -> None:
24
26
  @container.command()
25
27
  @click.option("--container-urn", required=True, type=str)
26
28
  @click.option("--term-urn", required=True, type=str)
29
+ @upgrade.check_upgrade
27
30
  def term(container_urn: str, term_urn: str) -> None:
28
31
  """Add patch to add a term to all datasets in a container"""
29
32
  apply_association_to_container(container_urn, term_urn, "term")
@@ -32,6 +35,7 @@ def term(container_urn: str, term_urn: str) -> None:
32
35
  @container.command()
33
36
  @click.option("--container-urn", required=True, type=str)
34
37
  @click.option("--owner-urn", required=True, type=str)
38
+ @upgrade.check_upgrade
35
39
  def owner(container_urn: str, owner_urn: str) -> None:
36
40
  """Add patch to add a owner to all datasets in a container"""
37
41
  apply_association_to_container(container_urn, owner_urn, "owner")
@@ -40,6 +44,7 @@ def owner(container_urn: str, owner_urn: str) -> None:
40
44
  @container.command()
41
45
  @click.option("--container-urn", required=True, type=str)
42
46
  @click.option("--domain-urn", required=True, type=str)
47
+ @upgrade.check_upgrade
43
48
  def domain(container_urn: str, domain_urn: str) -> None:
44
49
  """Add patch to add a domain to all datasets in a container"""
45
50
  apply_association_to_container(container_urn, domain_urn, "domain")