acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -22,25 +22,25 @@ from typing import (
22
22
  Union,
23
23
  )
24
24
 
25
+ import progressbar
25
26
  from avro.schema import RecordSchema
26
- from deprecated import deprecated
27
27
  from pydantic import BaseModel
28
28
  from requests.models import HTTPError
29
+ from typing_extensions import deprecated
29
30
 
31
+ from datahub._codegen.aspect import _Aspect
30
32
  from datahub.cli import config_utils
33
+ from datahub.cli.cli_utils import guess_frontend_url_from_gms_url
31
34
  from datahub.configuration.common import ConfigModel, GraphError, OperationalError
32
35
  from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
33
36
  from datahub.emitter.mce_builder import DEFAULT_ENV, Aspect
34
37
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
35
38
  from datahub.emitter.rest_emitter import (
36
- DEFAULT_REST_SINK_ENDPOINT,
37
- DEFAULT_REST_TRACE_MODE,
38
39
  DatahubRestEmitter,
39
- RestSinkEndpoint,
40
- RestTraceMode,
41
40
  )
42
41
  from datahub.emitter.serialization_helper import post_json_transform
43
42
  from datahub.ingestion.graph.config import (
43
+ ClientMode,
44
44
  DatahubClientConfig as DatahubClientConfig,
45
45
  )
46
46
  from datahub.ingestion.graph.connections import (
@@ -49,10 +49,12 @@ from datahub.ingestion.graph.connections import (
49
49
  )
50
50
  from datahub.ingestion.graph.entity_versioning import EntityVersioningAPI
51
51
  from datahub.ingestion.graph.filters import (
52
+ RawSearchFilter,
52
53
  RawSearchFilterRule,
53
54
  RemovedStatusFilter,
54
55
  generate_filter,
55
56
  )
57
+ from datahub.ingestion.graph.links import make_url_for_urn
56
58
  from datahub.ingestion.source.state.checkpoint import Checkpoint
57
59
  from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
58
60
  MetadataChangeEvent,
@@ -75,10 +77,19 @@ from datahub.metadata.schema_classes import (
75
77
  SystemMetadataClass,
76
78
  TelemetryClientIdClass,
77
79
  )
80
+ from datahub.metadata.urns import (
81
+ CorpUserUrn,
82
+ MlFeatureTableUrn,
83
+ MlFeatureUrn,
84
+ MlModelGroupUrn,
85
+ MlModelUrn,
86
+ MlPrimaryKeyUrn,
87
+ Urn,
88
+ )
78
89
  from datahub.telemetry.telemetry import telemetry_instance
79
90
  from datahub.utilities.perf_timer import PerfTimer
80
91
  from datahub.utilities.str_enum import StrEnum
81
- from datahub.utilities.urns.urn import Urn, guess_entity_type
92
+ from datahub.utilities.urns.urn import guess_entity_type
82
93
 
83
94
  if TYPE_CHECKING:
84
95
  from datahub.ingestion.sink.datahub_rest import (
@@ -116,8 +127,16 @@ def entity_type_to_graphql(entity_type: str) -> str:
116
127
  """Convert the entity types into GraphQL "EntityType" enum values."""
117
128
 
118
129
  # Hard-coded special cases.
119
- if entity_type == "corpuser":
120
- return "CORP_USER"
130
+ special_cases = {
131
+ CorpUserUrn.ENTITY_TYPE: "CORP_USER",
132
+ MlModelUrn.ENTITY_TYPE: "MLMODEL",
133
+ MlModelGroupUrn.ENTITY_TYPE: "MLMODEL_GROUP",
134
+ MlFeatureTableUrn.ENTITY_TYPE: "MLFEATURE_TABLE",
135
+ MlFeatureUrn.ENTITY_TYPE: "MLFEATURE",
136
+ MlPrimaryKeyUrn.ENTITY_TYPE: "MLPRIMARY_KEY",
137
+ }
138
+ if entity_type in special_cases:
139
+ return special_cases[entity_type]
121
140
 
122
141
  # Convert camelCase to UPPER_UNDERSCORE.
123
142
  entity_type = (
@@ -133,6 +152,14 @@ def entity_type_to_graphql(entity_type: str) -> str:
133
152
  return entity_type
134
153
 
135
154
 
155
+ def flexible_entity_type_to_graphql(entity_type: str) -> str:
156
+ if entity_type.upper() == entity_type:
157
+ # Assume that we were passed a graphql EntityType enum value,
158
+ # so no conversion is needed.
159
+ return entity_type
160
+ return entity_type_to_graphql(entity_type)
161
+
162
+
136
163
  class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
137
164
  def __init__(self, config: DatahubClientConfig) -> None:
138
165
  self.config = config
@@ -147,11 +174,12 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
147
174
  ca_certificate_path=self.config.ca_certificate_path,
148
175
  client_certificate_path=self.config.client_certificate_path,
149
176
  disable_ssl_verification=self.config.disable_ssl_verification,
150
- openapi_ingestion=DEFAULT_REST_SINK_ENDPOINT == RestSinkEndpoint.OPENAPI,
151
- default_trace_mode=DEFAULT_REST_TRACE_MODE == RestTraceMode.ENABLED,
177
+ openapi_ingestion=self.config.openapi_ingestion,
178
+ client_mode=config.client_mode,
179
+ datahub_component=config.datahub_component,
180
+ server_config_refresh_interval=config.server_config_refresh_interval,
152
181
  )
153
-
154
- self.server_id = _MISSING_SERVER_ID
182
+ self.server_id: str = _MISSING_SERVER_ID
155
183
 
156
184
  def test_connection(self) -> None:
157
185
  super().test_connection()
@@ -176,20 +204,36 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
176
204
  """Get the public-facing base url of the frontend
177
205
 
178
206
  This url can be used to construct links to the frontend. The url will not include a trailing slash.
207
+
179
208
  Note: Only supported with DataHub Cloud.
180
209
  """
181
210
 
182
- if not self.server_config:
211
+ if not hasattr(self, "server_config") or not self.server_config:
183
212
  self.test_connection()
184
213
 
185
- base_url = self.server_config.get("baseUrl")
214
+ base_url = self.server_config.raw_config.get("baseUrl")
186
215
  if not base_url:
187
216
  raise ValueError("baseUrl not found in server config")
188
217
  return base_url
189
218
 
219
+ def url_for(self, entity_urn: Union[str, Urn]) -> str:
220
+ """Get the UI url for an entity.
221
+
222
+ Note: Only supported with DataHub Cloud.
223
+
224
+ Args:
225
+ entity_urn: The urn of the entity to get the url for.
226
+
227
+ Returns:
228
+ The public-facing url for the entity.
229
+ """
230
+
231
+ return make_url_for_urn(self.frontend_base_url, str(entity_urn))
232
+
190
233
  @classmethod
191
234
  def from_emitter(cls, emitter: DatahubRestEmitter) -> "DataHubGraph":
192
235
  session_config = emitter._session_config
236
+
193
237
  if isinstance(session_config.timeout, tuple):
194
238
  # TODO: This is slightly lossy. Eventually, we want to modify the emitter
195
239
  # to accept a tuple for timeout_sec, and then we'll be able to remove this.
@@ -207,6 +251,9 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
207
251
  disable_ssl_verification=session_config.disable_ssl_verification,
208
252
  ca_certificate_path=session_config.ca_certificate_path,
209
253
  client_certificate_path=session_config.client_certificate_path,
254
+ client_mode=session_config.client_mode,
255
+ datahub_component=session_config.datahub_component,
256
+ server_config_refresh_interval=emitter._server_config_refresh_interval,
210
257
  )
211
258
  )
212
259
 
@@ -330,7 +377,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
330
377
  f"Failed to find {aspect_type_name} in response {response_json}"
331
378
  )
332
379
 
333
- @deprecated(reason="Use get_aspect instead which makes aspect string name optional")
380
+ @deprecated("Use get_aspect instead which makes aspect string name optional")
334
381
  def get_aspect_v2(
335
382
  self,
336
383
  entity_urn: str,
@@ -347,7 +394,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
347
394
  )
348
395
 
349
396
  def get_config(self) -> Dict[str, Any]:
350
- return self._get_generic(f"{self.config.server}/config")
397
+ return self.server_config.raw_config
351
398
 
352
399
  def get_ownership(self, entity_urn: str) -> Optional[OwnershipClass]:
353
400
  return self.get_aspect(entity_urn=entity_urn, aspect_type=OwnershipClass)
@@ -355,7 +402,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
355
402
  def get_schema_metadata(self, entity_urn: str) -> Optional[SchemaMetadataClass]:
356
403
  return self.get_aspect(entity_urn=entity_urn, aspect_type=SchemaMetadataClass)
357
404
 
358
- @deprecated(reason="Use get_aspect directly.")
405
+ @deprecated("Use get_aspect directly.")
359
406
  def get_domain_properties(self, entity_urn: str) -> Optional[DomainPropertiesClass]:
360
407
  return self.get_aspect(entity_urn=entity_urn, aspect_type=DomainPropertiesClass)
361
408
 
@@ -376,7 +423,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
376
423
  def get_domain(self, entity_urn: str) -> Optional[DomainsClass]:
377
424
  return self.get_aspect(entity_urn=entity_urn, aspect_type=DomainsClass)
378
425
 
379
- @deprecated(reason="Use get_aspect directly.")
426
+ @deprecated("Use get_aspect directly.")
380
427
  def get_browse_path(self, entity_urn: str) -> Optional[BrowsePathsClass]:
381
428
  return self.get_aspect(entity_urn=entity_urn, aspect_type=BrowsePathsClass)
382
429
 
@@ -447,7 +494,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
447
494
  filter_criteria_map: Dict[str, str],
448
495
  ) -> Optional[Aspect]:
449
496
  filter_criteria = [
450
- {"field": k, "value": v, "condition": "EQUAL"}
497
+ {"field": k, "values": [v], "condition": "EQUAL"}
451
498
  for k, v in filter_criteria_map.items()
452
499
  ]
453
500
  filter = {"or": [{"and": filter_criteria}]}
@@ -475,7 +522,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
475
522
  "limit": limit,
476
523
  "filter": filter,
477
524
  }
478
- end_point = f"{self.config.server}/aspects?action=getTimeseriesAspectValues"
525
+ end_point = f"{self._gms_server}/aspects?action=getTimeseriesAspectValues"
479
526
  resp: Dict = self._post_generic(end_point, query_body)
480
527
 
481
528
  values: Optional[List] = resp.get("value", {}).get("values")
@@ -495,7 +542,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
495
542
  def get_entity_raw(
496
543
  self, entity_urn: str, aspects: Optional[List[str]] = None
497
544
  ) -> Dict:
498
- endpoint: str = f"{self.config.server}/entitiesV2/{Urn.url_encode(entity_urn)}"
545
+ endpoint: str = f"{self._gms_server}/entitiesV2/{Urn.url_encode(entity_urn)}"
499
546
  if aspects is not None:
500
547
  assert aspects, "if provided, aspects must be a non-empty list"
501
548
  endpoint = f"{endpoint}?aspects=List(" + ",".join(aspects) + ")"
@@ -505,7 +552,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
505
552
  return response.json()
506
553
 
507
554
  @deprecated(
508
- reason="Use get_aspect for a single aspect or get_entity_semityped for a full entity."
555
+ "Use get_aspect for a single aspect or get_entity_semityped for a full entity."
509
556
  )
510
557
  def get_aspects_for_entity(
511
558
  self,
@@ -625,18 +672,15 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
625
672
 
626
673
  @property
627
674
  def _search_endpoint(self):
628
- return f"{self.config.server}/entities?action=search"
675
+ return f"{self._gms_server}/entities?action=search"
629
676
 
630
677
  @property
631
678
  def _relationships_endpoint(self):
632
- return f"{self.config.server}/openapi/relationships/v1/"
679
+ return f"{self._gms_server}/openapi/relationships/v1/"
633
680
 
634
681
  @property
635
682
  def _aspect_count_endpoint(self):
636
- return f"{self.config.server}/aspects?action=getCount"
637
-
638
- # def _session(self) -> Session:
639
- # return super()._session
683
+ return f"{self._gms_server}/aspects?action=getCount"
640
684
 
641
685
  def get_domain_urn_by_name(self, domain_name: str) -> Optional[str]:
642
686
  """Retrieve a domain urn based on its name. Returns None if there is no match found"""
@@ -645,7 +689,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
645
689
  filter_criteria = [
646
690
  {
647
691
  "field": "name",
648
- "value": domain_name,
692
+ "values": [domain_name],
649
693
  "condition": "EQUAL",
650
694
  }
651
695
  ]
@@ -749,9 +793,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
749
793
 
750
794
  assert res["upsertConnection"]["urn"] == urn
751
795
 
752
- @deprecated(
753
- reason='Use get_urns_by_filter(entity_types=["container"], ...) instead'
754
- )
796
+ @deprecated('Use get_urns_by_filter(entity_types=["container"], ...) instead')
755
797
  def get_container_urns_by_filter(
756
798
  self,
757
799
  env: Optional[str] = None,
@@ -767,7 +809,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
767
809
  filter_criteria.append(
768
810
  {
769
811
  "field": "customProperties",
770
- "value": f"instance={env}",
812
+ "values": [f"instance={env}"],
771
813
  "condition": "EQUAL",
772
814
  }
773
815
  )
@@ -775,7 +817,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
775
817
  filter_criteria.append(
776
818
  {
777
819
  "field": "typeNames",
778
- "value": container_subtype,
820
+ "values": [container_subtype],
779
821
  "condition": "EQUAL",
780
822
  }
781
823
  )
@@ -784,7 +826,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
784
826
  "input": search_query,
785
827
  "entity": "container",
786
828
  "start": 0,
787
- "count": 10000,
829
+ "count": 5000,
788
830
  "filter": {"or": container_filters},
789
831
  }
790
832
  results: Dict = self._post_generic(url, search_body)
@@ -797,11 +839,11 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
797
839
  def _bulk_fetch_schema_info_by_filter(
798
840
  self,
799
841
  *,
800
- platform: Optional[str] = None,
842
+ platform: Union[None, str, List[str]] = None,
801
843
  platform_instance: Optional[str] = None,
802
844
  env: Optional[str] = None,
803
845
  query: Optional[str] = None,
804
- container: Optional[str] = None,
846
+ container: Union[None, str, List[str]] = None,
805
847
  status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
806
848
  batch_size: int = 100,
807
849
  extraFilters: Optional[List[RawSearchFilterRule]] = None,
@@ -810,7 +852,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
810
852
 
811
853
  :return: An iterable of (urn, schema info) tuple that match the filters.
812
854
  """
813
- types = [entity_type_to_graphql("dataset")]
855
+ types = self._get_types(["dataset"])
814
856
 
815
857
  # Add the query default of * if no query is specified.
816
858
  query = query or "*"
@@ -873,15 +915,16 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
873
915
  self,
874
916
  *,
875
917
  entity_types: Optional[Sequence[str]] = None,
876
- platform: Optional[str] = None,
918
+ platform: Union[None, str, List[str]] = None,
877
919
  platform_instance: Optional[str] = None,
878
920
  env: Optional[str] = None,
879
921
  query: Optional[str] = None,
880
- container: Optional[str] = None,
881
- status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
882
- batch_size: int = 10000,
922
+ container: Union[None, str, List[str]] = None,
923
+ status: Optional[RemovedStatusFilter] = RemovedStatusFilter.NOT_SOFT_DELETED,
924
+ batch_size: int = 5000,
883
925
  extraFilters: Optional[List[RawSearchFilterRule]] = None,
884
- extra_or_filters: Optional[List[Dict[str, List[RawSearchFilterRule]]]] = None,
926
+ extra_or_filters: Optional[RawSearchFilter] = None,
927
+ skip_cache: bool = False,
885
928
  ) -> Iterable[str]:
886
929
  """Fetch all urns that match all of the given filters.
887
930
 
@@ -900,6 +943,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
900
943
  Note that this requires browsePathV2 aspects (added in 0.10.4+).
901
944
  :param status: Filter on the deletion status of the entity. The default is only return non-soft-deleted entities.
902
945
  :param extraFilters: Additional filters to apply. If specified, the results will match all of the filters.
946
+ :param skip_cache: Whether to bypass caching. Defaults to False.
903
947
 
904
948
  :return: An iterable of urns that match the filters.
905
949
  """
@@ -927,7 +971,9 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
927
971
  $query: String!,
928
972
  $orFilters: [AndFilterInput!],
929
973
  $batchSize: Int!,
930
- $scrollId: String) {
974
+ $scrollId: String,
975
+ $skipCache: Boolean!,
976
+ $includeSoftDeleted: Boolean) {
931
977
 
932
978
  scrollAcrossEntities(input: {
933
979
  query: $query,
@@ -938,6 +984,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
938
984
  searchFlags: {
939
985
  skipHighlighting: true
940
986
  skipAggregates: true
987
+ skipCache: $skipCache
988
+ includeSoftDeleted: $includeSoftDeleted
941
989
  }
942
990
  }) {
943
991
  nextScrollId
@@ -956,6 +1004,12 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
956
1004
  "query": query,
957
1005
  "orFilters": orFilters,
958
1006
  "batchSize": batch_size,
1007
+ "skipCache": skip_cache,
1008
+ "includeSoftDeleted": (
1009
+ None
1010
+ if status is None
1011
+ else status != RemovedStatusFilter.NOT_SOFT_DELETED
1012
+ ),
959
1013
  }
960
1014
 
961
1015
  for entity in self._scroll_across_entities(graphql_query, variables):
@@ -965,15 +1019,15 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
965
1019
  self,
966
1020
  *,
967
1021
  entity_types: Optional[List[str]] = None,
968
- platform: Optional[str] = None,
1022
+ platform: Union[None, str, List[str]] = None,
969
1023
  platform_instance: Optional[str] = None,
970
1024
  env: Optional[str] = None,
971
1025
  query: Optional[str] = None,
972
- container: Optional[str] = None,
1026
+ container: Union[None, str, List[str]] = None,
973
1027
  status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
974
- batch_size: int = 10000,
1028
+ batch_size: int = 5000,
975
1029
  extra_and_filters: Optional[List[RawSearchFilterRule]] = None,
976
- extra_or_filters: Optional[List[Dict[str, List[RawSearchFilterRule]]]] = None,
1030
+ extra_or_filters: Optional[RawSearchFilter] = None,
977
1031
  extra_source_fields: Optional[List[str]] = None,
978
1032
  skip_cache: bool = False,
979
1033
  ) -> Iterable[dict]:
@@ -1061,7 +1115,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1061
1115
  "query": query,
1062
1116
  "orFilters": or_filters_final,
1063
1117
  "batchSize": batch_size,
1064
- "skipCache": "true" if skip_cache else "false",
1118
+ "skipCache": skip_cache,
1065
1119
  "fetchExtraFields": extra_source_fields,
1066
1120
  }
1067
1121
 
@@ -1126,7 +1180,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1126
1180
  )
1127
1181
 
1128
1182
  types = [
1129
- entity_type_to_graphql(entity_type) for entity_type in entity_types
1183
+ flexible_entity_type_to_graphql(entity_type)
1184
+ for entity_type in entity_types
1130
1185
  ]
1131
1186
  return types
1132
1187
 
@@ -1179,7 +1234,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1179
1234
  operation_name: Optional[str] = None,
1180
1235
  format_exception: bool = True,
1181
1236
  ) -> Dict:
1182
- url = f"{self.config.server}/api/graphql"
1237
+ url = f"{self._gms_server}/api/graphql"
1183
1238
 
1184
1239
  body: Dict = {
1185
1240
  "query": query,
@@ -1404,6 +1459,83 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1404
1459
  related_aspects = response.get("relatedAspects", [])
1405
1460
  return reference_count, related_aspects
1406
1461
 
1462
+ def get_kafka_consumer_offsets(
1463
+ self,
1464
+ ) -> dict:
1465
+ """
1466
+ Get Kafka consumer offsets from the DataHub API.
1467
+
1468
+ Args:
1469
+ graph (DataHubGraph): The DataHub graph client
1470
+
1471
+ """
1472
+ urls = {
1473
+ "mcp": f"{self.config.server}/openapi/operations/kafka/mcp/consumer/offsets",
1474
+ "mcl": f"{self.config.server}/openapi/operations/kafka/mcl/consumer/offsets",
1475
+ "mcl-timeseries": f"{self.config.server}/openapi/operations/kafka/mcl-timeseries/consumer/offsets",
1476
+ }
1477
+
1478
+ params = {"skipCache": "true", "detailed": "true"}
1479
+ results = {}
1480
+ for key, url in urls.items():
1481
+ response = self._get_generic(url=url, params=params)
1482
+ results[key] = response
1483
+ if "errors" in response:
1484
+ logger.error(f"Error: {response['errors']}")
1485
+ return results
1486
+
1487
+ def _restore_index_call(self, payload_obj: dict) -> None:
1488
+ result = self._post_generic(
1489
+ f"{self._gms_server}/operations?action=restoreIndices", payload_obj
1490
+ )
1491
+ logger.debug(f"Restore indices result: {result}")
1492
+
1493
+ def restore_indices(
1494
+ self,
1495
+ urn_pattern: Optional[str] = None,
1496
+ aspect: Optional[str] = None,
1497
+ start: Optional[int] = None,
1498
+ batch_size: Optional[int] = None,
1499
+ file: Optional[str] = None,
1500
+ ) -> None:
1501
+ """Restore the indices for a given urn or urn-like pattern.
1502
+
1503
+ Args:
1504
+ urn_pattern: The exact URN or a pattern (with % for wildcard) to match URNs. If not provided, will restore indices from the file.
1505
+ aspect: Optional aspect string to restore indices for a specific aspect.
1506
+ start: Optional integer to decide which row number of sql store to restore from. Default: 0. Ignored in case file is provided.
1507
+ batch_size: Optional integer to decide how many rows to restore. Default: 10. Ignored in case file is provided.
1508
+ file: Optional file path to a file containing URNs to restore indices for.
1509
+
1510
+ Returns:
1511
+ A string containing the result of the restore indices operation. This format is subject to change.
1512
+ """
1513
+ payload_obj = {}
1514
+ if file is not None:
1515
+ with open(file) as f:
1516
+ for urn in progressbar.progressbar(f.readlines()):
1517
+ urn = urn.strip()
1518
+ if "%" in urn:
1519
+ payload_obj["urnLike"] = urn
1520
+ else:
1521
+ payload_obj["urn"] = urn
1522
+ if aspect is not None:
1523
+ payload_obj["aspect"] = aspect
1524
+ self._restore_index_call(payload_obj)
1525
+ else:
1526
+ if urn_pattern is not None:
1527
+ if "%" in urn_pattern:
1528
+ payload_obj["urnLike"] = urn_pattern
1529
+ else:
1530
+ payload_obj["urn"] = urn_pattern
1531
+ if aspect is not None:
1532
+ payload_obj["aspect"] = aspect
1533
+ if start is not None:
1534
+ payload_obj["start"] = start
1535
+ if batch_size is not None:
1536
+ payload_obj["batchSize"] = batch_size
1537
+ self._restore_index_call(payload_obj)
1538
+
1407
1539
  @functools.lru_cache
1408
1540
  def _make_schema_resolver(
1409
1541
  self,
@@ -1468,7 +1600,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1468
1600
  env: str = DEFAULT_ENV,
1469
1601
  default_db: Optional[str] = None,
1470
1602
  default_schema: Optional[str] = None,
1471
- default_dialect: Optional[str] = None,
1603
+ override_dialect: Optional[str] = None,
1472
1604
  ) -> "SqlParsingResult":
1473
1605
  from datahub.sql_parsing.sqlglot_lineage import sqlglot_lineage
1474
1606
 
@@ -1482,7 +1614,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1482
1614
  schema_resolver=schema_resolver,
1483
1615
  default_db=default_db,
1484
1616
  default_schema=default_schema,
1485
- default_dialect=default_dialect,
1617
+ override_dialect=override_dialect,
1486
1618
  )
1487
1619
 
1488
1620
  def create_tag(self, tag_name: str) -> str:
@@ -1691,6 +1823,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1691
1823
 
1692
1824
  return res["runAssertionsForAsset"]
1693
1825
 
1826
+ @deprecated("Use get_entities instead which returns typed aspects")
1694
1827
  def get_entities_v2(
1695
1828
  self,
1696
1829
  entity_name: str,
@@ -1708,7 +1841,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1708
1841
  "Accept": "application/json",
1709
1842
  "Content-Type": "application/json",
1710
1843
  }
1711
- url = f"{self.config.server}/openapi/v2/entity/batch/{entity_name}"
1844
+ url = f"{self._gms_server}/openapi/v2/entity/batch/{entity_name}"
1712
1845
  response = self._session.post(url, data=json.dumps(payload), headers=headers)
1713
1846
  response.raise_for_status()
1714
1847
 
@@ -1730,6 +1863,108 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1730
1863
  retval[entity_urn][aspect_key] = aspect_value
1731
1864
  return retval
1732
1865
 
1866
+ def get_entities(
1867
+ self,
1868
+ entity_name: str,
1869
+ urns: List[str],
1870
+ aspects: Optional[List[str]] = None,
1871
+ with_system_metadata: bool = False,
1872
+ ) -> Dict[str, Dict[str, Tuple[_Aspect, Optional[SystemMetadataClass]]]]:
1873
+ """
1874
+ Get entities using the OpenAPI v3 endpoint, deserializing aspects into typed objects.
1875
+
1876
+ Args:
1877
+ entity_name: The entity type name
1878
+ urns: List of entity URNs to fetch
1879
+ aspects: Optional list of aspect names to fetch. If None, all aspects will be fetched.
1880
+ with_system_metadata: If True, return system metadata along with each aspect.
1881
+
1882
+ Returns:
1883
+ A dictionary mapping URNs to a dictionary of aspect name to tuples of
1884
+ (typed aspect object, system metadata). If with_system_metadata is False,
1885
+ the system metadata in the tuple will be None.
1886
+ """
1887
+ aspects = aspects or []
1888
+
1889
+ request_payload = []
1890
+ for urn in urns:
1891
+ entity_request: Dict[str, Any] = {"urn": urn}
1892
+ for aspect_name in aspects:
1893
+ entity_request[aspect_name] = {}
1894
+ request_payload.append(entity_request)
1895
+
1896
+ headers: Dict[str, Any] = {
1897
+ "Accept": "application/json",
1898
+ "Content-Type": "application/json",
1899
+ }
1900
+
1901
+ url = f"{self._gms_server}/openapi/v3/entity/{entity_name}/batchGet"
1902
+ if with_system_metadata:
1903
+ url += "?systemMetadata=true"
1904
+
1905
+ response = self._session.post(
1906
+ url, data=json.dumps(request_payload), headers=headers
1907
+ )
1908
+ response.raise_for_status()
1909
+ entities = response.json()
1910
+
1911
+ result: Dict[str, Dict[str, Tuple[_Aspect, Optional[SystemMetadataClass]]]] = {}
1912
+
1913
+ for entity in entities:
1914
+ entity_urn = entity.get("urn")
1915
+ if entity_urn is None:
1916
+ logger.warning(
1917
+ f"Missing URN in entity response: {entity}, skipping deserialization"
1918
+ )
1919
+ continue
1920
+
1921
+ entity_aspects: Dict[
1922
+ str, Tuple[_Aspect, Optional[SystemMetadataClass]]
1923
+ ] = {}
1924
+
1925
+ for aspect_name, aspect_obj in entity.items():
1926
+ if aspect_name == "urn":
1927
+ continue
1928
+
1929
+ aspect_class = ASPECT_NAME_MAP.get(aspect_name)
1930
+ if aspect_class is None:
1931
+ logger.warning(
1932
+ f"Unknown aspect type {aspect_name}, skipping deserialization"
1933
+ )
1934
+ continue
1935
+
1936
+ aspect_value = aspect_obj.get("value")
1937
+ if aspect_value is None:
1938
+ logger.warning(
1939
+ f"Unknown aspect value for aspect {aspect_name}, skipping deserialization"
1940
+ )
1941
+ continue
1942
+
1943
+ try:
1944
+ post_json_obj = post_json_transform(aspect_value)
1945
+ typed_aspect = aspect_class.from_obj(post_json_obj)
1946
+ assert isinstance(typed_aspect, aspect_class) and isinstance(
1947
+ typed_aspect, _Aspect
1948
+ )
1949
+
1950
+ system_metadata = None
1951
+ if with_system_metadata:
1952
+ system_metadata_obj = aspect_obj.get("systemMetadata")
1953
+ if system_metadata_obj:
1954
+ system_metadata = SystemMetadataClass.from_obj(
1955
+ system_metadata_obj
1956
+ )
1957
+
1958
+ entity_aspects[aspect_name] = (typed_aspect, system_metadata)
1959
+ except Exception as e:
1960
+ logger.error(f"Error deserializing aspect {aspect_name}: {e}")
1961
+ raise
1962
+
1963
+ if entity_aspects:
1964
+ result[entity_urn] = entity_aspects
1965
+
1966
+ return result
1967
+
1733
1968
  def upsert_custom_assertion(
1734
1969
  self,
1735
1970
  urn: Optional[str],
@@ -1837,13 +2072,215 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1837
2072
 
1838
2073
  return res["reportAssertionResult"]
1839
2074
 
2075
+ def _get_invite_token(self) -> str:
2076
+ """
2077
+ Retrieve an invite token for user creation.
2078
+
2079
+ Returns:
2080
+ Invite token string
2081
+
2082
+ Raises:
2083
+ OperationalError: If invite token retrieval fails
2084
+ """
2085
+ get_invite_token_query = """
2086
+ query getInviteToken($input: GetInviteTokenInput!) {
2087
+ getInviteToken(input: $input) {
2088
+ inviteToken
2089
+ }
2090
+ }
2091
+ """
2092
+
2093
+ try:
2094
+ invite_token_response = self.execute_graphql(
2095
+ query=get_invite_token_query,
2096
+ variables={"input": {}},
2097
+ )
2098
+ invite_token = invite_token_response.get("getInviteToken", {}).get(
2099
+ "inviteToken"
2100
+ )
2101
+ if not invite_token:
2102
+ raise OperationalError(
2103
+ "Failed to retrieve invite token. Ensure you have admin permissions.",
2104
+ {},
2105
+ )
2106
+ return invite_token
2107
+ except Exception as e:
2108
+ raise OperationalError(
2109
+ f"Failed to retrieve invite token: {str(e)}", {}
2110
+ ) from e
2111
+
2112
+ def _create_user_with_token(
2113
+ self,
2114
+ user_urn: str,
2115
+ email: str,
2116
+ display_name: str,
2117
+ password: str,
2118
+ invite_token: str,
2119
+ ) -> None:
2120
+ """
2121
+ Create a user using the signup endpoint.
2122
+
2123
+ Args:
2124
+ user_urn: User URN (urn:li:corpuser:{user_id})
2125
+ email: User's email address
2126
+ display_name: Full display name for the user
2127
+ password: User's password
2128
+ invite_token: Invite token for user creation
2129
+
2130
+ Raises:
2131
+ OperationalError: If user creation fails
2132
+ """
2133
+ frontend_url = guess_frontend_url_from_gms_url(self._gms_server)
2134
+ signup_url = f"{frontend_url}/signUp"
2135
+ signup_payload = {
2136
+ "userUrn": user_urn,
2137
+ "email": email,
2138
+ "fullName": display_name,
2139
+ "password": password,
2140
+ "title": "Other",
2141
+ "inviteToken": invite_token,
2142
+ }
2143
+
2144
+ logger.debug(
2145
+ f"Creating user with URN={user_urn}, email={email} at URL: {signup_url}"
2146
+ )
2147
+ logger.debug(
2148
+ f"Signup payload: {json.dumps({**signup_payload, 'password': '***'})}"
2149
+ )
2150
+
2151
+ try:
2152
+ response = self._session.post(signup_url, json=signup_payload)
2153
+ logger.debug(f"Response status code: {response.status_code}")
2154
+ logger.debug(f"Response headers: {dict(response.headers)}")
2155
+ logger.debug(f"Response content length: {len(response.text)}")
2156
+
2157
+ response.raise_for_status()
2158
+
2159
+ # The /signUp endpoint returns 200 with empty body on success
2160
+ logger.debug("User created successfully")
2161
+
2162
+ except HTTPError as http_err:
2163
+ error_details = {
2164
+ "url": signup_url,
2165
+ "status_code": response.status_code,
2166
+ "response_text": response.text[:500],
2167
+ }
2168
+ try:
2169
+ error_json = response.json()
2170
+ error_details["error_response"] = error_json
2171
+ error_msg = error_json.get("message", str(http_err))
2172
+ except JSONDecodeError:
2173
+ error_msg = f"HTTP {response.status_code}: {response.text[:200]}"
2174
+
2175
+ raise OperationalError(
2176
+ f"Failed to create user: {error_msg}",
2177
+ error_details,
2178
+ ) from http_err
2179
+ except Exception as e:
2180
+ raise OperationalError(
2181
+ f"Failed to create user: {str(e)}",
2182
+ {"url": signup_url, "error_type": type(e).__name__},
2183
+ ) from e
2184
+
2185
+ def _assign_role_to_user(self, user_urn: str, role: str) -> None:
2186
+ """
2187
+ Assign a role to a user.
2188
+
2189
+ Args:
2190
+ user_urn: User URN
2191
+ role: Role to assign (Admin, Editor, or Reader)
2192
+
2193
+ Raises:
2194
+ ValueError: If role is invalid
2195
+ """
2196
+ normalized_role = role.capitalize()
2197
+ valid_roles = ["Admin", "Editor", "Reader"]
2198
+ if normalized_role not in valid_roles:
2199
+ raise ValueError(
2200
+ f"Invalid role '{role}'. Must be one of: {', '.join(valid_roles)}"
2201
+ )
2202
+
2203
+ role_urn = f"urn:li:dataHubRole:{normalized_role}"
2204
+
2205
+ batch_assign_role_mutation = """
2206
+ mutation batchAssignRole($input: BatchAssignRoleInput!) {
2207
+ batchAssignRole(input: $input)
2208
+ }
2209
+ """
2210
+
2211
+ try:
2212
+ self.execute_graphql(
2213
+ query=batch_assign_role_mutation,
2214
+ variables={"input": {"roleUrn": role_urn, "actors": [user_urn]}},
2215
+ )
2216
+ except Exception as e:
2217
+ logger.warning(f"Role assignment failed for user {user_urn}: {str(e)}")
2218
+ raise
2219
+
2220
+ def create_native_user(
2221
+ self,
2222
+ user_id: str,
2223
+ email: str,
2224
+ display_name: str,
2225
+ password: str,
2226
+ role: Optional[str] = None,
2227
+ ) -> str:
2228
+ """
2229
+ Create a native DataHub user with email/password authentication.
2230
+
2231
+ Args:
2232
+ user_id: User identifier (will be used in the URN)
2233
+ email: User's email address
2234
+ display_name: Full display name for the user
2235
+ password: User's password
2236
+ role: Optional role to assign (Admin, Editor, or Reader)
2237
+
2238
+ Returns:
2239
+ User URN of the created user (urn:li:corpuser:{user_id})
2240
+
2241
+ Raises:
2242
+ OperationalError: If user creation fails
2243
+ ValueError: If role is invalid
2244
+ """
2245
+ # Validate role before creating user
2246
+ if role:
2247
+ normalized_role = role.capitalize()
2248
+ valid_roles = ["Admin", "Editor", "Reader"]
2249
+ if normalized_role not in valid_roles:
2250
+ raise ValueError(
2251
+ f"Invalid role '{role}'. Must be one of: {', '.join(valid_roles)}"
2252
+ )
2253
+
2254
+ user_urn = f"urn:li:corpuser:{user_id}"
2255
+
2256
+ invite_token = self._get_invite_token()
2257
+ self._create_user_with_token(
2258
+ user_urn, email, display_name, password, invite_token
2259
+ )
2260
+
2261
+ if role:
2262
+ try:
2263
+ self._assign_role_to_user(user_urn, role)
2264
+ except Exception as e:
2265
+ logger.warning(
2266
+ f"User {email} created successfully, but role assignment failed: {str(e)}"
2267
+ )
2268
+
2269
+ return user_urn
2270
+
1840
2271
  def close(self) -> None:
1841
2272
  self._make_schema_resolver.cache_clear()
1842
2273
  super().close()
1843
2274
 
1844
2275
 
1845
- def get_default_graph() -> DataHubGraph:
2276
+ @functools.lru_cache(maxsize=None)
2277
+ def get_default_graph(
2278
+ client_mode: Optional[ClientMode] = None,
2279
+ datahub_component: Optional[str] = None,
2280
+ ) -> DataHubGraph:
1846
2281
  graph_config = config_utils.load_client_config()
2282
+ graph_config.client_mode = client_mode
2283
+ graph_config.datahub_component = datahub_component
1847
2284
  graph = DataHubGraph(graph_config)
1848
2285
  graph.test_connection()
1849
2286
  telemetry_instance.set_context(server=graph)