acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -22,6 +22,7 @@ from typing import (
22
22
  Union,
23
23
  )
24
24
 
25
+ import progressbar
25
26
  from avro.schema import RecordSchema
26
27
  from pydantic import BaseModel
27
28
  from requests.models import HTTPError
@@ -29,6 +30,7 @@ from typing_extensions import deprecated
29
30
 
30
31
  from datahub._codegen.aspect import _Aspect
31
32
  from datahub.cli import config_utils
33
+ from datahub.cli.cli_utils import guess_frontend_url_from_gms_url
32
34
  from datahub.configuration.common import ConfigModel, GraphError, OperationalError
33
35
  from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
34
36
  from datahub.emitter.mce_builder import DEFAULT_ENV, Aspect
@@ -75,7 +77,15 @@ from datahub.metadata.schema_classes import (
75
77
  SystemMetadataClass,
76
78
  TelemetryClientIdClass,
77
79
  )
78
- from datahub.metadata.urns import CorpUserUrn, Urn
80
+ from datahub.metadata.urns import (
81
+ CorpUserUrn,
82
+ MlFeatureTableUrn,
83
+ MlFeatureUrn,
84
+ MlModelGroupUrn,
85
+ MlModelUrn,
86
+ MlPrimaryKeyUrn,
87
+ Urn,
88
+ )
79
89
  from datahub.telemetry.telemetry import telemetry_instance
80
90
  from datahub.utilities.perf_timer import PerfTimer
81
91
  from datahub.utilities.str_enum import StrEnum
@@ -117,8 +127,16 @@ def entity_type_to_graphql(entity_type: str) -> str:
117
127
  """Convert the entity types into GraphQL "EntityType" enum values."""
118
128
 
119
129
  # Hard-coded special cases.
120
- if entity_type == CorpUserUrn.ENTITY_TYPE:
121
- return "CORP_USER"
130
+ special_cases = {
131
+ CorpUserUrn.ENTITY_TYPE: "CORP_USER",
132
+ MlModelUrn.ENTITY_TYPE: "MLMODEL",
133
+ MlModelGroupUrn.ENTITY_TYPE: "MLMODEL_GROUP",
134
+ MlFeatureTableUrn.ENTITY_TYPE: "MLFEATURE_TABLE",
135
+ MlFeatureUrn.ENTITY_TYPE: "MLFEATURE",
136
+ MlPrimaryKeyUrn.ENTITY_TYPE: "MLPRIMARY_KEY",
137
+ }
138
+ if entity_type in special_cases:
139
+ return special_cases[entity_type]
122
140
 
123
141
  # Convert camelCase to UPPER_UNDERSCORE.
124
142
  entity_type = (
@@ -159,6 +177,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
159
177
  openapi_ingestion=self.config.openapi_ingestion,
160
178
  client_mode=config.client_mode,
161
179
  datahub_component=config.datahub_component,
180
+ server_config_refresh_interval=config.server_config_refresh_interval,
162
181
  )
163
182
  self.server_id: str = _MISSING_SERVER_ID
164
183
 
@@ -189,7 +208,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
189
208
  Note: Only supported with DataHub Cloud.
190
209
  """
191
210
 
192
- if not self.server_config:
211
+ if not hasattr(self, "server_config") or not self.server_config:
193
212
  self.test_connection()
194
213
 
195
214
  base_url = self.server_config.raw_config.get("baseUrl")
@@ -234,6 +253,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
234
253
  client_certificate_path=session_config.client_certificate_path,
235
254
  client_mode=session_config.client_mode,
236
255
  datahub_component=session_config.datahub_component,
256
+ server_config_refresh_interval=emitter._server_config_refresh_interval,
237
257
  )
238
258
  )
239
259
 
@@ -502,7 +522,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
502
522
  "limit": limit,
503
523
  "filter": filter,
504
524
  }
505
- end_point = f"{self.config.server}/aspects?action=getTimeseriesAspectValues"
525
+ end_point = f"{self._gms_server}/aspects?action=getTimeseriesAspectValues"
506
526
  resp: Dict = self._post_generic(end_point, query_body)
507
527
 
508
528
  values: Optional[List] = resp.get("value", {}).get("values")
@@ -522,7 +542,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
522
542
  def get_entity_raw(
523
543
  self, entity_urn: str, aspects: Optional[List[str]] = None
524
544
  ) -> Dict:
525
- endpoint: str = f"{self.config.server}/entitiesV2/{Urn.url_encode(entity_urn)}"
545
+ endpoint: str = f"{self._gms_server}/entitiesV2/{Urn.url_encode(entity_urn)}"
526
546
  if aspects is not None:
527
547
  assert aspects, "if provided, aspects must be a non-empty list"
528
548
  endpoint = f"{endpoint}?aspects=List(" + ",".join(aspects) + ")"
@@ -652,15 +672,15 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
652
672
 
653
673
  @property
654
674
  def _search_endpoint(self):
655
- return f"{self.config.server}/entities?action=search"
675
+ return f"{self._gms_server}/entities?action=search"
656
676
 
657
677
  @property
658
678
  def _relationships_endpoint(self):
659
- return f"{self.config.server}/openapi/relationships/v1/"
679
+ return f"{self._gms_server}/openapi/relationships/v1/"
660
680
 
661
681
  @property
662
682
  def _aspect_count_endpoint(self):
663
- return f"{self.config.server}/aspects?action=getCount"
683
+ return f"{self._gms_server}/aspects?action=getCount"
664
684
 
665
685
  def get_domain_urn_by_name(self, domain_name: str) -> Optional[str]:
666
686
  """Retrieve a domain urn based on its name. Returns None if there is no match found"""
@@ -806,7 +826,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
806
826
  "input": search_query,
807
827
  "entity": "container",
808
828
  "start": 0,
809
- "count": 10000,
829
+ "count": 5000,
810
830
  "filter": {"or": container_filters},
811
831
  }
812
832
  results: Dict = self._post_generic(url, search_body)
@@ -819,11 +839,11 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
819
839
  def _bulk_fetch_schema_info_by_filter(
820
840
  self,
821
841
  *,
822
- platform: Optional[str] = None,
842
+ platform: Union[None, str, List[str]] = None,
823
843
  platform_instance: Optional[str] = None,
824
844
  env: Optional[str] = None,
825
845
  query: Optional[str] = None,
826
- container: Optional[str] = None,
846
+ container: Union[None, str, List[str]] = None,
827
847
  status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
828
848
  batch_size: int = 100,
829
849
  extraFilters: Optional[List[RawSearchFilterRule]] = None,
@@ -895,15 +915,16 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
895
915
  self,
896
916
  *,
897
917
  entity_types: Optional[Sequence[str]] = None,
898
- platform: Optional[str] = None,
918
+ platform: Union[None, str, List[str]] = None,
899
919
  platform_instance: Optional[str] = None,
900
920
  env: Optional[str] = None,
901
921
  query: Optional[str] = None,
902
- container: Optional[str] = None,
922
+ container: Union[None, str, List[str]] = None,
903
923
  status: Optional[RemovedStatusFilter] = RemovedStatusFilter.NOT_SOFT_DELETED,
904
- batch_size: int = 10000,
924
+ batch_size: int = 5000,
905
925
  extraFilters: Optional[List[RawSearchFilterRule]] = None,
906
926
  extra_or_filters: Optional[RawSearchFilter] = None,
927
+ skip_cache: bool = False,
907
928
  ) -> Iterable[str]:
908
929
  """Fetch all urns that match all of the given filters.
909
930
 
@@ -922,6 +943,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
922
943
  Note that this requires browsePathV2 aspects (added in 0.10.4+).
923
944
  :param status: Filter on the deletion status of the entity. The default is only return non-soft-deleted entities.
924
945
  :param extraFilters: Additional filters to apply. If specified, the results will match all of the filters.
946
+ :param skip_cache: Whether to bypass caching. Defaults to False.
925
947
 
926
948
  :return: An iterable of urns that match the filters.
927
949
  """
@@ -949,7 +971,9 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
949
971
  $query: String!,
950
972
  $orFilters: [AndFilterInput!],
951
973
  $batchSize: Int!,
952
- $scrollId: String) {
974
+ $scrollId: String,
975
+ $skipCache: Boolean!,
976
+ $includeSoftDeleted: Boolean) {
953
977
 
954
978
  scrollAcrossEntities(input: {
955
979
  query: $query,
@@ -960,6 +984,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
960
984
  searchFlags: {
961
985
  skipHighlighting: true
962
986
  skipAggregates: true
987
+ skipCache: $skipCache
988
+ includeSoftDeleted: $includeSoftDeleted
963
989
  }
964
990
  }) {
965
991
  nextScrollId
@@ -978,6 +1004,12 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
978
1004
  "query": query,
979
1005
  "orFilters": orFilters,
980
1006
  "batchSize": batch_size,
1007
+ "skipCache": skip_cache,
1008
+ "includeSoftDeleted": (
1009
+ None
1010
+ if status is None
1011
+ else status != RemovedStatusFilter.NOT_SOFT_DELETED
1012
+ ),
981
1013
  }
982
1014
 
983
1015
  for entity in self._scroll_across_entities(graphql_query, variables):
@@ -987,13 +1019,13 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
987
1019
  self,
988
1020
  *,
989
1021
  entity_types: Optional[List[str]] = None,
990
- platform: Optional[str] = None,
1022
+ platform: Union[None, str, List[str]] = None,
991
1023
  platform_instance: Optional[str] = None,
992
1024
  env: Optional[str] = None,
993
1025
  query: Optional[str] = None,
994
- container: Optional[str] = None,
1026
+ container: Union[None, str, List[str]] = None,
995
1027
  status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
996
- batch_size: int = 10000,
1028
+ batch_size: int = 5000,
997
1029
  extra_and_filters: Optional[List[RawSearchFilterRule]] = None,
998
1030
  extra_or_filters: Optional[RawSearchFilter] = None,
999
1031
  extra_source_fields: Optional[List[str]] = None,
@@ -1083,7 +1115,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1083
1115
  "query": query,
1084
1116
  "orFilters": or_filters_final,
1085
1117
  "batchSize": batch_size,
1086
- "skipCache": "true" if skip_cache else "false",
1118
+ "skipCache": skip_cache,
1087
1119
  "fetchExtraFields": extra_source_fields,
1088
1120
  }
1089
1121
 
@@ -1202,7 +1234,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1202
1234
  operation_name: Optional[str] = None,
1203
1235
  format_exception: bool = True,
1204
1236
  ) -> Dict:
1205
- url = f"{self.config.server}/api/graphql"
1237
+ url = f"{self._gms_server}/api/graphql"
1206
1238
 
1207
1239
  body: Dict = {
1208
1240
  "query": query,
@@ -1427,6 +1459,83 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1427
1459
  related_aspects = response.get("relatedAspects", [])
1428
1460
  return reference_count, related_aspects
1429
1461
 
1462
+ def get_kafka_consumer_offsets(
1463
+ self,
1464
+ ) -> dict:
1465
+ """
1466
+ Get Kafka consumer offsets from the DataHub API.
1467
+
1468
+ Args:
1469
+ graph (DataHubGraph): The DataHub graph client
1470
+
1471
+ """
1472
+ urls = {
1473
+ "mcp": f"{self.config.server}/openapi/operations/kafka/mcp/consumer/offsets",
1474
+ "mcl": f"{self.config.server}/openapi/operations/kafka/mcl/consumer/offsets",
1475
+ "mcl-timeseries": f"{self.config.server}/openapi/operations/kafka/mcl-timeseries/consumer/offsets",
1476
+ }
1477
+
1478
+ params = {"skipCache": "true", "detailed": "true"}
1479
+ results = {}
1480
+ for key, url in urls.items():
1481
+ response = self._get_generic(url=url, params=params)
1482
+ results[key] = response
1483
+ if "errors" in response:
1484
+ logger.error(f"Error: {response['errors']}")
1485
+ return results
1486
+
1487
+ def _restore_index_call(self, payload_obj: dict) -> None:
1488
+ result = self._post_generic(
1489
+ f"{self._gms_server}/operations?action=restoreIndices", payload_obj
1490
+ )
1491
+ logger.debug(f"Restore indices result: {result}")
1492
+
1493
+ def restore_indices(
1494
+ self,
1495
+ urn_pattern: Optional[str] = None,
1496
+ aspect: Optional[str] = None,
1497
+ start: Optional[int] = None,
1498
+ batch_size: Optional[int] = None,
1499
+ file: Optional[str] = None,
1500
+ ) -> None:
1501
+ """Restore the indices for a given urn or urn-like pattern.
1502
+
1503
+ Args:
1504
+ urn_pattern: The exact URN or a pattern (with % for wildcard) to match URNs. If not provided, will restore indices from the file.
1505
+ aspect: Optional aspect string to restore indices for a specific aspect.
1506
+ start: Optional integer to decide which row number of sql store to restore from. Default: 0. Ignored in case file is provided.
1507
+ batch_size: Optional integer to decide how many rows to restore. Default: 10. Ignored in case file is provided.
1508
+ file: Optional file path to a file containing URNs to restore indices for.
1509
+
1510
+ Returns:
1511
+ A string containing the result of the restore indices operation. This format is subject to change.
1512
+ """
1513
+ payload_obj = {}
1514
+ if file is not None:
1515
+ with open(file) as f:
1516
+ for urn in progressbar.progressbar(f.readlines()):
1517
+ urn = urn.strip()
1518
+ if "%" in urn:
1519
+ payload_obj["urnLike"] = urn
1520
+ else:
1521
+ payload_obj["urn"] = urn
1522
+ if aspect is not None:
1523
+ payload_obj["aspect"] = aspect
1524
+ self._restore_index_call(payload_obj)
1525
+ else:
1526
+ if urn_pattern is not None:
1527
+ if "%" in urn_pattern:
1528
+ payload_obj["urnLike"] = urn_pattern
1529
+ else:
1530
+ payload_obj["urn"] = urn_pattern
1531
+ if aspect is not None:
1532
+ payload_obj["aspect"] = aspect
1533
+ if start is not None:
1534
+ payload_obj["start"] = start
1535
+ if batch_size is not None:
1536
+ payload_obj["batchSize"] = batch_size
1537
+ self._restore_index_call(payload_obj)
1538
+
1430
1539
  @functools.lru_cache
1431
1540
  def _make_schema_resolver(
1432
1541
  self,
@@ -1491,7 +1600,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1491
1600
  env: str = DEFAULT_ENV,
1492
1601
  default_db: Optional[str] = None,
1493
1602
  default_schema: Optional[str] = None,
1494
- default_dialect: Optional[str] = None,
1603
+ override_dialect: Optional[str] = None,
1495
1604
  ) -> "SqlParsingResult":
1496
1605
  from datahub.sql_parsing.sqlglot_lineage import sqlglot_lineage
1497
1606
 
@@ -1505,7 +1614,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1505
1614
  schema_resolver=schema_resolver,
1506
1615
  default_db=default_db,
1507
1616
  default_schema=default_schema,
1508
- default_dialect=default_dialect,
1617
+ override_dialect=override_dialect,
1509
1618
  )
1510
1619
 
1511
1620
  def create_tag(self, tag_name: str) -> str:
@@ -1732,7 +1841,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1732
1841
  "Accept": "application/json",
1733
1842
  "Content-Type": "application/json",
1734
1843
  }
1735
- url = f"{self.config.server}/openapi/v2/entity/batch/{entity_name}"
1844
+ url = f"{self._gms_server}/openapi/v2/entity/batch/{entity_name}"
1736
1845
  response = self._session.post(url, data=json.dumps(payload), headers=headers)
1737
1846
  response.raise_for_status()
1738
1847
 
@@ -1789,7 +1898,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1789
1898
  "Content-Type": "application/json",
1790
1899
  }
1791
1900
 
1792
- url = f"{self.config.server}/openapi/v3/entity/{entity_name}/batchGet"
1901
+ url = f"{self._gms_server}/openapi/v3/entity/{entity_name}/batchGet"
1793
1902
  if with_system_metadata:
1794
1903
  url += "?systemMetadata=true"
1795
1904
 
@@ -1963,6 +2072,202 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1963
2072
 
1964
2073
  return res["reportAssertionResult"]
1965
2074
 
2075
+ def _get_invite_token(self) -> str:
2076
+ """
2077
+ Retrieve an invite token for user creation.
2078
+
2079
+ Returns:
2080
+ Invite token string
2081
+
2082
+ Raises:
2083
+ OperationalError: If invite token retrieval fails
2084
+ """
2085
+ get_invite_token_query = """
2086
+ query getInviteToken($input: GetInviteTokenInput!) {
2087
+ getInviteToken(input: $input) {
2088
+ inviteToken
2089
+ }
2090
+ }
2091
+ """
2092
+
2093
+ try:
2094
+ invite_token_response = self.execute_graphql(
2095
+ query=get_invite_token_query,
2096
+ variables={"input": {}},
2097
+ )
2098
+ invite_token = invite_token_response.get("getInviteToken", {}).get(
2099
+ "inviteToken"
2100
+ )
2101
+ if not invite_token:
2102
+ raise OperationalError(
2103
+ "Failed to retrieve invite token. Ensure you have admin permissions.",
2104
+ {},
2105
+ )
2106
+ return invite_token
2107
+ except Exception as e:
2108
+ raise OperationalError(
2109
+ f"Failed to retrieve invite token: {str(e)}", {}
2110
+ ) from e
2111
+
2112
+ def _create_user_with_token(
2113
+ self,
2114
+ user_urn: str,
2115
+ email: str,
2116
+ display_name: str,
2117
+ password: str,
2118
+ invite_token: str,
2119
+ ) -> None:
2120
+ """
2121
+ Create a user using the signup endpoint.
2122
+
2123
+ Args:
2124
+ user_urn: User URN (urn:li:corpuser:{user_id})
2125
+ email: User's email address
2126
+ display_name: Full display name for the user
2127
+ password: User's password
2128
+ invite_token: Invite token for user creation
2129
+
2130
+ Raises:
2131
+ OperationalError: If user creation fails
2132
+ """
2133
+ frontend_url = guess_frontend_url_from_gms_url(self._gms_server)
2134
+ signup_url = f"{frontend_url}/signUp"
2135
+ signup_payload = {
2136
+ "userUrn": user_urn,
2137
+ "email": email,
2138
+ "fullName": display_name,
2139
+ "password": password,
2140
+ "title": "Other",
2141
+ "inviteToken": invite_token,
2142
+ }
2143
+
2144
+ logger.debug(
2145
+ f"Creating user with URN={user_urn}, email={email} at URL: {signup_url}"
2146
+ )
2147
+ logger.debug(
2148
+ f"Signup payload: {json.dumps({**signup_payload, 'password': '***'})}"
2149
+ )
2150
+
2151
+ try:
2152
+ response = self._session.post(signup_url, json=signup_payload)
2153
+ logger.debug(f"Response status code: {response.status_code}")
2154
+ logger.debug(f"Response headers: {dict(response.headers)}")
2155
+ logger.debug(f"Response content length: {len(response.text)}")
2156
+
2157
+ response.raise_for_status()
2158
+
2159
+ # The /signUp endpoint returns 200 with empty body on success
2160
+ logger.debug("User created successfully")
2161
+
2162
+ except HTTPError as http_err:
2163
+ error_details = {
2164
+ "url": signup_url,
2165
+ "status_code": response.status_code,
2166
+ "response_text": response.text[:500],
2167
+ }
2168
+ try:
2169
+ error_json = response.json()
2170
+ error_details["error_response"] = error_json
2171
+ error_msg = error_json.get("message", str(http_err))
2172
+ except JSONDecodeError:
2173
+ error_msg = f"HTTP {response.status_code}: {response.text[:200]}"
2174
+
2175
+ raise OperationalError(
2176
+ f"Failed to create user: {error_msg}",
2177
+ error_details,
2178
+ ) from http_err
2179
+ except Exception as e:
2180
+ raise OperationalError(
2181
+ f"Failed to create user: {str(e)}",
2182
+ {"url": signup_url, "error_type": type(e).__name__},
2183
+ ) from e
2184
+
2185
+ def _assign_role_to_user(self, user_urn: str, role: str) -> None:
2186
+ """
2187
+ Assign a role to a user.
2188
+
2189
+ Args:
2190
+ user_urn: User URN
2191
+ role: Role to assign (Admin, Editor, or Reader)
2192
+
2193
+ Raises:
2194
+ ValueError: If role is invalid
2195
+ """
2196
+ normalized_role = role.capitalize()
2197
+ valid_roles = ["Admin", "Editor", "Reader"]
2198
+ if normalized_role not in valid_roles:
2199
+ raise ValueError(
2200
+ f"Invalid role '{role}'. Must be one of: {', '.join(valid_roles)}"
2201
+ )
2202
+
2203
+ role_urn = f"urn:li:dataHubRole:{normalized_role}"
2204
+
2205
+ batch_assign_role_mutation = """
2206
+ mutation batchAssignRole($input: BatchAssignRoleInput!) {
2207
+ batchAssignRole(input: $input)
2208
+ }
2209
+ """
2210
+
2211
+ try:
2212
+ self.execute_graphql(
2213
+ query=batch_assign_role_mutation,
2214
+ variables={"input": {"roleUrn": role_urn, "actors": [user_urn]}},
2215
+ )
2216
+ except Exception as e:
2217
+ logger.warning(f"Role assignment failed for user {user_urn}: {str(e)}")
2218
+ raise
2219
+
2220
+ def create_native_user(
2221
+ self,
2222
+ user_id: str,
2223
+ email: str,
2224
+ display_name: str,
2225
+ password: str,
2226
+ role: Optional[str] = None,
2227
+ ) -> str:
2228
+ """
2229
+ Create a native DataHub user with email/password authentication.
2230
+
2231
+ Args:
2232
+ user_id: User identifier (will be used in the URN)
2233
+ email: User's email address
2234
+ display_name: Full display name for the user
2235
+ password: User's password
2236
+ role: Optional role to assign (Admin, Editor, or Reader)
2237
+
2238
+ Returns:
2239
+ User URN of the created user (urn:li:corpuser:{user_id})
2240
+
2241
+ Raises:
2242
+ OperationalError: If user creation fails
2243
+ ValueError: If role is invalid
2244
+ """
2245
+ # Validate role before creating user
2246
+ if role:
2247
+ normalized_role = role.capitalize()
2248
+ valid_roles = ["Admin", "Editor", "Reader"]
2249
+ if normalized_role not in valid_roles:
2250
+ raise ValueError(
2251
+ f"Invalid role '{role}'. Must be one of: {', '.join(valid_roles)}"
2252
+ )
2253
+
2254
+ user_urn = f"urn:li:corpuser:{user_id}"
2255
+
2256
+ invite_token = self._get_invite_token()
2257
+ self._create_user_with_token(
2258
+ user_urn, email, display_name, password, invite_token
2259
+ )
2260
+
2261
+ if role:
2262
+ try:
2263
+ self._assign_role_to_user(user_urn, role)
2264
+ except Exception as e:
2265
+ logger.warning(
2266
+ f"User {email} created successfully, but role assignment failed: {str(e)}"
2267
+ )
2268
+
2269
+ return user_urn
2270
+
1966
2271
  def close(self) -> None:
1967
2272
  self._make_schema_resolver.cache_clear()
1968
2273
  super().close()
@@ -1,8 +1,8 @@
1
- import os
2
1
  from enum import Enum, auto
3
2
  from typing import Dict, List, Optional
4
3
 
5
4
  from datahub.configuration.common import ConfigModel
5
+ from datahub.configuration.env_vars import get_datahub_component
6
6
 
7
7
 
8
8
  class ClientMode(Enum):
@@ -11,7 +11,7 @@ class ClientMode(Enum):
11
11
  SDK = auto()
12
12
 
13
13
 
14
- DATAHUB_COMPONENT_ENV: str = os.getenv("DATAHUB_COMPONENT", "datahub").lower()
14
+ DATAHUB_COMPONENT_ENV: str = get_datahub_component().lower()
15
15
 
16
16
 
17
17
  class DatahubClientConfig(ConfigModel):
@@ -29,6 +29,7 @@ class DatahubClientConfig(ConfigModel):
29
29
  openapi_ingestion: Optional[bool] = None
30
30
  client_mode: Optional[ClientMode] = None
31
31
  datahub_component: Optional[str] = None
32
+ server_config_refresh_interval: Optional[int] = None
32
33
 
33
34
  class Config:
34
35
  extra = "ignore"
@@ -76,11 +76,21 @@ class RemovedStatusFilter(enum.Enum):
76
76
  """Search only soft-deleted entities."""
77
77
 
78
78
 
79
+ def _validate_or_filter_structure(
80
+ or_filters: List[Dict[str, List[SearchFilterRule]]],
81
+ ) -> None:
82
+ for filter_list in or_filters:
83
+ if "and" not in filter_list:
84
+ raise ValueError(f"Invalid or filter: {filter_list}")
85
+ if not isinstance(filter_list["and"], list):
86
+ raise ValueError(f"Invalid or filter: {filter_list}")
87
+
88
+
79
89
  def generate_filter(
80
- platform: Optional[str],
90
+ platform: Union[None, str, List[str]],
81
91
  platform_instance: Optional[str],
82
92
  env: Optional[str],
83
- container: Optional[str],
93
+ container: Union[None, str, List[str]],
84
94
  status: Optional[RemovedStatusFilter],
85
95
  extra_filters: Optional[List[RawSearchFilterRule]],
86
96
  extra_or_filters: Optional[RawSearchFilter] = None,
@@ -93,8 +103,7 @@ def generate_filter(
93
103
  :param container: The container to filter by.
94
104
  :param status: The status to filter by.
95
105
  :param extra_filters: Extra AND filters to apply.
96
- :param extra_or_filters: Extra OR filters to apply. These are combined with
97
- the AND filters using an OR at the top level.
106
+ :param extra_or_filters: Extra OR filters to apply. These are combined with the AND filters using an OR at the top level.
98
107
  """
99
108
  and_filters: List[RawSearchFilterRule] = []
100
109
 
@@ -218,23 +227,31 @@ def _get_status_filter(status: RemovedStatusFilter) -> Optional[SearchFilterRule
218
227
  raise ValueError(f"Invalid status filter: {status}")
219
228
 
220
229
 
221
- def _get_container_filter(container: str) -> SearchFilterRule:
230
+ def _get_container_filter(container: Union[str, List[str]]) -> SearchFilterRule:
231
+ if not isinstance(container, list):
232
+ container = [container]
233
+
222
234
  # Warn if container is not a fully qualified urn.
223
235
  # TODO: Change this once we have a first-class container urn type.
224
- if guess_entity_type(container) != "container":
225
- raise ValueError(f"Invalid container urn: {container}")
236
+ for cont in container:
237
+ if guess_entity_type(cont) != "container":
238
+ raise ValueError(f"Invalid container urn: {cont}")
226
239
 
227
240
  return SearchFilterRule(
228
241
  field="browsePathV2",
229
- values=[container],
242
+ values=container,
230
243
  condition="CONTAIN",
231
244
  )
232
245
 
233
246
 
234
247
  def _get_platform_instance_filter(
235
- platform: Optional[str], platform_instance: str
248
+ platform: Union[None, str, List[str]], platform_instance: str
236
249
  ) -> SearchFilterRule:
237
250
  if platform:
251
+ if isinstance(platform, list):
252
+ raise ValueError(
253
+ "Platform instance filter cannot be combined with a multi-value platform filter."
254
+ )
238
255
  # Massage the platform instance into a fully qualified urn, if necessary.
239
256
  platform_instance = make_dataplatform_instance_urn(platform, platform_instance)
240
257
 
@@ -250,9 +267,11 @@ def _get_platform_instance_filter(
250
267
  )
251
268
 
252
269
 
253
- def _get_platform_filter(platform: str) -> SearchFilterRule:
270
+ def _get_platform_filter(platform: Union[str, List[str]]) -> SearchFilterRule:
271
+ if not isinstance(platform, list):
272
+ platform = [platform]
254
273
  return SearchFilterRule(
255
274
  field="platform.keyword",
256
275
  condition="EQUAL",
257
- values=[make_data_platform_urn(platform)],
276
+ values=[make_data_platform_urn(plt) for plt in platform],
258
277
  )