acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -22,25 +22,25 @@ from typing import (
|
|
|
22
22
|
Union,
|
|
23
23
|
)
|
|
24
24
|
|
|
25
|
+
import progressbar
|
|
25
26
|
from avro.schema import RecordSchema
|
|
26
|
-
from deprecated import deprecated
|
|
27
27
|
from pydantic import BaseModel
|
|
28
28
|
from requests.models import HTTPError
|
|
29
|
+
from typing_extensions import deprecated
|
|
29
30
|
|
|
31
|
+
from datahub._codegen.aspect import _Aspect
|
|
30
32
|
from datahub.cli import config_utils
|
|
33
|
+
from datahub.cli.cli_utils import guess_frontend_url_from_gms_url
|
|
31
34
|
from datahub.configuration.common import ConfigModel, GraphError, OperationalError
|
|
32
35
|
from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
|
|
33
36
|
from datahub.emitter.mce_builder import DEFAULT_ENV, Aspect
|
|
34
37
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
35
38
|
from datahub.emitter.rest_emitter import (
|
|
36
|
-
DEFAULT_REST_SINK_ENDPOINT,
|
|
37
|
-
DEFAULT_REST_TRACE_MODE,
|
|
38
39
|
DatahubRestEmitter,
|
|
39
|
-
RestSinkEndpoint,
|
|
40
|
-
RestTraceMode,
|
|
41
40
|
)
|
|
42
41
|
from datahub.emitter.serialization_helper import post_json_transform
|
|
43
42
|
from datahub.ingestion.graph.config import (
|
|
43
|
+
ClientMode,
|
|
44
44
|
DatahubClientConfig as DatahubClientConfig,
|
|
45
45
|
)
|
|
46
46
|
from datahub.ingestion.graph.connections import (
|
|
@@ -49,10 +49,12 @@ from datahub.ingestion.graph.connections import (
|
|
|
49
49
|
)
|
|
50
50
|
from datahub.ingestion.graph.entity_versioning import EntityVersioningAPI
|
|
51
51
|
from datahub.ingestion.graph.filters import (
|
|
52
|
+
RawSearchFilter,
|
|
52
53
|
RawSearchFilterRule,
|
|
53
54
|
RemovedStatusFilter,
|
|
54
55
|
generate_filter,
|
|
55
56
|
)
|
|
57
|
+
from datahub.ingestion.graph.links import make_url_for_urn
|
|
56
58
|
from datahub.ingestion.source.state.checkpoint import Checkpoint
|
|
57
59
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
|
58
60
|
MetadataChangeEvent,
|
|
@@ -75,10 +77,19 @@ from datahub.metadata.schema_classes import (
|
|
|
75
77
|
SystemMetadataClass,
|
|
76
78
|
TelemetryClientIdClass,
|
|
77
79
|
)
|
|
80
|
+
from datahub.metadata.urns import (
|
|
81
|
+
CorpUserUrn,
|
|
82
|
+
MlFeatureTableUrn,
|
|
83
|
+
MlFeatureUrn,
|
|
84
|
+
MlModelGroupUrn,
|
|
85
|
+
MlModelUrn,
|
|
86
|
+
MlPrimaryKeyUrn,
|
|
87
|
+
Urn,
|
|
88
|
+
)
|
|
78
89
|
from datahub.telemetry.telemetry import telemetry_instance
|
|
79
90
|
from datahub.utilities.perf_timer import PerfTimer
|
|
80
91
|
from datahub.utilities.str_enum import StrEnum
|
|
81
|
-
from datahub.utilities.urns.urn import
|
|
92
|
+
from datahub.utilities.urns.urn import guess_entity_type
|
|
82
93
|
|
|
83
94
|
if TYPE_CHECKING:
|
|
84
95
|
from datahub.ingestion.sink.datahub_rest import (
|
|
@@ -116,8 +127,16 @@ def entity_type_to_graphql(entity_type: str) -> str:
|
|
|
116
127
|
"""Convert the entity types into GraphQL "EntityType" enum values."""
|
|
117
128
|
|
|
118
129
|
# Hard-coded special cases.
|
|
119
|
-
|
|
120
|
-
|
|
130
|
+
special_cases = {
|
|
131
|
+
CorpUserUrn.ENTITY_TYPE: "CORP_USER",
|
|
132
|
+
MlModelUrn.ENTITY_TYPE: "MLMODEL",
|
|
133
|
+
MlModelGroupUrn.ENTITY_TYPE: "MLMODEL_GROUP",
|
|
134
|
+
MlFeatureTableUrn.ENTITY_TYPE: "MLFEATURE_TABLE",
|
|
135
|
+
MlFeatureUrn.ENTITY_TYPE: "MLFEATURE",
|
|
136
|
+
MlPrimaryKeyUrn.ENTITY_TYPE: "MLPRIMARY_KEY",
|
|
137
|
+
}
|
|
138
|
+
if entity_type in special_cases:
|
|
139
|
+
return special_cases[entity_type]
|
|
121
140
|
|
|
122
141
|
# Convert camelCase to UPPER_UNDERSCORE.
|
|
123
142
|
entity_type = (
|
|
@@ -133,6 +152,14 @@ def entity_type_to_graphql(entity_type: str) -> str:
|
|
|
133
152
|
return entity_type
|
|
134
153
|
|
|
135
154
|
|
|
155
|
+
def flexible_entity_type_to_graphql(entity_type: str) -> str:
|
|
156
|
+
if entity_type.upper() == entity_type:
|
|
157
|
+
# Assume that we were passed a graphql EntityType enum value,
|
|
158
|
+
# so no conversion is needed.
|
|
159
|
+
return entity_type
|
|
160
|
+
return entity_type_to_graphql(entity_type)
|
|
161
|
+
|
|
162
|
+
|
|
136
163
|
class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
137
164
|
def __init__(self, config: DatahubClientConfig) -> None:
|
|
138
165
|
self.config = config
|
|
@@ -147,11 +174,12 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
147
174
|
ca_certificate_path=self.config.ca_certificate_path,
|
|
148
175
|
client_certificate_path=self.config.client_certificate_path,
|
|
149
176
|
disable_ssl_verification=self.config.disable_ssl_verification,
|
|
150
|
-
openapi_ingestion=
|
|
151
|
-
|
|
177
|
+
openapi_ingestion=self.config.openapi_ingestion,
|
|
178
|
+
client_mode=config.client_mode,
|
|
179
|
+
datahub_component=config.datahub_component,
|
|
180
|
+
server_config_refresh_interval=config.server_config_refresh_interval,
|
|
152
181
|
)
|
|
153
|
-
|
|
154
|
-
self.server_id = _MISSING_SERVER_ID
|
|
182
|
+
self.server_id: str = _MISSING_SERVER_ID
|
|
155
183
|
|
|
156
184
|
def test_connection(self) -> None:
|
|
157
185
|
super().test_connection()
|
|
@@ -176,20 +204,36 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
176
204
|
"""Get the public-facing base url of the frontend
|
|
177
205
|
|
|
178
206
|
This url can be used to construct links to the frontend. The url will not include a trailing slash.
|
|
207
|
+
|
|
179
208
|
Note: Only supported with DataHub Cloud.
|
|
180
209
|
"""
|
|
181
210
|
|
|
182
|
-
if not self.server_config:
|
|
211
|
+
if not hasattr(self, "server_config") or not self.server_config:
|
|
183
212
|
self.test_connection()
|
|
184
213
|
|
|
185
|
-
base_url = self.server_config.get("baseUrl")
|
|
214
|
+
base_url = self.server_config.raw_config.get("baseUrl")
|
|
186
215
|
if not base_url:
|
|
187
216
|
raise ValueError("baseUrl not found in server config")
|
|
188
217
|
return base_url
|
|
189
218
|
|
|
219
|
+
def url_for(self, entity_urn: Union[str, Urn]) -> str:
|
|
220
|
+
"""Get the UI url for an entity.
|
|
221
|
+
|
|
222
|
+
Note: Only supported with DataHub Cloud.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
entity_urn: The urn of the entity to get the url for.
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
The public-facing url for the entity.
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
return make_url_for_urn(self.frontend_base_url, str(entity_urn))
|
|
232
|
+
|
|
190
233
|
@classmethod
|
|
191
234
|
def from_emitter(cls, emitter: DatahubRestEmitter) -> "DataHubGraph":
|
|
192
235
|
session_config = emitter._session_config
|
|
236
|
+
|
|
193
237
|
if isinstance(session_config.timeout, tuple):
|
|
194
238
|
# TODO: This is slightly lossy. Eventually, we want to modify the emitter
|
|
195
239
|
# to accept a tuple for timeout_sec, and then we'll be able to remove this.
|
|
@@ -207,6 +251,9 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
207
251
|
disable_ssl_verification=session_config.disable_ssl_verification,
|
|
208
252
|
ca_certificate_path=session_config.ca_certificate_path,
|
|
209
253
|
client_certificate_path=session_config.client_certificate_path,
|
|
254
|
+
client_mode=session_config.client_mode,
|
|
255
|
+
datahub_component=session_config.datahub_component,
|
|
256
|
+
server_config_refresh_interval=emitter._server_config_refresh_interval,
|
|
210
257
|
)
|
|
211
258
|
)
|
|
212
259
|
|
|
@@ -330,7 +377,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
330
377
|
f"Failed to find {aspect_type_name} in response {response_json}"
|
|
331
378
|
)
|
|
332
379
|
|
|
333
|
-
@deprecated(
|
|
380
|
+
@deprecated("Use get_aspect instead which makes aspect string name optional")
|
|
334
381
|
def get_aspect_v2(
|
|
335
382
|
self,
|
|
336
383
|
entity_urn: str,
|
|
@@ -347,7 +394,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
347
394
|
)
|
|
348
395
|
|
|
349
396
|
def get_config(self) -> Dict[str, Any]:
|
|
350
|
-
return self.
|
|
397
|
+
return self.server_config.raw_config
|
|
351
398
|
|
|
352
399
|
def get_ownership(self, entity_urn: str) -> Optional[OwnershipClass]:
|
|
353
400
|
return self.get_aspect(entity_urn=entity_urn, aspect_type=OwnershipClass)
|
|
@@ -355,7 +402,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
355
402
|
def get_schema_metadata(self, entity_urn: str) -> Optional[SchemaMetadataClass]:
|
|
356
403
|
return self.get_aspect(entity_urn=entity_urn, aspect_type=SchemaMetadataClass)
|
|
357
404
|
|
|
358
|
-
@deprecated(
|
|
405
|
+
@deprecated("Use get_aspect directly.")
|
|
359
406
|
def get_domain_properties(self, entity_urn: str) -> Optional[DomainPropertiesClass]:
|
|
360
407
|
return self.get_aspect(entity_urn=entity_urn, aspect_type=DomainPropertiesClass)
|
|
361
408
|
|
|
@@ -376,7 +423,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
376
423
|
def get_domain(self, entity_urn: str) -> Optional[DomainsClass]:
|
|
377
424
|
return self.get_aspect(entity_urn=entity_urn, aspect_type=DomainsClass)
|
|
378
425
|
|
|
379
|
-
@deprecated(
|
|
426
|
+
@deprecated("Use get_aspect directly.")
|
|
380
427
|
def get_browse_path(self, entity_urn: str) -> Optional[BrowsePathsClass]:
|
|
381
428
|
return self.get_aspect(entity_urn=entity_urn, aspect_type=BrowsePathsClass)
|
|
382
429
|
|
|
@@ -447,7 +494,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
447
494
|
filter_criteria_map: Dict[str, str],
|
|
448
495
|
) -> Optional[Aspect]:
|
|
449
496
|
filter_criteria = [
|
|
450
|
-
{"field": k, "
|
|
497
|
+
{"field": k, "values": [v], "condition": "EQUAL"}
|
|
451
498
|
for k, v in filter_criteria_map.items()
|
|
452
499
|
]
|
|
453
500
|
filter = {"or": [{"and": filter_criteria}]}
|
|
@@ -475,7 +522,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
475
522
|
"limit": limit,
|
|
476
523
|
"filter": filter,
|
|
477
524
|
}
|
|
478
|
-
end_point = f"{self.
|
|
525
|
+
end_point = f"{self._gms_server}/aspects?action=getTimeseriesAspectValues"
|
|
479
526
|
resp: Dict = self._post_generic(end_point, query_body)
|
|
480
527
|
|
|
481
528
|
values: Optional[List] = resp.get("value", {}).get("values")
|
|
@@ -495,7 +542,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
495
542
|
def get_entity_raw(
|
|
496
543
|
self, entity_urn: str, aspects: Optional[List[str]] = None
|
|
497
544
|
) -> Dict:
|
|
498
|
-
endpoint: str = f"{self.
|
|
545
|
+
endpoint: str = f"{self._gms_server}/entitiesV2/{Urn.url_encode(entity_urn)}"
|
|
499
546
|
if aspects is not None:
|
|
500
547
|
assert aspects, "if provided, aspects must be a non-empty list"
|
|
501
548
|
endpoint = f"{endpoint}?aspects=List(" + ",".join(aspects) + ")"
|
|
@@ -505,7 +552,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
505
552
|
return response.json()
|
|
506
553
|
|
|
507
554
|
@deprecated(
|
|
508
|
-
|
|
555
|
+
"Use get_aspect for a single aspect or get_entity_semityped for a full entity."
|
|
509
556
|
)
|
|
510
557
|
def get_aspects_for_entity(
|
|
511
558
|
self,
|
|
@@ -625,18 +672,15 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
625
672
|
|
|
626
673
|
@property
|
|
627
674
|
def _search_endpoint(self):
|
|
628
|
-
return f"{self.
|
|
675
|
+
return f"{self._gms_server}/entities?action=search"
|
|
629
676
|
|
|
630
677
|
@property
|
|
631
678
|
def _relationships_endpoint(self):
|
|
632
|
-
return f"{self.
|
|
679
|
+
return f"{self._gms_server}/openapi/relationships/v1/"
|
|
633
680
|
|
|
634
681
|
@property
|
|
635
682
|
def _aspect_count_endpoint(self):
|
|
636
|
-
return f"{self.
|
|
637
|
-
|
|
638
|
-
# def _session(self) -> Session:
|
|
639
|
-
# return super()._session
|
|
683
|
+
return f"{self._gms_server}/aspects?action=getCount"
|
|
640
684
|
|
|
641
685
|
def get_domain_urn_by_name(self, domain_name: str) -> Optional[str]:
|
|
642
686
|
"""Retrieve a domain urn based on its name. Returns None if there is no match found"""
|
|
@@ -645,7 +689,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
645
689
|
filter_criteria = [
|
|
646
690
|
{
|
|
647
691
|
"field": "name",
|
|
648
|
-
"
|
|
692
|
+
"values": [domain_name],
|
|
649
693
|
"condition": "EQUAL",
|
|
650
694
|
}
|
|
651
695
|
]
|
|
@@ -749,9 +793,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
749
793
|
|
|
750
794
|
assert res["upsertConnection"]["urn"] == urn
|
|
751
795
|
|
|
752
|
-
@deprecated(
|
|
753
|
-
reason='Use get_urns_by_filter(entity_types=["container"], ...) instead'
|
|
754
|
-
)
|
|
796
|
+
@deprecated('Use get_urns_by_filter(entity_types=["container"], ...) instead')
|
|
755
797
|
def get_container_urns_by_filter(
|
|
756
798
|
self,
|
|
757
799
|
env: Optional[str] = None,
|
|
@@ -767,7 +809,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
767
809
|
filter_criteria.append(
|
|
768
810
|
{
|
|
769
811
|
"field": "customProperties",
|
|
770
|
-
"
|
|
812
|
+
"values": [f"instance={env}"],
|
|
771
813
|
"condition": "EQUAL",
|
|
772
814
|
}
|
|
773
815
|
)
|
|
@@ -775,7 +817,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
775
817
|
filter_criteria.append(
|
|
776
818
|
{
|
|
777
819
|
"field": "typeNames",
|
|
778
|
-
"
|
|
820
|
+
"values": [container_subtype],
|
|
779
821
|
"condition": "EQUAL",
|
|
780
822
|
}
|
|
781
823
|
)
|
|
@@ -784,7 +826,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
784
826
|
"input": search_query,
|
|
785
827
|
"entity": "container",
|
|
786
828
|
"start": 0,
|
|
787
|
-
"count":
|
|
829
|
+
"count": 5000,
|
|
788
830
|
"filter": {"or": container_filters},
|
|
789
831
|
}
|
|
790
832
|
results: Dict = self._post_generic(url, search_body)
|
|
@@ -797,11 +839,11 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
797
839
|
def _bulk_fetch_schema_info_by_filter(
|
|
798
840
|
self,
|
|
799
841
|
*,
|
|
800
|
-
platform:
|
|
842
|
+
platform: Union[None, str, List[str]] = None,
|
|
801
843
|
platform_instance: Optional[str] = None,
|
|
802
844
|
env: Optional[str] = None,
|
|
803
845
|
query: Optional[str] = None,
|
|
804
|
-
container:
|
|
846
|
+
container: Union[None, str, List[str]] = None,
|
|
805
847
|
status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
806
848
|
batch_size: int = 100,
|
|
807
849
|
extraFilters: Optional[List[RawSearchFilterRule]] = None,
|
|
@@ -810,7 +852,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
810
852
|
|
|
811
853
|
:return: An iterable of (urn, schema info) tuple that match the filters.
|
|
812
854
|
"""
|
|
813
|
-
types = [
|
|
855
|
+
types = self._get_types(["dataset"])
|
|
814
856
|
|
|
815
857
|
# Add the query default of * if no query is specified.
|
|
816
858
|
query = query or "*"
|
|
@@ -873,15 +915,16 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
873
915
|
self,
|
|
874
916
|
*,
|
|
875
917
|
entity_types: Optional[Sequence[str]] = None,
|
|
876
|
-
platform:
|
|
918
|
+
platform: Union[None, str, List[str]] = None,
|
|
877
919
|
platform_instance: Optional[str] = None,
|
|
878
920
|
env: Optional[str] = None,
|
|
879
921
|
query: Optional[str] = None,
|
|
880
|
-
container:
|
|
881
|
-
status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
882
|
-
batch_size: int =
|
|
922
|
+
container: Union[None, str, List[str]] = None,
|
|
923
|
+
status: Optional[RemovedStatusFilter] = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
924
|
+
batch_size: int = 5000,
|
|
883
925
|
extraFilters: Optional[List[RawSearchFilterRule]] = None,
|
|
884
|
-
extra_or_filters: Optional[
|
|
926
|
+
extra_or_filters: Optional[RawSearchFilter] = None,
|
|
927
|
+
skip_cache: bool = False,
|
|
885
928
|
) -> Iterable[str]:
|
|
886
929
|
"""Fetch all urns that match all of the given filters.
|
|
887
930
|
|
|
@@ -900,6 +943,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
900
943
|
Note that this requires browsePathV2 aspects (added in 0.10.4+).
|
|
901
944
|
:param status: Filter on the deletion status of the entity. The default is only return non-soft-deleted entities.
|
|
902
945
|
:param extraFilters: Additional filters to apply. If specified, the results will match all of the filters.
|
|
946
|
+
:param skip_cache: Whether to bypass caching. Defaults to False.
|
|
903
947
|
|
|
904
948
|
:return: An iterable of urns that match the filters.
|
|
905
949
|
"""
|
|
@@ -927,7 +971,9 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
927
971
|
$query: String!,
|
|
928
972
|
$orFilters: [AndFilterInput!],
|
|
929
973
|
$batchSize: Int!,
|
|
930
|
-
$scrollId: String
|
|
974
|
+
$scrollId: String,
|
|
975
|
+
$skipCache: Boolean!,
|
|
976
|
+
$includeSoftDeleted: Boolean) {
|
|
931
977
|
|
|
932
978
|
scrollAcrossEntities(input: {
|
|
933
979
|
query: $query,
|
|
@@ -938,6 +984,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
938
984
|
searchFlags: {
|
|
939
985
|
skipHighlighting: true
|
|
940
986
|
skipAggregates: true
|
|
987
|
+
skipCache: $skipCache
|
|
988
|
+
includeSoftDeleted: $includeSoftDeleted
|
|
941
989
|
}
|
|
942
990
|
}) {
|
|
943
991
|
nextScrollId
|
|
@@ -956,6 +1004,12 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
956
1004
|
"query": query,
|
|
957
1005
|
"orFilters": orFilters,
|
|
958
1006
|
"batchSize": batch_size,
|
|
1007
|
+
"skipCache": skip_cache,
|
|
1008
|
+
"includeSoftDeleted": (
|
|
1009
|
+
None
|
|
1010
|
+
if status is None
|
|
1011
|
+
else status != RemovedStatusFilter.NOT_SOFT_DELETED
|
|
1012
|
+
),
|
|
959
1013
|
}
|
|
960
1014
|
|
|
961
1015
|
for entity in self._scroll_across_entities(graphql_query, variables):
|
|
@@ -965,15 +1019,15 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
965
1019
|
self,
|
|
966
1020
|
*,
|
|
967
1021
|
entity_types: Optional[List[str]] = None,
|
|
968
|
-
platform:
|
|
1022
|
+
platform: Union[None, str, List[str]] = None,
|
|
969
1023
|
platform_instance: Optional[str] = None,
|
|
970
1024
|
env: Optional[str] = None,
|
|
971
1025
|
query: Optional[str] = None,
|
|
972
|
-
container:
|
|
1026
|
+
container: Union[None, str, List[str]] = None,
|
|
973
1027
|
status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
974
|
-
batch_size: int =
|
|
1028
|
+
batch_size: int = 5000,
|
|
975
1029
|
extra_and_filters: Optional[List[RawSearchFilterRule]] = None,
|
|
976
|
-
extra_or_filters: Optional[
|
|
1030
|
+
extra_or_filters: Optional[RawSearchFilter] = None,
|
|
977
1031
|
extra_source_fields: Optional[List[str]] = None,
|
|
978
1032
|
skip_cache: bool = False,
|
|
979
1033
|
) -> Iterable[dict]:
|
|
@@ -1061,7 +1115,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1061
1115
|
"query": query,
|
|
1062
1116
|
"orFilters": or_filters_final,
|
|
1063
1117
|
"batchSize": batch_size,
|
|
1064
|
-
"skipCache":
|
|
1118
|
+
"skipCache": skip_cache,
|
|
1065
1119
|
"fetchExtraFields": extra_source_fields,
|
|
1066
1120
|
}
|
|
1067
1121
|
|
|
@@ -1126,7 +1180,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1126
1180
|
)
|
|
1127
1181
|
|
|
1128
1182
|
types = [
|
|
1129
|
-
|
|
1183
|
+
flexible_entity_type_to_graphql(entity_type)
|
|
1184
|
+
for entity_type in entity_types
|
|
1130
1185
|
]
|
|
1131
1186
|
return types
|
|
1132
1187
|
|
|
@@ -1179,7 +1234,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1179
1234
|
operation_name: Optional[str] = None,
|
|
1180
1235
|
format_exception: bool = True,
|
|
1181
1236
|
) -> Dict:
|
|
1182
|
-
url = f"{self.
|
|
1237
|
+
url = f"{self._gms_server}/api/graphql"
|
|
1183
1238
|
|
|
1184
1239
|
body: Dict = {
|
|
1185
1240
|
"query": query,
|
|
@@ -1404,6 +1459,83 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1404
1459
|
related_aspects = response.get("relatedAspects", [])
|
|
1405
1460
|
return reference_count, related_aspects
|
|
1406
1461
|
|
|
1462
|
+
def get_kafka_consumer_offsets(
|
|
1463
|
+
self,
|
|
1464
|
+
) -> dict:
|
|
1465
|
+
"""
|
|
1466
|
+
Get Kafka consumer offsets from the DataHub API.
|
|
1467
|
+
|
|
1468
|
+
Args:
|
|
1469
|
+
graph (DataHubGraph): The DataHub graph client
|
|
1470
|
+
|
|
1471
|
+
"""
|
|
1472
|
+
urls = {
|
|
1473
|
+
"mcp": f"{self.config.server}/openapi/operations/kafka/mcp/consumer/offsets",
|
|
1474
|
+
"mcl": f"{self.config.server}/openapi/operations/kafka/mcl/consumer/offsets",
|
|
1475
|
+
"mcl-timeseries": f"{self.config.server}/openapi/operations/kafka/mcl-timeseries/consumer/offsets",
|
|
1476
|
+
}
|
|
1477
|
+
|
|
1478
|
+
params = {"skipCache": "true", "detailed": "true"}
|
|
1479
|
+
results = {}
|
|
1480
|
+
for key, url in urls.items():
|
|
1481
|
+
response = self._get_generic(url=url, params=params)
|
|
1482
|
+
results[key] = response
|
|
1483
|
+
if "errors" in response:
|
|
1484
|
+
logger.error(f"Error: {response['errors']}")
|
|
1485
|
+
return results
|
|
1486
|
+
|
|
1487
|
+
def _restore_index_call(self, payload_obj: dict) -> None:
|
|
1488
|
+
result = self._post_generic(
|
|
1489
|
+
f"{self._gms_server}/operations?action=restoreIndices", payload_obj
|
|
1490
|
+
)
|
|
1491
|
+
logger.debug(f"Restore indices result: {result}")
|
|
1492
|
+
|
|
1493
|
+
def restore_indices(
|
|
1494
|
+
self,
|
|
1495
|
+
urn_pattern: Optional[str] = None,
|
|
1496
|
+
aspect: Optional[str] = None,
|
|
1497
|
+
start: Optional[int] = None,
|
|
1498
|
+
batch_size: Optional[int] = None,
|
|
1499
|
+
file: Optional[str] = None,
|
|
1500
|
+
) -> None:
|
|
1501
|
+
"""Restore the indices for a given urn or urn-like pattern.
|
|
1502
|
+
|
|
1503
|
+
Args:
|
|
1504
|
+
urn_pattern: The exact URN or a pattern (with % for wildcard) to match URNs. If not provided, will restore indices from the file.
|
|
1505
|
+
aspect: Optional aspect string to restore indices for a specific aspect.
|
|
1506
|
+
start: Optional integer to decide which row number of sql store to restore from. Default: 0. Ignored in case file is provided.
|
|
1507
|
+
batch_size: Optional integer to decide how many rows to restore. Default: 10. Ignored in case file is provided.
|
|
1508
|
+
file: Optional file path to a file containing URNs to restore indices for.
|
|
1509
|
+
|
|
1510
|
+
Returns:
|
|
1511
|
+
A string containing the result of the restore indices operation. This format is subject to change.
|
|
1512
|
+
"""
|
|
1513
|
+
payload_obj = {}
|
|
1514
|
+
if file is not None:
|
|
1515
|
+
with open(file) as f:
|
|
1516
|
+
for urn in progressbar.progressbar(f.readlines()):
|
|
1517
|
+
urn = urn.strip()
|
|
1518
|
+
if "%" in urn:
|
|
1519
|
+
payload_obj["urnLike"] = urn
|
|
1520
|
+
else:
|
|
1521
|
+
payload_obj["urn"] = urn
|
|
1522
|
+
if aspect is not None:
|
|
1523
|
+
payload_obj["aspect"] = aspect
|
|
1524
|
+
self._restore_index_call(payload_obj)
|
|
1525
|
+
else:
|
|
1526
|
+
if urn_pattern is not None:
|
|
1527
|
+
if "%" in urn_pattern:
|
|
1528
|
+
payload_obj["urnLike"] = urn_pattern
|
|
1529
|
+
else:
|
|
1530
|
+
payload_obj["urn"] = urn_pattern
|
|
1531
|
+
if aspect is not None:
|
|
1532
|
+
payload_obj["aspect"] = aspect
|
|
1533
|
+
if start is not None:
|
|
1534
|
+
payload_obj["start"] = start
|
|
1535
|
+
if batch_size is not None:
|
|
1536
|
+
payload_obj["batchSize"] = batch_size
|
|
1537
|
+
self._restore_index_call(payload_obj)
|
|
1538
|
+
|
|
1407
1539
|
@functools.lru_cache
|
|
1408
1540
|
def _make_schema_resolver(
|
|
1409
1541
|
self,
|
|
@@ -1468,7 +1600,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1468
1600
|
env: str = DEFAULT_ENV,
|
|
1469
1601
|
default_db: Optional[str] = None,
|
|
1470
1602
|
default_schema: Optional[str] = None,
|
|
1471
|
-
|
|
1603
|
+
override_dialect: Optional[str] = None,
|
|
1472
1604
|
) -> "SqlParsingResult":
|
|
1473
1605
|
from datahub.sql_parsing.sqlglot_lineage import sqlglot_lineage
|
|
1474
1606
|
|
|
@@ -1482,7 +1614,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1482
1614
|
schema_resolver=schema_resolver,
|
|
1483
1615
|
default_db=default_db,
|
|
1484
1616
|
default_schema=default_schema,
|
|
1485
|
-
|
|
1617
|
+
override_dialect=override_dialect,
|
|
1486
1618
|
)
|
|
1487
1619
|
|
|
1488
1620
|
def create_tag(self, tag_name: str) -> str:
|
|
@@ -1691,6 +1823,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1691
1823
|
|
|
1692
1824
|
return res["runAssertionsForAsset"]
|
|
1693
1825
|
|
|
1826
|
+
@deprecated("Use get_entities instead which returns typed aspects")
|
|
1694
1827
|
def get_entities_v2(
|
|
1695
1828
|
self,
|
|
1696
1829
|
entity_name: str,
|
|
@@ -1708,7 +1841,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1708
1841
|
"Accept": "application/json",
|
|
1709
1842
|
"Content-Type": "application/json",
|
|
1710
1843
|
}
|
|
1711
|
-
url = f"{self.
|
|
1844
|
+
url = f"{self._gms_server}/openapi/v2/entity/batch/{entity_name}"
|
|
1712
1845
|
response = self._session.post(url, data=json.dumps(payload), headers=headers)
|
|
1713
1846
|
response.raise_for_status()
|
|
1714
1847
|
|
|
@@ -1730,6 +1863,108 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1730
1863
|
retval[entity_urn][aspect_key] = aspect_value
|
|
1731
1864
|
return retval
|
|
1732
1865
|
|
|
1866
|
+
def get_entities(
|
|
1867
|
+
self,
|
|
1868
|
+
entity_name: str,
|
|
1869
|
+
urns: List[str],
|
|
1870
|
+
aspects: Optional[List[str]] = None,
|
|
1871
|
+
with_system_metadata: bool = False,
|
|
1872
|
+
) -> Dict[str, Dict[str, Tuple[_Aspect, Optional[SystemMetadataClass]]]]:
|
|
1873
|
+
"""
|
|
1874
|
+
Get entities using the OpenAPI v3 endpoint, deserializing aspects into typed objects.
|
|
1875
|
+
|
|
1876
|
+
Args:
|
|
1877
|
+
entity_name: The entity type name
|
|
1878
|
+
urns: List of entity URNs to fetch
|
|
1879
|
+
aspects: Optional list of aspect names to fetch. If None, all aspects will be fetched.
|
|
1880
|
+
with_system_metadata: If True, return system metadata along with each aspect.
|
|
1881
|
+
|
|
1882
|
+
Returns:
|
|
1883
|
+
A dictionary mapping URNs to a dictionary of aspect name to tuples of
|
|
1884
|
+
(typed aspect object, system metadata). If with_system_metadata is False,
|
|
1885
|
+
the system metadata in the tuple will be None.
|
|
1886
|
+
"""
|
|
1887
|
+
aspects = aspects or []
|
|
1888
|
+
|
|
1889
|
+
request_payload = []
|
|
1890
|
+
for urn in urns:
|
|
1891
|
+
entity_request: Dict[str, Any] = {"urn": urn}
|
|
1892
|
+
for aspect_name in aspects:
|
|
1893
|
+
entity_request[aspect_name] = {}
|
|
1894
|
+
request_payload.append(entity_request)
|
|
1895
|
+
|
|
1896
|
+
headers: Dict[str, Any] = {
|
|
1897
|
+
"Accept": "application/json",
|
|
1898
|
+
"Content-Type": "application/json",
|
|
1899
|
+
}
|
|
1900
|
+
|
|
1901
|
+
url = f"{self._gms_server}/openapi/v3/entity/{entity_name}/batchGet"
|
|
1902
|
+
if with_system_metadata:
|
|
1903
|
+
url += "?systemMetadata=true"
|
|
1904
|
+
|
|
1905
|
+
response = self._session.post(
|
|
1906
|
+
url, data=json.dumps(request_payload), headers=headers
|
|
1907
|
+
)
|
|
1908
|
+
response.raise_for_status()
|
|
1909
|
+
entities = response.json()
|
|
1910
|
+
|
|
1911
|
+
result: Dict[str, Dict[str, Tuple[_Aspect, Optional[SystemMetadataClass]]]] = {}
|
|
1912
|
+
|
|
1913
|
+
for entity in entities:
|
|
1914
|
+
entity_urn = entity.get("urn")
|
|
1915
|
+
if entity_urn is None:
|
|
1916
|
+
logger.warning(
|
|
1917
|
+
f"Missing URN in entity response: {entity}, skipping deserialization"
|
|
1918
|
+
)
|
|
1919
|
+
continue
|
|
1920
|
+
|
|
1921
|
+
entity_aspects: Dict[
|
|
1922
|
+
str, Tuple[_Aspect, Optional[SystemMetadataClass]]
|
|
1923
|
+
] = {}
|
|
1924
|
+
|
|
1925
|
+
for aspect_name, aspect_obj in entity.items():
|
|
1926
|
+
if aspect_name == "urn":
|
|
1927
|
+
continue
|
|
1928
|
+
|
|
1929
|
+
aspect_class = ASPECT_NAME_MAP.get(aspect_name)
|
|
1930
|
+
if aspect_class is None:
|
|
1931
|
+
logger.warning(
|
|
1932
|
+
f"Unknown aspect type {aspect_name}, skipping deserialization"
|
|
1933
|
+
)
|
|
1934
|
+
continue
|
|
1935
|
+
|
|
1936
|
+
aspect_value = aspect_obj.get("value")
|
|
1937
|
+
if aspect_value is None:
|
|
1938
|
+
logger.warning(
|
|
1939
|
+
f"Unknown aspect value for aspect {aspect_name}, skipping deserialization"
|
|
1940
|
+
)
|
|
1941
|
+
continue
|
|
1942
|
+
|
|
1943
|
+
try:
|
|
1944
|
+
post_json_obj = post_json_transform(aspect_value)
|
|
1945
|
+
typed_aspect = aspect_class.from_obj(post_json_obj)
|
|
1946
|
+
assert isinstance(typed_aspect, aspect_class) and isinstance(
|
|
1947
|
+
typed_aspect, _Aspect
|
|
1948
|
+
)
|
|
1949
|
+
|
|
1950
|
+
system_metadata = None
|
|
1951
|
+
if with_system_metadata:
|
|
1952
|
+
system_metadata_obj = aspect_obj.get("systemMetadata")
|
|
1953
|
+
if system_metadata_obj:
|
|
1954
|
+
system_metadata = SystemMetadataClass.from_obj(
|
|
1955
|
+
system_metadata_obj
|
|
1956
|
+
)
|
|
1957
|
+
|
|
1958
|
+
entity_aspects[aspect_name] = (typed_aspect, system_metadata)
|
|
1959
|
+
except Exception as e:
|
|
1960
|
+
logger.error(f"Error deserializing aspect {aspect_name}: {e}")
|
|
1961
|
+
raise
|
|
1962
|
+
|
|
1963
|
+
if entity_aspects:
|
|
1964
|
+
result[entity_urn] = entity_aspects
|
|
1965
|
+
|
|
1966
|
+
return result
|
|
1967
|
+
|
|
1733
1968
|
def upsert_custom_assertion(
|
|
1734
1969
|
self,
|
|
1735
1970
|
urn: Optional[str],
|
|
@@ -1837,13 +2072,215 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1837
2072
|
|
|
1838
2073
|
return res["reportAssertionResult"]
|
|
1839
2074
|
|
|
2075
|
+
def _get_invite_token(self) -> str:
|
|
2076
|
+
"""
|
|
2077
|
+
Retrieve an invite token for user creation.
|
|
2078
|
+
|
|
2079
|
+
Returns:
|
|
2080
|
+
Invite token string
|
|
2081
|
+
|
|
2082
|
+
Raises:
|
|
2083
|
+
OperationalError: If invite token retrieval fails
|
|
2084
|
+
"""
|
|
2085
|
+
get_invite_token_query = """
|
|
2086
|
+
query getInviteToken($input: GetInviteTokenInput!) {
|
|
2087
|
+
getInviteToken(input: $input) {
|
|
2088
|
+
inviteToken
|
|
2089
|
+
}
|
|
2090
|
+
}
|
|
2091
|
+
"""
|
|
2092
|
+
|
|
2093
|
+
try:
|
|
2094
|
+
invite_token_response = self.execute_graphql(
|
|
2095
|
+
query=get_invite_token_query,
|
|
2096
|
+
variables={"input": {}},
|
|
2097
|
+
)
|
|
2098
|
+
invite_token = invite_token_response.get("getInviteToken", {}).get(
|
|
2099
|
+
"inviteToken"
|
|
2100
|
+
)
|
|
2101
|
+
if not invite_token:
|
|
2102
|
+
raise OperationalError(
|
|
2103
|
+
"Failed to retrieve invite token. Ensure you have admin permissions.",
|
|
2104
|
+
{},
|
|
2105
|
+
)
|
|
2106
|
+
return invite_token
|
|
2107
|
+
except Exception as e:
|
|
2108
|
+
raise OperationalError(
|
|
2109
|
+
f"Failed to retrieve invite token: {str(e)}", {}
|
|
2110
|
+
) from e
|
|
2111
|
+
|
|
2112
|
+
def _create_user_with_token(
|
|
2113
|
+
self,
|
|
2114
|
+
user_urn: str,
|
|
2115
|
+
email: str,
|
|
2116
|
+
display_name: str,
|
|
2117
|
+
password: str,
|
|
2118
|
+
invite_token: str,
|
|
2119
|
+
) -> None:
|
|
2120
|
+
"""
|
|
2121
|
+
Create a user using the signup endpoint.
|
|
2122
|
+
|
|
2123
|
+
Args:
|
|
2124
|
+
user_urn: User URN (urn:li:corpuser:{user_id})
|
|
2125
|
+
email: User's email address
|
|
2126
|
+
display_name: Full display name for the user
|
|
2127
|
+
password: User's password
|
|
2128
|
+
invite_token: Invite token for user creation
|
|
2129
|
+
|
|
2130
|
+
Raises:
|
|
2131
|
+
OperationalError: If user creation fails
|
|
2132
|
+
"""
|
|
2133
|
+
frontend_url = guess_frontend_url_from_gms_url(self._gms_server)
|
|
2134
|
+
signup_url = f"{frontend_url}/signUp"
|
|
2135
|
+
signup_payload = {
|
|
2136
|
+
"userUrn": user_urn,
|
|
2137
|
+
"email": email,
|
|
2138
|
+
"fullName": display_name,
|
|
2139
|
+
"password": password,
|
|
2140
|
+
"title": "Other",
|
|
2141
|
+
"inviteToken": invite_token,
|
|
2142
|
+
}
|
|
2143
|
+
|
|
2144
|
+
logger.debug(
|
|
2145
|
+
f"Creating user with URN={user_urn}, email={email} at URL: {signup_url}"
|
|
2146
|
+
)
|
|
2147
|
+
logger.debug(
|
|
2148
|
+
f"Signup payload: {json.dumps({**signup_payload, 'password': '***'})}"
|
|
2149
|
+
)
|
|
2150
|
+
|
|
2151
|
+
try:
|
|
2152
|
+
response = self._session.post(signup_url, json=signup_payload)
|
|
2153
|
+
logger.debug(f"Response status code: {response.status_code}")
|
|
2154
|
+
logger.debug(f"Response headers: {dict(response.headers)}")
|
|
2155
|
+
logger.debug(f"Response content length: {len(response.text)}")
|
|
2156
|
+
|
|
2157
|
+
response.raise_for_status()
|
|
2158
|
+
|
|
2159
|
+
# The /signUp endpoint returns 200 with empty body on success
|
|
2160
|
+
logger.debug("User created successfully")
|
|
2161
|
+
|
|
2162
|
+
except HTTPError as http_err:
|
|
2163
|
+
error_details = {
|
|
2164
|
+
"url": signup_url,
|
|
2165
|
+
"status_code": response.status_code,
|
|
2166
|
+
"response_text": response.text[:500],
|
|
2167
|
+
}
|
|
2168
|
+
try:
|
|
2169
|
+
error_json = response.json()
|
|
2170
|
+
error_details["error_response"] = error_json
|
|
2171
|
+
error_msg = error_json.get("message", str(http_err))
|
|
2172
|
+
except JSONDecodeError:
|
|
2173
|
+
error_msg = f"HTTP {response.status_code}: {response.text[:200]}"
|
|
2174
|
+
|
|
2175
|
+
raise OperationalError(
|
|
2176
|
+
f"Failed to create user: {error_msg}",
|
|
2177
|
+
error_details,
|
|
2178
|
+
) from http_err
|
|
2179
|
+
except Exception as e:
|
|
2180
|
+
raise OperationalError(
|
|
2181
|
+
f"Failed to create user: {str(e)}",
|
|
2182
|
+
{"url": signup_url, "error_type": type(e).__name__},
|
|
2183
|
+
) from e
|
|
2184
|
+
|
|
2185
|
+
def _assign_role_to_user(self, user_urn: str, role: str) -> None:
|
|
2186
|
+
"""
|
|
2187
|
+
Assign a role to a user.
|
|
2188
|
+
|
|
2189
|
+
Args:
|
|
2190
|
+
user_urn: User URN
|
|
2191
|
+
role: Role to assign (Admin, Editor, or Reader)
|
|
2192
|
+
|
|
2193
|
+
Raises:
|
|
2194
|
+
ValueError: If role is invalid
|
|
2195
|
+
"""
|
|
2196
|
+
normalized_role = role.capitalize()
|
|
2197
|
+
valid_roles = ["Admin", "Editor", "Reader"]
|
|
2198
|
+
if normalized_role not in valid_roles:
|
|
2199
|
+
raise ValueError(
|
|
2200
|
+
f"Invalid role '{role}'. Must be one of: {', '.join(valid_roles)}"
|
|
2201
|
+
)
|
|
2202
|
+
|
|
2203
|
+
role_urn = f"urn:li:dataHubRole:{normalized_role}"
|
|
2204
|
+
|
|
2205
|
+
batch_assign_role_mutation = """
|
|
2206
|
+
mutation batchAssignRole($input: BatchAssignRoleInput!) {
|
|
2207
|
+
batchAssignRole(input: $input)
|
|
2208
|
+
}
|
|
2209
|
+
"""
|
|
2210
|
+
|
|
2211
|
+
try:
|
|
2212
|
+
self.execute_graphql(
|
|
2213
|
+
query=batch_assign_role_mutation,
|
|
2214
|
+
variables={"input": {"roleUrn": role_urn, "actors": [user_urn]}},
|
|
2215
|
+
)
|
|
2216
|
+
except Exception as e:
|
|
2217
|
+
logger.warning(f"Role assignment failed for user {user_urn}: {str(e)}")
|
|
2218
|
+
raise
|
|
2219
|
+
|
|
2220
|
+
def create_native_user(
|
|
2221
|
+
self,
|
|
2222
|
+
user_id: str,
|
|
2223
|
+
email: str,
|
|
2224
|
+
display_name: str,
|
|
2225
|
+
password: str,
|
|
2226
|
+
role: Optional[str] = None,
|
|
2227
|
+
) -> str:
|
|
2228
|
+
"""
|
|
2229
|
+
Create a native DataHub user with email/password authentication.
|
|
2230
|
+
|
|
2231
|
+
Args:
|
|
2232
|
+
user_id: User identifier (will be used in the URN)
|
|
2233
|
+
email: User's email address
|
|
2234
|
+
display_name: Full display name for the user
|
|
2235
|
+
password: User's password
|
|
2236
|
+
role: Optional role to assign (Admin, Editor, or Reader)
|
|
2237
|
+
|
|
2238
|
+
Returns:
|
|
2239
|
+
User URN of the created user (urn:li:corpuser:{user_id})
|
|
2240
|
+
|
|
2241
|
+
Raises:
|
|
2242
|
+
OperationalError: If user creation fails
|
|
2243
|
+
ValueError: If role is invalid
|
|
2244
|
+
"""
|
|
2245
|
+
# Validate role before creating user
|
|
2246
|
+
if role:
|
|
2247
|
+
normalized_role = role.capitalize()
|
|
2248
|
+
valid_roles = ["Admin", "Editor", "Reader"]
|
|
2249
|
+
if normalized_role not in valid_roles:
|
|
2250
|
+
raise ValueError(
|
|
2251
|
+
f"Invalid role '{role}'. Must be one of: {', '.join(valid_roles)}"
|
|
2252
|
+
)
|
|
2253
|
+
|
|
2254
|
+
user_urn = f"urn:li:corpuser:{user_id}"
|
|
2255
|
+
|
|
2256
|
+
invite_token = self._get_invite_token()
|
|
2257
|
+
self._create_user_with_token(
|
|
2258
|
+
user_urn, email, display_name, password, invite_token
|
|
2259
|
+
)
|
|
2260
|
+
|
|
2261
|
+
if role:
|
|
2262
|
+
try:
|
|
2263
|
+
self._assign_role_to_user(user_urn, role)
|
|
2264
|
+
except Exception as e:
|
|
2265
|
+
logger.warning(
|
|
2266
|
+
f"User {email} created successfully, but role assignment failed: {str(e)}"
|
|
2267
|
+
)
|
|
2268
|
+
|
|
2269
|
+
return user_urn
|
|
2270
|
+
|
|
1840
2271
|
def close(self) -> None:
|
|
1841
2272
|
self._make_schema_resolver.cache_clear()
|
|
1842
2273
|
super().close()
|
|
1843
2274
|
|
|
1844
2275
|
|
|
1845
|
-
|
|
2276
|
+
@functools.lru_cache(maxsize=None)
|
|
2277
|
+
def get_default_graph(
|
|
2278
|
+
client_mode: Optional[ClientMode] = None,
|
|
2279
|
+
datahub_component: Optional[str] = None,
|
|
2280
|
+
) -> DataHubGraph:
|
|
1846
2281
|
graph_config = config_utils.load_client_config()
|
|
2282
|
+
graph_config.client_mode = client_mode
|
|
2283
|
+
graph_config.datahub_component = datahub_component
|
|
1847
2284
|
graph = DataHubGraph(graph_config)
|
|
1848
2285
|
graph.test_connection()
|
|
1849
2286
|
telemetry_instance.set_context(server=graph)
|