acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -22,6 +22,7 @@ from typing import (
|
|
|
22
22
|
Union,
|
|
23
23
|
)
|
|
24
24
|
|
|
25
|
+
import progressbar
|
|
25
26
|
from avro.schema import RecordSchema
|
|
26
27
|
from pydantic import BaseModel
|
|
27
28
|
from requests.models import HTTPError
|
|
@@ -29,6 +30,7 @@ from typing_extensions import deprecated
|
|
|
29
30
|
|
|
30
31
|
from datahub._codegen.aspect import _Aspect
|
|
31
32
|
from datahub.cli import config_utils
|
|
33
|
+
from datahub.cli.cli_utils import guess_frontend_url_from_gms_url
|
|
32
34
|
from datahub.configuration.common import ConfigModel, GraphError, OperationalError
|
|
33
35
|
from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
|
|
34
36
|
from datahub.emitter.mce_builder import DEFAULT_ENV, Aspect
|
|
@@ -75,7 +77,15 @@ from datahub.metadata.schema_classes import (
|
|
|
75
77
|
SystemMetadataClass,
|
|
76
78
|
TelemetryClientIdClass,
|
|
77
79
|
)
|
|
78
|
-
from datahub.metadata.urns import
|
|
80
|
+
from datahub.metadata.urns import (
|
|
81
|
+
CorpUserUrn,
|
|
82
|
+
MlFeatureTableUrn,
|
|
83
|
+
MlFeatureUrn,
|
|
84
|
+
MlModelGroupUrn,
|
|
85
|
+
MlModelUrn,
|
|
86
|
+
MlPrimaryKeyUrn,
|
|
87
|
+
Urn,
|
|
88
|
+
)
|
|
79
89
|
from datahub.telemetry.telemetry import telemetry_instance
|
|
80
90
|
from datahub.utilities.perf_timer import PerfTimer
|
|
81
91
|
from datahub.utilities.str_enum import StrEnum
|
|
@@ -117,8 +127,16 @@ def entity_type_to_graphql(entity_type: str) -> str:
|
|
|
117
127
|
"""Convert the entity types into GraphQL "EntityType" enum values."""
|
|
118
128
|
|
|
119
129
|
# Hard-coded special cases.
|
|
120
|
-
|
|
121
|
-
|
|
130
|
+
special_cases = {
|
|
131
|
+
CorpUserUrn.ENTITY_TYPE: "CORP_USER",
|
|
132
|
+
MlModelUrn.ENTITY_TYPE: "MLMODEL",
|
|
133
|
+
MlModelGroupUrn.ENTITY_TYPE: "MLMODEL_GROUP",
|
|
134
|
+
MlFeatureTableUrn.ENTITY_TYPE: "MLFEATURE_TABLE",
|
|
135
|
+
MlFeatureUrn.ENTITY_TYPE: "MLFEATURE",
|
|
136
|
+
MlPrimaryKeyUrn.ENTITY_TYPE: "MLPRIMARY_KEY",
|
|
137
|
+
}
|
|
138
|
+
if entity_type in special_cases:
|
|
139
|
+
return special_cases[entity_type]
|
|
122
140
|
|
|
123
141
|
# Convert camelCase to UPPER_UNDERSCORE.
|
|
124
142
|
entity_type = (
|
|
@@ -159,6 +177,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
159
177
|
openapi_ingestion=self.config.openapi_ingestion,
|
|
160
178
|
client_mode=config.client_mode,
|
|
161
179
|
datahub_component=config.datahub_component,
|
|
180
|
+
server_config_refresh_interval=config.server_config_refresh_interval,
|
|
162
181
|
)
|
|
163
182
|
self.server_id: str = _MISSING_SERVER_ID
|
|
164
183
|
|
|
@@ -189,7 +208,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
189
208
|
Note: Only supported with DataHub Cloud.
|
|
190
209
|
"""
|
|
191
210
|
|
|
192
|
-
if not self.server_config:
|
|
211
|
+
if not hasattr(self, "server_config") or not self.server_config:
|
|
193
212
|
self.test_connection()
|
|
194
213
|
|
|
195
214
|
base_url = self.server_config.raw_config.get("baseUrl")
|
|
@@ -234,6 +253,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
234
253
|
client_certificate_path=session_config.client_certificate_path,
|
|
235
254
|
client_mode=session_config.client_mode,
|
|
236
255
|
datahub_component=session_config.datahub_component,
|
|
256
|
+
server_config_refresh_interval=emitter._server_config_refresh_interval,
|
|
237
257
|
)
|
|
238
258
|
)
|
|
239
259
|
|
|
@@ -502,7 +522,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
502
522
|
"limit": limit,
|
|
503
523
|
"filter": filter,
|
|
504
524
|
}
|
|
505
|
-
end_point = f"{self.
|
|
525
|
+
end_point = f"{self._gms_server}/aspects?action=getTimeseriesAspectValues"
|
|
506
526
|
resp: Dict = self._post_generic(end_point, query_body)
|
|
507
527
|
|
|
508
528
|
values: Optional[List] = resp.get("value", {}).get("values")
|
|
@@ -522,7 +542,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
522
542
|
def get_entity_raw(
|
|
523
543
|
self, entity_urn: str, aspects: Optional[List[str]] = None
|
|
524
544
|
) -> Dict:
|
|
525
|
-
endpoint: str = f"{self.
|
|
545
|
+
endpoint: str = f"{self._gms_server}/entitiesV2/{Urn.url_encode(entity_urn)}"
|
|
526
546
|
if aspects is not None:
|
|
527
547
|
assert aspects, "if provided, aspects must be a non-empty list"
|
|
528
548
|
endpoint = f"{endpoint}?aspects=List(" + ",".join(aspects) + ")"
|
|
@@ -652,15 +672,15 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
652
672
|
|
|
653
673
|
@property
|
|
654
674
|
def _search_endpoint(self):
|
|
655
|
-
return f"{self.
|
|
675
|
+
return f"{self._gms_server}/entities?action=search"
|
|
656
676
|
|
|
657
677
|
@property
|
|
658
678
|
def _relationships_endpoint(self):
|
|
659
|
-
return f"{self.
|
|
679
|
+
return f"{self._gms_server}/openapi/relationships/v1/"
|
|
660
680
|
|
|
661
681
|
@property
|
|
662
682
|
def _aspect_count_endpoint(self):
|
|
663
|
-
return f"{self.
|
|
683
|
+
return f"{self._gms_server}/aspects?action=getCount"
|
|
664
684
|
|
|
665
685
|
def get_domain_urn_by_name(self, domain_name: str) -> Optional[str]:
|
|
666
686
|
"""Retrieve a domain urn based on its name. Returns None if there is no match found"""
|
|
@@ -806,7 +826,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
806
826
|
"input": search_query,
|
|
807
827
|
"entity": "container",
|
|
808
828
|
"start": 0,
|
|
809
|
-
"count":
|
|
829
|
+
"count": 5000,
|
|
810
830
|
"filter": {"or": container_filters},
|
|
811
831
|
}
|
|
812
832
|
results: Dict = self._post_generic(url, search_body)
|
|
@@ -819,11 +839,11 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
819
839
|
def _bulk_fetch_schema_info_by_filter(
|
|
820
840
|
self,
|
|
821
841
|
*,
|
|
822
|
-
platform:
|
|
842
|
+
platform: Union[None, str, List[str]] = None,
|
|
823
843
|
platform_instance: Optional[str] = None,
|
|
824
844
|
env: Optional[str] = None,
|
|
825
845
|
query: Optional[str] = None,
|
|
826
|
-
container:
|
|
846
|
+
container: Union[None, str, List[str]] = None,
|
|
827
847
|
status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
828
848
|
batch_size: int = 100,
|
|
829
849
|
extraFilters: Optional[List[RawSearchFilterRule]] = None,
|
|
@@ -895,15 +915,16 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
895
915
|
self,
|
|
896
916
|
*,
|
|
897
917
|
entity_types: Optional[Sequence[str]] = None,
|
|
898
|
-
platform:
|
|
918
|
+
platform: Union[None, str, List[str]] = None,
|
|
899
919
|
platform_instance: Optional[str] = None,
|
|
900
920
|
env: Optional[str] = None,
|
|
901
921
|
query: Optional[str] = None,
|
|
902
|
-
container:
|
|
922
|
+
container: Union[None, str, List[str]] = None,
|
|
903
923
|
status: Optional[RemovedStatusFilter] = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
904
|
-
batch_size: int =
|
|
924
|
+
batch_size: int = 5000,
|
|
905
925
|
extraFilters: Optional[List[RawSearchFilterRule]] = None,
|
|
906
926
|
extra_or_filters: Optional[RawSearchFilter] = None,
|
|
927
|
+
skip_cache: bool = False,
|
|
907
928
|
) -> Iterable[str]:
|
|
908
929
|
"""Fetch all urns that match all of the given filters.
|
|
909
930
|
|
|
@@ -922,6 +943,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
922
943
|
Note that this requires browsePathV2 aspects (added in 0.10.4+).
|
|
923
944
|
:param status: Filter on the deletion status of the entity. The default is only return non-soft-deleted entities.
|
|
924
945
|
:param extraFilters: Additional filters to apply. If specified, the results will match all of the filters.
|
|
946
|
+
:param skip_cache: Whether to bypass caching. Defaults to False.
|
|
925
947
|
|
|
926
948
|
:return: An iterable of urns that match the filters.
|
|
927
949
|
"""
|
|
@@ -949,7 +971,9 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
949
971
|
$query: String!,
|
|
950
972
|
$orFilters: [AndFilterInput!],
|
|
951
973
|
$batchSize: Int!,
|
|
952
|
-
$scrollId: String
|
|
974
|
+
$scrollId: String,
|
|
975
|
+
$skipCache: Boolean!,
|
|
976
|
+
$includeSoftDeleted: Boolean) {
|
|
953
977
|
|
|
954
978
|
scrollAcrossEntities(input: {
|
|
955
979
|
query: $query,
|
|
@@ -960,6 +984,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
960
984
|
searchFlags: {
|
|
961
985
|
skipHighlighting: true
|
|
962
986
|
skipAggregates: true
|
|
987
|
+
skipCache: $skipCache
|
|
988
|
+
includeSoftDeleted: $includeSoftDeleted
|
|
963
989
|
}
|
|
964
990
|
}) {
|
|
965
991
|
nextScrollId
|
|
@@ -978,6 +1004,12 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
978
1004
|
"query": query,
|
|
979
1005
|
"orFilters": orFilters,
|
|
980
1006
|
"batchSize": batch_size,
|
|
1007
|
+
"skipCache": skip_cache,
|
|
1008
|
+
"includeSoftDeleted": (
|
|
1009
|
+
None
|
|
1010
|
+
if status is None
|
|
1011
|
+
else status != RemovedStatusFilter.NOT_SOFT_DELETED
|
|
1012
|
+
),
|
|
981
1013
|
}
|
|
982
1014
|
|
|
983
1015
|
for entity in self._scroll_across_entities(graphql_query, variables):
|
|
@@ -987,13 +1019,13 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
987
1019
|
self,
|
|
988
1020
|
*,
|
|
989
1021
|
entity_types: Optional[List[str]] = None,
|
|
990
|
-
platform:
|
|
1022
|
+
platform: Union[None, str, List[str]] = None,
|
|
991
1023
|
platform_instance: Optional[str] = None,
|
|
992
1024
|
env: Optional[str] = None,
|
|
993
1025
|
query: Optional[str] = None,
|
|
994
|
-
container:
|
|
1026
|
+
container: Union[None, str, List[str]] = None,
|
|
995
1027
|
status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
996
|
-
batch_size: int =
|
|
1028
|
+
batch_size: int = 5000,
|
|
997
1029
|
extra_and_filters: Optional[List[RawSearchFilterRule]] = None,
|
|
998
1030
|
extra_or_filters: Optional[RawSearchFilter] = None,
|
|
999
1031
|
extra_source_fields: Optional[List[str]] = None,
|
|
@@ -1083,7 +1115,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1083
1115
|
"query": query,
|
|
1084
1116
|
"orFilters": or_filters_final,
|
|
1085
1117
|
"batchSize": batch_size,
|
|
1086
|
-
"skipCache":
|
|
1118
|
+
"skipCache": skip_cache,
|
|
1087
1119
|
"fetchExtraFields": extra_source_fields,
|
|
1088
1120
|
}
|
|
1089
1121
|
|
|
@@ -1202,7 +1234,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1202
1234
|
operation_name: Optional[str] = None,
|
|
1203
1235
|
format_exception: bool = True,
|
|
1204
1236
|
) -> Dict:
|
|
1205
|
-
url = f"{self.
|
|
1237
|
+
url = f"{self._gms_server}/api/graphql"
|
|
1206
1238
|
|
|
1207
1239
|
body: Dict = {
|
|
1208
1240
|
"query": query,
|
|
@@ -1427,6 +1459,83 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1427
1459
|
related_aspects = response.get("relatedAspects", [])
|
|
1428
1460
|
return reference_count, related_aspects
|
|
1429
1461
|
|
|
1462
|
+
def get_kafka_consumer_offsets(
|
|
1463
|
+
self,
|
|
1464
|
+
) -> dict:
|
|
1465
|
+
"""
|
|
1466
|
+
Get Kafka consumer offsets from the DataHub API.
|
|
1467
|
+
|
|
1468
|
+
Args:
|
|
1469
|
+
graph (DataHubGraph): The DataHub graph client
|
|
1470
|
+
|
|
1471
|
+
"""
|
|
1472
|
+
urls = {
|
|
1473
|
+
"mcp": f"{self.config.server}/openapi/operations/kafka/mcp/consumer/offsets",
|
|
1474
|
+
"mcl": f"{self.config.server}/openapi/operations/kafka/mcl/consumer/offsets",
|
|
1475
|
+
"mcl-timeseries": f"{self.config.server}/openapi/operations/kafka/mcl-timeseries/consumer/offsets",
|
|
1476
|
+
}
|
|
1477
|
+
|
|
1478
|
+
params = {"skipCache": "true", "detailed": "true"}
|
|
1479
|
+
results = {}
|
|
1480
|
+
for key, url in urls.items():
|
|
1481
|
+
response = self._get_generic(url=url, params=params)
|
|
1482
|
+
results[key] = response
|
|
1483
|
+
if "errors" in response:
|
|
1484
|
+
logger.error(f"Error: {response['errors']}")
|
|
1485
|
+
return results
|
|
1486
|
+
|
|
1487
|
+
def _restore_index_call(self, payload_obj: dict) -> None:
|
|
1488
|
+
result = self._post_generic(
|
|
1489
|
+
f"{self._gms_server}/operations?action=restoreIndices", payload_obj
|
|
1490
|
+
)
|
|
1491
|
+
logger.debug(f"Restore indices result: {result}")
|
|
1492
|
+
|
|
1493
|
+
def restore_indices(
|
|
1494
|
+
self,
|
|
1495
|
+
urn_pattern: Optional[str] = None,
|
|
1496
|
+
aspect: Optional[str] = None,
|
|
1497
|
+
start: Optional[int] = None,
|
|
1498
|
+
batch_size: Optional[int] = None,
|
|
1499
|
+
file: Optional[str] = None,
|
|
1500
|
+
) -> None:
|
|
1501
|
+
"""Restore the indices for a given urn or urn-like pattern.
|
|
1502
|
+
|
|
1503
|
+
Args:
|
|
1504
|
+
urn_pattern: The exact URN or a pattern (with % for wildcard) to match URNs. If not provided, will restore indices from the file.
|
|
1505
|
+
aspect: Optional aspect string to restore indices for a specific aspect.
|
|
1506
|
+
start: Optional integer to decide which row number of sql store to restore from. Default: 0. Ignored in case file is provided.
|
|
1507
|
+
batch_size: Optional integer to decide how many rows to restore. Default: 10. Ignored in case file is provided.
|
|
1508
|
+
file: Optional file path to a file containing URNs to restore indices for.
|
|
1509
|
+
|
|
1510
|
+
Returns:
|
|
1511
|
+
A string containing the result of the restore indices operation. This format is subject to change.
|
|
1512
|
+
"""
|
|
1513
|
+
payload_obj = {}
|
|
1514
|
+
if file is not None:
|
|
1515
|
+
with open(file) as f:
|
|
1516
|
+
for urn in progressbar.progressbar(f.readlines()):
|
|
1517
|
+
urn = urn.strip()
|
|
1518
|
+
if "%" in urn:
|
|
1519
|
+
payload_obj["urnLike"] = urn
|
|
1520
|
+
else:
|
|
1521
|
+
payload_obj["urn"] = urn
|
|
1522
|
+
if aspect is not None:
|
|
1523
|
+
payload_obj["aspect"] = aspect
|
|
1524
|
+
self._restore_index_call(payload_obj)
|
|
1525
|
+
else:
|
|
1526
|
+
if urn_pattern is not None:
|
|
1527
|
+
if "%" in urn_pattern:
|
|
1528
|
+
payload_obj["urnLike"] = urn_pattern
|
|
1529
|
+
else:
|
|
1530
|
+
payload_obj["urn"] = urn_pattern
|
|
1531
|
+
if aspect is not None:
|
|
1532
|
+
payload_obj["aspect"] = aspect
|
|
1533
|
+
if start is not None:
|
|
1534
|
+
payload_obj["start"] = start
|
|
1535
|
+
if batch_size is not None:
|
|
1536
|
+
payload_obj["batchSize"] = batch_size
|
|
1537
|
+
self._restore_index_call(payload_obj)
|
|
1538
|
+
|
|
1430
1539
|
@functools.lru_cache
|
|
1431
1540
|
def _make_schema_resolver(
|
|
1432
1541
|
self,
|
|
@@ -1491,7 +1600,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1491
1600
|
env: str = DEFAULT_ENV,
|
|
1492
1601
|
default_db: Optional[str] = None,
|
|
1493
1602
|
default_schema: Optional[str] = None,
|
|
1494
|
-
|
|
1603
|
+
override_dialect: Optional[str] = None,
|
|
1495
1604
|
) -> "SqlParsingResult":
|
|
1496
1605
|
from datahub.sql_parsing.sqlglot_lineage import sqlglot_lineage
|
|
1497
1606
|
|
|
@@ -1505,7 +1614,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1505
1614
|
schema_resolver=schema_resolver,
|
|
1506
1615
|
default_db=default_db,
|
|
1507
1616
|
default_schema=default_schema,
|
|
1508
|
-
|
|
1617
|
+
override_dialect=override_dialect,
|
|
1509
1618
|
)
|
|
1510
1619
|
|
|
1511
1620
|
def create_tag(self, tag_name: str) -> str:
|
|
@@ -1732,7 +1841,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1732
1841
|
"Accept": "application/json",
|
|
1733
1842
|
"Content-Type": "application/json",
|
|
1734
1843
|
}
|
|
1735
|
-
url = f"{self.
|
|
1844
|
+
url = f"{self._gms_server}/openapi/v2/entity/batch/{entity_name}"
|
|
1736
1845
|
response = self._session.post(url, data=json.dumps(payload), headers=headers)
|
|
1737
1846
|
response.raise_for_status()
|
|
1738
1847
|
|
|
@@ -1789,7 +1898,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1789
1898
|
"Content-Type": "application/json",
|
|
1790
1899
|
}
|
|
1791
1900
|
|
|
1792
|
-
url = f"{self.
|
|
1901
|
+
url = f"{self._gms_server}/openapi/v3/entity/{entity_name}/batchGet"
|
|
1793
1902
|
if with_system_metadata:
|
|
1794
1903
|
url += "?systemMetadata=true"
|
|
1795
1904
|
|
|
@@ -1963,6 +2072,202 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1963
2072
|
|
|
1964
2073
|
return res["reportAssertionResult"]
|
|
1965
2074
|
|
|
2075
|
+
def _get_invite_token(self) -> str:
|
|
2076
|
+
"""
|
|
2077
|
+
Retrieve an invite token for user creation.
|
|
2078
|
+
|
|
2079
|
+
Returns:
|
|
2080
|
+
Invite token string
|
|
2081
|
+
|
|
2082
|
+
Raises:
|
|
2083
|
+
OperationalError: If invite token retrieval fails
|
|
2084
|
+
"""
|
|
2085
|
+
get_invite_token_query = """
|
|
2086
|
+
query getInviteToken($input: GetInviteTokenInput!) {
|
|
2087
|
+
getInviteToken(input: $input) {
|
|
2088
|
+
inviteToken
|
|
2089
|
+
}
|
|
2090
|
+
}
|
|
2091
|
+
"""
|
|
2092
|
+
|
|
2093
|
+
try:
|
|
2094
|
+
invite_token_response = self.execute_graphql(
|
|
2095
|
+
query=get_invite_token_query,
|
|
2096
|
+
variables={"input": {}},
|
|
2097
|
+
)
|
|
2098
|
+
invite_token = invite_token_response.get("getInviteToken", {}).get(
|
|
2099
|
+
"inviteToken"
|
|
2100
|
+
)
|
|
2101
|
+
if not invite_token:
|
|
2102
|
+
raise OperationalError(
|
|
2103
|
+
"Failed to retrieve invite token. Ensure you have admin permissions.",
|
|
2104
|
+
{},
|
|
2105
|
+
)
|
|
2106
|
+
return invite_token
|
|
2107
|
+
except Exception as e:
|
|
2108
|
+
raise OperationalError(
|
|
2109
|
+
f"Failed to retrieve invite token: {str(e)}", {}
|
|
2110
|
+
) from e
|
|
2111
|
+
|
|
2112
|
+
def _create_user_with_token(
|
|
2113
|
+
self,
|
|
2114
|
+
user_urn: str,
|
|
2115
|
+
email: str,
|
|
2116
|
+
display_name: str,
|
|
2117
|
+
password: str,
|
|
2118
|
+
invite_token: str,
|
|
2119
|
+
) -> None:
|
|
2120
|
+
"""
|
|
2121
|
+
Create a user using the signup endpoint.
|
|
2122
|
+
|
|
2123
|
+
Args:
|
|
2124
|
+
user_urn: User URN (urn:li:corpuser:{user_id})
|
|
2125
|
+
email: User's email address
|
|
2126
|
+
display_name: Full display name for the user
|
|
2127
|
+
password: User's password
|
|
2128
|
+
invite_token: Invite token for user creation
|
|
2129
|
+
|
|
2130
|
+
Raises:
|
|
2131
|
+
OperationalError: If user creation fails
|
|
2132
|
+
"""
|
|
2133
|
+
frontend_url = guess_frontend_url_from_gms_url(self._gms_server)
|
|
2134
|
+
signup_url = f"{frontend_url}/signUp"
|
|
2135
|
+
signup_payload = {
|
|
2136
|
+
"userUrn": user_urn,
|
|
2137
|
+
"email": email,
|
|
2138
|
+
"fullName": display_name,
|
|
2139
|
+
"password": password,
|
|
2140
|
+
"title": "Other",
|
|
2141
|
+
"inviteToken": invite_token,
|
|
2142
|
+
}
|
|
2143
|
+
|
|
2144
|
+
logger.debug(
|
|
2145
|
+
f"Creating user with URN={user_urn}, email={email} at URL: {signup_url}"
|
|
2146
|
+
)
|
|
2147
|
+
logger.debug(
|
|
2148
|
+
f"Signup payload: {json.dumps({**signup_payload, 'password': '***'})}"
|
|
2149
|
+
)
|
|
2150
|
+
|
|
2151
|
+
try:
|
|
2152
|
+
response = self._session.post(signup_url, json=signup_payload)
|
|
2153
|
+
logger.debug(f"Response status code: {response.status_code}")
|
|
2154
|
+
logger.debug(f"Response headers: {dict(response.headers)}")
|
|
2155
|
+
logger.debug(f"Response content length: {len(response.text)}")
|
|
2156
|
+
|
|
2157
|
+
response.raise_for_status()
|
|
2158
|
+
|
|
2159
|
+
# The /signUp endpoint returns 200 with empty body on success
|
|
2160
|
+
logger.debug("User created successfully")
|
|
2161
|
+
|
|
2162
|
+
except HTTPError as http_err:
|
|
2163
|
+
error_details = {
|
|
2164
|
+
"url": signup_url,
|
|
2165
|
+
"status_code": response.status_code,
|
|
2166
|
+
"response_text": response.text[:500],
|
|
2167
|
+
}
|
|
2168
|
+
try:
|
|
2169
|
+
error_json = response.json()
|
|
2170
|
+
error_details["error_response"] = error_json
|
|
2171
|
+
error_msg = error_json.get("message", str(http_err))
|
|
2172
|
+
except JSONDecodeError:
|
|
2173
|
+
error_msg = f"HTTP {response.status_code}: {response.text[:200]}"
|
|
2174
|
+
|
|
2175
|
+
raise OperationalError(
|
|
2176
|
+
f"Failed to create user: {error_msg}",
|
|
2177
|
+
error_details,
|
|
2178
|
+
) from http_err
|
|
2179
|
+
except Exception as e:
|
|
2180
|
+
raise OperationalError(
|
|
2181
|
+
f"Failed to create user: {str(e)}",
|
|
2182
|
+
{"url": signup_url, "error_type": type(e).__name__},
|
|
2183
|
+
) from e
|
|
2184
|
+
|
|
2185
|
+
def _assign_role_to_user(self, user_urn: str, role: str) -> None:
|
|
2186
|
+
"""
|
|
2187
|
+
Assign a role to a user.
|
|
2188
|
+
|
|
2189
|
+
Args:
|
|
2190
|
+
user_urn: User URN
|
|
2191
|
+
role: Role to assign (Admin, Editor, or Reader)
|
|
2192
|
+
|
|
2193
|
+
Raises:
|
|
2194
|
+
ValueError: If role is invalid
|
|
2195
|
+
"""
|
|
2196
|
+
normalized_role = role.capitalize()
|
|
2197
|
+
valid_roles = ["Admin", "Editor", "Reader"]
|
|
2198
|
+
if normalized_role not in valid_roles:
|
|
2199
|
+
raise ValueError(
|
|
2200
|
+
f"Invalid role '{role}'. Must be one of: {', '.join(valid_roles)}"
|
|
2201
|
+
)
|
|
2202
|
+
|
|
2203
|
+
role_urn = f"urn:li:dataHubRole:{normalized_role}"
|
|
2204
|
+
|
|
2205
|
+
batch_assign_role_mutation = """
|
|
2206
|
+
mutation batchAssignRole($input: BatchAssignRoleInput!) {
|
|
2207
|
+
batchAssignRole(input: $input)
|
|
2208
|
+
}
|
|
2209
|
+
"""
|
|
2210
|
+
|
|
2211
|
+
try:
|
|
2212
|
+
self.execute_graphql(
|
|
2213
|
+
query=batch_assign_role_mutation,
|
|
2214
|
+
variables={"input": {"roleUrn": role_urn, "actors": [user_urn]}},
|
|
2215
|
+
)
|
|
2216
|
+
except Exception as e:
|
|
2217
|
+
logger.warning(f"Role assignment failed for user {user_urn}: {str(e)}")
|
|
2218
|
+
raise
|
|
2219
|
+
|
|
2220
|
+
def create_native_user(
|
|
2221
|
+
self,
|
|
2222
|
+
user_id: str,
|
|
2223
|
+
email: str,
|
|
2224
|
+
display_name: str,
|
|
2225
|
+
password: str,
|
|
2226
|
+
role: Optional[str] = None,
|
|
2227
|
+
) -> str:
|
|
2228
|
+
"""
|
|
2229
|
+
Create a native DataHub user with email/password authentication.
|
|
2230
|
+
|
|
2231
|
+
Args:
|
|
2232
|
+
user_id: User identifier (will be used in the URN)
|
|
2233
|
+
email: User's email address
|
|
2234
|
+
display_name: Full display name for the user
|
|
2235
|
+
password: User's password
|
|
2236
|
+
role: Optional role to assign (Admin, Editor, or Reader)
|
|
2237
|
+
|
|
2238
|
+
Returns:
|
|
2239
|
+
User URN of the created user (urn:li:corpuser:{user_id})
|
|
2240
|
+
|
|
2241
|
+
Raises:
|
|
2242
|
+
OperationalError: If user creation fails
|
|
2243
|
+
ValueError: If role is invalid
|
|
2244
|
+
"""
|
|
2245
|
+
# Validate role before creating user
|
|
2246
|
+
if role:
|
|
2247
|
+
normalized_role = role.capitalize()
|
|
2248
|
+
valid_roles = ["Admin", "Editor", "Reader"]
|
|
2249
|
+
if normalized_role not in valid_roles:
|
|
2250
|
+
raise ValueError(
|
|
2251
|
+
f"Invalid role '{role}'. Must be one of: {', '.join(valid_roles)}"
|
|
2252
|
+
)
|
|
2253
|
+
|
|
2254
|
+
user_urn = f"urn:li:corpuser:{user_id}"
|
|
2255
|
+
|
|
2256
|
+
invite_token = self._get_invite_token()
|
|
2257
|
+
self._create_user_with_token(
|
|
2258
|
+
user_urn, email, display_name, password, invite_token
|
|
2259
|
+
)
|
|
2260
|
+
|
|
2261
|
+
if role:
|
|
2262
|
+
try:
|
|
2263
|
+
self._assign_role_to_user(user_urn, role)
|
|
2264
|
+
except Exception as e:
|
|
2265
|
+
logger.warning(
|
|
2266
|
+
f"User {email} created successfully, but role assignment failed: {str(e)}"
|
|
2267
|
+
)
|
|
2268
|
+
|
|
2269
|
+
return user_urn
|
|
2270
|
+
|
|
1966
2271
|
def close(self) -> None:
|
|
1967
2272
|
self._make_schema_resolver.cache_clear()
|
|
1968
2273
|
super().close()
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from enum import Enum, auto
|
|
3
2
|
from typing import Dict, List, Optional
|
|
4
3
|
|
|
5
4
|
from datahub.configuration.common import ConfigModel
|
|
5
|
+
from datahub.configuration.env_vars import get_datahub_component
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class ClientMode(Enum):
|
|
@@ -11,7 +11,7 @@ class ClientMode(Enum):
|
|
|
11
11
|
SDK = auto()
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
DATAHUB_COMPONENT_ENV: str =
|
|
14
|
+
DATAHUB_COMPONENT_ENV: str = get_datahub_component().lower()
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class DatahubClientConfig(ConfigModel):
|
|
@@ -29,6 +29,7 @@ class DatahubClientConfig(ConfigModel):
|
|
|
29
29
|
openapi_ingestion: Optional[bool] = None
|
|
30
30
|
client_mode: Optional[ClientMode] = None
|
|
31
31
|
datahub_component: Optional[str] = None
|
|
32
|
+
server_config_refresh_interval: Optional[int] = None
|
|
32
33
|
|
|
33
34
|
class Config:
|
|
34
35
|
extra = "ignore"
|
|
@@ -76,11 +76,21 @@ class RemovedStatusFilter(enum.Enum):
|
|
|
76
76
|
"""Search only soft-deleted entities."""
|
|
77
77
|
|
|
78
78
|
|
|
79
|
+
def _validate_or_filter_structure(
|
|
80
|
+
or_filters: List[Dict[str, List[SearchFilterRule]]],
|
|
81
|
+
) -> None:
|
|
82
|
+
for filter_list in or_filters:
|
|
83
|
+
if "and" not in filter_list:
|
|
84
|
+
raise ValueError(f"Invalid or filter: {filter_list}")
|
|
85
|
+
if not isinstance(filter_list["and"], list):
|
|
86
|
+
raise ValueError(f"Invalid or filter: {filter_list}")
|
|
87
|
+
|
|
88
|
+
|
|
79
89
|
def generate_filter(
|
|
80
|
-
platform:
|
|
90
|
+
platform: Union[None, str, List[str]],
|
|
81
91
|
platform_instance: Optional[str],
|
|
82
92
|
env: Optional[str],
|
|
83
|
-
container:
|
|
93
|
+
container: Union[None, str, List[str]],
|
|
84
94
|
status: Optional[RemovedStatusFilter],
|
|
85
95
|
extra_filters: Optional[List[RawSearchFilterRule]],
|
|
86
96
|
extra_or_filters: Optional[RawSearchFilter] = None,
|
|
@@ -93,8 +103,7 @@ def generate_filter(
|
|
|
93
103
|
:param container: The container to filter by.
|
|
94
104
|
:param status: The status to filter by.
|
|
95
105
|
:param extra_filters: Extra AND filters to apply.
|
|
96
|
-
:param extra_or_filters: Extra OR filters to apply. These are combined with
|
|
97
|
-
the AND filters using an OR at the top level.
|
|
106
|
+
:param extra_or_filters: Extra OR filters to apply. These are combined with the AND filters using an OR at the top level.
|
|
98
107
|
"""
|
|
99
108
|
and_filters: List[RawSearchFilterRule] = []
|
|
100
109
|
|
|
@@ -218,23 +227,31 @@ def _get_status_filter(status: RemovedStatusFilter) -> Optional[SearchFilterRule
|
|
|
218
227
|
raise ValueError(f"Invalid status filter: {status}")
|
|
219
228
|
|
|
220
229
|
|
|
221
|
-
def _get_container_filter(container: str) -> SearchFilterRule:
|
|
230
|
+
def _get_container_filter(container: Union[str, List[str]]) -> SearchFilterRule:
|
|
231
|
+
if not isinstance(container, list):
|
|
232
|
+
container = [container]
|
|
233
|
+
|
|
222
234
|
# Warn if container is not a fully qualified urn.
|
|
223
235
|
# TODO: Change this once we have a first-class container urn type.
|
|
224
|
-
|
|
225
|
-
|
|
236
|
+
for cont in container:
|
|
237
|
+
if guess_entity_type(cont) != "container":
|
|
238
|
+
raise ValueError(f"Invalid container urn: {cont}")
|
|
226
239
|
|
|
227
240
|
return SearchFilterRule(
|
|
228
241
|
field="browsePathV2",
|
|
229
|
-
values=
|
|
242
|
+
values=container,
|
|
230
243
|
condition="CONTAIN",
|
|
231
244
|
)
|
|
232
245
|
|
|
233
246
|
|
|
234
247
|
def _get_platform_instance_filter(
|
|
235
|
-
platform:
|
|
248
|
+
platform: Union[None, str, List[str]], platform_instance: str
|
|
236
249
|
) -> SearchFilterRule:
|
|
237
250
|
if platform:
|
|
251
|
+
if isinstance(platform, list):
|
|
252
|
+
raise ValueError(
|
|
253
|
+
"Platform instance filter cannot be combined with a multi-value platform filter."
|
|
254
|
+
)
|
|
238
255
|
# Massage the platform instance into a fully qualified urn, if necessary.
|
|
239
256
|
platform_instance = make_dataplatform_instance_urn(platform, platform_instance)
|
|
240
257
|
|
|
@@ -250,9 +267,11 @@ def _get_platform_instance_filter(
|
|
|
250
267
|
)
|
|
251
268
|
|
|
252
269
|
|
|
253
|
-
def _get_platform_filter(platform: str) -> SearchFilterRule:
|
|
270
|
+
def _get_platform_filter(platform: Union[str, List[str]]) -> SearchFilterRule:
|
|
271
|
+
if not isinstance(platform, list):
|
|
272
|
+
platform = [platform]
|
|
254
273
|
return SearchFilterRule(
|
|
255
274
|
field="platform.keyword",
|
|
256
275
|
condition="EQUAL",
|
|
257
|
-
values=[make_data_platform_urn(platform
|
|
276
|
+
values=[make_data_platform_urn(plt) for plt in platform],
|
|
258
277
|
)
|