acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -2,23 +2,25 @@ import logging
|
|
|
2
2
|
import os
|
|
3
3
|
from datetime import datetime, timedelta, timezone
|
|
4
4
|
from typing import Any, Dict, List, Optional, Union
|
|
5
|
-
from urllib.parse import urlparse
|
|
6
5
|
|
|
7
6
|
import pydantic
|
|
8
7
|
from pydantic import Field
|
|
9
8
|
from typing_extensions import Literal
|
|
10
9
|
|
|
11
|
-
from datahub.configuration.common import
|
|
10
|
+
from datahub.configuration.common import (
|
|
11
|
+
AllowDenyPattern,
|
|
12
|
+
ConfigEnum,
|
|
13
|
+
ConfigModel,
|
|
14
|
+
HiddenFromDocs,
|
|
15
|
+
)
|
|
12
16
|
from datahub.configuration.source_common import (
|
|
13
17
|
DatasetSourceConfigMixin,
|
|
14
18
|
LowerCaseDatasetUrnConfigMixin,
|
|
15
19
|
)
|
|
16
20
|
from datahub.configuration.validate_field_removal import pydantic_removed_field
|
|
17
21
|
from datahub.configuration.validate_field_rename import pydantic_renamed_field
|
|
18
|
-
from datahub.ingestion.source.ge_data_profiler import DATABRICKS
|
|
19
22
|
from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig
|
|
20
23
|
from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
|
|
21
|
-
from datahub.ingestion.source.sql.sqlalchemy_uri import make_sqlalchemy_uri
|
|
22
24
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
23
25
|
StatefulStaleMetadataRemovalConfig,
|
|
24
26
|
)
|
|
@@ -26,6 +28,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
26
28
|
StatefulIngestionConfigBase,
|
|
27
29
|
StatefulProfilingConfigMixin,
|
|
28
30
|
)
|
|
31
|
+
from datahub.ingestion.source.unity.connection import UnityCatalogConnectionConfig
|
|
29
32
|
from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
|
|
30
33
|
from datahub.ingestion.source_config.operation_config import (
|
|
31
34
|
OperationConfig,
|
|
@@ -35,6 +38,22 @@ from datahub.utilities.global_warning_util import add_global_warning
|
|
|
35
38
|
|
|
36
39
|
logger = logging.getLogger(__name__)
|
|
37
40
|
|
|
41
|
+
# Configuration default constants
|
|
42
|
+
INCLUDE_TAGS_DEFAULT = True
|
|
43
|
+
INCLUDE_HIVE_METASTORE_DEFAULT = True
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class LineageDataSource(ConfigEnum):
|
|
47
|
+
AUTO = "AUTO"
|
|
48
|
+
SYSTEM_TABLES = "SYSTEM_TABLES"
|
|
49
|
+
API = "API"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class UsageDataSource(ConfigEnum):
|
|
53
|
+
AUTO = "AUTO"
|
|
54
|
+
SYSTEM_TABLES = "SYSTEM_TABLES"
|
|
55
|
+
API = "API"
|
|
56
|
+
|
|
38
57
|
|
|
39
58
|
class UnityCatalogProfilerConfig(ConfigModel):
|
|
40
59
|
method: str = Field(
|
|
@@ -118,6 +137,7 @@ class UnityCatalogGEProfilerConfig(UnityCatalogProfilerConfig, GEProfilingConfig
|
|
|
118
137
|
|
|
119
138
|
|
|
120
139
|
class UnityCatalogSourceConfig(
|
|
140
|
+
UnityCatalogConnectionConfig,
|
|
121
141
|
SQLCommonConfig,
|
|
122
142
|
StatefulIngestionConfigBase,
|
|
123
143
|
BaseUsageConfig,
|
|
@@ -125,23 +145,6 @@ class UnityCatalogSourceConfig(
|
|
|
125
145
|
StatefulProfilingConfigMixin,
|
|
126
146
|
LowerCaseDatasetUrnConfigMixin,
|
|
127
147
|
):
|
|
128
|
-
token: str = pydantic.Field(description="Databricks personal access token")
|
|
129
|
-
workspace_url: str = pydantic.Field(
|
|
130
|
-
description="Databricks workspace url. e.g. https://my-workspace.cloud.databricks.com"
|
|
131
|
-
)
|
|
132
|
-
warehouse_id: Optional[str] = pydantic.Field(
|
|
133
|
-
default=None,
|
|
134
|
-
description="SQL Warehouse id, for running queries. If not set, will use the default warehouse.",
|
|
135
|
-
)
|
|
136
|
-
include_hive_metastore: bool = pydantic.Field(
|
|
137
|
-
default=True,
|
|
138
|
-
description="Whether to ingest legacy `hive_metastore` catalog. This requires executing queries on SQL warehouse.",
|
|
139
|
-
)
|
|
140
|
-
workspace_name: Optional[str] = pydantic.Field(
|
|
141
|
-
default=None,
|
|
142
|
-
description="Name of the workspace. Default to deployment name present in workspace_url",
|
|
143
|
-
)
|
|
144
|
-
|
|
145
148
|
include_metastore: bool = pydantic.Field(
|
|
146
149
|
default=False,
|
|
147
150
|
description=(
|
|
@@ -229,6 +232,15 @@ class UnityCatalogSourceConfig(
|
|
|
229
232
|
description="Option to enable/disable ownership generation for metastores, catalogs, schemas, and tables.",
|
|
230
233
|
)
|
|
231
234
|
|
|
235
|
+
include_tags: bool = pydantic.Field(
|
|
236
|
+
default=INCLUDE_TAGS_DEFAULT,
|
|
237
|
+
description=(
|
|
238
|
+
"Option to enable/disable column/table tag extraction. "
|
|
239
|
+
"Requires warehouse_id to be set since tag extraction needs to query system.information_schema.tags. "
|
|
240
|
+
"If warehouse_id is not provided, this will be automatically disabled to allow ingestion to continue."
|
|
241
|
+
),
|
|
242
|
+
)
|
|
243
|
+
|
|
232
244
|
_rename_table_ownership = pydantic_renamed_field(
|
|
233
245
|
"include_table_ownership", "include_ownership"
|
|
234
246
|
)
|
|
@@ -238,15 +250,40 @@ class UnityCatalogSourceConfig(
|
|
|
238
250
|
description="Option to enable/disable lineage generation. Currently we have to call a rest call per column to get column level lineage due to the Databrick api which can slow down ingestion. ",
|
|
239
251
|
)
|
|
240
252
|
|
|
253
|
+
lineage_data_source: LineageDataSource = pydantic.Field(
|
|
254
|
+
default=LineageDataSource.AUTO,
|
|
255
|
+
description=(
|
|
256
|
+
"Source for lineage data extraction. Options: "
|
|
257
|
+
f"'{LineageDataSource.AUTO.value}' - Use system tables when SQL warehouse is available, fallback to API; "
|
|
258
|
+
f"'{LineageDataSource.SYSTEM_TABLES.value}' - Force use of system.access.table_lineage and system.access.column_lineage tables (requires SQL warehouse); "
|
|
259
|
+
f"'{LineageDataSource.API.value}' - Force use of REST API endpoints for lineage data"
|
|
260
|
+
),
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
ignore_start_time_lineage: bool = pydantic.Field(
|
|
264
|
+
default=False,
|
|
265
|
+
description="Option to ignore the start_time and retrieve all available lineage. When enabled, the start_time filter will be set to zero to extract all lineage events regardless of the configured time window.",
|
|
266
|
+
)
|
|
267
|
+
|
|
241
268
|
column_lineage_column_limit: int = pydantic.Field(
|
|
242
269
|
default=300,
|
|
243
270
|
description="Limit the number of columns to get column level lineage. ",
|
|
244
271
|
)
|
|
245
272
|
|
|
246
|
-
lineage_max_workers: int = pydantic.Field(
|
|
273
|
+
lineage_max_workers: HiddenFromDocs[int] = pydantic.Field(
|
|
247
274
|
default=5 * (os.cpu_count() or 4),
|
|
248
275
|
description="Number of worker threads to use for column lineage thread pool executor. Set to 1 to disable.",
|
|
249
|
-
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
databricks_api_page_size: int = pydantic.Field(
|
|
279
|
+
default=0,
|
|
280
|
+
ge=0,
|
|
281
|
+
description=(
|
|
282
|
+
"Page size for Databricks API calls when listing resources (catalogs, schemas, tables, etc.). "
|
|
283
|
+
"When set to 0 (default), uses server-side configured page length (recommended). "
|
|
284
|
+
"When set to a positive value, the page length is the minimum of this value and the server configured value. "
|
|
285
|
+
"Must be a non-negative integer."
|
|
286
|
+
),
|
|
250
287
|
)
|
|
251
288
|
|
|
252
289
|
include_usage_statistics: bool = Field(
|
|
@@ -254,6 +291,17 @@ class UnityCatalogSourceConfig(
|
|
|
254
291
|
description="Generate usage statistics.",
|
|
255
292
|
)
|
|
256
293
|
|
|
294
|
+
usage_data_source: UsageDataSource = pydantic.Field(
|
|
295
|
+
default=UsageDataSource.AUTO,
|
|
296
|
+
description=(
|
|
297
|
+
"Source for usage/query history data extraction. Options: "
|
|
298
|
+
f"'{UsageDataSource.AUTO.value}' (default) - Automatically use system.query.history table when SQL warehouse is configured, otherwise fall back to REST API. "
|
|
299
|
+
"This provides better performance for multi-workspace setups and large query volumes when warehouse_id is set. "
|
|
300
|
+
f"'{UsageDataSource.SYSTEM_TABLES.value}' - Force use of system.query.history table (requires SQL warehouse and SELECT permission on system.query.history). "
|
|
301
|
+
f"'{UsageDataSource.API.value}' - Force use of REST API endpoints for query history (legacy method, may have limitations with multiple workspaces)."
|
|
302
|
+
),
|
|
303
|
+
)
|
|
304
|
+
|
|
257
305
|
# TODO: Remove `type:ignore` by refactoring config
|
|
258
306
|
profiling: Union[
|
|
259
307
|
UnityCatalogGEProfilerConfig, UnityCatalogAnalyzeProfilerConfig
|
|
@@ -273,19 +321,68 @@ class UnityCatalogSourceConfig(
|
|
|
273
321
|
description="Details about the delta lake, incase to emit siblings",
|
|
274
322
|
)
|
|
275
323
|
|
|
276
|
-
|
|
324
|
+
include_ml_model_aliases: bool = pydantic.Field(
|
|
325
|
+
default=False,
|
|
326
|
+
description="Whether to include ML model aliases in the ingestion.",
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
ml_model_max_results: int = pydantic.Field(
|
|
330
|
+
default=1000,
|
|
331
|
+
ge=0,
|
|
332
|
+
description="Maximum number of ML models to ingest.",
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
_forced_disable_tag_extraction: bool = pydantic.PrivateAttr(default=False)
|
|
336
|
+
_forced_disable_hive_metastore_extraction = pydantic.PrivateAttr(default=False)
|
|
337
|
+
|
|
338
|
+
include_hive_metastore: bool = pydantic.Field(
|
|
339
|
+
default=INCLUDE_HIVE_METASTORE_DEFAULT,
|
|
340
|
+
description="Whether to ingest legacy `hive_metastore` catalog. This requires executing queries on SQL warehouse.",
|
|
341
|
+
)
|
|
277
342
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
343
|
+
workspace_name: Optional[str] = pydantic.Field(
|
|
344
|
+
default=None,
|
|
345
|
+
description="Name of the workspace. Default to deployment name present in workspace_url",
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
def __init__(self, **data):
|
|
349
|
+
# First, let the parent handle the root validators and field processing
|
|
350
|
+
super().__init__(**data)
|
|
351
|
+
|
|
352
|
+
# After model creation, check if we need to auto-disable features
|
|
353
|
+
# based on the final warehouse_id value (which may have been set by root validators)
|
|
354
|
+
include_tags_original = data.get("include_tags", INCLUDE_TAGS_DEFAULT)
|
|
355
|
+
include_hive_metastore_original = data.get(
|
|
356
|
+
"include_hive_metastore", INCLUDE_HIVE_METASTORE_DEFAULT
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
# Track what we're force-disabling
|
|
360
|
+
forced_disable_tag_extraction = False
|
|
361
|
+
forced_disable_hive_metastore_extraction = False
|
|
362
|
+
|
|
363
|
+
# Check if features should be auto-disabled based on final warehouse_id
|
|
364
|
+
if include_tags_original and not self.warehouse_id:
|
|
365
|
+
forced_disable_tag_extraction = True
|
|
366
|
+
self.include_tags = False # Modify the model attribute directly
|
|
367
|
+
logger.warning(
|
|
368
|
+
"warehouse_id is not set but include_tags=True. "
|
|
369
|
+
"Automatically disabling tag extraction since it requires SQL queries. "
|
|
370
|
+
"Set warehouse_id to enable tag extraction."
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
if include_hive_metastore_original and not self.warehouse_id:
|
|
374
|
+
forced_disable_hive_metastore_extraction = True
|
|
375
|
+
self.include_hive_metastore = False # Modify the model attribute directly
|
|
376
|
+
logger.warning(
|
|
377
|
+
"warehouse_id is not set but include_hive_metastore=True. "
|
|
378
|
+
"Automatically disabling hive metastore extraction since it requires SQL queries. "
|
|
379
|
+
"Set warehouse_id to enable hive metastore extraction."
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
# Set private attributes
|
|
383
|
+
self._forced_disable_tag_extraction = forced_disable_tag_extraction
|
|
384
|
+
self._forced_disable_hive_metastore_extraction = (
|
|
385
|
+
forced_disable_hive_metastore_extraction
|
|
289
386
|
)
|
|
290
387
|
|
|
291
388
|
def is_profiling_enabled(self) -> bool:
|
|
@@ -344,11 +441,6 @@ class UnityCatalogSourceConfig(
|
|
|
344
441
|
"When `warehouse_id` is set, it must match the `warehouse_id` in `profiling`."
|
|
345
442
|
)
|
|
346
443
|
|
|
347
|
-
if values.get("include_hive_metastore") and not values.get("warehouse_id"):
|
|
348
|
-
raise ValueError(
|
|
349
|
-
"When `include_hive_metastore` is set, `warehouse_id` must be set."
|
|
350
|
-
)
|
|
351
|
-
|
|
352
444
|
if values.get("warehouse_id") and profiling and not profiling.warehouse_id:
|
|
353
445
|
profiling.warehouse_id = values["warehouse_id"]
|
|
354
446
|
|
|
@@ -357,6 +449,34 @@ class UnityCatalogSourceConfig(
|
|
|
357
449
|
|
|
358
450
|
return values
|
|
359
451
|
|
|
452
|
+
@pydantic.root_validator(skip_on_failure=True)
|
|
453
|
+
def validate_lineage_data_source_with_warehouse(
|
|
454
|
+
cls, values: Dict[str, Any]
|
|
455
|
+
) -> Dict[str, Any]:
|
|
456
|
+
lineage_data_source = values.get("lineage_data_source", LineageDataSource.AUTO)
|
|
457
|
+
warehouse_id = values.get("warehouse_id")
|
|
458
|
+
|
|
459
|
+
if lineage_data_source == LineageDataSource.SYSTEM_TABLES and not warehouse_id:
|
|
460
|
+
raise ValueError(
|
|
461
|
+
f"lineage_data_source='{LineageDataSource.SYSTEM_TABLES.value}' requires warehouse_id to be set"
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
return values
|
|
465
|
+
|
|
466
|
+
@pydantic.root_validator(skip_on_failure=True)
|
|
467
|
+
def validate_usage_data_source_with_warehouse(
|
|
468
|
+
cls, values: Dict[str, Any]
|
|
469
|
+
) -> Dict[str, Any]:
|
|
470
|
+
usage_data_source = values.get("usage_data_source", UsageDataSource.AUTO)
|
|
471
|
+
warehouse_id = values.get("warehouse_id")
|
|
472
|
+
|
|
473
|
+
if usage_data_source == UsageDataSource.SYSTEM_TABLES and not warehouse_id:
|
|
474
|
+
raise ValueError(
|
|
475
|
+
f"usage_data_source='{UsageDataSource.SYSTEM_TABLES.value}' requires warehouse_id to be set"
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
return values
|
|
479
|
+
|
|
360
480
|
@pydantic.validator("schema_pattern", always=True)
|
|
361
481
|
def schema_pattern_should__always_deny_information_schema(
|
|
362
482
|
cls, v: AllowDenyPattern
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Databricks Unity Catalog connection configuration."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
from urllib.parse import urlparse
|
|
5
|
+
|
|
6
|
+
import pydantic
|
|
7
|
+
from pydantic import Field
|
|
8
|
+
|
|
9
|
+
from datahub.configuration.common import ConfigModel
|
|
10
|
+
from datahub.ingestion.source.sql.sqlalchemy_uri import make_sqlalchemy_uri
|
|
11
|
+
|
|
12
|
+
DATABRICKS = "databricks"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class UnityCatalogConnectionConfig(ConfigModel):
|
|
16
|
+
"""
|
|
17
|
+
Configuration for connecting to Databricks Unity Catalog.
|
|
18
|
+
Contains only connection-related fields that can be reused across different sources.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
scheme: str = DATABRICKS
|
|
22
|
+
token: str = pydantic.Field(description="Databricks personal access token")
|
|
23
|
+
workspace_url: str = pydantic.Field(
|
|
24
|
+
description="Databricks workspace url. e.g. https://my-workspace.cloud.databricks.com"
|
|
25
|
+
)
|
|
26
|
+
warehouse_id: Optional[str] = pydantic.Field(
|
|
27
|
+
default=None,
|
|
28
|
+
description=(
|
|
29
|
+
"SQL Warehouse id, for running queries. Must be explicitly provided to enable SQL-based features. "
|
|
30
|
+
"Required for the following features that need SQL access: "
|
|
31
|
+
"1) Tag extraction (include_tags=True) - queries system.information_schema.tags "
|
|
32
|
+
"2) Hive Metastore catalog (include_hive_metastore=True) - queries legacy hive_metastore catalog "
|
|
33
|
+
"3) System table lineage (lineage_data_source=SYSTEM_TABLES) - queries system.access.table_lineage/column_lineage "
|
|
34
|
+
"4) Data profiling (profiling.enabled=True) - runs SELECT/ANALYZE queries on tables. "
|
|
35
|
+
"When warehouse_id is missing, these features will be automatically disabled (with warnings) to allow ingestion to continue."
|
|
36
|
+
),
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
extra_client_options: Dict[str, Any] = Field(
|
|
40
|
+
default={},
|
|
41
|
+
description="Additional options to pass to Databricks SQLAlchemy client.",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
def __init__(self, **data: Any):
|
|
45
|
+
super().__init__(**data)
|
|
46
|
+
|
|
47
|
+
def get_sql_alchemy_url(self, database: Optional[str] = None) -> str:
|
|
48
|
+
uri_opts = {"http_path": f"/sql/1.0/warehouses/{self.warehouse_id}"}
|
|
49
|
+
if database:
|
|
50
|
+
uri_opts["catalog"] = database
|
|
51
|
+
return make_sqlalchemy_uri(
|
|
52
|
+
scheme=self.scheme,
|
|
53
|
+
username="token",
|
|
54
|
+
password=self.token,
|
|
55
|
+
at=urlparse(self.workspace_url).netloc,
|
|
56
|
+
db=database,
|
|
57
|
+
uri_opts=uri_opts,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
def get_options(self) -> dict:
|
|
61
|
+
return self.extra_client_options
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from datahub.api.entities.external.external_entities import (
|
|
4
|
+
PlatformResourceRepository,
|
|
5
|
+
)
|
|
6
|
+
from datahub.ingestion.source.unity.tag_entities import (
|
|
7
|
+
UnityCatalogTagPlatformResource,
|
|
8
|
+
UnityCatalogTagPlatformResourceId,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class UnityCatalogPlatformResourceRepository(
|
|
15
|
+
PlatformResourceRepository[
|
|
16
|
+
UnityCatalogTagPlatformResourceId, UnityCatalogTagPlatformResource
|
|
17
|
+
]
|
|
18
|
+
):
|
|
19
|
+
"""Unity Catalog-specific platform resource repository with tag-related operations."""
|