acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""
|
|
2
|
+
External Tags Module
|
|
3
|
+
|
|
4
|
+
This module provides tag types that integrate with external systems like DataHub and Unity Catalog.
|
|
5
|
+
It builds on top of RestrictedText to provide sanitized, truncated tag handling with original value preservation.
|
|
6
|
+
|
|
7
|
+
Classes:
|
|
8
|
+
- ExternalTag: DataHub-compatible tag with key/value parsing from URNs
|
|
9
|
+
|
|
10
|
+
Example Usage:
|
|
11
|
+
# DataHub Tags
|
|
12
|
+
tag = ExternalTag.from_urn("urn:li:tag:environment:production")
|
|
13
|
+
datahub_urn = tag.get_datahub_tag # Returns TagUrn object or string
|
|
14
|
+
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from typing import Any, Optional, Tuple, Union
|
|
20
|
+
|
|
21
|
+
from pydantic import BaseModel
|
|
22
|
+
|
|
23
|
+
from datahub.api.entities.external.restricted_text import RestrictedText
|
|
24
|
+
from datahub.metadata.urns import TagUrn
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ExternalTag(BaseModel):
|
|
28
|
+
"""A tag type that parses DataHub Tag URNs into key-value pairs with RestrictedText properties."""
|
|
29
|
+
|
|
30
|
+
key: RestrictedText
|
|
31
|
+
value: Optional[RestrictedText] = None
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
key: Optional[Union[str, RestrictedText]] = None,
|
|
36
|
+
value: Optional[Union[str, RestrictedText]] = None,
|
|
37
|
+
**data: Any,
|
|
38
|
+
) -> None:
|
|
39
|
+
"""
|
|
40
|
+
Initialize ExternalTag from either a DataHub Tag URN or explicit key/value.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
key: Explicit key value (optional for Pydantic initialization)
|
|
44
|
+
value: Explicit value (optional)
|
|
45
|
+
**data: Additional Pydantic data
|
|
46
|
+
"""
|
|
47
|
+
if key is not None:
|
|
48
|
+
# Direct initialization with key/value
|
|
49
|
+
processed_key = (
|
|
50
|
+
RestrictedText(raw_text=key)
|
|
51
|
+
if not isinstance(key, RestrictedText)
|
|
52
|
+
else key
|
|
53
|
+
)
|
|
54
|
+
processed_value = None
|
|
55
|
+
if value is not None:
|
|
56
|
+
processed_value = (
|
|
57
|
+
RestrictedText(raw_text=value)
|
|
58
|
+
if not isinstance(value, RestrictedText)
|
|
59
|
+
else value
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
super().__init__(
|
|
63
|
+
key=processed_key,
|
|
64
|
+
value=processed_value,
|
|
65
|
+
**data,
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
# Standard pydantic initialization
|
|
69
|
+
super().__init__(**data)
|
|
70
|
+
|
|
71
|
+
@staticmethod
|
|
72
|
+
def _parse_tag_name(tag_name: str) -> Tuple[str, Optional[str]]:
|
|
73
|
+
"""
|
|
74
|
+
Parse tag name into key and optional value.
|
|
75
|
+
|
|
76
|
+
If tag_name contains ':', split on first ':' into key:value
|
|
77
|
+
Otherwise, use entire tag_name as key with no value.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
tag_name: The tag name portion from the URN
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Tuple of (key, value) where value may be None
|
|
84
|
+
"""
|
|
85
|
+
if ":" in tag_name:
|
|
86
|
+
parts = tag_name.split(":", 1) # Split on first ':' only
|
|
87
|
+
return parts[0], parts[1]
|
|
88
|
+
else:
|
|
89
|
+
return tag_name, None
|
|
90
|
+
|
|
91
|
+
def to_datahub_tag_urn(self) -> TagUrn:
|
|
92
|
+
"""
|
|
93
|
+
Generate a DataHub Tag URN from the key and value.
|
|
94
|
+
This method creates the URN using the original (unprocessed) values.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
'urn:li:tag:key:value' if value exists, otherwise 'urn:li:tag:key'
|
|
98
|
+
"""
|
|
99
|
+
if self.value is not None:
|
|
100
|
+
tag_name = f"{self.key.raw_text}:{self.value.raw_text}"
|
|
101
|
+
else:
|
|
102
|
+
tag_name = self.key.raw_text
|
|
103
|
+
|
|
104
|
+
return TagUrn(name=tag_name)
|
|
105
|
+
|
|
106
|
+
@classmethod
|
|
107
|
+
def from_urn(cls, tag_urn: Union[str, "TagUrn"]) -> "ExternalTag":
|
|
108
|
+
"""
|
|
109
|
+
Create an ExternalTag from a DataHub Tag URN.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
tag_urn: DataHub Tag URN string or TagUrn object
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
ExternalTag instance
|
|
116
|
+
"""
|
|
117
|
+
if isinstance(tag_urn, str):
|
|
118
|
+
tag_urn = TagUrn.from_string(tag_urn)
|
|
119
|
+
key, value = cls._parse_tag_name(tag_urn.name)
|
|
120
|
+
return cls(key=key, value=value)
|
|
121
|
+
|
|
122
|
+
@classmethod
|
|
123
|
+
def from_key_value(cls, key: str, value: Optional[str] = None) -> "ExternalTag":
|
|
124
|
+
"""
|
|
125
|
+
Create an ExternalTag from explicit key and value.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
key: Tag key
|
|
129
|
+
value: Optional tag value
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
ExternalTag instance
|
|
133
|
+
"""
|
|
134
|
+
return cls(key=key, value=value)
|
|
135
|
+
|
|
136
|
+
def __str__(self) -> str:
|
|
137
|
+
"""String representation of the tag."""
|
|
138
|
+
if self.value is not None:
|
|
139
|
+
return f"{self.key}:{self.value}"
|
|
140
|
+
else:
|
|
141
|
+
return str(self.key)
|
|
142
|
+
|
|
143
|
+
def __repr__(self) -> str:
|
|
144
|
+
if self.value is not None:
|
|
145
|
+
return f"ExternalTag(key={self.key!r}, value={self.value!r})"
|
|
146
|
+
else:
|
|
147
|
+
return f"ExternalTag(key={self.key!r})"
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# Import RestrictedText from your existing module
|
|
2
|
+
# Uncomment and adjust the import path as needed:
|
|
3
|
+
# from your_restricted_text_module import RestrictedText
|
|
4
|
+
# The following is a list of tag constraints:
|
|
5
|
+
# You can assign a maximum of 50 tags to a single securable object.
|
|
6
|
+
# The maximum length of a tag key is 255 characters.
|
|
7
|
+
# The maximum length of a tag value is 1000 characters.
|
|
8
|
+
# The following characters are not allowed in tag keys:
|
|
9
|
+
# . , - = / :
|
|
10
|
+
# Tag search using the workspace search UI is supported only for tables, views, and table columns.
|
|
11
|
+
# Tag search requires exact term matching.
|
|
12
|
+
# https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint
|
|
13
|
+
from typing import Any, Dict, Optional
|
|
14
|
+
|
|
15
|
+
from pydantic import validator
|
|
16
|
+
from typing_extensions import ClassVar
|
|
17
|
+
|
|
18
|
+
from datahub.api.entities.external.external_tag import ExternalTag
|
|
19
|
+
from datahub.api.entities.external.restricted_text import RestrictedText
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class LakeFormationTagKeyText(RestrictedText):
|
|
23
|
+
"""RestrictedText configured for Lake Formation tag keys."""
|
|
24
|
+
|
|
25
|
+
DEFAULT_MAX_LENGTH: ClassVar[int] = 50
|
|
26
|
+
# Lake Formation tag keys restrictions
|
|
27
|
+
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_"
|
|
28
|
+
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "" # No suffix for clean identifiers
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class LakeFormationTagValueText(RestrictedText):
|
|
32
|
+
"""RestrictedText configured for Lake Formation tag values."""
|
|
33
|
+
|
|
34
|
+
DEFAULT_MAX_LENGTH: ClassVar[int] = 50
|
|
35
|
+
# Lake Formation tag values restrictions
|
|
36
|
+
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = " "
|
|
37
|
+
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..."
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class LakeFormationTag(ExternalTag):
|
|
41
|
+
"""
|
|
42
|
+
A tag type specifically designed for LakeFormation tag restrictions.
|
|
43
|
+
|
|
44
|
+
LakeFormation Tag Restrictions:
|
|
45
|
+
- Key: Max 127 characters, alphanumeric + hyphens, underscores, periods only
|
|
46
|
+
- Value: Max 256 characters, more permissive but no control characters
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
key: LakeFormationTagKeyText
|
|
50
|
+
value: Optional[LakeFormationTagValueText] = None
|
|
51
|
+
catalog: Optional[str] = None
|
|
52
|
+
|
|
53
|
+
# Pydantic v1 validators
|
|
54
|
+
@validator("key", pre=True)
|
|
55
|
+
@classmethod
|
|
56
|
+
def _validate_key(cls, v: Any) -> LakeFormationTagKeyText:
|
|
57
|
+
"""Validate and convert key field for Pydantic v1."""
|
|
58
|
+
if isinstance(v, LakeFormationTagKeyText):
|
|
59
|
+
return v
|
|
60
|
+
|
|
61
|
+
# If we get a RestrictedText object from parent class validation, use its raw_text value
|
|
62
|
+
if hasattr(v, "raw_text"):
|
|
63
|
+
return LakeFormationTagKeyText(raw_text=v.raw_text)
|
|
64
|
+
|
|
65
|
+
return LakeFormationTagKeyText(raw_text=v)
|
|
66
|
+
|
|
67
|
+
@validator("value", pre=True)
|
|
68
|
+
@classmethod
|
|
69
|
+
def _validate_value(cls, v: Any) -> Optional[LakeFormationTagValueText]:
|
|
70
|
+
"""Validate and convert value field for Pydantic v1."""
|
|
71
|
+
if v is None:
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
if isinstance(v, LakeFormationTagValueText):
|
|
75
|
+
return v
|
|
76
|
+
|
|
77
|
+
# If we get a RestrictedText object from parent class validation, use its raw_text value
|
|
78
|
+
if hasattr(v, "raw_text"):
|
|
79
|
+
text_value = v.raw_text
|
|
80
|
+
# If value is an empty string, set it to None to not generate empty value in DataHub tag
|
|
81
|
+
if not str(text_value):
|
|
82
|
+
return None
|
|
83
|
+
return LakeFormationTagValueText(raw_text=text_value)
|
|
84
|
+
|
|
85
|
+
# If value is an empty string, set it to None to not generate empty value in DataHub tag
|
|
86
|
+
if not str(v):
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
return LakeFormationTagValueText(raw_text=v)
|
|
90
|
+
|
|
91
|
+
def __eq__(self, other: object) -> bool:
|
|
92
|
+
"""Check equality based on key and value."""
|
|
93
|
+
if not isinstance(other, LakeFormationTag):
|
|
94
|
+
return False
|
|
95
|
+
return str(self.key) == str(other.key) and (
|
|
96
|
+
str(self.value) if self.value else None
|
|
97
|
+
) == (str(other.value) if other.value else None)
|
|
98
|
+
|
|
99
|
+
def __hash__(self) -> int:
|
|
100
|
+
"""Make LakeFormationTag hashable based on key and value."""
|
|
101
|
+
return hash((str(self.key), str(self.value) if self.value else None))
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def from_dict(cls, tag_dict: Dict[str, Any]) -> "LakeFormationTag":
|
|
105
|
+
"""
|
|
106
|
+
Create a LakeFormationTag from a dictionary with 'key' and optional 'value'.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
tag_dict: Dictionary with 'key' and optional 'value' keys
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
LakeFormationTag instance
|
|
113
|
+
"""
|
|
114
|
+
return cls(key=tag_dict["key"], value=tag_dict.get("value"))
|
|
115
|
+
|
|
116
|
+
@classmethod
|
|
117
|
+
def from_key_value(
|
|
118
|
+
cls, key: str, value: Optional[str] = None
|
|
119
|
+
) -> "LakeFormationTag":
|
|
120
|
+
"""
|
|
121
|
+
Create a LakeFormationTagPlatformResource from explicit key and value.
|
|
122
|
+
|
|
123
|
+
Overrides the parent method to return the correct type.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
key: Tag key
|
|
127
|
+
value: Optional tag value
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
LakeFormationTag instance
|
|
131
|
+
"""
|
|
132
|
+
return cls(key=key, value=value)
|
|
133
|
+
|
|
134
|
+
def to_dict(self) -> Dict[str, str]:
|
|
135
|
+
"""
|
|
136
|
+
Convert to dictionary format suitable for LakeFormation tag.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Dictionary with 'key' and optionally 'value'
|
|
140
|
+
"""
|
|
141
|
+
result: Dict[str, str] = {"key": self.key.raw_text}
|
|
142
|
+
if self.value is not None:
|
|
143
|
+
result["value"] = self.value.raw_text
|
|
144
|
+
return result
|
|
145
|
+
|
|
146
|
+
def to_display_dict(self) -> Dict[str, str]:
|
|
147
|
+
"""
|
|
148
|
+
Convert to dictionary format showing processed values.
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Dictionary with processed 'key' and optional 'value'
|
|
152
|
+
"""
|
|
153
|
+
result: Dict[str, str] = {"key": str(self.key)}
|
|
154
|
+
if self.value is not None:
|
|
155
|
+
result["value"] = str(self.value)
|
|
156
|
+
return result
|
|
157
|
+
|
|
158
|
+
def __repr__(self) -> str:
|
|
159
|
+
if self.value:
|
|
160
|
+
return f"LakeFormationTag(key={self.key!r}, value={self.value!r})"
|
|
161
|
+
else:
|
|
162
|
+
return f"LakeFormationTag(key={self.key!r})"
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""The `RestrictedText` module provides a custom Pydantic type that stores the original
|
|
2
|
+
value but returns a truncated and sanitized version when accessed.
|
|
3
|
+
|
|
4
|
+
Features:
|
|
5
|
+
- Configurable maximum length with truncation
|
|
6
|
+
- Character replacement (default replaces with underscore)
|
|
7
|
+
- Preserves original value internally
|
|
8
|
+
- Customizable truncation suffix
|
|
9
|
+
- Compatible with both Pydantic v1 and v2
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from typing import ClassVar, Optional, Set
|
|
15
|
+
|
|
16
|
+
from datahub.configuration.common import ConfigModel
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class RestrictedText(ConfigModel):
|
|
20
|
+
"""A string type that stores the original value but returns a truncated and sanitized version.
|
|
21
|
+
|
|
22
|
+
This type allows you to:
|
|
23
|
+
- Set a maximum length for the displayed value
|
|
24
|
+
- Replace specific characters with a replacement character
|
|
25
|
+
- Access both the original and processed values
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
from pydantic import BaseModel
|
|
29
|
+
|
|
30
|
+
class TestModel(BaseModel):
|
|
31
|
+
# Basic usage with default settings
|
|
32
|
+
name: RestrictedText
|
|
33
|
+
|
|
34
|
+
# Custom max length and character replacement
|
|
35
|
+
custom_field: RestrictedText = RestrictedText(
|
|
36
|
+
text="hello-world.test",
|
|
37
|
+
max_length=10,
|
|
38
|
+
forbidden_chars={' ', '-', '.'},
|
|
39
|
+
replacement_char='_'
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Usage example
|
|
43
|
+
model = TestModel(
|
|
44
|
+
name="This is a very long string with special characters!",
|
|
45
|
+
custom_field="hello-world.test"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
print(model.name) # Truncated and sanitized version
|
|
49
|
+
print(model.name.text) # Original value
|
|
50
|
+
print(model.custom_field) # "hello_worl..."
|
|
51
|
+
```
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
# Default configuration
|
|
55
|
+
DEFAULT_MAX_LENGTH: ClassVar[Optional[int]] = 50
|
|
56
|
+
DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {" ", "\t", "\n", "\r"}
|
|
57
|
+
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_"
|
|
58
|
+
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..."
|
|
59
|
+
|
|
60
|
+
raw_text: str
|
|
61
|
+
max_length: Optional[int] = None
|
|
62
|
+
forbidden_chars: Optional[Set[str]] = None
|
|
63
|
+
replacement_char: Optional[str] = None
|
|
64
|
+
truncation_suffix: Optional[str] = None
|
|
65
|
+
_processed_value: Optional[str] = None
|
|
66
|
+
|
|
67
|
+
def __init__(self, **data):
|
|
68
|
+
super().__init__(**data)
|
|
69
|
+
self.validate_text()
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def __get_validators__(cls):
|
|
73
|
+
yield cls.pydantic_accept_raw_text
|
|
74
|
+
yield cls.validate
|
|
75
|
+
yield cls.pydantic_validate_text
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def pydantic_accept_raw_text(cls, v):
|
|
79
|
+
if isinstance(v, (RestrictedText, dict)):
|
|
80
|
+
return v
|
|
81
|
+
assert isinstance(v, str), "text must be a string"
|
|
82
|
+
return {"text": v}
|
|
83
|
+
|
|
84
|
+
@classmethod
|
|
85
|
+
def pydantic_validate_text(cls, v):
|
|
86
|
+
assert isinstance(v, RestrictedText)
|
|
87
|
+
assert v.validate_text()
|
|
88
|
+
return v
|
|
89
|
+
|
|
90
|
+
@classmethod
|
|
91
|
+
def validate(cls, v):
|
|
92
|
+
"""Validate and create a RestrictedText instance."""
|
|
93
|
+
if isinstance(v, RestrictedText):
|
|
94
|
+
return v
|
|
95
|
+
|
|
96
|
+
# This should be a dict at this point from pydantic_accept_raw_text
|
|
97
|
+
if isinstance(v, dict):
|
|
98
|
+
instance = cls(**v)
|
|
99
|
+
instance.validate_text()
|
|
100
|
+
return instance
|
|
101
|
+
|
|
102
|
+
raise ValueError(f"Unable to validate RestrictedText from {type(v)}")
|
|
103
|
+
|
|
104
|
+
def validate_text(self) -> bool:
|
|
105
|
+
"""Validate the text and apply restrictions."""
|
|
106
|
+
# Set defaults if not provided
|
|
107
|
+
max_length = (
|
|
108
|
+
self.max_length if self.max_length is not None else self.DEFAULT_MAX_LENGTH
|
|
109
|
+
)
|
|
110
|
+
forbidden_chars = (
|
|
111
|
+
self.forbidden_chars
|
|
112
|
+
if self.forbidden_chars is not None
|
|
113
|
+
else self.DEFAULT_FORBIDDEN_CHARS
|
|
114
|
+
)
|
|
115
|
+
replacement_char = (
|
|
116
|
+
self.replacement_char
|
|
117
|
+
if self.replacement_char is not None
|
|
118
|
+
else self.DEFAULT_REPLACEMENT_CHAR
|
|
119
|
+
)
|
|
120
|
+
truncation_suffix = (
|
|
121
|
+
self.truncation_suffix
|
|
122
|
+
if self.truncation_suffix is not None
|
|
123
|
+
else self.DEFAULT_TRUNCATION_SUFFIX
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Store processed value
|
|
127
|
+
self._processed_value = self._process_value(
|
|
128
|
+
self.raw_text,
|
|
129
|
+
max_length,
|
|
130
|
+
forbidden_chars,
|
|
131
|
+
replacement_char,
|
|
132
|
+
truncation_suffix,
|
|
133
|
+
)
|
|
134
|
+
return True
|
|
135
|
+
|
|
136
|
+
def _process_value(
|
|
137
|
+
self,
|
|
138
|
+
value: str,
|
|
139
|
+
max_length: Optional[int],
|
|
140
|
+
forbidden_chars: Set[str],
|
|
141
|
+
replacement_char: str,
|
|
142
|
+
truncation_suffix: str,
|
|
143
|
+
) -> str:
|
|
144
|
+
"""Process the value by replacing characters and truncating."""
|
|
145
|
+
# Replace specified characters
|
|
146
|
+
processed = value
|
|
147
|
+
for char in forbidden_chars:
|
|
148
|
+
processed = processed.replace(char, replacement_char)
|
|
149
|
+
|
|
150
|
+
# Truncate if necessary
|
|
151
|
+
if max_length is not None and len(processed) > max_length:
|
|
152
|
+
if len(truncation_suffix) >= max_length:
|
|
153
|
+
# If suffix is too long, just truncate without suffix
|
|
154
|
+
processed = processed[:max_length]
|
|
155
|
+
else:
|
|
156
|
+
# Truncate and add suffix
|
|
157
|
+
truncate_length = max_length - len(truncation_suffix)
|
|
158
|
+
processed = processed[:truncate_length] + truncation_suffix
|
|
159
|
+
|
|
160
|
+
return processed
|
|
161
|
+
|
|
162
|
+
def __str__(self) -> str:
|
|
163
|
+
"""Return the processed (truncated and sanitized) value."""
|
|
164
|
+
return self._processed_value or ""
|
|
165
|
+
|
|
166
|
+
def __repr__(self) -> str:
|
|
167
|
+
return f"{self.__class__.__name__}({self.raw_text!r})"
|
|
168
|
+
|
|
169
|
+
@property
|
|
170
|
+
def processed(self) -> str:
|
|
171
|
+
"""Get the processed (truncated and sanitized) value."""
|
|
172
|
+
return self._processed_value or ""
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# Import RestrictedText from your existing module
|
|
2
|
+
# Uncomment and adjust the import path as needed:
|
|
3
|
+
# from your_restricted_text_module import RestrictedText
|
|
4
|
+
# The following is a list of tag constraints:
|
|
5
|
+
# You can assign a maximum of 50 tags to a single securable object.
|
|
6
|
+
# The maximum length of a tag key is 255 characters.
|
|
7
|
+
# The maximum length of a tag value is 1000 characters.
|
|
8
|
+
# The following characters are not allowed in tag keys:
|
|
9
|
+
# . , - = / :
|
|
10
|
+
# Tag search using the workspace search UI is supported only for tables, views, and table columns.
|
|
11
|
+
# Tag search requires exact term matching.
|
|
12
|
+
# https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint
|
|
13
|
+
from typing import Any, Dict, Optional, Set
|
|
14
|
+
|
|
15
|
+
# Import validator for Pydantic v1 (always needed since we removed conditional logic)
|
|
16
|
+
from pydantic import validator
|
|
17
|
+
from typing_extensions import ClassVar
|
|
18
|
+
|
|
19
|
+
from datahub.api.entities.external.external_tag import ExternalTag
|
|
20
|
+
from datahub.api.entities.external.restricted_text import RestrictedText
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class UnityCatalogTagKeyText(RestrictedText):
|
|
24
|
+
"""RestrictedText configured for Unity Catalog tag keys."""
|
|
25
|
+
|
|
26
|
+
DEFAULT_MAX_LENGTH: ClassVar[int] = 255
|
|
27
|
+
# Unity Catalog tag keys: forbidden characters based on constraints
|
|
28
|
+
DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {
|
|
29
|
+
"\t",
|
|
30
|
+
"\n",
|
|
31
|
+
"\r",
|
|
32
|
+
".",
|
|
33
|
+
",",
|
|
34
|
+
"-",
|
|
35
|
+
"=",
|
|
36
|
+
"/",
|
|
37
|
+
":",
|
|
38
|
+
}
|
|
39
|
+
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_"
|
|
40
|
+
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "" # No suffix for clean identifiers
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class UnityCatalogTagValueText(RestrictedText):
|
|
44
|
+
"""RestrictedText configured for Unity Catalog tag values."""
|
|
45
|
+
|
|
46
|
+
DEFAULT_MAX_LENGTH: ClassVar[int] = 1000
|
|
47
|
+
# Unity Catalog tag values are more permissive but still have some restrictions
|
|
48
|
+
DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {"\t", "\n", "\r"}
|
|
49
|
+
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = " "
|
|
50
|
+
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..."
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class UnityCatalogTag(ExternalTag):
|
|
54
|
+
"""
|
|
55
|
+
A tag type specifically designed for Unity Catalog tag restrictions.
|
|
56
|
+
|
|
57
|
+
Unity Catalog Tag Restrictions:
|
|
58
|
+
- Key: Max 127 characters, alphanumeric + hyphens, underscores, periods only
|
|
59
|
+
- Value: Max 256 characters, more permissive but no control characters
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
key: UnityCatalogTagKeyText
|
|
63
|
+
value: Optional[UnityCatalogTagValueText] = None
|
|
64
|
+
|
|
65
|
+
# Pydantic v1 validators
|
|
66
|
+
@validator("key", pre=True)
|
|
67
|
+
@classmethod
|
|
68
|
+
def _validate_key(cls, v: Any) -> UnityCatalogTagKeyText:
|
|
69
|
+
"""Validate and convert key field for Pydantic v1."""
|
|
70
|
+
if isinstance(v, UnityCatalogTagKeyText):
|
|
71
|
+
return v
|
|
72
|
+
|
|
73
|
+
# If we get a RestrictedText object from parent class validation, use its raw_text value
|
|
74
|
+
if hasattr(v, "raw_text"):
|
|
75
|
+
return UnityCatalogTagKeyText(raw_text=v.raw_text)
|
|
76
|
+
|
|
77
|
+
return UnityCatalogTagKeyText(raw_text=v)
|
|
78
|
+
|
|
79
|
+
@validator("value", pre=True)
|
|
80
|
+
@classmethod
|
|
81
|
+
def _validate_value(cls, v: Any) -> Optional[UnityCatalogTagValueText]:
|
|
82
|
+
"""Validate and convert value field for Pydantic v1."""
|
|
83
|
+
if v is None:
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
if isinstance(v, UnityCatalogTagValueText):
|
|
87
|
+
return v
|
|
88
|
+
|
|
89
|
+
# If we get a RestrictedText object from parent class validation, use its raw_text value
|
|
90
|
+
if hasattr(v, "raw_text"):
|
|
91
|
+
text_value = v.raw_text
|
|
92
|
+
# If value is an empty string, set it to None to not generate empty value in DataHub tag
|
|
93
|
+
if not str(text_value):
|
|
94
|
+
return None
|
|
95
|
+
return UnityCatalogTagValueText(raw_text=text_value)
|
|
96
|
+
|
|
97
|
+
# If value is an empty string, set it to None to not generate empty value in DataHub tag
|
|
98
|
+
if not str(v):
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
return UnityCatalogTagValueText(raw_text=v)
|
|
102
|
+
|
|
103
|
+
def __eq__(self, other: object) -> bool:
|
|
104
|
+
"""Check equality based on key and value."""
|
|
105
|
+
if not isinstance(other, UnityCatalogTag):
|
|
106
|
+
return False
|
|
107
|
+
return str(self.key) == str(other.key) and (
|
|
108
|
+
str(self.value) if self.value else None
|
|
109
|
+
) == (str(other.value) if other.value else None)
|
|
110
|
+
|
|
111
|
+
def __hash__(self) -> int:
|
|
112
|
+
"""Make UnityCatalogTag hashable based on key and value."""
|
|
113
|
+
return hash((str(self.key), str(self.value) if self.value else None))
|
|
114
|
+
|
|
115
|
+
@classmethod
|
|
116
|
+
def from_dict(cls, tag_dict: Dict[str, Any]) -> "UnityCatalogTag":
|
|
117
|
+
"""
|
|
118
|
+
Create a UnityCatalogTag from a dictionary with 'key' and optional 'value'.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
tag_dict: Dictionary with 'key' and optional 'value' keys
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
UnityCatalogTag instance
|
|
125
|
+
"""
|
|
126
|
+
return cls(**tag_dict)
|
|
127
|
+
|
|
128
|
+
@classmethod
|
|
129
|
+
def from_key_value(cls, key: str, value: Optional[str] = None) -> "UnityCatalogTag":
|
|
130
|
+
"""
|
|
131
|
+
Create a UnityCatalogTag from explicit key and value.
|
|
132
|
+
|
|
133
|
+
Overrides the parent method to return the correct type.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
key: Tag key
|
|
137
|
+
value: Optional tag value
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
UnityCatalogTag instance
|
|
141
|
+
"""
|
|
142
|
+
return cls(key=key, value=value)
|
|
143
|
+
|
|
144
|
+
def to_dict(self) -> Dict[str, str]:
|
|
145
|
+
"""
|
|
146
|
+
Convert to dictionary format suitable for Unity Catalog API.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Dictionary with 'key' and optionally 'value'
|
|
150
|
+
"""
|
|
151
|
+
result: Dict[str, str] = {"key": self.key.raw_text}
|
|
152
|
+
if self.value is not None:
|
|
153
|
+
result["value"] = self.value.raw_text
|
|
154
|
+
return result
|
|
155
|
+
|
|
156
|
+
def to_display_dict(self) -> Dict[str, str]:
|
|
157
|
+
"""
|
|
158
|
+
Convert to dictionary format showing processed values.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Dictionary with processed 'key' and optional 'value'
|
|
162
|
+
"""
|
|
163
|
+
result: Dict[str, str] = {"key": str(self.key)}
|
|
164
|
+
if self.value is not None:
|
|
165
|
+
result["value"] = str(self.value)
|
|
166
|
+
return result
|
|
167
|
+
|
|
168
|
+
def __repr__(self) -> str:
|
|
169
|
+
if self.value:
|
|
170
|
+
return f"UnityCatalogTag(key={self.key!r}, value={self.value!r})"
|
|
171
|
+
else:
|
|
172
|
+
return f"UnityCatalogTag(key={self.key!r})"
|
|
@@ -5,7 +5,7 @@ from pathlib import Path
|
|
|
5
5
|
from typing import List, Optional, Union
|
|
6
6
|
|
|
7
7
|
import yaml
|
|
8
|
-
from pydantic import validator
|
|
8
|
+
from pydantic import Field, validator
|
|
9
9
|
from ruamel.yaml import YAML
|
|
10
10
|
from typing_extensions import Literal
|
|
11
11
|
|
|
@@ -67,7 +67,7 @@ class Prompt(ConfigModel):
|
|
|
67
67
|
description: Optional[str] = None
|
|
68
68
|
type: str
|
|
69
69
|
structured_property_id: Optional[str] = None
|
|
70
|
-
structured_property_urn: Optional[str] = None
|
|
70
|
+
structured_property_urn: Optional[str] = Field(default=None, validate_default=True)
|
|
71
71
|
required: Optional[bool] = None
|
|
72
72
|
|
|
73
73
|
@validator("structured_property_urn", pre=True, always=True)
|
|
@@ -111,7 +111,7 @@ class Actors(ConfigModel):
|
|
|
111
111
|
|
|
112
112
|
class Forms(ConfigModel):
|
|
113
113
|
id: Optional[str] = None
|
|
114
|
-
urn: Optional[str] = None
|
|
114
|
+
urn: Optional[str] = Field(default=None, validate_default=True)
|
|
115
115
|
name: str
|
|
116
116
|
description: Optional[str] = None
|
|
117
117
|
prompts: List[Prompt] = []
|