acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/sdk/search_filters.py
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import abc
|
|
4
|
+
import json
|
|
4
5
|
from typing import (
|
|
6
|
+
TYPE_CHECKING,
|
|
7
|
+
Annotated,
|
|
5
8
|
Any,
|
|
6
9
|
ClassVar,
|
|
7
10
|
Iterator,
|
|
@@ -15,7 +18,10 @@ from typing import (
|
|
|
15
18
|
import pydantic
|
|
16
19
|
|
|
17
20
|
from datahub.configuration.common import ConfigModel
|
|
18
|
-
from datahub.configuration.pydantic_migration_helpers import
|
|
21
|
+
from datahub.configuration.pydantic_migration_helpers import (
|
|
22
|
+
PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR,
|
|
23
|
+
PYDANTIC_VERSION_2,
|
|
24
|
+
)
|
|
19
25
|
from datahub.ingestion.graph.client import flexible_entity_type_to_graphql
|
|
20
26
|
from datahub.ingestion.graph.filters import (
|
|
21
27
|
FilterOperator,
|
|
@@ -24,7 +30,14 @@ from datahub.ingestion.graph.filters import (
|
|
|
24
30
|
_get_status_filter,
|
|
25
31
|
)
|
|
26
32
|
from datahub.metadata.schema_classes import EntityTypeName
|
|
27
|
-
from datahub.metadata.urns import
|
|
33
|
+
from datahub.metadata.urns import (
|
|
34
|
+
ContainerUrn,
|
|
35
|
+
CorpGroupUrn,
|
|
36
|
+
CorpUserUrn,
|
|
37
|
+
DataPlatformUrn,
|
|
38
|
+
DomainUrn,
|
|
39
|
+
)
|
|
40
|
+
from datahub.utilities.urns.urn import guess_entity_type
|
|
28
41
|
|
|
29
42
|
_AndSearchFilterRule = TypedDict(
|
|
30
43
|
"_AndSearchFilterRule", {"and": List[SearchFilterRule]}
|
|
@@ -33,21 +46,32 @@ _OrFilters = List[_AndSearchFilterRule]
|
|
|
33
46
|
|
|
34
47
|
|
|
35
48
|
class _BaseFilter(ConfigModel):
|
|
36
|
-
|
|
37
|
-
# We can't wrap this in a TYPE_CHECKING block because the pydantic plugin
|
|
38
|
-
# doesn't recognize it properly. So unfortunately we'll need to live
|
|
39
|
-
# with the deprecation warning w/ pydantic v2.
|
|
40
|
-
allow_population_by_field_name = True
|
|
41
|
-
if PYDANTIC_VERSION_2:
|
|
42
|
-
populate_by_name = True
|
|
49
|
+
model_config = pydantic.ConfigDict(populate_by_name=True)
|
|
43
50
|
|
|
44
51
|
@abc.abstractmethod
|
|
45
|
-
def compile(self) -> _OrFilters:
|
|
46
|
-
pass
|
|
52
|
+
def compile(self) -> _OrFilters: ...
|
|
47
53
|
|
|
48
54
|
def dfs(self) -> Iterator[_BaseFilter]:
|
|
49
55
|
yield self
|
|
50
56
|
|
|
57
|
+
@classmethod
|
|
58
|
+
def _field_discriminator(cls) -> str:
|
|
59
|
+
if cls is _BaseFilter:
|
|
60
|
+
raise ValueError("Cannot get discriminator for _BaseFilter")
|
|
61
|
+
if PYDANTIC_VERSION_2:
|
|
62
|
+
fields: dict = cls.model_fields # type: ignore
|
|
63
|
+
else:
|
|
64
|
+
fields = cls.__fields__ # type: ignore
|
|
65
|
+
|
|
66
|
+
# Assumes that there's only one field name per filter.
|
|
67
|
+
# If that's not the case, this method should be overridden.
|
|
68
|
+
if len(fields.keys()) != 1:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
f"Found multiple fields that could be the discriminator for this filter: {list(fields.keys())}"
|
|
71
|
+
)
|
|
72
|
+
name, field = next(iter(fields.items()))
|
|
73
|
+
return field.alias or name # type: ignore
|
|
74
|
+
|
|
51
75
|
|
|
52
76
|
class _EntityTypeFilter(_BaseFilter):
|
|
53
77
|
"""Filter for specific entity types.
|
|
@@ -59,7 +83,7 @@ class _EntityTypeFilter(_BaseFilter):
|
|
|
59
83
|
ENTITY_TYPE_FIELD: ClassVar[str] = "_entityType"
|
|
60
84
|
|
|
61
85
|
entity_type: List[str] = pydantic.Field(
|
|
62
|
-
description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', etc.",
|
|
86
|
+
description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', 'dataProduct', etc.",
|
|
63
87
|
)
|
|
64
88
|
|
|
65
89
|
def _build_rule(self) -> SearchFilterRule:
|
|
@@ -74,15 +98,19 @@ class _EntityTypeFilter(_BaseFilter):
|
|
|
74
98
|
|
|
75
99
|
|
|
76
100
|
class _EntitySubtypeFilter(_BaseFilter):
|
|
77
|
-
entity_subtype: str = pydantic.Field(
|
|
101
|
+
entity_subtype: List[str] = pydantic.Field(
|
|
78
102
|
description="The entity subtype to filter on. Can be 'Table', 'View', 'Source', etc. depending on the native platform's concepts.",
|
|
79
103
|
)
|
|
80
104
|
|
|
105
|
+
@pydantic.validator("entity_subtype", pre=True)
|
|
106
|
+
def validate_entity_subtype(cls, v: str) -> List[str]:
|
|
107
|
+
return [v] if not isinstance(v, list) else v
|
|
108
|
+
|
|
81
109
|
def _build_rule(self) -> SearchFilterRule:
|
|
82
110
|
return SearchFilterRule(
|
|
83
111
|
field="typeNames",
|
|
84
112
|
condition="EQUAL",
|
|
85
|
-
values=
|
|
113
|
+
values=self.entity_subtype,
|
|
86
114
|
)
|
|
87
115
|
|
|
88
116
|
def compile(self) -> _OrFilters:
|
|
@@ -148,6 +176,39 @@ class _DomainFilter(_BaseFilter):
|
|
|
148
176
|
return [{"and": [self._build_rule()]}]
|
|
149
177
|
|
|
150
178
|
|
|
179
|
+
class _ContainerFilter(_BaseFilter):
|
|
180
|
+
container: List[str]
|
|
181
|
+
direct_descendants_only: bool = pydantic.Field(
|
|
182
|
+
default=False,
|
|
183
|
+
description="If true, only entities that are direct descendants of the container will be returned.",
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
@pydantic.validator("container", each_item=True)
|
|
187
|
+
def validate_container(cls, v: str) -> str:
|
|
188
|
+
return str(ContainerUrn.from_string(v))
|
|
189
|
+
|
|
190
|
+
@classmethod
|
|
191
|
+
def _field_discriminator(cls) -> str:
|
|
192
|
+
return "container"
|
|
193
|
+
|
|
194
|
+
def _build_rule(self) -> SearchFilterRule:
|
|
195
|
+
if self.direct_descendants_only:
|
|
196
|
+
return SearchFilterRule(
|
|
197
|
+
field="container",
|
|
198
|
+
condition="EQUAL",
|
|
199
|
+
values=self.container,
|
|
200
|
+
)
|
|
201
|
+
else:
|
|
202
|
+
return SearchFilterRule(
|
|
203
|
+
field="browsePathV2",
|
|
204
|
+
condition="CONTAIN",
|
|
205
|
+
values=self.container,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
def compile(self) -> _OrFilters:
|
|
209
|
+
return [{"and": [self._build_rule()]}]
|
|
210
|
+
|
|
211
|
+
|
|
151
212
|
class _EnvFilter(_BaseFilter):
|
|
152
213
|
# Note that not all entity types have an env (e.g. dashboards / charts).
|
|
153
214
|
# If the env filter is specified, these will be excluded.
|
|
@@ -181,6 +242,94 @@ class _EnvFilter(_BaseFilter):
|
|
|
181
242
|
]
|
|
182
243
|
|
|
183
244
|
|
|
245
|
+
class _OwnerFilter(_BaseFilter):
|
|
246
|
+
"""Filter for entities owned by specific users or groups."""
|
|
247
|
+
|
|
248
|
+
owner: List[str] = pydantic.Field(
|
|
249
|
+
description="The owner to filter on. Should be user or group URNs.",
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
@pydantic.validator("owner", each_item=True)
|
|
253
|
+
def validate_owner(cls, v: str) -> str:
|
|
254
|
+
if not v.startswith("urn:li:"):
|
|
255
|
+
raise ValueError(f"Owner must be a valid User or Group URN, got: {v}")
|
|
256
|
+
_type = guess_entity_type(v)
|
|
257
|
+
if _type == CorpUserUrn.ENTITY_TYPE:
|
|
258
|
+
return str(CorpUserUrn.from_string(v))
|
|
259
|
+
elif _type == CorpGroupUrn.ENTITY_TYPE:
|
|
260
|
+
return str(CorpGroupUrn.from_string(v))
|
|
261
|
+
else:
|
|
262
|
+
raise ValueError(f"Owner must be a valid User or Group URN, got: {v}")
|
|
263
|
+
|
|
264
|
+
def _build_rule(self) -> SearchFilterRule:
|
|
265
|
+
return SearchFilterRule(
|
|
266
|
+
field="owners",
|
|
267
|
+
condition="EQUAL",
|
|
268
|
+
values=self.owner,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
def compile(self) -> _OrFilters:
|
|
272
|
+
return [{"and": [self._build_rule()]}]
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class _GlossaryTermFilter(_BaseFilter):
|
|
276
|
+
"""Filter for entities associated with specific glossary terms."""
|
|
277
|
+
|
|
278
|
+
glossary_term: List[str] = pydantic.Field(
|
|
279
|
+
description="The glossary term to filter on. Should be glossary term URNs.",
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
@pydantic.validator("glossary_term", each_item=True)
|
|
283
|
+
def validate_glossary_term(cls, v: str) -> str:
|
|
284
|
+
if not v.startswith("urn:li:"):
|
|
285
|
+
raise ValueError(f"Glossary term must be a valid URN, got: {v}")
|
|
286
|
+
# Validate that it's a glossary term URN
|
|
287
|
+
_type = guess_entity_type(v)
|
|
288
|
+
if _type != "glossaryTerm":
|
|
289
|
+
raise ValueError(
|
|
290
|
+
f"Glossary term must be a valid glossary term URN, got: {v}"
|
|
291
|
+
)
|
|
292
|
+
return v
|
|
293
|
+
|
|
294
|
+
def _build_rule(self) -> SearchFilterRule:
|
|
295
|
+
return SearchFilterRule(
|
|
296
|
+
field="glossaryTerms",
|
|
297
|
+
condition="EQUAL",
|
|
298
|
+
values=self.glossary_term,
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
def compile(self) -> _OrFilters:
|
|
302
|
+
return [{"and": [self._build_rule()]}]
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
class _TagFilter(_BaseFilter):
|
|
306
|
+
"""Filter for entities associated with specific tags."""
|
|
307
|
+
|
|
308
|
+
tag: List[str] = pydantic.Field(
|
|
309
|
+
description="The tag to filter on. Should be tag URNs.",
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
@pydantic.validator("tag", each_item=True)
|
|
313
|
+
def validate_tag(cls, v: str) -> str:
|
|
314
|
+
if not v.startswith("urn:li:"):
|
|
315
|
+
raise ValueError(f"Tag must be a valid URN, got: {v}")
|
|
316
|
+
# Validate that it's a tag URN
|
|
317
|
+
_type = guess_entity_type(v)
|
|
318
|
+
if _type != "tag":
|
|
319
|
+
raise ValueError(f"Tag must be a valid tag URN, got: {v}")
|
|
320
|
+
return v
|
|
321
|
+
|
|
322
|
+
def _build_rule(self) -> SearchFilterRule:
|
|
323
|
+
return SearchFilterRule(
|
|
324
|
+
field="tags",
|
|
325
|
+
condition="EQUAL",
|
|
326
|
+
values=self.tag,
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
def compile(self) -> _OrFilters:
|
|
330
|
+
return [{"and": [self._build_rule()]}]
|
|
331
|
+
|
|
332
|
+
|
|
184
333
|
class _CustomCondition(_BaseFilter):
|
|
185
334
|
"""Represents a single field condition."""
|
|
186
335
|
|
|
@@ -196,6 +345,10 @@ class _CustomCondition(_BaseFilter):
|
|
|
196
345
|
)
|
|
197
346
|
return [{"and": [rule]}]
|
|
198
347
|
|
|
348
|
+
@classmethod
|
|
349
|
+
def _field_discriminator(cls) -> str:
|
|
350
|
+
return "_custom"
|
|
351
|
+
|
|
199
352
|
|
|
200
353
|
class _And(_BaseFilter):
|
|
201
354
|
"""Represents an AND conjunction of filters."""
|
|
@@ -302,31 +455,116 @@ class _Not(_BaseFilter):
|
|
|
302
455
|
yield from self.not_.dfs()
|
|
303
456
|
|
|
304
457
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
458
|
+
def _filter_discriminator(v: Any) -> Optional[str]:
|
|
459
|
+
if isinstance(v, _BaseFilter):
|
|
460
|
+
return v._field_discriminator()
|
|
461
|
+
|
|
462
|
+
if not isinstance(v, dict):
|
|
463
|
+
return None
|
|
464
|
+
|
|
465
|
+
keys = list(v.keys())
|
|
466
|
+
if len(keys) == 1:
|
|
467
|
+
return keys[0]
|
|
468
|
+
elif set(keys).issuperset({"container"}):
|
|
469
|
+
return _ContainerFilter._field_discriminator()
|
|
470
|
+
elif set(keys).issuperset({"field", "condition"}):
|
|
471
|
+
return _CustomCondition._field_discriminator()
|
|
472
|
+
|
|
473
|
+
return None
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def _parse_and_like_filter(value: Any) -> Any:
|
|
477
|
+
# Do not parse if filter is already of type and/or/not or a custom condition
|
|
478
|
+
# also do not parse container filter if direct_descendants_only is specified
|
|
479
|
+
if (
|
|
480
|
+
isinstance(value, dict)
|
|
481
|
+
and not set(value.keys()).intersection(
|
|
482
|
+
{"and", "or", "not", "field", "condition", "direct_descendants_only"}
|
|
483
|
+
)
|
|
484
|
+
and len(value) > 1
|
|
485
|
+
):
|
|
486
|
+
return {"and": [{k: v} for k, v in value.items()]}
|
|
487
|
+
|
|
488
|
+
return value
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
|
|
492
|
+
# The `not TYPE_CHECKING` bit is required to make the linter happy,
|
|
493
|
+
# since we currently only run mypy with pydantic v1.
|
|
494
|
+
Filter = Union[
|
|
495
|
+
_And,
|
|
496
|
+
_Or,
|
|
497
|
+
_Not,
|
|
498
|
+
_EntityTypeFilter,
|
|
499
|
+
_EntitySubtypeFilter,
|
|
500
|
+
_StatusFilter,
|
|
501
|
+
_PlatformFilter,
|
|
502
|
+
_DomainFilter,
|
|
503
|
+
_ContainerFilter,
|
|
504
|
+
_EnvFilter,
|
|
505
|
+
_OwnerFilter,
|
|
506
|
+
_GlossaryTermFilter,
|
|
507
|
+
_TagFilter,
|
|
508
|
+
_CustomCondition,
|
|
509
|
+
]
|
|
510
|
+
|
|
327
511
|
_And.update_forward_refs()
|
|
328
512
|
_Or.update_forward_refs()
|
|
329
513
|
_Not.update_forward_refs()
|
|
514
|
+
else:
|
|
515
|
+
from pydantic import Discriminator, Tag
|
|
516
|
+
|
|
517
|
+
def _parse_json_from_string(value: Any) -> Any:
|
|
518
|
+
if isinstance(value, str):
|
|
519
|
+
try:
|
|
520
|
+
return json.loads(value)
|
|
521
|
+
except json.JSONDecodeError:
|
|
522
|
+
return value
|
|
523
|
+
else:
|
|
524
|
+
return value
|
|
525
|
+
|
|
526
|
+
# TODO: Once we're fully on pydantic 2, we can use a RootModel here.
|
|
527
|
+
# That way we'd be able to attach methods to the Filter type.
|
|
528
|
+
# e.g. replace load_filters(...) with Filter.load(...)
|
|
529
|
+
Filter = Annotated[
|
|
530
|
+
Annotated[
|
|
531
|
+
Union[
|
|
532
|
+
Annotated[_And, Tag(_And._field_discriminator())],
|
|
533
|
+
Annotated[_Or, Tag(_Or._field_discriminator())],
|
|
534
|
+
Annotated[_Not, Tag(_Not._field_discriminator())],
|
|
535
|
+
Annotated[
|
|
536
|
+
_EntityTypeFilter, Tag(_EntityTypeFilter._field_discriminator())
|
|
537
|
+
],
|
|
538
|
+
Annotated[
|
|
539
|
+
_EntitySubtypeFilter,
|
|
540
|
+
Tag(_EntitySubtypeFilter._field_discriminator()),
|
|
541
|
+
],
|
|
542
|
+
Annotated[_StatusFilter, Tag(_StatusFilter._field_discriminator())],
|
|
543
|
+
Annotated[_PlatformFilter, Tag(_PlatformFilter._field_discriminator())],
|
|
544
|
+
Annotated[_DomainFilter, Tag(_DomainFilter._field_discriminator())],
|
|
545
|
+
Annotated[
|
|
546
|
+
_ContainerFilter, Tag(_ContainerFilter._field_discriminator())
|
|
547
|
+
],
|
|
548
|
+
Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())],
|
|
549
|
+
Annotated[_OwnerFilter, Tag(_OwnerFilter._field_discriminator())],
|
|
550
|
+
Annotated[
|
|
551
|
+
_GlossaryTermFilter, Tag(_GlossaryTermFilter._field_discriminator())
|
|
552
|
+
],
|
|
553
|
+
Annotated[_TagFilter, Tag(_TagFilter._field_discriminator())],
|
|
554
|
+
Annotated[
|
|
555
|
+
_CustomCondition, Tag(_CustomCondition._field_discriminator())
|
|
556
|
+
],
|
|
557
|
+
],
|
|
558
|
+
Discriminator(_filter_discriminator),
|
|
559
|
+
],
|
|
560
|
+
pydantic.BeforeValidator(_parse_and_like_filter),
|
|
561
|
+
pydantic.BeforeValidator(_parse_json_from_string),
|
|
562
|
+
]
|
|
563
|
+
|
|
564
|
+
# Required to resolve forward references to "Filter"
|
|
565
|
+
_And.model_rebuild() # type: ignore
|
|
566
|
+
_Or.model_rebuild() # type: ignore
|
|
567
|
+
_Not.model_rebuild() # type: ignore
|
|
330
568
|
|
|
331
569
|
|
|
332
570
|
def load_filters(obj: Any) -> Filter:
|
|
@@ -400,10 +638,40 @@ class FilterDsl:
|
|
|
400
638
|
def domain(domain: Union[str, Sequence[str]], /) -> _DomainFilter:
|
|
401
639
|
return _DomainFilter(domain=[domain] if isinstance(domain, str) else domain)
|
|
402
640
|
|
|
641
|
+
@staticmethod
|
|
642
|
+
def container(
|
|
643
|
+
container: Union[str, Sequence[str]],
|
|
644
|
+
/,
|
|
645
|
+
*,
|
|
646
|
+
direct_descendants_only: bool = False,
|
|
647
|
+
) -> _ContainerFilter:
|
|
648
|
+
return _ContainerFilter(
|
|
649
|
+
container=[container] if isinstance(container, str) else container,
|
|
650
|
+
direct_descendants_only=direct_descendants_only,
|
|
651
|
+
)
|
|
652
|
+
|
|
403
653
|
@staticmethod
|
|
404
654
|
def env(env: Union[str, Sequence[str]], /) -> _EnvFilter:
|
|
405
655
|
return _EnvFilter(env=[env] if isinstance(env, str) else env)
|
|
406
656
|
|
|
657
|
+
@staticmethod
|
|
658
|
+
def owner(owner: Union[str, Sequence[str]], /) -> _OwnerFilter:
|
|
659
|
+
return _OwnerFilter(owner=[owner] if isinstance(owner, str) else owner)
|
|
660
|
+
|
|
661
|
+
@staticmethod
|
|
662
|
+
def glossary_term(
|
|
663
|
+
glossary_term: Union[str, Sequence[str]], /
|
|
664
|
+
) -> _GlossaryTermFilter:
|
|
665
|
+
return _GlossaryTermFilter(
|
|
666
|
+
glossary_term=[glossary_term]
|
|
667
|
+
if isinstance(glossary_term, str)
|
|
668
|
+
else glossary_term
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
@staticmethod
|
|
672
|
+
def tag(tag: Union[str, Sequence[str]], /) -> _TagFilter:
|
|
673
|
+
return _TagFilter(tag=[tag] if isinstance(tag, str) else tag)
|
|
674
|
+
|
|
407
675
|
@staticmethod
|
|
408
676
|
def has_custom_property(key: str, value: str) -> _CustomCondition:
|
|
409
677
|
return _CustomCondition(
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Dict, List, Union
|
|
3
|
+
|
|
4
|
+
from datahub.secret.secret_store import SecretStore
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# Simple SecretStore implementation that fetches Secret values from the local environment.
|
|
8
|
+
class EnvironmentSecretStore(SecretStore):
|
|
9
|
+
def __init__(self, config):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
def close(self) -> None:
|
|
13
|
+
return
|
|
14
|
+
|
|
15
|
+
def get_secret_values(self, secret_names: List[str]) -> Dict[str, Union[str, None]]:
|
|
16
|
+
values = {}
|
|
17
|
+
for secret_name in secret_names:
|
|
18
|
+
values[secret_name] = os.getenv(secret_name)
|
|
19
|
+
return values
|
|
20
|
+
|
|
21
|
+
def get_secret_value(self, secret_name: str) -> Union[str, None]:
|
|
22
|
+
return os.getenv(secret_name)
|
|
23
|
+
|
|
24
|
+
def get_id(self) -> str:
|
|
25
|
+
return "env"
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def create(cls, config: Dict) -> "EnvironmentSecretStore":
|
|
29
|
+
return cls(config)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import Any, Dict, List, Union
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
from datahub.secret.secret_store import SecretStore
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class FileSecretStoreConfig(BaseModel):
|
|
13
|
+
basedir: str = "/mnt/secrets"
|
|
14
|
+
max_length: int = 1024768
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Simple SecretStore implementation that fetches Secret values from the local files.
|
|
18
|
+
class FileSecretStore(SecretStore):
|
|
19
|
+
def __init__(self, config: FileSecretStoreConfig):
|
|
20
|
+
self.config = config
|
|
21
|
+
|
|
22
|
+
def get_secret_values(self, secret_names: List[str]) -> Dict[str, Union[str, None]]:
|
|
23
|
+
values = {}
|
|
24
|
+
for secret_name in secret_names:
|
|
25
|
+
values[secret_name] = self.get_secret_value(secret_name)
|
|
26
|
+
return values
|
|
27
|
+
|
|
28
|
+
def get_secret_value(self, secret_name: str) -> Union[str, None]:
|
|
29
|
+
secret_path = os.path.join(self.config.basedir, secret_name)
|
|
30
|
+
if os.path.exists(secret_path):
|
|
31
|
+
with open(secret_path, "r") as f:
|
|
32
|
+
secret_value = f.read(self.config.max_length + 1)
|
|
33
|
+
if len(secret_value) > self.config.max_length:
|
|
34
|
+
logger.warning(
|
|
35
|
+
f"Secret {secret_name} is longer than {self.config.max_length} and will be truncated."
|
|
36
|
+
)
|
|
37
|
+
return secret_value[: self.config.max_length].rstrip()
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
def get_id(self) -> str:
|
|
41
|
+
return "file"
|
|
42
|
+
|
|
43
|
+
def close(self) -> None:
|
|
44
|
+
return
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def create(cls, config: Any) -> "FileSecretStore":
|
|
48
|
+
config = FileSecretStoreConfig.parse_obj(config)
|
|
49
|
+
return cls(config)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from typing import List, Tuple
|
|
3
|
+
|
|
4
|
+
from typing_extensions import Self
|
|
5
|
+
|
|
6
|
+
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal, PatchPath
|
|
7
|
+
from datahub.metadata.schema_classes import (
|
|
8
|
+
FineGrainedLineageClass as FineGrainedLineage,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class HasFineGrainedLineagePatch(MetadataPatchProposal):
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def _fine_grained_lineage_location(self) -> Tuple[str, PatchPath]:
|
|
15
|
+
"""Return the aspect name where fine-grained lineage is stored."""
|
|
16
|
+
raise NotImplementedError("Subclasses must implement this method.")
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def _get_fine_grained_key(
|
|
20
|
+
fine_grained_lineage: FineGrainedLineage,
|
|
21
|
+
) -> Tuple[str, str, str]:
|
|
22
|
+
downstreams = fine_grained_lineage.downstreams or []
|
|
23
|
+
if len(downstreams) != 1:
|
|
24
|
+
raise TypeError("Cannot patch with more or less than one downstream.")
|
|
25
|
+
transform_op = fine_grained_lineage.transformOperation or "NONE"
|
|
26
|
+
downstream_urn = downstreams[0]
|
|
27
|
+
query_id = fine_grained_lineage.query or "NONE"
|
|
28
|
+
return transform_op, downstream_urn, query_id
|
|
29
|
+
|
|
30
|
+
def add_fine_grained_lineage(
|
|
31
|
+
self, fine_grained_lineage: FineGrainedLineage
|
|
32
|
+
) -> Self:
|
|
33
|
+
aspect_name, path = self._fine_grained_lineage_location()
|
|
34
|
+
(
|
|
35
|
+
transform_op,
|
|
36
|
+
downstream_urn,
|
|
37
|
+
query_id,
|
|
38
|
+
) = self._get_fine_grained_key(fine_grained_lineage)
|
|
39
|
+
for upstream_urn in fine_grained_lineage.upstreams or []:
|
|
40
|
+
self._add_patch(
|
|
41
|
+
aspect_name,
|
|
42
|
+
"add",
|
|
43
|
+
path=(*path, transform_op, downstream_urn, query_id, upstream_urn),
|
|
44
|
+
value={"confidenceScore": fine_grained_lineage.confidenceScore},
|
|
45
|
+
)
|
|
46
|
+
return self
|
|
47
|
+
|
|
48
|
+
def remove_fine_grained_lineage(
|
|
49
|
+
self, fine_grained_lineage: FineGrainedLineage
|
|
50
|
+
) -> Self:
|
|
51
|
+
aspect_name, path = self._fine_grained_lineage_location()
|
|
52
|
+
(
|
|
53
|
+
transform_op,
|
|
54
|
+
downstream_urn,
|
|
55
|
+
query_id,
|
|
56
|
+
) = self._get_fine_grained_key(fine_grained_lineage)
|
|
57
|
+
for upstream_urn in fine_grained_lineage.upstreams or []:
|
|
58
|
+
self._add_patch(
|
|
59
|
+
aspect_name,
|
|
60
|
+
"remove",
|
|
61
|
+
path=(*path, transform_op, downstream_urn, query_id, upstream_urn),
|
|
62
|
+
value={},
|
|
63
|
+
)
|
|
64
|
+
return self
|
|
65
|
+
|
|
66
|
+
def set_fine_grained_lineages(
|
|
67
|
+
self, fine_grained_lineages: List[FineGrainedLineage]
|
|
68
|
+
) -> Self:
|
|
69
|
+
aspect_name, path = self._fine_grained_lineage_location()
|
|
70
|
+
self._add_patch(
|
|
71
|
+
aspect_name,
|
|
72
|
+
"add",
|
|
73
|
+
path=path,
|
|
74
|
+
value=fine_grained_lineages,
|
|
75
|
+
)
|
|
76
|
+
return self
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from typing_extensions import Self
|
|
4
|
+
|
|
5
|
+
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
|
|
6
|
+
from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class HasSiblingsPatch(MetadataPatchProposal):
|
|
10
|
+
def add_sibling(self, sibling_urn: str, primary: bool = False) -> Self:
|
|
11
|
+
"""Add a sibling relationship to the entity.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
sibling_urn: The URN of the sibling entity to add.
|
|
15
|
+
primary: Whether this entity should be marked as primary in the relationship.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
The patch builder instance.
|
|
19
|
+
"""
|
|
20
|
+
self._add_patch(
|
|
21
|
+
Siblings.ASPECT_NAME,
|
|
22
|
+
"add",
|
|
23
|
+
path=("siblings", sibling_urn),
|
|
24
|
+
value=sibling_urn,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# Set primary flag if specified
|
|
28
|
+
if primary:
|
|
29
|
+
self._add_patch(
|
|
30
|
+
Siblings.ASPECT_NAME,
|
|
31
|
+
"add",
|
|
32
|
+
path=("primary",),
|
|
33
|
+
value=primary,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
return self
|
|
37
|
+
|
|
38
|
+
def remove_sibling(self, sibling_urn: str) -> Self:
|
|
39
|
+
"""Remove a sibling relationship from the entity.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
sibling_urn: The URN of the sibling entity to remove.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
The patch builder instance.
|
|
46
|
+
"""
|
|
47
|
+
self._add_patch(
|
|
48
|
+
Siblings.ASPECT_NAME,
|
|
49
|
+
"remove",
|
|
50
|
+
path=("siblings", sibling_urn),
|
|
51
|
+
value={},
|
|
52
|
+
)
|
|
53
|
+
return self
|
|
54
|
+
|
|
55
|
+
def set_siblings(self, sibling_urns: List[str], primary: bool = False) -> Self:
|
|
56
|
+
"""Set the complete list of siblings for the entity.
|
|
57
|
+
|
|
58
|
+
This will replace all existing siblings with the new list.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
sibling_urns: The list of sibling URNs to set.
|
|
62
|
+
primary: Whether this entity should be marked as primary.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
The patch builder instance.
|
|
66
|
+
"""
|
|
67
|
+
self._add_patch(
|
|
68
|
+
Siblings.ASPECT_NAME, "add", path=("siblings",), value=sibling_urns
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
self._add_patch(Siblings.ASPECT_NAME, "add", path=("primary",), value=primary)
|
|
72
|
+
|
|
73
|
+
return self
|