acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/sdk/search_filters.py
CHANGED
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import abc
|
|
4
|
+
import json
|
|
4
5
|
from typing import (
|
|
6
|
+
TYPE_CHECKING,
|
|
7
|
+
Annotated,
|
|
5
8
|
Any,
|
|
9
|
+
ClassVar,
|
|
10
|
+
Iterator,
|
|
6
11
|
List,
|
|
12
|
+
Optional,
|
|
7
13
|
Sequence,
|
|
8
14
|
TypedDict,
|
|
9
15
|
Union,
|
|
@@ -12,11 +18,26 @@ from typing import (
|
|
|
12
18
|
import pydantic
|
|
13
19
|
|
|
14
20
|
from datahub.configuration.common import ConfigModel
|
|
15
|
-
from datahub.configuration.pydantic_migration_helpers import
|
|
16
|
-
|
|
17
|
-
|
|
21
|
+
from datahub.configuration.pydantic_migration_helpers import (
|
|
22
|
+
PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR,
|
|
23
|
+
PYDANTIC_VERSION_2,
|
|
24
|
+
)
|
|
25
|
+
from datahub.ingestion.graph.client import flexible_entity_type_to_graphql
|
|
26
|
+
from datahub.ingestion.graph.filters import (
|
|
27
|
+
FilterOperator,
|
|
28
|
+
RemovedStatusFilter,
|
|
29
|
+
SearchFilterRule,
|
|
30
|
+
_get_status_filter,
|
|
31
|
+
)
|
|
18
32
|
from datahub.metadata.schema_classes import EntityTypeName
|
|
19
|
-
from datahub.metadata.urns import
|
|
33
|
+
from datahub.metadata.urns import (
|
|
34
|
+
ContainerUrn,
|
|
35
|
+
CorpGroupUrn,
|
|
36
|
+
CorpUserUrn,
|
|
37
|
+
DataPlatformUrn,
|
|
38
|
+
DomainUrn,
|
|
39
|
+
)
|
|
40
|
+
from datahub.utilities.urns.urn import guess_entity_type
|
|
20
41
|
|
|
21
42
|
_AndSearchFilterRule = TypedDict(
|
|
22
43
|
"_AndSearchFilterRule", {"and": List[SearchFilterRule]}
|
|
@@ -25,37 +46,51 @@ _OrFilters = List[_AndSearchFilterRule]
|
|
|
25
46
|
|
|
26
47
|
|
|
27
48
|
class _BaseFilter(ConfigModel):
|
|
28
|
-
|
|
29
|
-
# We can't wrap this in a TYPE_CHECKING block because the pydantic plugin
|
|
30
|
-
# doesn't recognize it properly. So unfortunately we'll need to live
|
|
31
|
-
# with the deprecation warning w/ pydantic v2.
|
|
32
|
-
allow_population_by_field_name = True
|
|
33
|
-
if PYDANTIC_VERSION_2:
|
|
34
|
-
populate_by_name = True
|
|
49
|
+
model_config = pydantic.ConfigDict(populate_by_name=True)
|
|
35
50
|
|
|
36
51
|
@abc.abstractmethod
|
|
37
|
-
def compile(self) -> _OrFilters:
|
|
38
|
-
|
|
52
|
+
def compile(self) -> _OrFilters: ...
|
|
53
|
+
|
|
54
|
+
def dfs(self) -> Iterator[_BaseFilter]:
|
|
55
|
+
yield self
|
|
39
56
|
|
|
57
|
+
@classmethod
|
|
58
|
+
def _field_discriminator(cls) -> str:
|
|
59
|
+
if cls is _BaseFilter:
|
|
60
|
+
raise ValueError("Cannot get discriminator for _BaseFilter")
|
|
61
|
+
if PYDANTIC_VERSION_2:
|
|
62
|
+
fields: dict = cls.model_fields # type: ignore
|
|
63
|
+
else:
|
|
64
|
+
fields = cls.__fields__ # type: ignore
|
|
40
65
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
66
|
+
# Assumes that there's only one field name per filter.
|
|
67
|
+
# If that's not the case, this method should be overridden.
|
|
68
|
+
if len(fields.keys()) != 1:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
f"Found multiple fields that could be the discriminator for this filter: {list(fields.keys())}"
|
|
71
|
+
)
|
|
72
|
+
name, field = next(iter(fields.items()))
|
|
73
|
+
return field.alias or name # type: ignore
|
|
47
74
|
|
|
48
75
|
|
|
49
76
|
class _EntityTypeFilter(_BaseFilter):
|
|
77
|
+
"""Filter for specific entity types.
|
|
78
|
+
|
|
79
|
+
If no entity type filter is specified, we will search all entity types in the
|
|
80
|
+
default search set, mirroring the behavior of the DataHub UI.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
ENTITY_TYPE_FIELD: ClassVar[str] = "_entityType"
|
|
84
|
+
|
|
50
85
|
entity_type: List[str] = pydantic.Field(
|
|
51
|
-
description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', etc.",
|
|
86
|
+
description="The entity type to filter on. Can be 'dataset', 'chart', 'dashboard', 'corpuser', 'dataProduct', etc.",
|
|
52
87
|
)
|
|
53
88
|
|
|
54
89
|
def _build_rule(self) -> SearchFilterRule:
|
|
55
90
|
return SearchFilterRule(
|
|
56
|
-
field=
|
|
91
|
+
field=self.ENTITY_TYPE_FIELD,
|
|
57
92
|
condition="EQUAL",
|
|
58
|
-
values=[
|
|
93
|
+
values=[flexible_entity_type_to_graphql(t) for t in self.entity_type],
|
|
59
94
|
)
|
|
60
95
|
|
|
61
96
|
def compile(self) -> _OrFilters:
|
|
@@ -63,25 +98,43 @@ class _EntityTypeFilter(_BaseFilter):
|
|
|
63
98
|
|
|
64
99
|
|
|
65
100
|
class _EntitySubtypeFilter(_BaseFilter):
|
|
66
|
-
|
|
67
|
-
entity_subtype: str = pydantic.Field(
|
|
101
|
+
entity_subtype: List[str] = pydantic.Field(
|
|
68
102
|
description="The entity subtype to filter on. Can be 'Table', 'View', 'Source', etc. depending on the native platform's concepts.",
|
|
69
103
|
)
|
|
70
104
|
|
|
105
|
+
@pydantic.validator("entity_subtype", pre=True)
|
|
106
|
+
def validate_entity_subtype(cls, v: str) -> List[str]:
|
|
107
|
+
return [v] if not isinstance(v, list) else v
|
|
108
|
+
|
|
109
|
+
def _build_rule(self) -> SearchFilterRule:
|
|
110
|
+
return SearchFilterRule(
|
|
111
|
+
field="typeNames",
|
|
112
|
+
condition="EQUAL",
|
|
113
|
+
values=self.entity_subtype,
|
|
114
|
+
)
|
|
115
|
+
|
|
71
116
|
def compile(self) -> _OrFilters:
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
return
|
|
117
|
+
return [{"and": [self._build_rule()]}]
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class _StatusFilter(_BaseFilter):
|
|
121
|
+
"""Filter for the status of entities during search.
|
|
122
|
+
|
|
123
|
+
If not explicitly specified, the NOT_SOFT_DELETED status filter will be applied.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
status: RemovedStatusFilter
|
|
127
|
+
|
|
128
|
+
def _build_rule(self) -> Optional[SearchFilterRule]:
|
|
129
|
+
return _get_status_filter(self.status)
|
|
130
|
+
|
|
131
|
+
def compile(self) -> _OrFilters:
|
|
132
|
+
rule = self._build_rule()
|
|
133
|
+
if rule:
|
|
134
|
+
return [{"and": [rule]}]
|
|
135
|
+
else:
|
|
136
|
+
# Our boolean algebra logic requires something here - returning [] would cause errors.
|
|
137
|
+
return FilterDsl.true().compile()
|
|
85
138
|
|
|
86
139
|
|
|
87
140
|
class _PlatformFilter(_BaseFilter):
|
|
@@ -123,6 +176,39 @@ class _DomainFilter(_BaseFilter):
|
|
|
123
176
|
return [{"and": [self._build_rule()]}]
|
|
124
177
|
|
|
125
178
|
|
|
179
|
+
class _ContainerFilter(_BaseFilter):
|
|
180
|
+
container: List[str]
|
|
181
|
+
direct_descendants_only: bool = pydantic.Field(
|
|
182
|
+
default=False,
|
|
183
|
+
description="If true, only entities that are direct descendants of the container will be returned.",
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
@pydantic.validator("container", each_item=True)
|
|
187
|
+
def validate_container(cls, v: str) -> str:
|
|
188
|
+
return str(ContainerUrn.from_string(v))
|
|
189
|
+
|
|
190
|
+
@classmethod
|
|
191
|
+
def _field_discriminator(cls) -> str:
|
|
192
|
+
return "container"
|
|
193
|
+
|
|
194
|
+
def _build_rule(self) -> SearchFilterRule:
|
|
195
|
+
if self.direct_descendants_only:
|
|
196
|
+
return SearchFilterRule(
|
|
197
|
+
field="container",
|
|
198
|
+
condition="EQUAL",
|
|
199
|
+
values=self.container,
|
|
200
|
+
)
|
|
201
|
+
else:
|
|
202
|
+
return SearchFilterRule(
|
|
203
|
+
field="browsePathV2",
|
|
204
|
+
condition="CONTAIN",
|
|
205
|
+
values=self.container,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
def compile(self) -> _OrFilters:
|
|
209
|
+
return [{"and": [self._build_rule()]}]
|
|
210
|
+
|
|
211
|
+
|
|
126
212
|
class _EnvFilter(_BaseFilter):
|
|
127
213
|
# Note that not all entity types have an env (e.g. dashboards / charts).
|
|
128
214
|
# If the env filter is specified, these will be excluded.
|
|
@@ -156,11 +242,99 @@ class _EnvFilter(_BaseFilter):
|
|
|
156
242
|
]
|
|
157
243
|
|
|
158
244
|
|
|
245
|
+
class _OwnerFilter(_BaseFilter):
|
|
246
|
+
"""Filter for entities owned by specific users or groups."""
|
|
247
|
+
|
|
248
|
+
owner: List[str] = pydantic.Field(
|
|
249
|
+
description="The owner to filter on. Should be user or group URNs.",
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
@pydantic.validator("owner", each_item=True)
|
|
253
|
+
def validate_owner(cls, v: str) -> str:
|
|
254
|
+
if not v.startswith("urn:li:"):
|
|
255
|
+
raise ValueError(f"Owner must be a valid User or Group URN, got: {v}")
|
|
256
|
+
_type = guess_entity_type(v)
|
|
257
|
+
if _type == CorpUserUrn.ENTITY_TYPE:
|
|
258
|
+
return str(CorpUserUrn.from_string(v))
|
|
259
|
+
elif _type == CorpGroupUrn.ENTITY_TYPE:
|
|
260
|
+
return str(CorpGroupUrn.from_string(v))
|
|
261
|
+
else:
|
|
262
|
+
raise ValueError(f"Owner must be a valid User or Group URN, got: {v}")
|
|
263
|
+
|
|
264
|
+
def _build_rule(self) -> SearchFilterRule:
|
|
265
|
+
return SearchFilterRule(
|
|
266
|
+
field="owners",
|
|
267
|
+
condition="EQUAL",
|
|
268
|
+
values=self.owner,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
def compile(self) -> _OrFilters:
|
|
272
|
+
return [{"and": [self._build_rule()]}]
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class _GlossaryTermFilter(_BaseFilter):
|
|
276
|
+
"""Filter for entities associated with specific glossary terms."""
|
|
277
|
+
|
|
278
|
+
glossary_term: List[str] = pydantic.Field(
|
|
279
|
+
description="The glossary term to filter on. Should be glossary term URNs.",
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
@pydantic.validator("glossary_term", each_item=True)
|
|
283
|
+
def validate_glossary_term(cls, v: str) -> str:
|
|
284
|
+
if not v.startswith("urn:li:"):
|
|
285
|
+
raise ValueError(f"Glossary term must be a valid URN, got: {v}")
|
|
286
|
+
# Validate that it's a glossary term URN
|
|
287
|
+
_type = guess_entity_type(v)
|
|
288
|
+
if _type != "glossaryTerm":
|
|
289
|
+
raise ValueError(
|
|
290
|
+
f"Glossary term must be a valid glossary term URN, got: {v}"
|
|
291
|
+
)
|
|
292
|
+
return v
|
|
293
|
+
|
|
294
|
+
def _build_rule(self) -> SearchFilterRule:
|
|
295
|
+
return SearchFilterRule(
|
|
296
|
+
field="glossaryTerms",
|
|
297
|
+
condition="EQUAL",
|
|
298
|
+
values=self.glossary_term,
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
def compile(self) -> _OrFilters:
|
|
302
|
+
return [{"and": [self._build_rule()]}]
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
class _TagFilter(_BaseFilter):
|
|
306
|
+
"""Filter for entities associated with specific tags."""
|
|
307
|
+
|
|
308
|
+
tag: List[str] = pydantic.Field(
|
|
309
|
+
description="The tag to filter on. Should be tag URNs.",
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
@pydantic.validator("tag", each_item=True)
|
|
313
|
+
def validate_tag(cls, v: str) -> str:
|
|
314
|
+
if not v.startswith("urn:li:"):
|
|
315
|
+
raise ValueError(f"Tag must be a valid URN, got: {v}")
|
|
316
|
+
# Validate that it's a tag URN
|
|
317
|
+
_type = guess_entity_type(v)
|
|
318
|
+
if _type != "tag":
|
|
319
|
+
raise ValueError(f"Tag must be a valid tag URN, got: {v}")
|
|
320
|
+
return v
|
|
321
|
+
|
|
322
|
+
def _build_rule(self) -> SearchFilterRule:
|
|
323
|
+
return SearchFilterRule(
|
|
324
|
+
field="tags",
|
|
325
|
+
condition="EQUAL",
|
|
326
|
+
values=self.tag,
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
def compile(self) -> _OrFilters:
|
|
330
|
+
return [{"and": [self._build_rule()]}]
|
|
331
|
+
|
|
332
|
+
|
|
159
333
|
class _CustomCondition(_BaseFilter):
|
|
160
|
-
"""Represents a single field condition"""
|
|
334
|
+
"""Represents a single field condition."""
|
|
161
335
|
|
|
162
336
|
field: str
|
|
163
|
-
condition:
|
|
337
|
+
condition: FilterOperator
|
|
164
338
|
values: List[str]
|
|
165
339
|
|
|
166
340
|
def compile(self) -> _OrFilters:
|
|
@@ -171,9 +345,13 @@ class _CustomCondition(_BaseFilter):
|
|
|
171
345
|
)
|
|
172
346
|
return [{"and": [rule]}]
|
|
173
347
|
|
|
348
|
+
@classmethod
|
|
349
|
+
def _field_discriminator(cls) -> str:
|
|
350
|
+
return "_custom"
|
|
351
|
+
|
|
174
352
|
|
|
175
353
|
class _And(_BaseFilter):
|
|
176
|
-
"""Represents an AND conjunction of filters"""
|
|
354
|
+
"""Represents an AND conjunction of filters."""
|
|
177
355
|
|
|
178
356
|
and_: Sequence["Filter"] = pydantic.Field(alias="and")
|
|
179
357
|
# TODO: Add validator to ensure that the "and" field is not empty
|
|
@@ -219,9 +397,14 @@ class _And(_BaseFilter):
|
|
|
219
397
|
]
|
|
220
398
|
}
|
|
221
399
|
|
|
400
|
+
def dfs(self) -> Iterator[_BaseFilter]:
|
|
401
|
+
yield self
|
|
402
|
+
for filter in self.and_:
|
|
403
|
+
yield from filter.dfs()
|
|
404
|
+
|
|
222
405
|
|
|
223
406
|
class _Or(_BaseFilter):
|
|
224
|
-
"""Represents an OR conjunction of filters"""
|
|
407
|
+
"""Represents an OR conjunction of filters."""
|
|
225
408
|
|
|
226
409
|
or_: Sequence["Filter"] = pydantic.Field(alias="or")
|
|
227
410
|
# TODO: Add validator to ensure that the "or" field is not empty
|
|
@@ -232,9 +415,14 @@ class _Or(_BaseFilter):
|
|
|
232
415
|
merged_filter.extend(filter.compile())
|
|
233
416
|
return merged_filter
|
|
234
417
|
|
|
418
|
+
def dfs(self) -> Iterator[_BaseFilter]:
|
|
419
|
+
yield self
|
|
420
|
+
for filter in self.or_:
|
|
421
|
+
yield from filter.dfs()
|
|
422
|
+
|
|
235
423
|
|
|
236
424
|
class _Not(_BaseFilter):
|
|
237
|
-
"""Represents a NOT filter"""
|
|
425
|
+
"""Represents a NOT filter."""
|
|
238
426
|
|
|
239
427
|
not_: "Filter" = pydantic.Field(alias="not")
|
|
240
428
|
|
|
@@ -262,31 +450,121 @@ class _Not(_BaseFilter):
|
|
|
262
450
|
|
|
263
451
|
return final_filters
|
|
264
452
|
|
|
453
|
+
def dfs(self) -> Iterator[_BaseFilter]:
|
|
454
|
+
yield self
|
|
455
|
+
yield from self.not_.dfs()
|
|
265
456
|
|
|
266
|
-
# TODO: With pydantic 2, we can use a RootModel with a
|
|
267
|
-
# discriminated union to make the error messages more informative.
|
|
268
|
-
Filter = Union[
|
|
269
|
-
_And,
|
|
270
|
-
_Or,
|
|
271
|
-
_Not,
|
|
272
|
-
_EntityTypeFilter,
|
|
273
|
-
_EntitySubtypeFilter,
|
|
274
|
-
_PlatformFilter,
|
|
275
|
-
_DomainFilter,
|
|
276
|
-
_EnvFilter,
|
|
277
|
-
_CustomCondition,
|
|
278
|
-
]
|
|
279
457
|
|
|
458
|
+
def _filter_discriminator(v: Any) -> Optional[str]:
|
|
459
|
+
if isinstance(v, _BaseFilter):
|
|
460
|
+
return v._field_discriminator()
|
|
461
|
+
|
|
462
|
+
if not isinstance(v, dict):
|
|
463
|
+
return None
|
|
464
|
+
|
|
465
|
+
keys = list(v.keys())
|
|
466
|
+
if len(keys) == 1:
|
|
467
|
+
return keys[0]
|
|
468
|
+
elif set(keys).issuperset({"container"}):
|
|
469
|
+
return _ContainerFilter._field_discriminator()
|
|
470
|
+
elif set(keys).issuperset({"field", "condition"}):
|
|
471
|
+
return _CustomCondition._field_discriminator()
|
|
472
|
+
|
|
473
|
+
return None
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def _parse_and_like_filter(value: Any) -> Any:
|
|
477
|
+
# Do not parse if filter is already of type and/or/not or a custom condition
|
|
478
|
+
# also do not parse container filter if direct_descendants_only is specified
|
|
479
|
+
if (
|
|
480
|
+
isinstance(value, dict)
|
|
481
|
+
and not set(value.keys()).intersection(
|
|
482
|
+
{"and", "or", "not", "field", "condition", "direct_descendants_only"}
|
|
483
|
+
)
|
|
484
|
+
and len(value) > 1
|
|
485
|
+
):
|
|
486
|
+
return {"and": [{k: v} for k, v in value.items()]}
|
|
487
|
+
|
|
488
|
+
return value
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
if TYPE_CHECKING or not PYDANTIC_SUPPORTS_CALLABLE_DISCRIMINATOR:
|
|
492
|
+
# The `not TYPE_CHECKING` bit is required to make the linter happy,
|
|
493
|
+
# since we currently only run mypy with pydantic v1.
|
|
494
|
+
Filter = Union[
|
|
495
|
+
_And,
|
|
496
|
+
_Or,
|
|
497
|
+
_Not,
|
|
498
|
+
_EntityTypeFilter,
|
|
499
|
+
_EntitySubtypeFilter,
|
|
500
|
+
_StatusFilter,
|
|
501
|
+
_PlatformFilter,
|
|
502
|
+
_DomainFilter,
|
|
503
|
+
_ContainerFilter,
|
|
504
|
+
_EnvFilter,
|
|
505
|
+
_OwnerFilter,
|
|
506
|
+
_GlossaryTermFilter,
|
|
507
|
+
_TagFilter,
|
|
508
|
+
_CustomCondition,
|
|
509
|
+
]
|
|
280
510
|
|
|
281
|
-
# Required to resolve forward references to "Filter"
|
|
282
|
-
if PYDANTIC_VERSION_2:
|
|
283
|
-
_And.model_rebuild() # type: ignore
|
|
284
|
-
_Or.model_rebuild() # type: ignore
|
|
285
|
-
_Not.model_rebuild() # type: ignore
|
|
286
|
-
else:
|
|
287
511
|
_And.update_forward_refs()
|
|
288
512
|
_Or.update_forward_refs()
|
|
289
513
|
_Not.update_forward_refs()
|
|
514
|
+
else:
|
|
515
|
+
from pydantic import Discriminator, Tag
|
|
516
|
+
|
|
517
|
+
def _parse_json_from_string(value: Any) -> Any:
|
|
518
|
+
if isinstance(value, str):
|
|
519
|
+
try:
|
|
520
|
+
return json.loads(value)
|
|
521
|
+
except json.JSONDecodeError:
|
|
522
|
+
return value
|
|
523
|
+
else:
|
|
524
|
+
return value
|
|
525
|
+
|
|
526
|
+
# TODO: Once we're fully on pydantic 2, we can use a RootModel here.
|
|
527
|
+
# That way we'd be able to attach methods to the Filter type.
|
|
528
|
+
# e.g. replace load_filters(...) with Filter.load(...)
|
|
529
|
+
Filter = Annotated[
|
|
530
|
+
Annotated[
|
|
531
|
+
Union[
|
|
532
|
+
Annotated[_And, Tag(_And._field_discriminator())],
|
|
533
|
+
Annotated[_Or, Tag(_Or._field_discriminator())],
|
|
534
|
+
Annotated[_Not, Tag(_Not._field_discriminator())],
|
|
535
|
+
Annotated[
|
|
536
|
+
_EntityTypeFilter, Tag(_EntityTypeFilter._field_discriminator())
|
|
537
|
+
],
|
|
538
|
+
Annotated[
|
|
539
|
+
_EntitySubtypeFilter,
|
|
540
|
+
Tag(_EntitySubtypeFilter._field_discriminator()),
|
|
541
|
+
],
|
|
542
|
+
Annotated[_StatusFilter, Tag(_StatusFilter._field_discriminator())],
|
|
543
|
+
Annotated[_PlatformFilter, Tag(_PlatformFilter._field_discriminator())],
|
|
544
|
+
Annotated[_DomainFilter, Tag(_DomainFilter._field_discriminator())],
|
|
545
|
+
Annotated[
|
|
546
|
+
_ContainerFilter, Tag(_ContainerFilter._field_discriminator())
|
|
547
|
+
],
|
|
548
|
+
Annotated[_EnvFilter, Tag(_EnvFilter._field_discriminator())],
|
|
549
|
+
Annotated[_OwnerFilter, Tag(_OwnerFilter._field_discriminator())],
|
|
550
|
+
Annotated[
|
|
551
|
+
_GlossaryTermFilter, Tag(_GlossaryTermFilter._field_discriminator())
|
|
552
|
+
],
|
|
553
|
+
Annotated[_TagFilter, Tag(_TagFilter._field_discriminator())],
|
|
554
|
+
Annotated[
|
|
555
|
+
_CustomCondition, Tag(_CustomCondition._field_discriminator())
|
|
556
|
+
],
|
|
557
|
+
],
|
|
558
|
+
Discriminator(_filter_discriminator),
|
|
559
|
+
],
|
|
560
|
+
pydantic.BeforeValidator(_parse_and_like_filter),
|
|
561
|
+
pydantic.BeforeValidator(_parse_json_from_string),
|
|
562
|
+
]
|
|
563
|
+
|
|
564
|
+
# Required to resolve forward references to "Filter"
|
|
565
|
+
_And.model_rebuild() # type: ignore
|
|
566
|
+
_Or.model_rebuild() # type: ignore
|
|
567
|
+
_Not.model_rebuild() # type: ignore
|
|
290
568
|
|
|
291
569
|
|
|
292
570
|
def load_filters(obj: Any) -> Filter:
|
|
@@ -318,6 +596,18 @@ class FilterDsl:
|
|
|
318
596
|
def not_(arg: "Filter") -> _Not:
|
|
319
597
|
return _Not(not_=arg)
|
|
320
598
|
|
|
599
|
+
@staticmethod
|
|
600
|
+
def true() -> "Filter":
|
|
601
|
+
return _CustomCondition(
|
|
602
|
+
field="urn",
|
|
603
|
+
condition="EXISTS",
|
|
604
|
+
values=[],
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
@staticmethod
|
|
608
|
+
def false() -> "Filter":
|
|
609
|
+
return FilterDsl.not_(FilterDsl.true())
|
|
610
|
+
|
|
321
611
|
@staticmethod
|
|
322
612
|
def entity_type(
|
|
323
613
|
entity_type: Union[EntityTypeName, Sequence[EntityTypeName]],
|
|
@@ -329,14 +619,15 @@ class FilterDsl:
|
|
|
329
619
|
)
|
|
330
620
|
|
|
331
621
|
@staticmethod
|
|
332
|
-
def entity_subtype(
|
|
622
|
+
def entity_subtype(
|
|
623
|
+
entity_subtype: Union[str, Sequence[str]],
|
|
624
|
+
) -> _EntitySubtypeFilter:
|
|
333
625
|
return _EntitySubtypeFilter(
|
|
334
|
-
|
|
335
|
-
entity_subtype=subtype,
|
|
626
|
+
entity_subtype=entity_subtype,
|
|
336
627
|
)
|
|
337
628
|
|
|
338
629
|
@staticmethod
|
|
339
|
-
def platform(platform: Union[str,
|
|
630
|
+
def platform(platform: Union[str, Sequence[str]], /) -> _PlatformFilter:
|
|
340
631
|
return _PlatformFilter(
|
|
341
632
|
platform=[platform] if isinstance(platform, str) else platform
|
|
342
633
|
)
|
|
@@ -344,13 +635,43 @@ class FilterDsl:
|
|
|
344
635
|
# TODO: Add a platform_instance filter
|
|
345
636
|
|
|
346
637
|
@staticmethod
|
|
347
|
-
def domain(domain: Union[str,
|
|
638
|
+
def domain(domain: Union[str, Sequence[str]], /) -> _DomainFilter:
|
|
348
639
|
return _DomainFilter(domain=[domain] if isinstance(domain, str) else domain)
|
|
349
640
|
|
|
350
641
|
@staticmethod
|
|
351
|
-
def
|
|
642
|
+
def container(
|
|
643
|
+
container: Union[str, Sequence[str]],
|
|
644
|
+
/,
|
|
645
|
+
*,
|
|
646
|
+
direct_descendants_only: bool = False,
|
|
647
|
+
) -> _ContainerFilter:
|
|
648
|
+
return _ContainerFilter(
|
|
649
|
+
container=[container] if isinstance(container, str) else container,
|
|
650
|
+
direct_descendants_only=direct_descendants_only,
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
@staticmethod
|
|
654
|
+
def env(env: Union[str, Sequence[str]], /) -> _EnvFilter:
|
|
352
655
|
return _EnvFilter(env=[env] if isinstance(env, str) else env)
|
|
353
656
|
|
|
657
|
+
@staticmethod
|
|
658
|
+
def owner(owner: Union[str, Sequence[str]], /) -> _OwnerFilter:
|
|
659
|
+
return _OwnerFilter(owner=[owner] if isinstance(owner, str) else owner)
|
|
660
|
+
|
|
661
|
+
@staticmethod
|
|
662
|
+
def glossary_term(
|
|
663
|
+
glossary_term: Union[str, Sequence[str]], /
|
|
664
|
+
) -> _GlossaryTermFilter:
|
|
665
|
+
return _GlossaryTermFilter(
|
|
666
|
+
glossary_term=[glossary_term]
|
|
667
|
+
if isinstance(glossary_term, str)
|
|
668
|
+
else glossary_term
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
@staticmethod
|
|
672
|
+
def tag(tag: Union[str, Sequence[str]], /) -> _TagFilter:
|
|
673
|
+
return _TagFilter(tag=[tag] if isinstance(tag, str) else tag)
|
|
674
|
+
|
|
354
675
|
@staticmethod
|
|
355
676
|
def has_custom_property(key: str, value: str) -> _CustomCondition:
|
|
356
677
|
return _CustomCondition(
|
|
@@ -359,13 +680,17 @@ class FilterDsl:
|
|
|
359
680
|
values=[f"{key}={value}"],
|
|
360
681
|
)
|
|
361
682
|
|
|
683
|
+
@staticmethod
|
|
684
|
+
def soft_deleted(status: RemovedStatusFilter) -> _StatusFilter:
|
|
685
|
+
return _StatusFilter(status=status)
|
|
686
|
+
|
|
362
687
|
# TODO: Add a soft-deletion status filter
|
|
363
688
|
# TODO: add a container / browse path filter
|
|
364
689
|
# TODO add shortcut for custom filters
|
|
365
690
|
|
|
366
691
|
@staticmethod
|
|
367
692
|
def custom_filter(
|
|
368
|
-
field: str, condition:
|
|
693
|
+
field: str, condition: FilterOperator, values: Sequence[str]
|
|
369
694
|
) -> _CustomCondition:
|
|
370
695
|
return _CustomCondition(
|
|
371
696
|
field=field,
|
|
@@ -3,7 +3,8 @@ from typing import Any, Dict, List, Optional, Union
|
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel, validator
|
|
5
5
|
|
|
6
|
-
from datahub.ingestion.graph.client import
|
|
6
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
7
|
+
from datahub.ingestion.graph.config import DatahubClientConfig
|
|
7
8
|
from datahub.secret.datahub_secrets_client import DataHubSecretsClient
|
|
8
9
|
from datahub.secret.secret_store import SecretStore
|
|
9
10
|
|
|
@@ -64,3 +65,6 @@ class DataHubSecretStore(SecretStore):
|
|
|
64
65
|
def create(cls, config: Any) -> "DataHubSecretStore":
|
|
65
66
|
config = DataHubSecretStoreConfig.parse_obj(config)
|
|
66
67
|
return cls(config)
|
|
68
|
+
|
|
69
|
+
def close(self) -> None:
|
|
70
|
+
self.client.graph.close()
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Dict, List, Union
|
|
3
|
+
|
|
4
|
+
from datahub.secret.secret_store import SecretStore
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# Simple SecretStore implementation that fetches Secret values from the local environment.
|
|
8
|
+
class EnvironmentSecretStore(SecretStore):
|
|
9
|
+
def __init__(self, config):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
def close(self) -> None:
|
|
13
|
+
return
|
|
14
|
+
|
|
15
|
+
def get_secret_values(self, secret_names: List[str]) -> Dict[str, Union[str, None]]:
|
|
16
|
+
values = {}
|
|
17
|
+
for secret_name in secret_names:
|
|
18
|
+
values[secret_name] = os.getenv(secret_name)
|
|
19
|
+
return values
|
|
20
|
+
|
|
21
|
+
def get_secret_value(self, secret_name: str) -> Union[str, None]:
|
|
22
|
+
return os.getenv(secret_name)
|
|
23
|
+
|
|
24
|
+
def get_id(self) -> str:
|
|
25
|
+
return "env"
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def create(cls, config: Dict) -> "EnvironmentSecretStore":
|
|
29
|
+
return cls(config)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import Any, Dict, List, Union
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
from datahub.secret.secret_store import SecretStore
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class FileSecretStoreConfig(BaseModel):
|
|
13
|
+
basedir: str = "/mnt/secrets"
|
|
14
|
+
max_length: int = 1024768
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Simple SecretStore implementation that fetches Secret values from the local files.
|
|
18
|
+
class FileSecretStore(SecretStore):
|
|
19
|
+
def __init__(self, config: FileSecretStoreConfig):
|
|
20
|
+
self.config = config
|
|
21
|
+
|
|
22
|
+
def get_secret_values(self, secret_names: List[str]) -> Dict[str, Union[str, None]]:
|
|
23
|
+
values = {}
|
|
24
|
+
for secret_name in secret_names:
|
|
25
|
+
values[secret_name] = self.get_secret_value(secret_name)
|
|
26
|
+
return values
|
|
27
|
+
|
|
28
|
+
def get_secret_value(self, secret_name: str) -> Union[str, None]:
|
|
29
|
+
secret_path = os.path.join(self.config.basedir, secret_name)
|
|
30
|
+
if os.path.exists(secret_path):
|
|
31
|
+
with open(secret_path, "r") as f:
|
|
32
|
+
secret_value = f.read(self.config.max_length + 1)
|
|
33
|
+
if len(secret_value) > self.config.max_length:
|
|
34
|
+
logger.warning(
|
|
35
|
+
f"Secret {secret_name} is longer than {self.config.max_length} and will be truncated."
|
|
36
|
+
)
|
|
37
|
+
return secret_value[: self.config.max_length].rstrip()
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
def get_id(self) -> str:
|
|
41
|
+
return "file"
|
|
42
|
+
|
|
43
|
+
def close(self) -> None:
|
|
44
|
+
return
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def create(cls, config: Any) -> "FileSecretStore":
|
|
48
|
+
config = FileSecretStoreConfig.parse_obj(config)
|
|
49
|
+
return cls(config)
|