acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/cli/specific/user_cli.py
CHANGED
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import pathlib
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
4
5
|
|
|
5
6
|
import click
|
|
6
7
|
from click_default_group import DefaultGroup
|
|
7
8
|
|
|
8
9
|
from datahub.api.entities.corpuser.corpuser import CorpUser, CorpUserGenerationConfig
|
|
9
10
|
from datahub.cli.specific.file_loader import load_file
|
|
10
|
-
from datahub.
|
|
11
|
-
from datahub.
|
|
11
|
+
from datahub.configuration.common import OperationalError
|
|
12
|
+
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
13
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
12
14
|
from datahub.upgrade import upgrade
|
|
13
15
|
|
|
14
16
|
logger = logging.getLogger(__name__)
|
|
@@ -32,13 +34,12 @@ def user() -> None:
|
|
|
32
34
|
help="Use this flag to overwrite the information that is set via the UI",
|
|
33
35
|
)
|
|
34
36
|
@upgrade.check_upgrade
|
|
35
|
-
@telemetry.with_telemetry()
|
|
36
37
|
def upsert(file: Path, override_editable: bool) -> None:
|
|
37
38
|
"""Create or Update a User in DataHub"""
|
|
38
39
|
|
|
39
40
|
config_dict = load_file(pathlib.Path(file))
|
|
40
41
|
user_configs = config_dict if isinstance(config_dict, list) else [config_dict]
|
|
41
|
-
with get_default_graph() as emitter:
|
|
42
|
+
with get_default_graph(ClientMode.CLI) as emitter:
|
|
42
43
|
for user_config in user_configs:
|
|
43
44
|
try:
|
|
44
45
|
datahub_user: CorpUser = CorpUser.parse_obj(user_config)
|
|
@@ -56,3 +57,172 @@ def upsert(file: Path, override_editable: bool) -> None:
|
|
|
56
57
|
f"Update failed for id {user_config.get('id')}. due to {e}",
|
|
57
58
|
fg="red",
|
|
58
59
|
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def validate_user_id_options(
|
|
63
|
+
user_id: Optional[str], email_as_id: bool, email: str
|
|
64
|
+
) -> str:
|
|
65
|
+
"""
|
|
66
|
+
Validate user ID options and return the final user ID to use.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
user_id: Optional explicit user ID
|
|
70
|
+
email_as_id: Whether to use email as the user ID
|
|
71
|
+
email: User's email address
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
The final user ID to use for the URN
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
ValueError: If validation fails (neither or both options provided)
|
|
78
|
+
"""
|
|
79
|
+
if not user_id and not email_as_id:
|
|
80
|
+
raise ValueError("Must specify either --id or --email-as-id flag")
|
|
81
|
+
|
|
82
|
+
if user_id and email_as_id:
|
|
83
|
+
raise ValueError("Cannot specify both --id and --email-as-id flag")
|
|
84
|
+
|
|
85
|
+
if email_as_id:
|
|
86
|
+
return email
|
|
87
|
+
|
|
88
|
+
assert user_id is not None
|
|
89
|
+
return user_id
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def create_native_user_in_datahub(
|
|
93
|
+
graph: DataHubGraph,
|
|
94
|
+
user_id: str,
|
|
95
|
+
email: str,
|
|
96
|
+
display_name: str,
|
|
97
|
+
password: str,
|
|
98
|
+
role: Optional[str] = None,
|
|
99
|
+
) -> str:
|
|
100
|
+
"""
|
|
101
|
+
Create a native DataHub user.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
graph: DataHubGraph client
|
|
105
|
+
user_id: User identifier (used in URN)
|
|
106
|
+
email: User's email address
|
|
107
|
+
display_name: User's full display name
|
|
108
|
+
password: User's password
|
|
109
|
+
role: Optional role to assign (Admin, Editor, or Reader)
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
The created user's URN
|
|
113
|
+
|
|
114
|
+
Raises:
|
|
115
|
+
ValueError: If user already exists or role is invalid
|
|
116
|
+
OperationalError: If user creation fails due to API/network errors
|
|
117
|
+
"""
|
|
118
|
+
user_urn = f"urn:li:corpuser:{user_id}"
|
|
119
|
+
|
|
120
|
+
if graph.exists(user_urn):
|
|
121
|
+
raise ValueError(f"User with ID {user_id} already exists (urn: {user_urn})")
|
|
122
|
+
|
|
123
|
+
created_user_urn = graph.create_native_user(
|
|
124
|
+
user_id=user_id,
|
|
125
|
+
email=email,
|
|
126
|
+
display_name=display_name,
|
|
127
|
+
password=password,
|
|
128
|
+
role=role,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
return created_user_urn
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@user.command(name="add")
|
|
135
|
+
@click.option("--id", "user_id", type=str, help="User identifier (used in URN)")
|
|
136
|
+
@click.option("--email", required=True, type=str, help="User's email address")
|
|
137
|
+
@click.option(
|
|
138
|
+
"--email-as-id",
|
|
139
|
+
is_flag=True,
|
|
140
|
+
default=False,
|
|
141
|
+
help="Use email address as user ID (alternative to --id)",
|
|
142
|
+
)
|
|
143
|
+
@click.option(
|
|
144
|
+
"--display-name", required=True, type=str, help="User's full display name"
|
|
145
|
+
)
|
|
146
|
+
@click.option(
|
|
147
|
+
"--password",
|
|
148
|
+
is_flag=True,
|
|
149
|
+
default=False,
|
|
150
|
+
help="Prompt for password (hidden input)",
|
|
151
|
+
)
|
|
152
|
+
@click.option(
|
|
153
|
+
"--role",
|
|
154
|
+
required=False,
|
|
155
|
+
type=click.Choice(
|
|
156
|
+
["Admin", "Editor", "Reader", "admin", "editor", "reader"], case_sensitive=False
|
|
157
|
+
),
|
|
158
|
+
help="Optional role to assign (Admin, Editor, or Reader)",
|
|
159
|
+
)
|
|
160
|
+
@upgrade.check_upgrade
|
|
161
|
+
def add(
|
|
162
|
+
user_id: str,
|
|
163
|
+
email: str,
|
|
164
|
+
email_as_id: bool,
|
|
165
|
+
display_name: str,
|
|
166
|
+
password: bool,
|
|
167
|
+
role: str,
|
|
168
|
+
) -> None:
|
|
169
|
+
"""Create a native DataHub user with email/password authentication"""
|
|
170
|
+
|
|
171
|
+
try:
|
|
172
|
+
final_user_id = validate_user_id_options(user_id, email_as_id, email)
|
|
173
|
+
except ValueError as e:
|
|
174
|
+
click.secho(f"Error: {str(e)}", fg="red")
|
|
175
|
+
raise SystemExit(1) from e
|
|
176
|
+
|
|
177
|
+
if not password:
|
|
178
|
+
click.secho(
|
|
179
|
+
"Error: --password flag is required to prompt for password input",
|
|
180
|
+
fg="red",
|
|
181
|
+
)
|
|
182
|
+
raise SystemExit(1)
|
|
183
|
+
|
|
184
|
+
password_value = click.prompt(
|
|
185
|
+
"Enter password", hide_input=True, confirmation_prompt=True
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
189
|
+
try:
|
|
190
|
+
created_user_urn = create_native_user_in_datahub(
|
|
191
|
+
graph, final_user_id, email, display_name, password_value, role
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
if role:
|
|
195
|
+
click.secho(
|
|
196
|
+
f"Successfully created user {final_user_id} with role {role.capitalize()} (URN: {created_user_urn})",
|
|
197
|
+
fg="green",
|
|
198
|
+
)
|
|
199
|
+
else:
|
|
200
|
+
click.secho(
|
|
201
|
+
f"Successfully created user {final_user_id} (URN: {created_user_urn})",
|
|
202
|
+
fg="green",
|
|
203
|
+
)
|
|
204
|
+
except ValueError as e:
|
|
205
|
+
click.secho(f"Error: {str(e)}", fg="red")
|
|
206
|
+
raise SystemExit(1) from e
|
|
207
|
+
except OperationalError as e:
|
|
208
|
+
error_msg = e.message if hasattr(e, "message") else str(e.args[0])
|
|
209
|
+
click.secho(f"Error: {error_msg}", fg="red")
|
|
210
|
+
|
|
211
|
+
if hasattr(e, "info") and e.info:
|
|
212
|
+
logger.debug(f"Error details: {e.info}")
|
|
213
|
+
if "status_code" in e.info:
|
|
214
|
+
click.secho(f" HTTP Status: {e.info['status_code']}", fg="red")
|
|
215
|
+
if "response_text" in e.info:
|
|
216
|
+
click.secho(
|
|
217
|
+
f" Response: {e.info['response_text'][:200]}", fg="red"
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
click.secho(
|
|
221
|
+
"\nTip: Run with DATAHUB_DEBUG=1 environment variable for detailed logs",
|
|
222
|
+
fg="yellow",
|
|
223
|
+
)
|
|
224
|
+
raise SystemExit(1) from e
|
|
225
|
+
except Exception as e:
|
|
226
|
+
click.secho(f"Unexpected error: {str(e)}", fg="red")
|
|
227
|
+
logger.exception("Unexpected error during user creation")
|
|
228
|
+
raise SystemExit(1) from e
|
datahub/cli/state_cli.py
CHANGED
|
@@ -5,7 +5,7 @@ import click
|
|
|
5
5
|
from click_default_group import DefaultGroup
|
|
6
6
|
|
|
7
7
|
from datahub.ingestion.graph.client import get_default_graph
|
|
8
|
-
from datahub.
|
|
8
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
9
9
|
from datahub.upgrade import upgrade
|
|
10
10
|
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
@@ -21,14 +21,13 @@ def state() -> None:
|
|
|
21
21
|
@click.option("--pipeline-name", required=True, type=str)
|
|
22
22
|
@click.option("--platform", required=True, type=str)
|
|
23
23
|
@upgrade.check_upgrade
|
|
24
|
-
@telemetry.with_telemetry()
|
|
25
24
|
def inspect(pipeline_name: str, platform: str) -> None:
|
|
26
25
|
"""
|
|
27
26
|
Get the latest stateful ingestion state for a given pipeline.
|
|
28
27
|
Only works for state entity removal for now.
|
|
29
28
|
"""
|
|
30
29
|
|
|
31
|
-
datahub_graph = get_default_graph()
|
|
30
|
+
datahub_graph = get_default_graph(ClientMode.CLI)
|
|
32
31
|
checkpoint = datahub_graph.get_latest_pipeline_checkpoint(pipeline_name, platform)
|
|
33
32
|
if not checkpoint:
|
|
34
33
|
click.secho("No ingestion state found.", fg="red")
|
datahub/cli/timeline_cli.py
CHANGED
|
@@ -9,7 +9,7 @@ from requests import Response
|
|
|
9
9
|
|
|
10
10
|
from datahub.emitter.mce_builder import dataset_urn_to_key, schema_field_urn_to_key
|
|
11
11
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
12
|
-
from datahub.
|
|
12
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
13
13
|
from datahub.upgrade import upgrade
|
|
14
14
|
from datahub.utilities.urns.urn import Urn
|
|
15
15
|
|
|
@@ -63,7 +63,7 @@ def get_timeline(
|
|
|
63
63
|
diff: bool,
|
|
64
64
|
graph: Optional[DataHubGraph] = None,
|
|
65
65
|
) -> Any:
|
|
66
|
-
client = graph if graph else get_default_graph()
|
|
66
|
+
client = graph if graph else get_default_graph(ClientMode.CLI)
|
|
67
67
|
session = client._session
|
|
68
68
|
host = client.config.server
|
|
69
69
|
if urn.startswith("urn%3A"):
|
|
@@ -129,7 +129,6 @@ def get_timeline(
|
|
|
129
129
|
@click.option("--raw", type=bool, is_flag=True, help="Show the raw diff")
|
|
130
130
|
@click.pass_context
|
|
131
131
|
@upgrade.check_upgrade
|
|
132
|
-
@telemetry.with_telemetry()
|
|
133
132
|
def timeline(
|
|
134
133
|
ctx: Any,
|
|
135
134
|
urn: str,
|
datahub/configuration/common.py
CHANGED
|
@@ -1,20 +1,25 @@
|
|
|
1
|
+
import dataclasses
|
|
1
2
|
import re
|
|
2
3
|
import unittest.mock
|
|
3
4
|
from abc import ABC, abstractmethod
|
|
4
5
|
from enum import auto
|
|
5
6
|
from typing import (
|
|
6
7
|
IO,
|
|
8
|
+
TYPE_CHECKING,
|
|
9
|
+
Annotated,
|
|
7
10
|
Any,
|
|
8
11
|
ClassVar,
|
|
9
12
|
Dict,
|
|
10
13
|
List,
|
|
11
14
|
Optional,
|
|
12
15
|
Type,
|
|
16
|
+
TypeVar,
|
|
13
17
|
Union,
|
|
14
18
|
runtime_checkable,
|
|
15
19
|
)
|
|
16
20
|
|
|
17
21
|
import pydantic
|
|
22
|
+
import pydantic_core
|
|
18
23
|
from cached_property import cached_property
|
|
19
24
|
from pydantic import BaseModel, Extra, ValidationError
|
|
20
25
|
from pydantic.fields import Field
|
|
@@ -33,10 +38,15 @@ REDACT_KEYS = {
|
|
|
33
38
|
}
|
|
34
39
|
REDACT_SUFFIXES = {
|
|
35
40
|
"_password",
|
|
41
|
+
"-password",
|
|
36
42
|
"_secret",
|
|
43
|
+
"-secret",
|
|
37
44
|
"_token",
|
|
45
|
+
"-token",
|
|
38
46
|
"_key",
|
|
47
|
+
"-key",
|
|
39
48
|
"_key_id",
|
|
49
|
+
"-key-id",
|
|
40
50
|
}
|
|
41
51
|
|
|
42
52
|
|
|
@@ -78,6 +88,29 @@ def redact_raw_config(obj: Any) -> Any:
|
|
|
78
88
|
return obj
|
|
79
89
|
|
|
80
90
|
|
|
91
|
+
if TYPE_CHECKING:
|
|
92
|
+
AnyType = TypeVar("AnyType")
|
|
93
|
+
HiddenFromDocs = Annotated[AnyType, ...]
|
|
94
|
+
else:
|
|
95
|
+
HiddenFromDocs = pydantic.json_schema.SkipJsonSchema
|
|
96
|
+
|
|
97
|
+
LaxStr = Annotated[str, pydantic.BeforeValidator(lambda v: str(v))]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@dataclasses.dataclass(frozen=True)
|
|
101
|
+
class SupportedSources:
|
|
102
|
+
sources: List[str]
|
|
103
|
+
|
|
104
|
+
def __get_pydantic_json_schema__(
|
|
105
|
+
self,
|
|
106
|
+
core_schema: pydantic_core.core_schema.CoreSchema,
|
|
107
|
+
handler: pydantic.GetJsonSchemaHandler,
|
|
108
|
+
) -> pydantic.json_schema.JsonSchemaValue:
|
|
109
|
+
json_schema = handler(core_schema)
|
|
110
|
+
json_schema.setdefault("schema_extra", {})["supported_sources"] = self.sources
|
|
111
|
+
return json_schema
|
|
112
|
+
|
|
113
|
+
|
|
81
114
|
class ConfigModel(BaseModel):
|
|
82
115
|
class Config:
|
|
83
116
|
@staticmethod
|
|
@@ -136,6 +169,17 @@ class PermissiveConfigModel(ConfigModel):
|
|
|
136
169
|
extra = Extra.allow
|
|
137
170
|
|
|
138
171
|
|
|
172
|
+
class ConnectionModel(BaseModel):
|
|
173
|
+
"""Represents the config associated with a connection"""
|
|
174
|
+
|
|
175
|
+
class Config:
|
|
176
|
+
if PYDANTIC_VERSION_2:
|
|
177
|
+
extra = "allow"
|
|
178
|
+
else:
|
|
179
|
+
extra = Extra.allow
|
|
180
|
+
underscore_attrs_are_private = True
|
|
181
|
+
|
|
182
|
+
|
|
139
183
|
class TransformerSemantics(ConfigEnum):
|
|
140
184
|
"""Describes semantics for aspect changes"""
|
|
141
185
|
|
|
@@ -317,7 +361,7 @@ class KeyValuePattern(ConfigModel):
|
|
|
317
361
|
return KeyValuePattern()
|
|
318
362
|
|
|
319
363
|
def value(self, string: str) -> List[str]:
|
|
320
|
-
matching_keys = [key for key in self.rules
|
|
364
|
+
matching_keys = [key for key in self.rules if re.match(key, string)]
|
|
321
365
|
if not matching_keys:
|
|
322
366
|
return []
|
|
323
367
|
elif self.first_match_only:
|
|
@@ -329,4 +373,4 @@ class KeyValuePattern(ConfigModel):
|
|
|
329
373
|
|
|
330
374
|
|
|
331
375
|
class VersionedConfig(ConfigModel):
|
|
332
|
-
version:
|
|
376
|
+
version: LaxStr = "1"
|
|
@@ -1,13 +1,16 @@
|
|
|
1
|
-
from typing import Type
|
|
1
|
+
from typing import TYPE_CHECKING, Type
|
|
2
2
|
|
|
3
3
|
import pydantic
|
|
4
4
|
|
|
5
5
|
from datahub.ingestion.api.global_context import get_graph_context
|
|
6
6
|
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from pydantic.deprecated.class_validators import V1RootValidator
|
|
9
|
+
|
|
7
10
|
|
|
8
11
|
def auto_connection_resolver(
|
|
9
12
|
connection_field: str = "connection",
|
|
10
|
-
) ->
|
|
13
|
+
) -> "V1RootValidator":
|
|
11
14
|
def _resolve_connection(cls: Type, values: dict) -> dict:
|
|
12
15
|
if connection_field in values:
|
|
13
16
|
connection_urn = values.pop(connection_field)
|