acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/sdk/_shared.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import warnings
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
4
5
|
from datetime import datetime
|
|
5
6
|
from typing import (
|
|
6
7
|
TYPE_CHECKING,
|
|
7
8
|
Callable,
|
|
9
|
+
Dict,
|
|
8
10
|
List,
|
|
9
11
|
Optional,
|
|
10
12
|
Sequence,
|
|
@@ -14,6 +16,7 @@ from typing import (
|
|
|
14
16
|
|
|
15
17
|
from typing_extensions import TypeAlias, assert_never
|
|
16
18
|
|
|
19
|
+
import datahub.emitter.mce_builder as builder
|
|
17
20
|
import datahub.metadata.schema_classes as models
|
|
18
21
|
from datahub.emitter.mce_builder import (
|
|
19
22
|
make_ts_millis,
|
|
@@ -24,33 +27,77 @@ from datahub.emitter.mce_builder import (
|
|
|
24
27
|
from datahub.emitter.mcp_builder import ContainerKey
|
|
25
28
|
from datahub.errors import MultipleSubtypesWarning, SdkUsageError
|
|
26
29
|
from datahub.metadata.urns import (
|
|
30
|
+
ChartUrn,
|
|
27
31
|
ContainerUrn,
|
|
28
32
|
CorpGroupUrn,
|
|
29
33
|
CorpUserUrn,
|
|
34
|
+
DashboardUrn,
|
|
35
|
+
DataFlowUrn,
|
|
30
36
|
DataJobUrn,
|
|
31
37
|
DataPlatformInstanceUrn,
|
|
32
38
|
DataPlatformUrn,
|
|
39
|
+
DataProcessInstanceUrn,
|
|
33
40
|
DatasetUrn,
|
|
34
41
|
DomainUrn,
|
|
35
42
|
GlossaryTermUrn,
|
|
36
43
|
OwnershipTypeUrn,
|
|
44
|
+
StructuredPropertyUrn,
|
|
37
45
|
TagUrn,
|
|
38
46
|
Urn,
|
|
47
|
+
VersionSetUrn,
|
|
39
48
|
)
|
|
40
|
-
from datahub.sdk._utils import add_list_unique, remove_list_unique
|
|
49
|
+
from datahub.sdk._utils import DEFAULT_ACTOR_URN, add_list_unique, remove_list_unique
|
|
41
50
|
from datahub.sdk.entity import Entity
|
|
42
51
|
from datahub.utilities.urns.error import InvalidUrnError
|
|
43
52
|
|
|
44
53
|
if TYPE_CHECKING:
|
|
45
54
|
from datahub.sdk.container import Container
|
|
46
|
-
|
|
47
55
|
UrnOrStr: TypeAlias = Union[Urn, str]
|
|
56
|
+
ChartUrnOrStr: TypeAlias = Union[str, ChartUrn]
|
|
48
57
|
DatasetUrnOrStr: TypeAlias = Union[str, DatasetUrn]
|
|
49
58
|
DatajobUrnOrStr: TypeAlias = Union[str, DataJobUrn]
|
|
59
|
+
DataflowUrnOrStr: TypeAlias = Union[str, DataFlowUrn]
|
|
60
|
+
DashboardUrnOrStr: TypeAlias = Union[str, DashboardUrn]
|
|
61
|
+
DataPlatformInstanceUrnOrStr: TypeAlias = Union[str, DataPlatformInstanceUrn]
|
|
62
|
+
DataPlatformUrnOrStr: TypeAlias = Union[str, DataPlatformUrn]
|
|
50
63
|
|
|
51
64
|
ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
|
|
65
|
+
ActorUrnOrStr: TypeAlias = Union[str, ActorUrn]
|
|
66
|
+
StructuredPropertyUrnOrStr: TypeAlias = Union[str, StructuredPropertyUrn]
|
|
67
|
+
StructuredPropertyValueType: TypeAlias = Union[str, float, int]
|
|
68
|
+
StructuredPropertyInputType: TypeAlias = Dict[
|
|
69
|
+
StructuredPropertyUrnOrStr, Sequence[StructuredPropertyValueType]
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
TrainingMetricsInputType: TypeAlias = Union[
|
|
73
|
+
List[models.MLMetricClass], Dict[str, Optional[str]]
|
|
74
|
+
]
|
|
75
|
+
HyperParamsInputType: TypeAlias = Union[
|
|
76
|
+
List[models.MLHyperParamClass], Dict[str, Optional[str]]
|
|
77
|
+
]
|
|
78
|
+
MLTrainingJobInputType: TypeAlias = Union[Sequence[Union[str, DataProcessInstanceUrn]]]
|
|
52
79
|
|
|
53
|
-
|
|
80
|
+
|
|
81
|
+
def convert_training_metrics(
|
|
82
|
+
metrics: TrainingMetricsInputType,
|
|
83
|
+
) -> List[models.MLMetricClass]:
|
|
84
|
+
if isinstance(metrics, dict):
|
|
85
|
+
return [
|
|
86
|
+
models.MLMetricClass(name=name, value=str(value))
|
|
87
|
+
for name, value in metrics.items()
|
|
88
|
+
]
|
|
89
|
+
return metrics
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def convert_hyper_params(
|
|
93
|
+
params: HyperParamsInputType,
|
|
94
|
+
) -> List[models.MLHyperParamClass]:
|
|
95
|
+
if isinstance(params, dict):
|
|
96
|
+
return [
|
|
97
|
+
models.MLHyperParamClass(name=name, value=str(value))
|
|
98
|
+
for name, value in params.items()
|
|
99
|
+
]
|
|
100
|
+
return params
|
|
54
101
|
|
|
55
102
|
|
|
56
103
|
def make_time_stamp(ts: Optional[datetime]) -> Optional[models.TimeStampClass]:
|
|
@@ -65,6 +112,130 @@ def parse_time_stamp(ts: Optional[models.TimeStampClass]) -> Optional[datetime]:
|
|
|
65
112
|
return parse_ts_millis(ts.time)
|
|
66
113
|
|
|
67
114
|
|
|
115
|
+
class ChangeAuditStampsMixin(ABC):
|
|
116
|
+
"""Mixin class for managing audit stamps on entities."""
|
|
117
|
+
|
|
118
|
+
__slots__ = ()
|
|
119
|
+
|
|
120
|
+
@abstractmethod
|
|
121
|
+
def _get_audit_stamps(self) -> models.ChangeAuditStampsClass:
|
|
122
|
+
"""Get the audit stamps from the entity properties."""
|
|
123
|
+
pass
|
|
124
|
+
|
|
125
|
+
@abstractmethod
|
|
126
|
+
def _set_audit_stamps(self, audit_stamps: models.ChangeAuditStampsClass) -> None:
|
|
127
|
+
"""Set the audit stamps on the entity properties."""
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def last_modified(self) -> Optional[datetime]:
|
|
132
|
+
"""Get the last modification timestamp from audit stamps."""
|
|
133
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
134
|
+
if audit_stamps.lastModified.time == 0:
|
|
135
|
+
return None
|
|
136
|
+
return datetime.fromtimestamp(
|
|
137
|
+
audit_stamps.lastModified.time / 1000
|
|
138
|
+
) # supports only seconds precision
|
|
139
|
+
|
|
140
|
+
def set_last_modified(self, last_modified: datetime) -> None:
|
|
141
|
+
"""Set the last modification timestamp in audit stamps."""
|
|
142
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
143
|
+
audit_stamps.lastModified.time = make_ts_millis(last_modified)
|
|
144
|
+
self._set_audit_stamps(audit_stamps)
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def last_modified_by(self) -> Optional[str]:
|
|
148
|
+
"""Get the last modification actor from audit stamps."""
|
|
149
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
150
|
+
if audit_stamps.lastModified.actor == builder.UNKNOWN_USER:
|
|
151
|
+
return None
|
|
152
|
+
return audit_stamps.lastModified.actor
|
|
153
|
+
|
|
154
|
+
def set_last_modified_by(self, last_modified_by: ActorUrnOrStr) -> None:
|
|
155
|
+
"""Set the last modification actor in audit stamps."""
|
|
156
|
+
if isinstance(last_modified_by, str):
|
|
157
|
+
last_modified_by = make_user_urn(last_modified_by)
|
|
158
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
159
|
+
audit_stamps.lastModified.actor = str(last_modified_by)
|
|
160
|
+
self._set_audit_stamps(audit_stamps)
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def created_at(self) -> Optional[datetime]:
|
|
164
|
+
"""Get the creation timestamp from audit stamps."""
|
|
165
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
166
|
+
if audit_stamps.created.time == 0:
|
|
167
|
+
return None
|
|
168
|
+
return datetime.fromtimestamp(
|
|
169
|
+
audit_stamps.created.time / 1000
|
|
170
|
+
) # supports only seconds precision
|
|
171
|
+
|
|
172
|
+
def set_created_at(self, created_at: datetime) -> None:
|
|
173
|
+
"""Set the creation timestamp in audit stamps."""
|
|
174
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
175
|
+
audit_stamps.created.time = make_ts_millis(created_at)
|
|
176
|
+
self._set_audit_stamps(audit_stamps)
|
|
177
|
+
|
|
178
|
+
@property
|
|
179
|
+
def created_by(self) -> Optional[ActorUrnOrStr]:
|
|
180
|
+
"""Get the creation actor from audit stamps."""
|
|
181
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
182
|
+
if audit_stamps.created.actor == builder.UNKNOWN_USER:
|
|
183
|
+
return None
|
|
184
|
+
return audit_stamps.created.actor
|
|
185
|
+
|
|
186
|
+
def set_created_by(self, created_by: ActorUrnOrStr) -> None:
|
|
187
|
+
"""Set the creation actor in audit stamps."""
|
|
188
|
+
if isinstance(created_by, str):
|
|
189
|
+
created_by = make_user_urn(created_by)
|
|
190
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
191
|
+
audit_stamps.created.actor = str(created_by)
|
|
192
|
+
self._set_audit_stamps(audit_stamps)
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def deleted_on(self) -> Optional[datetime]:
|
|
196
|
+
"""Get the deletion timestamp from audit stamps."""
|
|
197
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
198
|
+
if audit_stamps.deleted is None or audit_stamps.deleted.time == 0:
|
|
199
|
+
return None
|
|
200
|
+
return datetime.fromtimestamp(
|
|
201
|
+
audit_stamps.deleted.time / 1000
|
|
202
|
+
) # supports only seconds precision
|
|
203
|
+
|
|
204
|
+
def set_deleted_on(self, deleted_on: datetime) -> None:
|
|
205
|
+
"""Set the deletion timestamp in audit stamps."""
|
|
206
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
207
|
+
# Default constructor sets deleted to None
|
|
208
|
+
if audit_stamps.deleted is None:
|
|
209
|
+
audit_stamps.deleted = models.AuditStampClass(
|
|
210
|
+
time=0, actor=builder.UNKNOWN_USER
|
|
211
|
+
)
|
|
212
|
+
audit_stamps.deleted.time = make_ts_millis(deleted_on)
|
|
213
|
+
self._set_audit_stamps(audit_stamps)
|
|
214
|
+
|
|
215
|
+
@property
|
|
216
|
+
def deleted_by(self) -> Optional[ActorUrnOrStr]:
|
|
217
|
+
"""Get the deletion actor from audit stamps."""
|
|
218
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
219
|
+
if (
|
|
220
|
+
audit_stamps.deleted is None
|
|
221
|
+
or audit_stamps.deleted.actor == builder.UNKNOWN_USER
|
|
222
|
+
):
|
|
223
|
+
return None
|
|
224
|
+
return audit_stamps.deleted.actor
|
|
225
|
+
|
|
226
|
+
def set_deleted_by(self, deleted_by: ActorUrnOrStr) -> None:
|
|
227
|
+
"""Set the deletion actor in audit stamps."""
|
|
228
|
+
if isinstance(deleted_by, str):
|
|
229
|
+
deleted_by = make_user_urn(deleted_by)
|
|
230
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
231
|
+
if audit_stamps.deleted is None:
|
|
232
|
+
audit_stamps.deleted = models.AuditStampClass(
|
|
233
|
+
time=0, actor=builder.UNKNOWN_USER
|
|
234
|
+
)
|
|
235
|
+
audit_stamps.deleted.actor = str(deleted_by)
|
|
236
|
+
self._set_audit_stamps(audit_stamps)
|
|
237
|
+
|
|
238
|
+
|
|
68
239
|
class HasPlatformInstance(Entity):
|
|
69
240
|
__slots__ = ()
|
|
70
241
|
|
|
@@ -134,7 +305,7 @@ OwnerInputType: TypeAlias = Union[
|
|
|
134
305
|
Tuple[ActorUrn, OwnershipTypeType],
|
|
135
306
|
models.OwnerClass,
|
|
136
307
|
]
|
|
137
|
-
OwnersInputType: TypeAlias =
|
|
308
|
+
OwnersInputType: TypeAlias = Sequence[OwnerInputType]
|
|
138
309
|
|
|
139
310
|
|
|
140
311
|
class HasOwnership(Entity):
|
|
@@ -235,7 +406,9 @@ class HasOwnership(Entity):
|
|
|
235
406
|
# If you pass in a ContainerKey, we can use parent_key() to build the browse path.
|
|
236
407
|
# If you pass in a list of urns, we'll use that as the browse path. Any non-urn strings
|
|
237
408
|
# will be treated as raw ids.
|
|
238
|
-
ParentContainerInputType: TypeAlias = Union[
|
|
409
|
+
ParentContainerInputType: TypeAlias = Union[
|
|
410
|
+
"Container", ContainerKey, Sequence[UrnOrStr]
|
|
411
|
+
]
|
|
239
412
|
|
|
240
413
|
|
|
241
414
|
class HasContainer(Entity):
|
|
@@ -295,7 +468,7 @@ class HasContainer(Entity):
|
|
|
295
468
|
)
|
|
296
469
|
for entry in parsed_path
|
|
297
470
|
]
|
|
298
|
-
elif container
|
|
471
|
+
elif isinstance(container, ContainerKey):
|
|
299
472
|
container_urn = container.as_urn()
|
|
300
473
|
|
|
301
474
|
browse_path_reversed = [container_urn]
|
|
@@ -354,7 +527,7 @@ class HasContainer(Entity):
|
|
|
354
527
|
|
|
355
528
|
|
|
356
529
|
TagInputType: TypeAlias = Union[str, TagUrn, models.TagAssociationClass]
|
|
357
|
-
TagsInputType: TypeAlias =
|
|
530
|
+
TagsInputType: TypeAlias = Sequence[TagInputType]
|
|
358
531
|
|
|
359
532
|
|
|
360
533
|
class HasTags(Entity):
|
|
@@ -409,7 +582,7 @@ class HasTags(Entity):
|
|
|
409
582
|
TermInputType: TypeAlias = Union[
|
|
410
583
|
str, GlossaryTermUrn, models.GlossaryTermAssociationClass
|
|
411
584
|
]
|
|
412
|
-
TermsInputType: TypeAlias =
|
|
585
|
+
TermsInputType: TypeAlias = Sequence[TermInputType]
|
|
413
586
|
|
|
414
587
|
|
|
415
588
|
class HasTerms(Entity):
|
|
@@ -441,7 +614,7 @@ class HasTerms(Entity):
|
|
|
441
614
|
def _terms_audit_stamp(self) -> models.AuditStampClass:
|
|
442
615
|
return models.AuditStampClass(
|
|
443
616
|
time=0,
|
|
444
|
-
actor=
|
|
617
|
+
actor=DEFAULT_ACTOR_URN,
|
|
445
618
|
)
|
|
446
619
|
|
|
447
620
|
def set_terms(self, terms: TermsInputType) -> None:
|
|
@@ -529,7 +702,7 @@ class HasInstitutionalMemory(Entity):
|
|
|
529
702
|
def _institutional_memory_audit_stamp(self) -> models.AuditStampClass:
|
|
530
703
|
return models.AuditStampClass(
|
|
531
704
|
time=0,
|
|
532
|
-
actor=
|
|
705
|
+
actor=DEFAULT_ACTOR_URN,
|
|
533
706
|
)
|
|
534
707
|
|
|
535
708
|
@classmethod
|
|
@@ -578,3 +751,213 @@ class HasInstitutionalMemory(Entity):
|
|
|
578
751
|
self._link_key,
|
|
579
752
|
self._parse_link_association_class(link),
|
|
580
753
|
)
|
|
754
|
+
|
|
755
|
+
|
|
756
|
+
class HasVersion(Entity):
|
|
757
|
+
"""Mixin for entities that have version properties."""
|
|
758
|
+
|
|
759
|
+
def _get_version_props(self) -> Optional[models.VersionPropertiesClass]:
|
|
760
|
+
return self._get_aspect(models.VersionPropertiesClass)
|
|
761
|
+
|
|
762
|
+
def _ensure_version_props(self) -> models.VersionPropertiesClass:
|
|
763
|
+
version_props = self._get_version_props()
|
|
764
|
+
if version_props is None:
|
|
765
|
+
guid_dict = {"urn": str(self.urn)}
|
|
766
|
+
version_set_urn = VersionSetUrn(
|
|
767
|
+
id=builder.datahub_guid(guid_dict), entity_type=self.urn.ENTITY_TYPE
|
|
768
|
+
)
|
|
769
|
+
|
|
770
|
+
version_props = models.VersionPropertiesClass(
|
|
771
|
+
versionSet=str(version_set_urn),
|
|
772
|
+
version=models.VersionTagClass(versionTag="0.1.0"),
|
|
773
|
+
sortId="0000000.1.0",
|
|
774
|
+
)
|
|
775
|
+
self._set_aspect(version_props)
|
|
776
|
+
return version_props
|
|
777
|
+
|
|
778
|
+
@property
|
|
779
|
+
def version(self) -> Optional[str]:
|
|
780
|
+
version_props = self._get_version_props()
|
|
781
|
+
if version_props and version_props.version:
|
|
782
|
+
return version_props.version.versionTag
|
|
783
|
+
return None
|
|
784
|
+
|
|
785
|
+
def set_version(self, version: str) -> None:
|
|
786
|
+
"""Set the version of the entity."""
|
|
787
|
+
guid_dict = {"urn": str(self.urn)}
|
|
788
|
+
version_set_urn = VersionSetUrn(
|
|
789
|
+
id=builder.datahub_guid(guid_dict), entity_type=self.urn.ENTITY_TYPE
|
|
790
|
+
)
|
|
791
|
+
|
|
792
|
+
version_props = self._get_version_props()
|
|
793
|
+
if version_props is None:
|
|
794
|
+
# If no version properties exist, create a new one
|
|
795
|
+
version_props = models.VersionPropertiesClass(
|
|
796
|
+
version=models.VersionTagClass(versionTag=version),
|
|
797
|
+
versionSet=str(version_set_urn),
|
|
798
|
+
sortId=version.zfill(10), # Pad with zeros for sorting
|
|
799
|
+
)
|
|
800
|
+
else:
|
|
801
|
+
# Update existing version properties
|
|
802
|
+
version_props.version = models.VersionTagClass(versionTag=version)
|
|
803
|
+
version_props.versionSet = str(version_set_urn)
|
|
804
|
+
version_props.sortId = version.zfill(10)
|
|
805
|
+
|
|
806
|
+
self._set_aspect(version_props)
|
|
807
|
+
|
|
808
|
+
@property
|
|
809
|
+
def version_aliases(self) -> List[str]:
|
|
810
|
+
version_props = self._get_version_props()
|
|
811
|
+
if version_props and version_props.aliases:
|
|
812
|
+
return [
|
|
813
|
+
alias.versionTag
|
|
814
|
+
for alias in version_props.aliases
|
|
815
|
+
if alias.versionTag is not None
|
|
816
|
+
]
|
|
817
|
+
return [] # Return empty list instead of None
|
|
818
|
+
|
|
819
|
+
def set_version_aliases(self, aliases: List[str]) -> None:
|
|
820
|
+
version_props = self._get_aspect(models.VersionPropertiesClass)
|
|
821
|
+
if version_props:
|
|
822
|
+
version_props.aliases = [
|
|
823
|
+
models.VersionTagClass(versionTag=alias) for alias in aliases
|
|
824
|
+
]
|
|
825
|
+
else:
|
|
826
|
+
# If no version properties exist, we need to create one with a default version
|
|
827
|
+
guid_dict = {"urn": str(self.urn)}
|
|
828
|
+
version_set_urn = VersionSetUrn(
|
|
829
|
+
id=builder.datahub_guid(guid_dict), entity_type=self.urn.ENTITY_TYPE
|
|
830
|
+
)
|
|
831
|
+
self._set_aspect(
|
|
832
|
+
models.VersionPropertiesClass(
|
|
833
|
+
version=models.VersionTagClass(
|
|
834
|
+
versionTag="0.1.0"
|
|
835
|
+
), # Default version
|
|
836
|
+
versionSet=str(version_set_urn),
|
|
837
|
+
sortId="0000000.1.0",
|
|
838
|
+
aliases=[
|
|
839
|
+
models.VersionTagClass(versionTag=alias) for alias in aliases
|
|
840
|
+
],
|
|
841
|
+
)
|
|
842
|
+
)
|
|
843
|
+
|
|
844
|
+
def add_version_alias(self, alias: str) -> None:
|
|
845
|
+
if not alias:
|
|
846
|
+
raise ValueError("Alias cannot be empty")
|
|
847
|
+
version_props = self._ensure_version_props()
|
|
848
|
+
if version_props.aliases is None:
|
|
849
|
+
version_props.aliases = []
|
|
850
|
+
version_props.aliases.append(models.VersionTagClass(versionTag=alias))
|
|
851
|
+
self._set_aspect(version_props)
|
|
852
|
+
|
|
853
|
+
def remove_version_alias(self, alias: str) -> None:
|
|
854
|
+
version_props = self._get_version_props()
|
|
855
|
+
if version_props and version_props.aliases:
|
|
856
|
+
version_props.aliases = [
|
|
857
|
+
a for a in version_props.aliases if a.versionTag != alias
|
|
858
|
+
]
|
|
859
|
+
self._set_aspect(version_props)
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
class HasStructuredProperties(Entity):
|
|
863
|
+
"""
|
|
864
|
+
Mixin for entities that support structured properties
|
|
865
|
+
"""
|
|
866
|
+
|
|
867
|
+
__slots__ = ()
|
|
868
|
+
|
|
869
|
+
@property
|
|
870
|
+
def structured_properties(
|
|
871
|
+
self,
|
|
872
|
+
) -> Optional[List[models.StructuredPropertyValueAssignmentClass]]:
|
|
873
|
+
"""
|
|
874
|
+
Retrieve structured properties for the entity
|
|
875
|
+
|
|
876
|
+
Returns:
|
|
877
|
+
Optional list of structured property value assignments
|
|
878
|
+
"""
|
|
879
|
+
sp_aspect = self._get_aspect(models.StructuredPropertiesClass)
|
|
880
|
+
return sp_aspect.properties if sp_aspect else None
|
|
881
|
+
|
|
882
|
+
def _ensure_structured_properties(self) -> models.StructuredPropertiesClass:
|
|
883
|
+
"""
|
|
884
|
+
Ensure structured properties aspect exists, creating it if necessary
|
|
885
|
+
|
|
886
|
+
Returns:
|
|
887
|
+
StructuredPropertiesClass aspect
|
|
888
|
+
"""
|
|
889
|
+
return self._setdefault_aspect(models.StructuredPropertiesClass(properties=[]))
|
|
890
|
+
|
|
891
|
+
def set_structured_property(
|
|
892
|
+
self,
|
|
893
|
+
property_urn: StructuredPropertyUrnOrStr,
|
|
894
|
+
values: Sequence[StructuredPropertyValueType],
|
|
895
|
+
) -> None:
|
|
896
|
+
"""
|
|
897
|
+
Update an existing structured property or add if it doesn't exist
|
|
898
|
+
|
|
899
|
+
Args:
|
|
900
|
+
property_urn: URN of the structured property
|
|
901
|
+
values: List of values for the property
|
|
902
|
+
"""
|
|
903
|
+
# validate property_urn is a valid structured property urn
|
|
904
|
+
property_urn = StructuredPropertyUrn.from_string(property_urn)
|
|
905
|
+
|
|
906
|
+
properties = self._ensure_structured_properties()
|
|
907
|
+
|
|
908
|
+
# Find existing property assignment
|
|
909
|
+
existing_prop = next(
|
|
910
|
+
(
|
|
911
|
+
prop
|
|
912
|
+
for prop in properties.properties
|
|
913
|
+
if prop.propertyUrn == str(property_urn)
|
|
914
|
+
),
|
|
915
|
+
None,
|
|
916
|
+
)
|
|
917
|
+
current_timestamp = make_ts_millis(datetime.now())
|
|
918
|
+
|
|
919
|
+
if existing_prop:
|
|
920
|
+
# Update existing property
|
|
921
|
+
existing_prop.values = list(values)
|
|
922
|
+
existing_prop.lastModified = models.AuditStampClass(
|
|
923
|
+
time=current_timestamp,
|
|
924
|
+
actor=DEFAULT_ACTOR_URN,
|
|
925
|
+
)
|
|
926
|
+
else:
|
|
927
|
+
# Create new property assignment
|
|
928
|
+
new_property = models.StructuredPropertyValueAssignmentClass(
|
|
929
|
+
propertyUrn=str(property_urn),
|
|
930
|
+
values=list(values),
|
|
931
|
+
created=models.AuditStampClass(
|
|
932
|
+
time=current_timestamp,
|
|
933
|
+
actor=DEFAULT_ACTOR_URN,
|
|
934
|
+
),
|
|
935
|
+
lastModified=models.AuditStampClass(
|
|
936
|
+
time=current_timestamp,
|
|
937
|
+
actor=DEFAULT_ACTOR_URN,
|
|
938
|
+
),
|
|
939
|
+
)
|
|
940
|
+
add_list_unique(
|
|
941
|
+
properties.properties,
|
|
942
|
+
key=lambda prop: prop.propertyUrn,
|
|
943
|
+
item=new_property,
|
|
944
|
+
)
|
|
945
|
+
|
|
946
|
+
self._set_aspect(properties)
|
|
947
|
+
|
|
948
|
+
def remove_structured_property(
|
|
949
|
+
self, property_urn: StructuredPropertyUrnOrStr
|
|
950
|
+
) -> None:
|
|
951
|
+
"""
|
|
952
|
+
Remove a structured property from the entity
|
|
953
|
+
|
|
954
|
+
Args:
|
|
955
|
+
property_urn: URN of the structured property to remove
|
|
956
|
+
"""
|
|
957
|
+
remove_list_unique(
|
|
958
|
+
self._ensure_structured_properties().properties,
|
|
959
|
+
key=lambda prop: prop.propertyUrn,
|
|
960
|
+
item=models.StructuredPropertyValueAssignmentClass(
|
|
961
|
+
propertyUrn=str(property_urn), values=[]
|
|
962
|
+
),
|
|
963
|
+
)
|
datahub/sdk/_utils.py
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
from typing import Any, Callable, List, Protocol, TypeVar
|
|
2
2
|
|
|
3
3
|
from datahub.errors import ItemNotFoundError
|
|
4
|
+
from datahub.metadata.urns import CorpUserUrn
|
|
5
|
+
|
|
6
|
+
# TODO: Change __ingestion to _ingestion.
|
|
7
|
+
DEFAULT_ACTOR_URN = CorpUserUrn("__ingestion").urn()
|
|
4
8
|
|
|
5
9
|
|
|
6
10
|
class _SupportsEq(Protocol):
|