acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/sdk/dataset.py
CHANGED
|
@@ -26,12 +26,14 @@ from datahub.sdk._shared import (
|
|
|
26
26
|
HasInstitutionalMemory,
|
|
27
27
|
HasOwnership,
|
|
28
28
|
HasPlatformInstance,
|
|
29
|
+
HasStructuredProperties,
|
|
29
30
|
HasSubtype,
|
|
30
31
|
HasTags,
|
|
31
32
|
HasTerms,
|
|
32
33
|
LinksInputType,
|
|
33
34
|
OwnersInputType,
|
|
34
35
|
ParentContainerInputType,
|
|
36
|
+
StructuredPropertyInputType,
|
|
35
37
|
TagInputType,
|
|
36
38
|
TagsInputType,
|
|
37
39
|
TermInputType,
|
|
@@ -44,6 +46,10 @@ from datahub.sdk.entity import Entity, ExtraAspectsType
|
|
|
44
46
|
from datahub.utilities.sentinels import Unset, unset
|
|
45
47
|
|
|
46
48
|
SchemaFieldInputType: TypeAlias = Union[
|
|
49
|
+
# There is no Enum variant for schema field types because that would force users to do a mapping
|
|
50
|
+
# to our enum from the raw source type, so additional complexity on their side.
|
|
51
|
+
# To avoid that, the raw source native type can be provided as a string,
|
|
52
|
+
# and we will do the mapping internally (in sql_types.py)
|
|
47
53
|
Tuple[str, str], # (name, type)
|
|
48
54
|
Tuple[str, str, str], # (name, type, description)
|
|
49
55
|
models.SchemaFieldClass,
|
|
@@ -70,6 +76,11 @@ UpstreamLineageInputType: TypeAlias = Union[
|
|
|
70
76
|
Dict[DatasetUrnOrStr, ColumnLineageMapping],
|
|
71
77
|
]
|
|
72
78
|
|
|
79
|
+
ViewDefinitionInputType: TypeAlias = Union[
|
|
80
|
+
str,
|
|
81
|
+
models.ViewPropertiesClass,
|
|
82
|
+
]
|
|
83
|
+
|
|
73
84
|
|
|
74
85
|
def _parse_upstream_input(
|
|
75
86
|
upstream_input: UpstreamInputType,
|
|
@@ -87,7 +98,7 @@ def _parse_upstream_input(
|
|
|
87
98
|
assert_never(upstream_input)
|
|
88
99
|
|
|
89
100
|
|
|
90
|
-
def
|
|
101
|
+
def parse_cll_mapping(
|
|
91
102
|
*,
|
|
92
103
|
upstream: DatasetUrnOrStr,
|
|
93
104
|
downstream: DatasetUrnOrStr,
|
|
@@ -142,7 +153,7 @@ def _parse_upstream_lineage_input(
|
|
|
142
153
|
)
|
|
143
154
|
)
|
|
144
155
|
cll.extend(
|
|
145
|
-
|
|
156
|
+
parse_cll_mapping(
|
|
146
157
|
upstream=dataset_urn,
|
|
147
158
|
downstream=downstream_urn,
|
|
148
159
|
cll_mapping=column_lineage,
|
|
@@ -428,12 +439,25 @@ class Dataset(
|
|
|
428
439
|
HasTags,
|
|
429
440
|
HasTerms,
|
|
430
441
|
HasDomain,
|
|
442
|
+
HasStructuredProperties,
|
|
431
443
|
Entity,
|
|
432
444
|
):
|
|
445
|
+
"""Represents a dataset in DataHub.
|
|
446
|
+
|
|
447
|
+
A dataset represents a collection of data, such as a table, view, or file.
|
|
448
|
+
This class provides methods for managing dataset metadata including schema,
|
|
449
|
+
lineage, and various aspects like ownership, tags, and terms.
|
|
450
|
+
"""
|
|
451
|
+
|
|
433
452
|
__slots__ = ()
|
|
434
453
|
|
|
435
454
|
@classmethod
|
|
436
455
|
def get_urn_type(cls) -> Type[DatasetUrn]:
|
|
456
|
+
"""Get the URN type for datasets.
|
|
457
|
+
|
|
458
|
+
Returns:
|
|
459
|
+
The DatasetUrn class.
|
|
460
|
+
"""
|
|
437
461
|
return DatasetUrn
|
|
438
462
|
|
|
439
463
|
def __init__(
|
|
@@ -452,6 +476,7 @@ class Dataset(
|
|
|
452
476
|
custom_properties: Optional[Dict[str, str]] = None,
|
|
453
477
|
created: Optional[datetime] = None,
|
|
454
478
|
last_modified: Optional[datetime] = None,
|
|
479
|
+
view_definition: Optional[ViewDefinitionInputType] = None,
|
|
455
480
|
# Standard aspects.
|
|
456
481
|
parent_container: ParentContainerInputType | Unset = unset,
|
|
457
482
|
subtype: Optional[str] = None,
|
|
@@ -459,13 +484,39 @@ class Dataset(
|
|
|
459
484
|
links: Optional[LinksInputType] = None,
|
|
460
485
|
tags: Optional[TagsInputType] = None,
|
|
461
486
|
terms: Optional[TermsInputType] = None,
|
|
462
|
-
# TODO structured_properties
|
|
463
487
|
domain: Optional[DomainInputType] = None,
|
|
464
|
-
extra_aspects: ExtraAspectsType = None,
|
|
465
488
|
# Dataset-specific aspects.
|
|
466
489
|
schema: Optional[SchemaFieldsInputType] = None,
|
|
467
490
|
upstreams: Optional[models.UpstreamLineageClass] = None,
|
|
491
|
+
structured_properties: Optional[StructuredPropertyInputType] = None,
|
|
492
|
+
extra_aspects: ExtraAspectsType = None,
|
|
468
493
|
):
|
|
494
|
+
"""Initialize a new Dataset instance.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
platform: The platform this dataset belongs to (e.g. "mysql", "snowflake").
|
|
498
|
+
name: The name of the dataset.
|
|
499
|
+
platform_instance: Optional platform instance identifier.
|
|
500
|
+
env: The environment this dataset belongs to (default: DEFAULT_ENV).
|
|
501
|
+
description: Optional description of the dataset.
|
|
502
|
+
display_name: Optional display name for the dataset.
|
|
503
|
+
qualified_name: Optional qualified name for the dataset.
|
|
504
|
+
external_url: Optional URL to external documentation or source.
|
|
505
|
+
custom_properties: Optional dictionary of custom properties.
|
|
506
|
+
created: Optional creation timestamp.
|
|
507
|
+
last_modified: Optional last modification timestamp.
|
|
508
|
+
view_definition: Optional view definition for the dataset.
|
|
509
|
+
parent_container: Optional parent container for this dataset.
|
|
510
|
+
subtype: Optional subtype of the dataset.
|
|
511
|
+
owners: Optional list of owners.
|
|
512
|
+
links: Optional list of links.
|
|
513
|
+
tags: Optional list of tags.
|
|
514
|
+
terms: Optional list of glossary terms.
|
|
515
|
+
domain: Optional domain this dataset belongs to.
|
|
516
|
+
extra_aspects: Optional list of additional aspects.
|
|
517
|
+
schema: Optional schema definition for the dataset.
|
|
518
|
+
upstreams: Optional upstream lineage information.
|
|
519
|
+
"""
|
|
469
520
|
urn = DatasetUrn.create_from_ids(
|
|
470
521
|
platform_id=platform,
|
|
471
522
|
table_name=name,
|
|
@@ -496,6 +547,8 @@ class Dataset(
|
|
|
496
547
|
self.set_created(created)
|
|
497
548
|
if last_modified is not None:
|
|
498
549
|
self.set_last_modified(last_modified)
|
|
550
|
+
if view_definition is not None:
|
|
551
|
+
self.set_view_definition(view_definition)
|
|
499
552
|
|
|
500
553
|
if parent_container is not unset:
|
|
501
554
|
self._set_container(parent_container)
|
|
@@ -511,6 +564,9 @@ class Dataset(
|
|
|
511
564
|
self.set_terms(terms)
|
|
512
565
|
if domain is not None:
|
|
513
566
|
self.set_domain(domain)
|
|
567
|
+
if structured_properties is not None:
|
|
568
|
+
for key, value in structured_properties.items():
|
|
569
|
+
self.set_structured_property(property_urn=key, values=value)
|
|
514
570
|
|
|
515
571
|
@classmethod
|
|
516
572
|
def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
|
|
@@ -539,6 +595,11 @@ class Dataset(
|
|
|
539
595
|
|
|
540
596
|
@property
|
|
541
597
|
def description(self) -> Optional[str]:
|
|
598
|
+
"""Get the description of the dataset.
|
|
599
|
+
|
|
600
|
+
Returns:
|
|
601
|
+
The description if set, None otherwise.
|
|
602
|
+
"""
|
|
542
603
|
editable_props = self._get_editable_props()
|
|
543
604
|
return first_non_null(
|
|
544
605
|
[
|
|
@@ -548,6 +609,15 @@ class Dataset(
|
|
|
548
609
|
)
|
|
549
610
|
|
|
550
611
|
def set_description(self, description: str) -> None:
|
|
612
|
+
"""Set the description of the dataset.
|
|
613
|
+
|
|
614
|
+
Args:
|
|
615
|
+
description: The description to set.
|
|
616
|
+
|
|
617
|
+
Note:
|
|
618
|
+
If called during ingestion, this will warn if overwriting
|
|
619
|
+
a non-ingestion description.
|
|
620
|
+
"""
|
|
551
621
|
if is_ingestion_attribution():
|
|
552
622
|
editable_props = self._get_editable_props()
|
|
553
623
|
if editable_props is not None and editable_props.description is not None:
|
|
@@ -565,46 +635,136 @@ class Dataset(
|
|
|
565
635
|
|
|
566
636
|
@property
|
|
567
637
|
def display_name(self) -> Optional[str]:
|
|
638
|
+
"""Get the display name of the dataset.
|
|
639
|
+
|
|
640
|
+
Returns:
|
|
641
|
+
The display name if set, None otherwise.
|
|
642
|
+
"""
|
|
568
643
|
return self._ensure_dataset_props().name
|
|
569
644
|
|
|
570
645
|
def set_display_name(self, display_name: str) -> None:
|
|
646
|
+
"""Set the display name of the dataset.
|
|
647
|
+
|
|
648
|
+
Args:
|
|
649
|
+
display_name: The display name to set.
|
|
650
|
+
"""
|
|
571
651
|
self._ensure_dataset_props().name = display_name
|
|
572
652
|
|
|
573
653
|
@property
|
|
574
654
|
def qualified_name(self) -> Optional[str]:
|
|
655
|
+
"""Get the qualified name of the dataset.
|
|
656
|
+
|
|
657
|
+
Returns:
|
|
658
|
+
The qualified name if set, None otherwise.
|
|
659
|
+
"""
|
|
575
660
|
return self._ensure_dataset_props().qualifiedName
|
|
576
661
|
|
|
577
662
|
def set_qualified_name(self, qualified_name: str) -> None:
|
|
663
|
+
"""Set the qualified name of the dataset.
|
|
664
|
+
|
|
665
|
+
Args:
|
|
666
|
+
qualified_name: The qualified name to set.
|
|
667
|
+
"""
|
|
578
668
|
self._ensure_dataset_props().qualifiedName = qualified_name
|
|
579
669
|
|
|
580
670
|
@property
|
|
581
671
|
def external_url(self) -> Optional[str]:
|
|
672
|
+
"""Get the external URL of the dataset.
|
|
673
|
+
|
|
674
|
+
Returns:
|
|
675
|
+
The external URL if set, None otherwise.
|
|
676
|
+
"""
|
|
582
677
|
return self._ensure_dataset_props().externalUrl
|
|
583
678
|
|
|
584
679
|
def set_external_url(self, external_url: str) -> None:
|
|
680
|
+
"""Set the external URL of the dataset.
|
|
681
|
+
|
|
682
|
+
Args:
|
|
683
|
+
external_url: The external URL to set.
|
|
684
|
+
"""
|
|
585
685
|
self._ensure_dataset_props().externalUrl = external_url
|
|
586
686
|
|
|
587
687
|
@property
|
|
588
688
|
def custom_properties(self) -> Dict[str, str]:
|
|
689
|
+
"""Get the custom properties of the dataset.
|
|
690
|
+
|
|
691
|
+
Returns:
|
|
692
|
+
Dictionary of custom properties.
|
|
693
|
+
"""
|
|
589
694
|
return self._ensure_dataset_props().customProperties
|
|
590
695
|
|
|
591
696
|
def set_custom_properties(self, custom_properties: Dict[str, str]) -> None:
|
|
697
|
+
"""Set the custom properties of the dataset.
|
|
698
|
+
|
|
699
|
+
Args:
|
|
700
|
+
custom_properties: Dictionary of custom properties to set.
|
|
701
|
+
"""
|
|
592
702
|
self._ensure_dataset_props().customProperties = custom_properties
|
|
593
703
|
|
|
594
704
|
@property
|
|
595
705
|
def created(self) -> Optional[datetime]:
|
|
706
|
+
"""Get the creation timestamp of the dataset.
|
|
707
|
+
|
|
708
|
+
Returns:
|
|
709
|
+
The creation timestamp if set, None otherwise.
|
|
710
|
+
"""
|
|
596
711
|
return parse_time_stamp(self._ensure_dataset_props().created)
|
|
597
712
|
|
|
598
713
|
def set_created(self, created: datetime) -> None:
|
|
714
|
+
"""Set the creation timestamp of the dataset.
|
|
715
|
+
|
|
716
|
+
Args:
|
|
717
|
+
created: The creation timestamp to set.
|
|
718
|
+
"""
|
|
599
719
|
self._ensure_dataset_props().created = make_time_stamp(created)
|
|
600
720
|
|
|
601
721
|
@property
|
|
602
722
|
def last_modified(self) -> Optional[datetime]:
|
|
723
|
+
"""Get the last modification timestamp of the dataset.
|
|
724
|
+
|
|
725
|
+
Returns:
|
|
726
|
+
The last modification timestamp if set, None otherwise.
|
|
727
|
+
"""
|
|
603
728
|
return parse_time_stamp(self._ensure_dataset_props().lastModified)
|
|
604
729
|
|
|
605
730
|
def set_last_modified(self, last_modified: datetime) -> None:
|
|
606
731
|
self._ensure_dataset_props().lastModified = make_time_stamp(last_modified)
|
|
607
732
|
|
|
733
|
+
@property
|
|
734
|
+
def view_definition(self) -> Optional[models.ViewPropertiesClass]:
|
|
735
|
+
"""Get the view definition of the dataset.
|
|
736
|
+
|
|
737
|
+
Under typical usage, this will be present if the subtype is "View".
|
|
738
|
+
|
|
739
|
+
Returns:
|
|
740
|
+
The view definition if set, None otherwise.
|
|
741
|
+
"""
|
|
742
|
+
return self._get_aspect(models.ViewPropertiesClass)
|
|
743
|
+
|
|
744
|
+
def set_view_definition(self, view_definition: ViewDefinitionInputType) -> None:
|
|
745
|
+
"""Set the view definition of the dataset.
|
|
746
|
+
|
|
747
|
+
If you're setting a view definition, subtype should typically be set to "view".
|
|
748
|
+
|
|
749
|
+
If a string is provided, it will be treated as a SQL view definition. To set
|
|
750
|
+
a custom language or other properties, provide a ViewPropertiesClass object.
|
|
751
|
+
|
|
752
|
+
Args:
|
|
753
|
+
view_definition: The view definition to set.
|
|
754
|
+
"""
|
|
755
|
+
if isinstance(view_definition, models.ViewPropertiesClass):
|
|
756
|
+
self._set_aspect(view_definition)
|
|
757
|
+
elif isinstance(view_definition, str):
|
|
758
|
+
self._set_aspect(
|
|
759
|
+
models.ViewPropertiesClass(
|
|
760
|
+
materialized=False,
|
|
761
|
+
viewLogic=view_definition,
|
|
762
|
+
viewLanguage="SQL",
|
|
763
|
+
)
|
|
764
|
+
)
|
|
765
|
+
else:
|
|
766
|
+
assert_never(view_definition)
|
|
767
|
+
|
|
608
768
|
def _schema_dict(self) -> Dict[str, models.SchemaFieldClass]:
|
|
609
769
|
schema_metadata = self._get_aspect(models.SchemaMetadataClass)
|
|
610
770
|
if schema_metadata is None:
|
|
@@ -614,6 +774,11 @@ class Dataset(
|
|
|
614
774
|
@property
|
|
615
775
|
def schema(self) -> List[SchemaField]:
|
|
616
776
|
# TODO: Add some caching here to avoid iterating over the schema every time.
|
|
777
|
+
"""Get the schema fields of the dataset.
|
|
778
|
+
|
|
779
|
+
Returns:
|
|
780
|
+
List of SchemaField objects representing the dataset's schema.
|
|
781
|
+
"""
|
|
617
782
|
schema_dict = self._schema_dict()
|
|
618
783
|
return [SchemaField(self, field_path) for field_path in schema_dict]
|
|
619
784
|
|
|
@@ -669,6 +834,17 @@ class Dataset(
|
|
|
669
834
|
|
|
670
835
|
def __getitem__(self, field_path: str) -> SchemaField:
|
|
671
836
|
# TODO: Automatically deal with field path v2?
|
|
837
|
+
"""Get a schema field by its path.
|
|
838
|
+
|
|
839
|
+
Args:
|
|
840
|
+
field_path: The path of the field to retrieve.
|
|
841
|
+
|
|
842
|
+
Returns:
|
|
843
|
+
A SchemaField instance.
|
|
844
|
+
|
|
845
|
+
Raises:
|
|
846
|
+
SchemaFieldKeyError: If the field is not found.
|
|
847
|
+
"""
|
|
672
848
|
schema_dict = self._schema_dict()
|
|
673
849
|
if field_path not in schema_dict:
|
|
674
850
|
raise SchemaFieldKeyError(f"Field {field_path} not found in schema")
|
datahub/sdk/entity.py
CHANGED
|
@@ -20,9 +20,24 @@ ExtraAspectsType = Union[None, List[AspectTypeVar]]
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class Entity:
|
|
23
|
+
"""Base class for all DataHub entities.
|
|
24
|
+
|
|
25
|
+
This class provides the core functionality for working with DataHub entities,
|
|
26
|
+
including aspect management and URN handling. It should not be instantiated directly;
|
|
27
|
+
instead, use one of its subclasses like Dataset or Container.
|
|
28
|
+
"""
|
|
29
|
+
|
|
23
30
|
__slots__ = ("_urn", "_prev_aspects", "_aspects")
|
|
24
31
|
|
|
25
32
|
def __init__(self, /, urn: Urn):
|
|
33
|
+
"""Initialize a new Entity instance.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
urn: The URN that uniquely identifies this entity.
|
|
37
|
+
|
|
38
|
+
Raises:
|
|
39
|
+
SdkUsageError: If this base class is instantiated directly.
|
|
40
|
+
"""
|
|
26
41
|
# This method is not meant for direct usage.
|
|
27
42
|
if type(self) is Entity:
|
|
28
43
|
raise SdkUsageError(f"{Entity.__name__} cannot be instantiated directly.")
|
|
@@ -36,6 +51,15 @@ class Entity:
|
|
|
36
51
|
|
|
37
52
|
@classmethod
|
|
38
53
|
def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
|
|
54
|
+
"""Create a new entity instance from graph data.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
urn: The URN of the entity.
|
|
58
|
+
current_aspects: The current aspects of the entity from the graph.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
A new entity instance initialized with the graph data.
|
|
62
|
+
"""
|
|
39
63
|
# If an init method from a subclass adds required fields, it also needs to override this method.
|
|
40
64
|
# An alternative approach would call cls.__new__() to bypass the init method, but it's a bit
|
|
41
65
|
# too hacky for my taste.
|
|
@@ -43,6 +67,14 @@ class Entity:
|
|
|
43
67
|
return entity._init_from_graph(current_aspects)
|
|
44
68
|
|
|
45
69
|
def _init_from_graph(self, current_aspects: models.AspectBag) -> Self:
|
|
70
|
+
"""Initialize the entity with aspects from the graph.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
current_aspects: The current aspects of the entity from the graph.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
The entity instance with initialized aspects.
|
|
77
|
+
"""
|
|
46
78
|
self._prev_aspects = current_aspects
|
|
47
79
|
|
|
48
80
|
self._aspects = {}
|
|
@@ -54,14 +86,30 @@ class Entity:
|
|
|
54
86
|
|
|
55
87
|
@classmethod
|
|
56
88
|
@abc.abstractmethod
|
|
57
|
-
def get_urn_type(cls) -> Type[_SpecificUrn]:
|
|
89
|
+
def get_urn_type(cls) -> Type[_SpecificUrn]:
|
|
90
|
+
"""Get the URN type for this entity class.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
The URN type class that corresponds to this entity type.
|
|
94
|
+
"""
|
|
95
|
+
...
|
|
58
96
|
|
|
59
97
|
@classmethod
|
|
60
98
|
def entity_type_name(cls) -> str:
|
|
99
|
+
"""Get the entity type name.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
The string name of this entity type.
|
|
103
|
+
"""
|
|
61
104
|
return cls.get_urn_type().ENTITY_TYPE
|
|
62
105
|
|
|
63
106
|
@property
|
|
64
107
|
def urn(self) -> _SpecificUrn:
|
|
108
|
+
"""Get the entity's URN.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
The URN that uniquely identifies this entity.
|
|
112
|
+
"""
|
|
65
113
|
return self._urn
|
|
66
114
|
|
|
67
115
|
def _get_aspect(
|
|
@@ -69,22 +117,51 @@ class Entity:
|
|
|
69
117
|
aspect_type: Type[AspectTypeVar],
|
|
70
118
|
/,
|
|
71
119
|
) -> Optional[AspectTypeVar]:
|
|
120
|
+
"""Get an aspect of the entity by its type.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
aspect_type: The type of aspect to retrieve.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
The aspect if it exists, None otherwise.
|
|
127
|
+
"""
|
|
72
128
|
return self._aspects.get(aspect_type.ASPECT_NAME) # type: ignore
|
|
73
129
|
|
|
74
130
|
def _set_aspect(self, value: AspectTypeVar, /) -> None:
|
|
131
|
+
"""Set an aspect of the entity.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
value: The aspect to set.
|
|
135
|
+
"""
|
|
75
136
|
self._aspects[value.ASPECT_NAME] = value # type: ignore
|
|
76
137
|
|
|
77
138
|
def _setdefault_aspect(self, default_aspect: AspectTypeVar, /) -> AspectTypeVar:
|
|
139
|
+
"""Set a default aspect if it doesn't exist.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
default_aspect: The default aspect to set if none exists.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
The existing aspect if one exists, otherwise the default aspect.
|
|
146
|
+
"""
|
|
78
147
|
# Similar semantics to dict.setdefault.
|
|
79
148
|
if existing_aspect := self._get_aspect(type(default_aspect)):
|
|
80
149
|
return existing_aspect
|
|
81
150
|
self._set_aspect(default_aspect)
|
|
82
151
|
return default_aspect
|
|
83
152
|
|
|
84
|
-
def
|
|
153
|
+
def as_mcps(
|
|
85
154
|
self,
|
|
86
155
|
change_type: Union[str, models.ChangeTypeClass] = models.ChangeTypeClass.UPSERT,
|
|
87
156
|
) -> List[MetadataChangeProposalWrapper]:
|
|
157
|
+
"""Convert the entity's aspects to MetadataChangeProposals.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
change_type: The type of change to apply (default: UPSERT).
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
A list of MetadataChangeProposalWrapper objects.
|
|
164
|
+
"""
|
|
88
165
|
urn_str = str(self.urn)
|
|
89
166
|
|
|
90
167
|
mcps = []
|
|
@@ -100,13 +177,32 @@ class Entity:
|
|
|
100
177
|
return mcps
|
|
101
178
|
|
|
102
179
|
def as_workunits(self) -> List[MetadataWorkUnit]:
|
|
103
|
-
|
|
180
|
+
"""Convert the entity's aspects to MetadataWorkUnits.
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
A list of MetadataWorkUnit objects.
|
|
184
|
+
"""
|
|
185
|
+
return [mcp.as_workunit() for mcp in self.as_mcps()]
|
|
104
186
|
|
|
105
187
|
def _set_extra_aspects(self, extra_aspects: ExtraAspectsType) -> None:
|
|
188
|
+
"""Set additional aspects on the entity.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
extra_aspects: List of additional aspects to set.
|
|
192
|
+
|
|
193
|
+
Note:
|
|
194
|
+
This method does not validate for conflicts between extra aspects
|
|
195
|
+
and standard aspects.
|
|
196
|
+
"""
|
|
106
197
|
# TODO: Add validation to ensure that an "extra aspect" does not conflict
|
|
107
198
|
# with / get overridden by a standard aspect.
|
|
108
199
|
for aspect in extra_aspects or []:
|
|
109
200
|
self._set_aspect(aspect)
|
|
110
201
|
|
|
111
202
|
def __repr__(self) -> str:
|
|
203
|
+
"""Get a string representation of the entity.
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
A string in the format "EntityClass('urn')".
|
|
207
|
+
"""
|
|
112
208
|
return f"{self.__class__.__name__}('{self.urn}')"
|