acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/sdk/dataset.py
CHANGED
|
@@ -26,12 +26,14 @@ from datahub.sdk._shared import (
|
|
|
26
26
|
HasInstitutionalMemory,
|
|
27
27
|
HasOwnership,
|
|
28
28
|
HasPlatformInstance,
|
|
29
|
+
HasStructuredProperties,
|
|
29
30
|
HasSubtype,
|
|
30
31
|
HasTags,
|
|
31
32
|
HasTerms,
|
|
32
33
|
LinksInputType,
|
|
33
34
|
OwnersInputType,
|
|
34
35
|
ParentContainerInputType,
|
|
36
|
+
StructuredPropertyInputType,
|
|
35
37
|
TagInputType,
|
|
36
38
|
TagsInputType,
|
|
37
39
|
TermInputType,
|
|
@@ -44,6 +46,10 @@ from datahub.sdk.entity import Entity, ExtraAspectsType
|
|
|
44
46
|
from datahub.utilities.sentinels import Unset, unset
|
|
45
47
|
|
|
46
48
|
SchemaFieldInputType: TypeAlias = Union[
|
|
49
|
+
# There is no Enum variant for schema field types because that would force users to do a mapping
|
|
50
|
+
# to our enum from the raw source type, so additional complexity on their side.
|
|
51
|
+
# To avoid that, the raw source native type can be provided as a string,
|
|
52
|
+
# and we will do the mapping internally (in sql_types.py)
|
|
47
53
|
Tuple[str, str], # (name, type)
|
|
48
54
|
Tuple[str, str, str], # (name, type, description)
|
|
49
55
|
models.SchemaFieldClass,
|
|
@@ -70,6 +76,11 @@ UpstreamLineageInputType: TypeAlias = Union[
|
|
|
70
76
|
Dict[DatasetUrnOrStr, ColumnLineageMapping],
|
|
71
77
|
]
|
|
72
78
|
|
|
79
|
+
ViewDefinitionInputType: TypeAlias = Union[
|
|
80
|
+
str,
|
|
81
|
+
models.ViewPropertiesClass,
|
|
82
|
+
]
|
|
83
|
+
|
|
73
84
|
|
|
74
85
|
def _parse_upstream_input(
|
|
75
86
|
upstream_input: UpstreamInputType,
|
|
@@ -428,6 +439,7 @@ class Dataset(
|
|
|
428
439
|
HasTags,
|
|
429
440
|
HasTerms,
|
|
430
441
|
HasDomain,
|
|
442
|
+
HasStructuredProperties,
|
|
431
443
|
Entity,
|
|
432
444
|
):
|
|
433
445
|
"""Represents a dataset in DataHub.
|
|
@@ -464,6 +476,7 @@ class Dataset(
|
|
|
464
476
|
custom_properties: Optional[Dict[str, str]] = None,
|
|
465
477
|
created: Optional[datetime] = None,
|
|
466
478
|
last_modified: Optional[datetime] = None,
|
|
479
|
+
view_definition: Optional[ViewDefinitionInputType] = None,
|
|
467
480
|
# Standard aspects.
|
|
468
481
|
parent_container: ParentContainerInputType | Unset = unset,
|
|
469
482
|
subtype: Optional[str] = None,
|
|
@@ -471,12 +484,12 @@ class Dataset(
|
|
|
471
484
|
links: Optional[LinksInputType] = None,
|
|
472
485
|
tags: Optional[TagsInputType] = None,
|
|
473
486
|
terms: Optional[TermsInputType] = None,
|
|
474
|
-
# TODO structured_properties
|
|
475
487
|
domain: Optional[DomainInputType] = None,
|
|
476
|
-
extra_aspects: ExtraAspectsType = None,
|
|
477
488
|
# Dataset-specific aspects.
|
|
478
489
|
schema: Optional[SchemaFieldsInputType] = None,
|
|
479
490
|
upstreams: Optional[models.UpstreamLineageClass] = None,
|
|
491
|
+
structured_properties: Optional[StructuredPropertyInputType] = None,
|
|
492
|
+
extra_aspects: ExtraAspectsType = None,
|
|
480
493
|
):
|
|
481
494
|
"""Initialize a new Dataset instance.
|
|
482
495
|
|
|
@@ -492,6 +505,7 @@ class Dataset(
|
|
|
492
505
|
custom_properties: Optional dictionary of custom properties.
|
|
493
506
|
created: Optional creation timestamp.
|
|
494
507
|
last_modified: Optional last modification timestamp.
|
|
508
|
+
view_definition: Optional view definition for the dataset.
|
|
495
509
|
parent_container: Optional parent container for this dataset.
|
|
496
510
|
subtype: Optional subtype of the dataset.
|
|
497
511
|
owners: Optional list of owners.
|
|
@@ -533,6 +547,8 @@ class Dataset(
|
|
|
533
547
|
self.set_created(created)
|
|
534
548
|
if last_modified is not None:
|
|
535
549
|
self.set_last_modified(last_modified)
|
|
550
|
+
if view_definition is not None:
|
|
551
|
+
self.set_view_definition(view_definition)
|
|
536
552
|
|
|
537
553
|
if parent_container is not unset:
|
|
538
554
|
self._set_container(parent_container)
|
|
@@ -548,6 +564,9 @@ class Dataset(
|
|
|
548
564
|
self.set_terms(terms)
|
|
549
565
|
if domain is not None:
|
|
550
566
|
self.set_domain(domain)
|
|
567
|
+
if structured_properties is not None:
|
|
568
|
+
for key, value in structured_properties.items():
|
|
569
|
+
self.set_structured_property(property_urn=key, values=value)
|
|
551
570
|
|
|
552
571
|
@classmethod
|
|
553
572
|
def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
|
|
@@ -711,6 +730,41 @@ class Dataset(
|
|
|
711
730
|
def set_last_modified(self, last_modified: datetime) -> None:
|
|
712
731
|
self._ensure_dataset_props().lastModified = make_time_stamp(last_modified)
|
|
713
732
|
|
|
733
|
+
@property
|
|
734
|
+
def view_definition(self) -> Optional[models.ViewPropertiesClass]:
|
|
735
|
+
"""Get the view definition of the dataset.
|
|
736
|
+
|
|
737
|
+
Under typical usage, this will be present if the subtype is "View".
|
|
738
|
+
|
|
739
|
+
Returns:
|
|
740
|
+
The view definition if set, None otherwise.
|
|
741
|
+
"""
|
|
742
|
+
return self._get_aspect(models.ViewPropertiesClass)
|
|
743
|
+
|
|
744
|
+
def set_view_definition(self, view_definition: ViewDefinitionInputType) -> None:
|
|
745
|
+
"""Set the view definition of the dataset.
|
|
746
|
+
|
|
747
|
+
If you're setting a view definition, subtype should typically be set to "view".
|
|
748
|
+
|
|
749
|
+
If a string is provided, it will be treated as a SQL view definition. To set
|
|
750
|
+
a custom language or other properties, provide a ViewPropertiesClass object.
|
|
751
|
+
|
|
752
|
+
Args:
|
|
753
|
+
view_definition: The view definition to set.
|
|
754
|
+
"""
|
|
755
|
+
if isinstance(view_definition, models.ViewPropertiesClass):
|
|
756
|
+
self._set_aspect(view_definition)
|
|
757
|
+
elif isinstance(view_definition, str):
|
|
758
|
+
self._set_aspect(
|
|
759
|
+
models.ViewPropertiesClass(
|
|
760
|
+
materialized=False,
|
|
761
|
+
viewLogic=view_definition,
|
|
762
|
+
viewLanguage="SQL",
|
|
763
|
+
)
|
|
764
|
+
)
|
|
765
|
+
else:
|
|
766
|
+
assert_never(view_definition)
|
|
767
|
+
|
|
714
768
|
def _schema_dict(self) -> Dict[str, models.SchemaFieldClass]:
|
|
715
769
|
schema_metadata = self._get_aspect(models.SchemaMetadataClass)
|
|
716
770
|
if schema_metadata is None:
|
datahub/sdk/entity_client.py
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import warnings
|
|
4
|
-
from typing import TYPE_CHECKING, Union, overload
|
|
4
|
+
from typing import TYPE_CHECKING, Optional, Union, overload
|
|
5
5
|
|
|
6
6
|
import datahub.metadata.schema_classes as models
|
|
7
7
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
8
8
|
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
|
|
9
|
+
from datahub.emitter.rest_emitter import EmitMode
|
|
9
10
|
from datahub.errors import IngestionAttributionWarning, ItemNotFoundError, SdkUsageError
|
|
10
11
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
11
12
|
from datahub.metadata.urns import (
|
|
13
|
+
ChartUrn,
|
|
12
14
|
ContainerUrn,
|
|
15
|
+
DashboardUrn,
|
|
13
16
|
DataFlowUrn,
|
|
14
17
|
DataJobUrn,
|
|
15
18
|
DatasetUrn,
|
|
@@ -19,7 +22,9 @@ from datahub.metadata.urns import (
|
|
|
19
22
|
)
|
|
20
23
|
from datahub.sdk._all_entities import ENTITY_CLASSES
|
|
21
24
|
from datahub.sdk._shared import UrnOrStr
|
|
25
|
+
from datahub.sdk.chart import Chart
|
|
22
26
|
from datahub.sdk.container import Container
|
|
27
|
+
from datahub.sdk.dashboard import Dashboard
|
|
23
28
|
from datahub.sdk.dataflow import DataFlow
|
|
24
29
|
from datahub.sdk.datajob import DataJob
|
|
25
30
|
from datahub.sdk.dataset import Dataset
|
|
@@ -65,6 +70,10 @@ class EntityClient:
|
|
|
65
70
|
@overload
|
|
66
71
|
def get(self, urn: DataJobUrn) -> DataJob: ...
|
|
67
72
|
@overload
|
|
73
|
+
def get(self, urn: DashboardUrn) -> Dashboard: ...
|
|
74
|
+
@overload
|
|
75
|
+
def get(self, urn: ChartUrn) -> Chart: ...
|
|
76
|
+
@overload
|
|
68
77
|
def get(self, urn: Union[Urn, str]) -> Entity: ...
|
|
69
78
|
def get(self, urn: UrnOrStr) -> Entity:
|
|
70
79
|
"""Retrieve an entity by its urn.
|
|
@@ -84,7 +93,29 @@ class EntityClient:
|
|
|
84
93
|
urn = Urn.from_string(urn)
|
|
85
94
|
|
|
86
95
|
# TODO: add error handling around this with a suggested alternative if not yet supported
|
|
87
|
-
|
|
96
|
+
try:
|
|
97
|
+
EntityClass = ENTITY_CLASSES[urn.entity_type]
|
|
98
|
+
except KeyError as e:
|
|
99
|
+
# Try to import cloud-specific entities if not found
|
|
100
|
+
try:
|
|
101
|
+
from acryl_datahub_cloud.sdk.entities.assertion import Assertion
|
|
102
|
+
from acryl_datahub_cloud.sdk.entities.monitor import Monitor
|
|
103
|
+
from acryl_datahub_cloud.sdk.entities.subscription import Subscription
|
|
104
|
+
|
|
105
|
+
if urn.entity_type == "assertion":
|
|
106
|
+
EntityClass = Assertion
|
|
107
|
+
elif urn.entity_type == "subscription":
|
|
108
|
+
EntityClass = Subscription
|
|
109
|
+
elif urn.entity_type == "monitor":
|
|
110
|
+
EntityClass = Monitor
|
|
111
|
+
else:
|
|
112
|
+
raise SdkUsageError(
|
|
113
|
+
f"Entity type {urn.entity_type} is not yet supported"
|
|
114
|
+
) from e
|
|
115
|
+
except ImportError as e:
|
|
116
|
+
raise SdkUsageError(
|
|
117
|
+
f"Entity type {urn.entity_type} is not yet supported"
|
|
118
|
+
) from e
|
|
88
119
|
|
|
89
120
|
if not self._graph.exists(str(urn)):
|
|
90
121
|
raise ItemNotFoundError(f"Entity {urn} not found")
|
|
@@ -92,9 +123,25 @@ class EntityClient:
|
|
|
92
123
|
aspects = self._graph.get_entity_semityped(str(urn))
|
|
93
124
|
|
|
94
125
|
# TODO: save the timestamp so we can use If-Unmodified-Since on the updates
|
|
95
|
-
|
|
126
|
+
entity = EntityClass._new_from_graph(urn, aspects)
|
|
127
|
+
|
|
128
|
+
# Type narrowing for cloud-specific entities
|
|
129
|
+
if urn.entity_type == "assertion":
|
|
130
|
+
from acryl_datahub_cloud.sdk.entities.assertion import Assertion
|
|
131
|
+
|
|
132
|
+
assert isinstance(entity, Assertion)
|
|
133
|
+
elif urn.entity_type == "monitor":
|
|
134
|
+
from acryl_datahub_cloud.sdk.entities.monitor import Monitor
|
|
135
|
+
|
|
136
|
+
assert isinstance(entity, Monitor)
|
|
137
|
+
elif urn.entity_type == "subscription":
|
|
138
|
+
from acryl_datahub_cloud.sdk.entities.subscription import Subscription
|
|
139
|
+
|
|
140
|
+
assert isinstance(entity, Subscription)
|
|
96
141
|
|
|
97
|
-
|
|
142
|
+
return entity
|
|
143
|
+
|
|
144
|
+
def create(self, entity: Entity, *, emit_mode: Optional[EmitMode] = None) -> None:
|
|
98
145
|
mcps = []
|
|
99
146
|
|
|
100
147
|
if self._graph.exists(str(entity.urn)):
|
|
@@ -113,9 +160,12 @@ class EntityClient:
|
|
|
113
160
|
)
|
|
114
161
|
mcps.extend(entity.as_mcps(models.ChangeTypeClass.CREATE))
|
|
115
162
|
|
|
116
|
-
|
|
163
|
+
if emit_mode:
|
|
164
|
+
self._graph.emit_mcps(mcps, emit_mode=emit_mode)
|
|
165
|
+
else:
|
|
166
|
+
self._graph.emit_mcps(mcps)
|
|
117
167
|
|
|
118
|
-
def upsert(self, entity: Entity) -> None:
|
|
168
|
+
def upsert(self, entity: Entity, *, emit_mode: Optional[EmitMode] = None) -> None:
|
|
119
169
|
if entity._prev_aspects is None and self._graph.exists(str(entity.urn)):
|
|
120
170
|
warnings.warn(
|
|
121
171
|
f"The entity {entity.urn} already exists. This operation will partially overwrite the existing entity.",
|
|
@@ -125,9 +175,17 @@ class EntityClient:
|
|
|
125
175
|
# TODO: If there are no previous aspects but the entity exists, should we delete aspects that are not present here?
|
|
126
176
|
|
|
127
177
|
mcps = entity.as_mcps(models.ChangeTypeClass.UPSERT)
|
|
128
|
-
|
|
178
|
+
if emit_mode:
|
|
179
|
+
self._graph.emit_mcps(mcps, emit_mode=emit_mode)
|
|
180
|
+
else:
|
|
181
|
+
self._graph.emit_mcps(mcps)
|
|
129
182
|
|
|
130
|
-
def update(
|
|
183
|
+
def update(
|
|
184
|
+
self,
|
|
185
|
+
entity: Union[Entity, MetadataPatchProposal],
|
|
186
|
+
*,
|
|
187
|
+
emit_mode: Optional[EmitMode] = None,
|
|
188
|
+
) -> None:
|
|
131
189
|
if isinstance(entity, MetadataPatchProposal):
|
|
132
190
|
return self._update_patch(entity)
|
|
133
191
|
|
|
@@ -140,7 +198,10 @@ class EntityClient:
|
|
|
140
198
|
# -> probably add a "mode" parameter that can be "update" (e.g. if not modified) or "update_force"
|
|
141
199
|
|
|
142
200
|
mcps = entity.as_mcps(models.ChangeTypeClass.UPSERT)
|
|
143
|
-
|
|
201
|
+
if emit_mode:
|
|
202
|
+
self._graph.emit_mcps(mcps, emit_mode=emit_mode)
|
|
203
|
+
else:
|
|
204
|
+
self._graph.emit_mcps(mcps)
|
|
144
205
|
|
|
145
206
|
def _update_patch(
|
|
146
207
|
self, updater: MetadataPatchProposal, check_exists: bool = True
|
|
@@ -153,3 +214,44 @@ class EntityClient:
|
|
|
153
214
|
|
|
154
215
|
mcps = updater.build()
|
|
155
216
|
self._graph.emit_mcps(mcps)
|
|
217
|
+
|
|
218
|
+
def delete(
|
|
219
|
+
self,
|
|
220
|
+
urn: UrnOrStr,
|
|
221
|
+
check_exists: bool = True,
|
|
222
|
+
cascade: bool = False,
|
|
223
|
+
hard: bool = False,
|
|
224
|
+
) -> None:
|
|
225
|
+
"""Delete an entity by its urn.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
urn: The urn of the entity to delete. Can be a string or :py:class:`Urn` object.
|
|
229
|
+
check_exists: Whether to check if the entity exists before deletion. Defaults to True.
|
|
230
|
+
cascade: Whether to cascade delete related entities. When True, deletes child entities
|
|
231
|
+
like datajobs within dataflows, datasets within containers, etc. Not yet supported.
|
|
232
|
+
hard: Whether to perform a hard delete (permanent) or soft delete. Defaults to False.
|
|
233
|
+
|
|
234
|
+
Raises:
|
|
235
|
+
SdkUsageError: If the entity does not exist and check_exists is True, or if cascade is True (not supported).
|
|
236
|
+
|
|
237
|
+
Note:
|
|
238
|
+
When hard is True, the operation is irreversible and the entity will be permanently removed.
|
|
239
|
+
|
|
240
|
+
Impact of cascade deletion (still to be done) depends on the input entity type:
|
|
241
|
+
- Container: Recursively deletes all containers and data assets within the container.
|
|
242
|
+
- Dataflow: Recursively deletes all data jobs within the dataflow.
|
|
243
|
+
- Dashboard: TBD
|
|
244
|
+
- DataPlatformInstance: TBD
|
|
245
|
+
- ...
|
|
246
|
+
"""
|
|
247
|
+
urn_str = str(urn) if isinstance(urn, Urn) else urn
|
|
248
|
+
if check_exists and not self._graph.exists(entity_urn=urn_str):
|
|
249
|
+
raise SdkUsageError(
|
|
250
|
+
f"Entity {urn_str} does not exist, and hence cannot be deleted. "
|
|
251
|
+
"You can bypass this check by setting check_exists=False."
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
if cascade:
|
|
255
|
+
raise SdkUsageError("The 'cascade' parameter is not yet supported.")
|
|
256
|
+
|
|
257
|
+
self._graph.delete_entity(urn=urn_str, hard=hard)
|