acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/sdk/_shared.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import warnings
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
4
5
|
from datetime import datetime
|
|
5
6
|
from typing import (
|
|
6
7
|
TYPE_CHECKING,
|
|
@@ -26,9 +27,11 @@ from datahub.emitter.mce_builder import (
|
|
|
26
27
|
from datahub.emitter.mcp_builder import ContainerKey
|
|
27
28
|
from datahub.errors import MultipleSubtypesWarning, SdkUsageError
|
|
28
29
|
from datahub.metadata.urns import (
|
|
30
|
+
ChartUrn,
|
|
29
31
|
ContainerUrn,
|
|
30
32
|
CorpGroupUrn,
|
|
31
33
|
CorpUserUrn,
|
|
34
|
+
DashboardUrn,
|
|
32
35
|
DataFlowUrn,
|
|
33
36
|
DataJobUrn,
|
|
34
37
|
DataPlatformInstanceUrn,
|
|
@@ -38,6 +41,7 @@ from datahub.metadata.urns import (
|
|
|
38
41
|
DomainUrn,
|
|
39
42
|
GlossaryTermUrn,
|
|
40
43
|
OwnershipTypeUrn,
|
|
44
|
+
StructuredPropertyUrn,
|
|
41
45
|
TagUrn,
|
|
42
46
|
Urn,
|
|
43
47
|
VersionSetUrn,
|
|
@@ -49,11 +53,21 @@ from datahub.utilities.urns.error import InvalidUrnError
|
|
|
49
53
|
if TYPE_CHECKING:
|
|
50
54
|
from datahub.sdk.container import Container
|
|
51
55
|
UrnOrStr: TypeAlias = Union[Urn, str]
|
|
56
|
+
ChartUrnOrStr: TypeAlias = Union[str, ChartUrn]
|
|
52
57
|
DatasetUrnOrStr: TypeAlias = Union[str, DatasetUrn]
|
|
53
58
|
DatajobUrnOrStr: TypeAlias = Union[str, DataJobUrn]
|
|
54
59
|
DataflowUrnOrStr: TypeAlias = Union[str, DataFlowUrn]
|
|
60
|
+
DashboardUrnOrStr: TypeAlias = Union[str, DashboardUrn]
|
|
61
|
+
DataPlatformInstanceUrnOrStr: TypeAlias = Union[str, DataPlatformInstanceUrn]
|
|
62
|
+
DataPlatformUrnOrStr: TypeAlias = Union[str, DataPlatformUrn]
|
|
55
63
|
|
|
56
64
|
ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
|
|
65
|
+
ActorUrnOrStr: TypeAlias = Union[str, ActorUrn]
|
|
66
|
+
StructuredPropertyUrnOrStr: TypeAlias = Union[str, StructuredPropertyUrn]
|
|
67
|
+
StructuredPropertyValueType: TypeAlias = Union[str, float, int]
|
|
68
|
+
StructuredPropertyInputType: TypeAlias = Dict[
|
|
69
|
+
StructuredPropertyUrnOrStr, Sequence[StructuredPropertyValueType]
|
|
70
|
+
]
|
|
57
71
|
|
|
58
72
|
TrainingMetricsInputType: TypeAlias = Union[
|
|
59
73
|
List[models.MLMetricClass], Dict[str, Optional[str]]
|
|
@@ -98,6 +112,130 @@ def parse_time_stamp(ts: Optional[models.TimeStampClass]) -> Optional[datetime]:
|
|
|
98
112
|
return parse_ts_millis(ts.time)
|
|
99
113
|
|
|
100
114
|
|
|
115
|
+
class ChangeAuditStampsMixin(ABC):
|
|
116
|
+
"""Mixin class for managing audit stamps on entities."""
|
|
117
|
+
|
|
118
|
+
__slots__ = ()
|
|
119
|
+
|
|
120
|
+
@abstractmethod
|
|
121
|
+
def _get_audit_stamps(self) -> models.ChangeAuditStampsClass:
|
|
122
|
+
"""Get the audit stamps from the entity properties."""
|
|
123
|
+
pass
|
|
124
|
+
|
|
125
|
+
@abstractmethod
|
|
126
|
+
def _set_audit_stamps(self, audit_stamps: models.ChangeAuditStampsClass) -> None:
|
|
127
|
+
"""Set the audit stamps on the entity properties."""
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def last_modified(self) -> Optional[datetime]:
|
|
132
|
+
"""Get the last modification timestamp from audit stamps."""
|
|
133
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
134
|
+
if audit_stamps.lastModified.time == 0:
|
|
135
|
+
return None
|
|
136
|
+
return datetime.fromtimestamp(
|
|
137
|
+
audit_stamps.lastModified.time / 1000
|
|
138
|
+
) # supports only seconds precision
|
|
139
|
+
|
|
140
|
+
def set_last_modified(self, last_modified: datetime) -> None:
|
|
141
|
+
"""Set the last modification timestamp in audit stamps."""
|
|
142
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
143
|
+
audit_stamps.lastModified.time = make_ts_millis(last_modified)
|
|
144
|
+
self._set_audit_stamps(audit_stamps)
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def last_modified_by(self) -> Optional[str]:
|
|
148
|
+
"""Get the last modification actor from audit stamps."""
|
|
149
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
150
|
+
if audit_stamps.lastModified.actor == builder.UNKNOWN_USER:
|
|
151
|
+
return None
|
|
152
|
+
return audit_stamps.lastModified.actor
|
|
153
|
+
|
|
154
|
+
def set_last_modified_by(self, last_modified_by: ActorUrnOrStr) -> None:
|
|
155
|
+
"""Set the last modification actor in audit stamps."""
|
|
156
|
+
if isinstance(last_modified_by, str):
|
|
157
|
+
last_modified_by = make_user_urn(last_modified_by)
|
|
158
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
159
|
+
audit_stamps.lastModified.actor = str(last_modified_by)
|
|
160
|
+
self._set_audit_stamps(audit_stamps)
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def created_at(self) -> Optional[datetime]:
|
|
164
|
+
"""Get the creation timestamp from audit stamps."""
|
|
165
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
166
|
+
if audit_stamps.created.time == 0:
|
|
167
|
+
return None
|
|
168
|
+
return datetime.fromtimestamp(
|
|
169
|
+
audit_stamps.created.time / 1000
|
|
170
|
+
) # supports only seconds precision
|
|
171
|
+
|
|
172
|
+
def set_created_at(self, created_at: datetime) -> None:
|
|
173
|
+
"""Set the creation timestamp in audit stamps."""
|
|
174
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
175
|
+
audit_stamps.created.time = make_ts_millis(created_at)
|
|
176
|
+
self._set_audit_stamps(audit_stamps)
|
|
177
|
+
|
|
178
|
+
@property
|
|
179
|
+
def created_by(self) -> Optional[ActorUrnOrStr]:
|
|
180
|
+
"""Get the creation actor from audit stamps."""
|
|
181
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
182
|
+
if audit_stamps.created.actor == builder.UNKNOWN_USER:
|
|
183
|
+
return None
|
|
184
|
+
return audit_stamps.created.actor
|
|
185
|
+
|
|
186
|
+
def set_created_by(self, created_by: ActorUrnOrStr) -> None:
|
|
187
|
+
"""Set the creation actor in audit stamps."""
|
|
188
|
+
if isinstance(created_by, str):
|
|
189
|
+
created_by = make_user_urn(created_by)
|
|
190
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
191
|
+
audit_stamps.created.actor = str(created_by)
|
|
192
|
+
self._set_audit_stamps(audit_stamps)
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def deleted_on(self) -> Optional[datetime]:
|
|
196
|
+
"""Get the deletion timestamp from audit stamps."""
|
|
197
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
198
|
+
if audit_stamps.deleted is None or audit_stamps.deleted.time == 0:
|
|
199
|
+
return None
|
|
200
|
+
return datetime.fromtimestamp(
|
|
201
|
+
audit_stamps.deleted.time / 1000
|
|
202
|
+
) # supports only seconds precision
|
|
203
|
+
|
|
204
|
+
def set_deleted_on(self, deleted_on: datetime) -> None:
|
|
205
|
+
"""Set the deletion timestamp in audit stamps."""
|
|
206
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
207
|
+
# Default constructor sets deleted to None
|
|
208
|
+
if audit_stamps.deleted is None:
|
|
209
|
+
audit_stamps.deleted = models.AuditStampClass(
|
|
210
|
+
time=0, actor=builder.UNKNOWN_USER
|
|
211
|
+
)
|
|
212
|
+
audit_stamps.deleted.time = make_ts_millis(deleted_on)
|
|
213
|
+
self._set_audit_stamps(audit_stamps)
|
|
214
|
+
|
|
215
|
+
@property
|
|
216
|
+
def deleted_by(self) -> Optional[ActorUrnOrStr]:
|
|
217
|
+
"""Get the deletion actor from audit stamps."""
|
|
218
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
219
|
+
if (
|
|
220
|
+
audit_stamps.deleted is None
|
|
221
|
+
or audit_stamps.deleted.actor == builder.UNKNOWN_USER
|
|
222
|
+
):
|
|
223
|
+
return None
|
|
224
|
+
return audit_stamps.deleted.actor
|
|
225
|
+
|
|
226
|
+
def set_deleted_by(self, deleted_by: ActorUrnOrStr) -> None:
|
|
227
|
+
"""Set the deletion actor in audit stamps."""
|
|
228
|
+
if isinstance(deleted_by, str):
|
|
229
|
+
deleted_by = make_user_urn(deleted_by)
|
|
230
|
+
audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps()
|
|
231
|
+
if audit_stamps.deleted is None:
|
|
232
|
+
audit_stamps.deleted = models.AuditStampClass(
|
|
233
|
+
time=0, actor=builder.UNKNOWN_USER
|
|
234
|
+
)
|
|
235
|
+
audit_stamps.deleted.actor = str(deleted_by)
|
|
236
|
+
self._set_audit_stamps(audit_stamps)
|
|
237
|
+
|
|
238
|
+
|
|
101
239
|
class HasPlatformInstance(Entity):
|
|
102
240
|
__slots__ = ()
|
|
103
241
|
|
|
@@ -167,7 +305,7 @@ OwnerInputType: TypeAlias = Union[
|
|
|
167
305
|
Tuple[ActorUrn, OwnershipTypeType],
|
|
168
306
|
models.OwnerClass,
|
|
169
307
|
]
|
|
170
|
-
OwnersInputType: TypeAlias =
|
|
308
|
+
OwnersInputType: TypeAlias = Sequence[OwnerInputType]
|
|
171
309
|
|
|
172
310
|
|
|
173
311
|
class HasOwnership(Entity):
|
|
@@ -268,7 +406,9 @@ class HasOwnership(Entity):
|
|
|
268
406
|
# If you pass in a ContainerKey, we can use parent_key() to build the browse path.
|
|
269
407
|
# If you pass in a list of urns, we'll use that as the browse path. Any non-urn strings
|
|
270
408
|
# will be treated as raw ids.
|
|
271
|
-
ParentContainerInputType: TypeAlias = Union[
|
|
409
|
+
ParentContainerInputType: TypeAlias = Union[
|
|
410
|
+
"Container", ContainerKey, Sequence[UrnOrStr]
|
|
411
|
+
]
|
|
272
412
|
|
|
273
413
|
|
|
274
414
|
class HasContainer(Entity):
|
|
@@ -328,7 +468,7 @@ class HasContainer(Entity):
|
|
|
328
468
|
)
|
|
329
469
|
for entry in parsed_path
|
|
330
470
|
]
|
|
331
|
-
elif container
|
|
471
|
+
elif isinstance(container, ContainerKey):
|
|
332
472
|
container_urn = container.as_urn()
|
|
333
473
|
|
|
334
474
|
browse_path_reversed = [container_urn]
|
|
@@ -387,7 +527,7 @@ class HasContainer(Entity):
|
|
|
387
527
|
|
|
388
528
|
|
|
389
529
|
TagInputType: TypeAlias = Union[str, TagUrn, models.TagAssociationClass]
|
|
390
|
-
TagsInputType: TypeAlias =
|
|
530
|
+
TagsInputType: TypeAlias = Sequence[TagInputType]
|
|
391
531
|
|
|
392
532
|
|
|
393
533
|
class HasTags(Entity):
|
|
@@ -442,7 +582,7 @@ class HasTags(Entity):
|
|
|
442
582
|
TermInputType: TypeAlias = Union[
|
|
443
583
|
str, GlossaryTermUrn, models.GlossaryTermAssociationClass
|
|
444
584
|
]
|
|
445
|
-
TermsInputType: TypeAlias =
|
|
585
|
+
TermsInputType: TypeAlias = Sequence[TermInputType]
|
|
446
586
|
|
|
447
587
|
|
|
448
588
|
class HasTerms(Entity):
|
|
@@ -717,3 +857,107 @@ class HasVersion(Entity):
|
|
|
717
857
|
a for a in version_props.aliases if a.versionTag != alias
|
|
718
858
|
]
|
|
719
859
|
self._set_aspect(version_props)
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
class HasStructuredProperties(Entity):
|
|
863
|
+
"""
|
|
864
|
+
Mixin for entities that support structured properties
|
|
865
|
+
"""
|
|
866
|
+
|
|
867
|
+
__slots__ = ()
|
|
868
|
+
|
|
869
|
+
@property
|
|
870
|
+
def structured_properties(
|
|
871
|
+
self,
|
|
872
|
+
) -> Optional[List[models.StructuredPropertyValueAssignmentClass]]:
|
|
873
|
+
"""
|
|
874
|
+
Retrieve structured properties for the entity
|
|
875
|
+
|
|
876
|
+
Returns:
|
|
877
|
+
Optional list of structured property value assignments
|
|
878
|
+
"""
|
|
879
|
+
sp_aspect = self._get_aspect(models.StructuredPropertiesClass)
|
|
880
|
+
return sp_aspect.properties if sp_aspect else None
|
|
881
|
+
|
|
882
|
+
def _ensure_structured_properties(self) -> models.StructuredPropertiesClass:
|
|
883
|
+
"""
|
|
884
|
+
Ensure structured properties aspect exists, creating it if necessary
|
|
885
|
+
|
|
886
|
+
Returns:
|
|
887
|
+
StructuredPropertiesClass aspect
|
|
888
|
+
"""
|
|
889
|
+
return self._setdefault_aspect(models.StructuredPropertiesClass(properties=[]))
|
|
890
|
+
|
|
891
|
+
def set_structured_property(
|
|
892
|
+
self,
|
|
893
|
+
property_urn: StructuredPropertyUrnOrStr,
|
|
894
|
+
values: Sequence[StructuredPropertyValueType],
|
|
895
|
+
) -> None:
|
|
896
|
+
"""
|
|
897
|
+
Update an existing structured property or add if it doesn't exist
|
|
898
|
+
|
|
899
|
+
Args:
|
|
900
|
+
property_urn: URN of the structured property
|
|
901
|
+
values: List of values for the property
|
|
902
|
+
"""
|
|
903
|
+
# validate property_urn is a valid structured property urn
|
|
904
|
+
property_urn = StructuredPropertyUrn.from_string(property_urn)
|
|
905
|
+
|
|
906
|
+
properties = self._ensure_structured_properties()
|
|
907
|
+
|
|
908
|
+
# Find existing property assignment
|
|
909
|
+
existing_prop = next(
|
|
910
|
+
(
|
|
911
|
+
prop
|
|
912
|
+
for prop in properties.properties
|
|
913
|
+
if prop.propertyUrn == str(property_urn)
|
|
914
|
+
),
|
|
915
|
+
None,
|
|
916
|
+
)
|
|
917
|
+
current_timestamp = make_ts_millis(datetime.now())
|
|
918
|
+
|
|
919
|
+
if existing_prop:
|
|
920
|
+
# Update existing property
|
|
921
|
+
existing_prop.values = list(values)
|
|
922
|
+
existing_prop.lastModified = models.AuditStampClass(
|
|
923
|
+
time=current_timestamp,
|
|
924
|
+
actor=DEFAULT_ACTOR_URN,
|
|
925
|
+
)
|
|
926
|
+
else:
|
|
927
|
+
# Create new property assignment
|
|
928
|
+
new_property = models.StructuredPropertyValueAssignmentClass(
|
|
929
|
+
propertyUrn=str(property_urn),
|
|
930
|
+
values=list(values),
|
|
931
|
+
created=models.AuditStampClass(
|
|
932
|
+
time=current_timestamp,
|
|
933
|
+
actor=DEFAULT_ACTOR_URN,
|
|
934
|
+
),
|
|
935
|
+
lastModified=models.AuditStampClass(
|
|
936
|
+
time=current_timestamp,
|
|
937
|
+
actor=DEFAULT_ACTOR_URN,
|
|
938
|
+
),
|
|
939
|
+
)
|
|
940
|
+
add_list_unique(
|
|
941
|
+
properties.properties,
|
|
942
|
+
key=lambda prop: prop.propertyUrn,
|
|
943
|
+
item=new_property,
|
|
944
|
+
)
|
|
945
|
+
|
|
946
|
+
self._set_aspect(properties)
|
|
947
|
+
|
|
948
|
+
def remove_structured_property(
|
|
949
|
+
self, property_urn: StructuredPropertyUrnOrStr
|
|
950
|
+
) -> None:
|
|
951
|
+
"""
|
|
952
|
+
Remove a structured property from the entity
|
|
953
|
+
|
|
954
|
+
Args:
|
|
955
|
+
property_urn: URN of the structured property to remove
|
|
956
|
+
"""
|
|
957
|
+
remove_list_unique(
|
|
958
|
+
self._ensure_structured_properties().properties,
|
|
959
|
+
key=lambda prop: prop.propertyUrn,
|
|
960
|
+
item=models.StructuredPropertyValueAssignmentClass(
|
|
961
|
+
propertyUrn=str(property_urn), values=[]
|
|
962
|
+
),
|
|
963
|
+
)
|