acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -36,8 +36,10 @@ csv-enricher = datahub.ingestion.source.csv_enricher:CSVEnricherSource
|
|
|
36
36
|
datahub = datahub.ingestion.source.datahub.datahub_source:DataHubSource
|
|
37
37
|
datahub-apply = datahub.ingestion.source.apply.datahub_apply:DataHubApplySource
|
|
38
38
|
datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:BusinessGlossaryFileSource
|
|
39
|
+
datahub-debug = datahub.ingestion.source.debug.datahub_debug:DataHubDebugSource
|
|
39
40
|
datahub-gc = datahub.ingestion.source.gc.datahub_gc:DataHubGcSource
|
|
40
41
|
datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource
|
|
42
|
+
datahub-mock-data = datahub.ingestion.source.mock_data.datahub_mock_data:DataHubMockDataSource
|
|
41
43
|
dbt = datahub.ingestion.source.dbt.dbt_core:DBTCoreSource
|
|
42
44
|
dbt-cloud = datahub.ingestion.source.dbt.dbt_cloud:DBTCloudSource
|
|
43
45
|
delta-lake = datahub.ingestion.source.delta_lake:DeltaLakeSource
|
|
@@ -46,6 +48,7 @@ dremio = datahub.ingestion.source.dremio.dremio_source:DremioSource
|
|
|
46
48
|
druid = datahub.ingestion.source.sql.druid:DruidSource
|
|
47
49
|
dynamodb = datahub.ingestion.source.dynamodb.dynamodb:DynamoDBSource
|
|
48
50
|
elasticsearch = datahub.ingestion.source.elastic_search:ElasticsearchSource
|
|
51
|
+
excel = datahub.ingestion.source.excel.source:ExcelSource
|
|
49
52
|
feast = datahub.ingestion.source.feast:FeastRepositorySource
|
|
50
53
|
file = datahub.ingestion.source.file:GenericFileSource
|
|
51
54
|
fivetran = datahub.ingestion.source.fivetran.fivetran:FivetranSource
|
|
@@ -91,6 +94,7 @@ sagemaker = datahub.ingestion.source.aws.sagemaker:SagemakerSource
|
|
|
91
94
|
salesforce = datahub.ingestion.source.salesforce:SalesforceSource
|
|
92
95
|
sigma = datahub.ingestion.source.sigma.sigma:SigmaSource
|
|
93
96
|
slack = datahub.ingestion.source.slack.slack:SlackSource
|
|
97
|
+
snaplogic = datahub.ingestion.source.snaplogic.snaplogic:SnaplogicSource
|
|
94
98
|
snowflake = datahub.ingestion.source.snowflake.snowflake_v2:SnowflakeV2Source
|
|
95
99
|
snowflake-queries = datahub.ingestion.source.snowflake.snowflake_queries:SnowflakeQueriesSource
|
|
96
100
|
snowflake-summary = datahub.ingestion.source.snowflake.snowflake_summary:SnowflakeSummarySource
|
|
@@ -127,6 +131,7 @@ pattern_cleanup_dataset_usage_user = datahub.ingestion.transformer.pattern_clean
|
|
|
127
131
|
pattern_cleanup_ownership = datahub.ingestion.transformer.pattern_cleanup_ownership:PatternCleanUpOwnership
|
|
128
132
|
replace_external_url = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlDataset
|
|
129
133
|
replace_external_url_container = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlContainer
|
|
134
|
+
set_browse_path = datahub.ingestion.transformer.set_browse_path:SetBrowsePathTransformer
|
|
130
135
|
set_dataset_browse_path = datahub.ingestion.transformer.add_dataset_browse_path:AddDatasetBrowsePathTransformer
|
|
131
136
|
simple_add_dataset_dataproduct = datahub.ingestion.transformer.add_dataset_dataproduct:SimpleAddDatasetDataProduct
|
|
132
137
|
simple_add_dataset_domain = datahub.ingestion.transformer.dataset_domain:SimpleAddDatasetDomain
|
datahub/_version.py
CHANGED
|
@@ -131,7 +131,7 @@ class SerializedResourceValue(BaseModel):
|
|
|
131
131
|
elif isinstance(object, BaseModel):
|
|
132
132
|
return SerializedResourceValue(
|
|
133
133
|
content_type=models.SerializedValueContentTypeClass.JSON,
|
|
134
|
-
blob=json.dumps(object.dict()).encode("utf-8"),
|
|
134
|
+
blob=json.dumps(object.dict(), sort_keys=True).encode("utf-8"),
|
|
135
135
|
schema_type=models.SerializedValueSchemaTypeClass.JSON,
|
|
136
136
|
schema_ref=object.__class__.__name__,
|
|
137
137
|
)
|
|
@@ -71,7 +71,7 @@ class CorpGroup(BaseModel):
|
|
|
71
71
|
_rename_admins_to_owners = pydantic_renamed_field("admins", "owners")
|
|
72
72
|
|
|
73
73
|
@pydantic.validator("owners", "members", each_item=True)
|
|
74
|
-
def make_urn_if_needed(v):
|
|
74
|
+
def make_urn_if_needed(cls, v):
|
|
75
75
|
if isinstance(v, str):
|
|
76
76
|
return builder.make_user_urn(v)
|
|
77
77
|
return v
|
|
@@ -6,9 +6,10 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
|
|
|
6
6
|
|
|
7
7
|
import pydantic
|
|
8
8
|
from ruamel.yaml import YAML
|
|
9
|
+
from typing_extensions import assert_never
|
|
9
10
|
|
|
10
11
|
import datahub.emitter.mce_builder as builder
|
|
11
|
-
from datahub.configuration.common import ConfigModel
|
|
12
|
+
from datahub.configuration.common import ConfigModel, LaxStr
|
|
12
13
|
from datahub.emitter.generic_emitter import Emitter
|
|
13
14
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
14
15
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
@@ -110,8 +111,9 @@ class DataProduct(ConfigModel):
|
|
|
110
111
|
description: Optional[str] = None
|
|
111
112
|
tags: Optional[List[str]] = None
|
|
112
113
|
terms: Optional[List[str]] = None
|
|
113
|
-
properties: Optional[Dict[str,
|
|
114
|
+
properties: Optional[Dict[str, LaxStr]] = None
|
|
114
115
|
external_url: Optional[str] = None
|
|
116
|
+
output_ports: Optional[List[str]] = None
|
|
115
117
|
_original_yaml_dict: Optional[dict] = None
|
|
116
118
|
|
|
117
119
|
@pydantic.validator("assets", each_item=True)
|
|
@@ -123,6 +125,22 @@ class DataProduct(ConfigModel):
|
|
|
123
125
|
|
|
124
126
|
return v
|
|
125
127
|
|
|
128
|
+
@pydantic.validator("output_ports", each_item=True)
|
|
129
|
+
def output_ports_must_be_urns(cls, v: str) -> str:
|
|
130
|
+
try:
|
|
131
|
+
Urn.create_from_string(v)
|
|
132
|
+
except Exception as e:
|
|
133
|
+
raise ValueError(f"Output port {v} is not an urn: {e}") from e
|
|
134
|
+
|
|
135
|
+
return v
|
|
136
|
+
|
|
137
|
+
@pydantic.validator("output_ports", each_item=True)
|
|
138
|
+
def output_ports_must_be_from_asset_list(cls, v: str, values: dict) -> str:
|
|
139
|
+
assets = values.get("assets", [])
|
|
140
|
+
if v not in assets:
|
|
141
|
+
raise ValueError(f"Output port {v} is not in asset list")
|
|
142
|
+
return v
|
|
143
|
+
|
|
126
144
|
@property
|
|
127
145
|
def urn(self) -> str:
|
|
128
146
|
if self.id.startswith("urn:li:dataProduct:"):
|
|
@@ -180,6 +198,7 @@ class DataProduct(ConfigModel):
|
|
|
180
198
|
DataProductAssociationClass(
|
|
181
199
|
destinationUrn=asset,
|
|
182
200
|
created=self._mint_auditstamp("yaml"),
|
|
201
|
+
outputPort=asset in (self.output_ports or []),
|
|
183
202
|
)
|
|
184
203
|
for asset in self.assets
|
|
185
204
|
]
|
|
@@ -203,6 +222,7 @@ class DataProduct(ConfigModel):
|
|
|
203
222
|
DataProductAssociationClass(
|
|
204
223
|
destinationUrn=asset,
|
|
205
224
|
created=self._mint_auditstamp("yaml"),
|
|
225
|
+
outputPort=asset in (self.output_ports or []),
|
|
206
226
|
)
|
|
207
227
|
for asset in self.assets or []
|
|
208
228
|
],
|
|
@@ -368,6 +388,13 @@ class DataProduct(ConfigModel):
|
|
|
368
388
|
external_url=(
|
|
369
389
|
data_product_properties.externalUrl if data_product_properties else None
|
|
370
390
|
),
|
|
391
|
+
output_ports=[
|
|
392
|
+
e.destinationUrn
|
|
393
|
+
for e in (data_product_properties.assets or [])
|
|
394
|
+
if e.outputPort
|
|
395
|
+
]
|
|
396
|
+
if data_product_properties
|
|
397
|
+
else None,
|
|
371
398
|
)
|
|
372
399
|
|
|
373
400
|
def _patch_ownership(
|
|
@@ -414,7 +441,9 @@ class DataProduct(ConfigModel):
|
|
|
414
441
|
"type": new_owner_type_map[owner_urn],
|
|
415
442
|
}
|
|
416
443
|
else:
|
|
417
|
-
patches_drop[i] = o
|
|
444
|
+
patches_drop[i] = o.model_dump()
|
|
445
|
+
else:
|
|
446
|
+
assert_never(o)
|
|
418
447
|
|
|
419
448
|
# Figure out what if any are new owners to add
|
|
420
449
|
new_owners_to_add = {o for o in new_owner_type_map} - set(owners_matched)
|
|
@@ -27,10 +27,11 @@ from typing_extensions import TypeAlias
|
|
|
27
27
|
|
|
28
28
|
import datahub.metadata.schema_classes as models
|
|
29
29
|
from datahub.api.entities.structuredproperties.structuredproperties import AllowedTypes
|
|
30
|
-
from datahub.configuration.common import ConfigModel
|
|
30
|
+
from datahub.configuration.common import ConfigModel, LaxStr
|
|
31
31
|
from datahub.emitter.mce_builder import (
|
|
32
32
|
make_data_platform_urn,
|
|
33
33
|
make_dataset_urn,
|
|
34
|
+
make_domain_urn,
|
|
34
35
|
make_schema_field_urn,
|
|
35
36
|
make_tag_urn,
|
|
36
37
|
make_term_urn,
|
|
@@ -43,6 +44,7 @@ from datahub.ingestion.graph.client import DataHubGraph
|
|
|
43
44
|
from datahub.metadata.schema_classes import (
|
|
44
45
|
AuditStampClass,
|
|
45
46
|
DatasetPropertiesClass,
|
|
47
|
+
DomainsClass,
|
|
46
48
|
GlobalTagsClass,
|
|
47
49
|
GlossaryTermAssociationClass,
|
|
48
50
|
GlossaryTermsClass,
|
|
@@ -134,14 +136,13 @@ class StructuredPropertiesHelper:
|
|
|
134
136
|
|
|
135
137
|
class SchemaFieldSpecification(StrictModel):
|
|
136
138
|
id: Optional[str] = None
|
|
137
|
-
urn: Optional[str] = None
|
|
139
|
+
urn: Optional[str] = Field(None, validate_default=True)
|
|
138
140
|
structured_properties: Optional[StructuredProperties] = None
|
|
139
141
|
type: Optional[str] = None
|
|
140
142
|
nativeDataType: Optional[str] = None
|
|
141
143
|
jsonPath: Union[None, str] = None
|
|
142
144
|
nullable: bool = False
|
|
143
145
|
description: Union[None, str] = None
|
|
144
|
-
doc: Union[None, str] = None # doc is an alias for description
|
|
145
146
|
label: Optional[str] = None
|
|
146
147
|
created: Optional[dict] = None
|
|
147
148
|
lastModified: Optional[dict] = None
|
|
@@ -219,14 +220,14 @@ class SchemaFieldSpecification(StrictModel):
|
|
|
219
220
|
return v
|
|
220
221
|
|
|
221
222
|
@root_validator(pre=True)
|
|
222
|
-
def
|
|
223
|
-
"""Synchronize doc
|
|
223
|
+
def sync_doc_into_description(cls, values: Dict) -> Dict:
|
|
224
|
+
"""Synchronize doc into description field if doc is provided."""
|
|
224
225
|
description = values.get("description")
|
|
225
|
-
doc = values.
|
|
226
|
+
doc = values.pop("doc", None)
|
|
226
227
|
|
|
227
|
-
if
|
|
228
|
-
|
|
229
|
-
|
|
228
|
+
if doc is not None:
|
|
229
|
+
if description is not None:
|
|
230
|
+
raise ValueError("doc and description cannot both be provided")
|
|
230
231
|
values["description"] = doc
|
|
231
232
|
|
|
232
233
|
return values
|
|
@@ -294,10 +295,6 @@ class SchemaFieldSpecification(StrictModel):
|
|
|
294
295
|
"""Custom dict method for Pydantic v1 to handle YAML serialization properly."""
|
|
295
296
|
exclude = kwargs.pop("exclude", None) or set()
|
|
296
297
|
|
|
297
|
-
# If description and doc are identical, exclude doc from the output
|
|
298
|
-
if self.description == self.doc and self.description is not None:
|
|
299
|
-
exclude.add("doc")
|
|
300
|
-
|
|
301
298
|
# if nativeDataType and type are identical, exclude nativeDataType from the output
|
|
302
299
|
if self.nativeDataType == self.type and self.nativeDataType is not None:
|
|
303
300
|
exclude.add("nativeDataType")
|
|
@@ -325,10 +322,6 @@ class SchemaFieldSpecification(StrictModel):
|
|
|
325
322
|
"""Custom model_dump method for Pydantic v2 to handle YAML serialization properly."""
|
|
326
323
|
exclude = kwargs.pop("exclude", None) or set()
|
|
327
324
|
|
|
328
|
-
# If description and doc are identical, exclude doc from the output
|
|
329
|
-
if self.description == self.doc and self.description is not None:
|
|
330
|
-
exclude.add("doc")
|
|
331
|
-
|
|
332
325
|
# if nativeDataType and type are identical, exclude nativeDataType from the output
|
|
333
326
|
if self.nativeDataType == self.type and self.nativeDataType is not None:
|
|
334
327
|
exclude.add("nativeDataType")
|
|
@@ -380,12 +373,12 @@ class Dataset(StrictModel):
|
|
|
380
373
|
id: Optional[str] = None
|
|
381
374
|
platform: Optional[str] = None
|
|
382
375
|
env: str = "PROD"
|
|
383
|
-
urn: Optional[str] = None
|
|
376
|
+
urn: Optional[str] = Field(None, validate_default=True)
|
|
384
377
|
description: Optional[str] = None
|
|
385
|
-
name: Optional[str] = None
|
|
386
|
-
schema_metadata: Optional[SchemaSpecification] = Field(alias="schema")
|
|
378
|
+
name: Optional[str] = Field(None, validate_default=True)
|
|
379
|
+
schema_metadata: Optional[SchemaSpecification] = Field(default=None, alias="schema")
|
|
387
380
|
downstreams: Optional[List[str]] = None
|
|
388
|
-
properties: Optional[Dict[str,
|
|
381
|
+
properties: Optional[Dict[str, LaxStr]] = None
|
|
389
382
|
subtype: Optional[str] = None
|
|
390
383
|
subtypes: Optional[List[str]] = None
|
|
391
384
|
tags: Optional[List[str]] = None
|
|
@@ -393,6 +386,7 @@ class Dataset(StrictModel):
|
|
|
393
386
|
owners: Optional[List[Union[str, Ownership]]] = None
|
|
394
387
|
structured_properties: Optional[StructuredProperties] = None
|
|
395
388
|
external_url: Optional[str] = None
|
|
389
|
+
domains: Optional[List[str]] = None
|
|
396
390
|
|
|
397
391
|
@property
|
|
398
392
|
def platform_urn(self) -> str:
|
|
@@ -602,7 +596,7 @@ class Dataset(StrictModel):
|
|
|
602
596
|
],
|
|
603
597
|
platformSchema=OtherSchemaClass(
|
|
604
598
|
rawSchema=yaml.dump(
|
|
605
|
-
self.schema_metadata.
|
|
599
|
+
self.schema_metadata.model_dump(
|
|
606
600
|
exclude_none=True, exclude_unset=True
|
|
607
601
|
)
|
|
608
602
|
)
|
|
@@ -735,7 +729,14 @@ class Dataset(StrictModel):
|
|
|
735
729
|
)
|
|
736
730
|
)
|
|
737
731
|
yield from patch_builder.build()
|
|
738
|
-
|
|
732
|
+
if self.domains:
|
|
733
|
+
mcp = MetadataChangeProposalWrapper(
|
|
734
|
+
entityUrn=self.urn,
|
|
735
|
+
aspect=DomainsClass(
|
|
736
|
+
[make_domain_urn(domain) for domain in self.domains]
|
|
737
|
+
),
|
|
738
|
+
)
|
|
739
|
+
yield mcp
|
|
739
740
|
logger.info(f"Created dataset {self.urn}")
|
|
740
741
|
|
|
741
742
|
@staticmethod
|
|
@@ -897,6 +898,7 @@ class Dataset(StrictModel):
|
|
|
897
898
|
structured_properties_map[sp.propertyUrn].extend(sp.values) # type: ignore[arg-type,union-attr]
|
|
898
899
|
else:
|
|
899
900
|
structured_properties_map[sp.propertyUrn] = sp.values
|
|
901
|
+
domains: Optional[DomainsClass] = graph.get_aspect(urn, DomainsClass)
|
|
900
902
|
|
|
901
903
|
if config.include_downstreams:
|
|
902
904
|
related_downstreams = graph.get_related_entities(
|
|
@@ -937,6 +939,7 @@ class Dataset(StrictModel):
|
|
|
937
939
|
structured_properties=(
|
|
938
940
|
structured_properties_map if structured_properties else None
|
|
939
941
|
),
|
|
942
|
+
domains=[domain for domain in domains.domains] if domains else None,
|
|
940
943
|
downstreams=downstreams if config.include_downstreams else None,
|
|
941
944
|
)
|
|
942
945
|
|
|
File without changes
|