acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -3,6 +3,7 @@ import logging
|
|
|
3
3
|
import re
|
|
4
4
|
import time
|
|
5
5
|
from collections import OrderedDict, defaultdict
|
|
6
|
+
from copy import deepcopy
|
|
6
7
|
from dataclasses import dataclass, field as dataclass_field
|
|
7
8
|
from datetime import datetime, timedelta, timezone
|
|
8
9
|
from functools import lru_cache
|
|
@@ -80,6 +81,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
80
81
|
from datahub.ingestion.source.common.subtypes import (
|
|
81
82
|
BIContainerSubTypes,
|
|
82
83
|
DatasetSubTypes,
|
|
84
|
+
SourceCapabilityModifier,
|
|
83
85
|
)
|
|
84
86
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
85
87
|
StaleEntityRemovalHandler,
|
|
@@ -118,7 +120,6 @@ from datahub.ingestion.source.tableau.tableau_common import (
|
|
|
118
120
|
)
|
|
119
121
|
from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
|
|
120
122
|
from datahub.ingestion.source.tableau.tableau_validation import check_user_role
|
|
121
|
-
from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
|
|
122
123
|
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
|
123
124
|
AuditStamp,
|
|
124
125
|
ChangeAuditStamps,
|
|
@@ -148,7 +149,6 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
|
148
149
|
)
|
|
149
150
|
from datahub.metadata.schema_classes import (
|
|
150
151
|
BrowsePathsClass,
|
|
151
|
-
ChangeTypeClass,
|
|
152
152
|
ChartInfoClass,
|
|
153
153
|
ChartUsageStatisticsClass,
|
|
154
154
|
DashboardInfoClass,
|
|
@@ -474,6 +474,13 @@ class TableauPageSizeConfig(ConfigModel):
|
|
|
474
474
|
return self.database_table_page_size or self.page_size
|
|
475
475
|
|
|
476
476
|
|
|
477
|
+
_IngestHiddenAssetsOptionsType = Literal["worksheet", "dashboard"]
|
|
478
|
+
_IngestHiddenAssetsOptions: List[_IngestHiddenAssetsOptionsType] = [
|
|
479
|
+
"worksheet",
|
|
480
|
+
"dashboard",
|
|
481
|
+
]
|
|
482
|
+
|
|
483
|
+
|
|
477
484
|
class TableauConfig(
|
|
478
485
|
DatasetLineageProviderConfigBase,
|
|
479
486
|
StatefulIngestionConfigBase,
|
|
@@ -524,10 +531,22 @@ class TableauConfig(
|
|
|
524
531
|
default=False,
|
|
525
532
|
description="Ingest Owner from source. This will override Owner info entered from UI",
|
|
526
533
|
)
|
|
534
|
+
use_email_as_username: bool = Field(
|
|
535
|
+
default=False,
|
|
536
|
+
description="Use email address instead of username for entity owners. Requires ingest_owner to be True.",
|
|
537
|
+
)
|
|
527
538
|
ingest_tables_external: bool = Field(
|
|
528
539
|
default=False,
|
|
529
540
|
description="Ingest details for tables external to (not embedded in) tableau as entities.",
|
|
530
541
|
)
|
|
542
|
+
emit_all_published_datasources: bool = Field(
|
|
543
|
+
default=False,
|
|
544
|
+
description="Ingest all published data sources. When False (default), only ingest published data sources that belong to an ingested workbook.",
|
|
545
|
+
)
|
|
546
|
+
emit_all_embedded_datasources: bool = Field(
|
|
547
|
+
default=False,
|
|
548
|
+
description="Ingest all embedded data sources. When False (default), only ingest embedded data sources that belong to an ingested workbook.",
|
|
549
|
+
)
|
|
531
550
|
|
|
532
551
|
env: str = Field(
|
|
533
552
|
default=builder.DEFAULT_ENV,
|
|
@@ -574,13 +593,13 @@ class TableauConfig(
|
|
|
574
593
|
)
|
|
575
594
|
|
|
576
595
|
extract_lineage_from_unsupported_custom_sql_queries: bool = Field(
|
|
577
|
-
default=
|
|
578
|
-
description="[Experimental]
|
|
596
|
+
default=True,
|
|
597
|
+
description="[Experimental] Extract lineage from Custom SQL queries using DataHub's SQL parser in cases where the Tableau Catalog API fails to return lineage for the query.",
|
|
579
598
|
)
|
|
580
599
|
|
|
581
600
|
force_extraction_of_lineage_from_custom_sql_queries: bool = Field(
|
|
582
601
|
default=False,
|
|
583
|
-
description="[Experimental] Force extraction of lineage from
|
|
602
|
+
description="[Experimental] Force extraction of lineage from Custom SQL queries using DataHub's SQL parser, even when the Tableau Catalog API returns lineage already.",
|
|
584
603
|
)
|
|
585
604
|
|
|
586
605
|
sql_parsing_disable_schema_awareness: bool = Field(
|
|
@@ -613,8 +632,8 @@ class TableauConfig(
|
|
|
613
632
|
description="Configuration settings for ingesting Tableau groups and their capabilities as custom properties.",
|
|
614
633
|
)
|
|
615
634
|
|
|
616
|
-
ingest_hidden_assets: Union[List[
|
|
617
|
-
|
|
635
|
+
ingest_hidden_assets: Union[List[_IngestHiddenAssetsOptionsType], bool] = Field(
|
|
636
|
+
_IngestHiddenAssetsOptions,
|
|
618
637
|
description=(
|
|
619
638
|
"When enabled, hidden worksheets and dashboards are ingested into Datahub."
|
|
620
639
|
" If a dashboard or worksheet is hidden in Tableau the luid is blank."
|
|
@@ -636,6 +655,11 @@ class TableauConfig(
|
|
|
636
655
|
# pre = True because we want to take some decision before pydantic initialize the configuration to default values
|
|
637
656
|
@root_validator(pre=True)
|
|
638
657
|
def projects_backward_compatibility(cls, values: Dict) -> Dict:
|
|
658
|
+
# In-place update of the input dict would cause state contamination. This was discovered through test failures
|
|
659
|
+
# in test_hex.py where the same dict is reused.
|
|
660
|
+
# So a copy is performed first.
|
|
661
|
+
values = deepcopy(values)
|
|
662
|
+
|
|
639
663
|
projects = values.get("projects")
|
|
640
664
|
project_pattern = values.get("project_pattern")
|
|
641
665
|
project_path_pattern = values.get("project_path_pattern")
|
|
@@ -647,6 +671,7 @@ class TableauConfig(
|
|
|
647
671
|
values["project_pattern"] = AllowDenyPattern(
|
|
648
672
|
allow=[f"^{prj}$" for prj in projects]
|
|
649
673
|
)
|
|
674
|
+
values.pop("projects")
|
|
650
675
|
elif (project_pattern or project_path_pattern) and projects:
|
|
651
676
|
raise ValueError(
|
|
652
677
|
"projects is deprecated. Please use project_path_pattern only."
|
|
@@ -658,7 +683,7 @@ class TableauConfig(
|
|
|
658
683
|
|
|
659
684
|
return values
|
|
660
685
|
|
|
661
|
-
@root_validator()
|
|
686
|
+
@root_validator(skip_on_failure=True)
|
|
662
687
|
def validate_config_values(cls, values: Dict) -> Dict:
|
|
663
688
|
tags_for_hidden_assets = values.get("tags_for_hidden_assets")
|
|
664
689
|
ingest_tags = values.get("ingest_tags")
|
|
@@ -670,6 +695,14 @@ class TableauConfig(
|
|
|
670
695
|
raise ValueError(
|
|
671
696
|
"tags_for_hidden_assets is only allowed with ingest_tags enabled. Be aware that this will overwrite tags entered from the UI."
|
|
672
697
|
)
|
|
698
|
+
|
|
699
|
+
use_email_as_username = values.get("use_email_as_username")
|
|
700
|
+
ingest_owner = values.get("ingest_owner")
|
|
701
|
+
if use_email_as_username and not ingest_owner:
|
|
702
|
+
raise ValueError(
|
|
703
|
+
"use_email_as_username requires ingest_owner to be enabled."
|
|
704
|
+
)
|
|
705
|
+
|
|
673
706
|
return values
|
|
674
707
|
|
|
675
708
|
|
|
@@ -761,7 +794,6 @@ class SiteIdContentUrl:
|
|
|
761
794
|
@dataclass
|
|
762
795
|
class TableauSourceReport(
|
|
763
796
|
StaleEntityRemovalSourceReport,
|
|
764
|
-
IngestionStageReport,
|
|
765
797
|
):
|
|
766
798
|
get_all_datasources_query_failed: bool = False
|
|
767
799
|
num_get_datasource_query_failures: int = 0
|
|
@@ -831,6 +863,9 @@ class TableauSourceReport(
|
|
|
831
863
|
default_factory=(lambda: defaultdict(int))
|
|
832
864
|
)
|
|
833
865
|
|
|
866
|
+
# Owner extraction statistics
|
|
867
|
+
num_email_fallback_to_username: int = 0
|
|
868
|
+
|
|
834
869
|
|
|
835
870
|
def report_user_role(report: TableauSourceReport, server: Server) -> None:
|
|
836
871
|
title: str = "Insufficient Permissions"
|
|
@@ -861,16 +896,29 @@ def report_user_role(report: TableauSourceReport, server: Server) -> None:
|
|
|
861
896
|
@platform_name("Tableau")
|
|
862
897
|
@config_class(TableauConfig)
|
|
863
898
|
@support_status(SupportStatus.CERTIFIED)
|
|
899
|
+
@capability(
|
|
900
|
+
SourceCapability.CONTAINERS,
|
|
901
|
+
"Enabled by default",
|
|
902
|
+
subtype_modifier=[
|
|
903
|
+
SourceCapabilityModifier.TABLEAU_PROJECT,
|
|
904
|
+
SourceCapabilityModifier.TABLEAU_SITE,
|
|
905
|
+
SourceCapabilityModifier.TABLEAU_WORKBOOK,
|
|
906
|
+
],
|
|
907
|
+
)
|
|
864
908
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
865
909
|
@capability(SourceCapability.DOMAINS, "Requires transformer", supported=False)
|
|
866
910
|
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
|
|
867
911
|
@capability(
|
|
868
912
|
SourceCapability.USAGE_STATS,
|
|
869
913
|
"Dashboard/Chart view counts, enabled using extract_usage_stats config",
|
|
914
|
+
subtype_modifier=[
|
|
915
|
+
SourceCapabilityModifier.DASHBOARD,
|
|
916
|
+
SourceCapabilityModifier.CHART,
|
|
917
|
+
],
|
|
870
918
|
)
|
|
871
919
|
@capability(
|
|
872
920
|
SourceCapability.DELETION_DETECTION,
|
|
873
|
-
"Enabled by default
|
|
921
|
+
"Enabled by default via stateful ingestion.",
|
|
874
922
|
)
|
|
875
923
|
@capability(SourceCapability.OWNERSHIP, "Requires recipe configuration")
|
|
876
924
|
@capability(SourceCapability.TAGS, "Requires recipe configuration")
|
|
@@ -879,6 +927,7 @@ def report_user_role(report: TableauSourceReport, server: Server) -> None:
|
|
|
879
927
|
SourceCapability.LINEAGE_FINE,
|
|
880
928
|
"Enabled by default, configure using `extract_column_level_lineage`",
|
|
881
929
|
)
|
|
930
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
882
931
|
class TableauSource(StatefulIngestionSourceBase, TestableSource):
|
|
883
932
|
platform = "tableau"
|
|
884
933
|
|
|
@@ -1162,7 +1211,7 @@ class TableauSiteSource:
|
|
|
1162
1211
|
self.report.warning(
|
|
1163
1212
|
title="Incomplete project hierarchy",
|
|
1164
1213
|
message="Project details missing. Child projects will be ingested without reference to their parent project. We generally need Site Administrator Explorer permissions to extract the complete project hierarchy.",
|
|
1165
|
-
context=f"Missing {project.parent_id}, referenced by {project.id} {project.
|
|
1214
|
+
context=f"Missing {project.parent_id}, referenced by {project.id} {project.name}",
|
|
1166
1215
|
)
|
|
1167
1216
|
project.parent_id = None
|
|
1168
1217
|
|
|
@@ -1539,12 +1588,15 @@ class TableauSiteSource:
|
|
|
1539
1588
|
}}""",
|
|
1540
1589
|
)
|
|
1541
1590
|
else:
|
|
1542
|
-
# As of Tableau Server 2024.2, the metadata API sporadically returns a 30-second
|
|
1543
|
-
# timeout error.
|
|
1544
|
-
# It doesn't reliably happen, so retrying a couple of times makes sense.
|
|
1545
1591
|
if all(
|
|
1592
|
+
# As of Tableau Server 2024.2, the metadata API sporadically returns a 30-second
|
|
1593
|
+
# timeout error.
|
|
1594
|
+
# It doesn't reliably happen, so retrying a couple of times makes sense.
|
|
1546
1595
|
error.get("message")
|
|
1547
1596
|
== "Execution canceled because timeout of 30000 millis was reached"
|
|
1597
|
+
# The Metadata API sometimes returns an 'unexpected error' message when querying
|
|
1598
|
+
# embeddedDatasourcesConnection. Try retrying a couple of times.
|
|
1599
|
+
or error.get("message") == "Unexpected error occurred"
|
|
1548
1600
|
for error in errors
|
|
1549
1601
|
):
|
|
1550
1602
|
# If it was only a timeout error, we can retry.
|
|
@@ -1556,8 +1608,8 @@ class TableauSiteSource:
|
|
|
1556
1608
|
(self.config.max_retries - retries_remaining + 1) ** 2, 60
|
|
1557
1609
|
)
|
|
1558
1610
|
logger.info(
|
|
1559
|
-
f"Query {connection_type} received a
|
|
1560
|
-
f"
|
|
1611
|
+
f"Query {connection_type} received a retryable error with {retries_remaining} retries remaining, "
|
|
1612
|
+
f"will retry in {backoff_time} seconds: {errors}"
|
|
1561
1613
|
)
|
|
1562
1614
|
time.sleep(backoff_time)
|
|
1563
1615
|
return self.get_connection_object_page(
|
|
@@ -2174,32 +2226,32 @@ class TableauSiteSource:
|
|
|
2174
2226
|
else []
|
|
2175
2227
|
)
|
|
2176
2228
|
|
|
2177
|
-
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2229
|
+
tableau_table_list = csql.get(c.TABLES, [])
|
|
2230
|
+
if self.config.force_extraction_of_lineage_from_custom_sql_queries or (
|
|
2231
|
+
not tableau_table_list
|
|
2232
|
+
and self.config.extract_lineage_from_unsupported_custom_sql_queries
|
|
2233
|
+
):
|
|
2234
|
+
if not tableau_table_list:
|
|
2235
|
+
# custom sql tables may contain unsupported sql, causing incomplete lineage
|
|
2236
|
+
# we extract the lineage from the raw queries
|
|
2237
|
+
logger.debug(
|
|
2238
|
+
"Parsing TLL & CLL from custom sql (tableau metadata incomplete)"
|
|
2239
|
+
)
|
|
2240
|
+
else:
|
|
2241
|
+
# The Tableau SQL parser is much worse than our sqlglot based parser,
|
|
2242
|
+
# so relying on metadata parsed by Tableau from SQL queries can be
|
|
2243
|
+
# less accurate. This option allows us to ignore Tableau's parser and
|
|
2244
|
+
# only use our own.
|
|
2245
|
+
logger.debug("Parsing TLL & CLL from custom sql (forced)")
|
|
2246
|
+
|
|
2183
2247
|
yield from self._create_lineage_from_unsupported_csql(
|
|
2184
2248
|
csql_urn, csql, columns
|
|
2185
2249
|
)
|
|
2186
2250
|
else:
|
|
2187
|
-
|
|
2188
|
-
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
yield from self._create_lineage_to_upstream_tables(
|
|
2192
|
-
csql_urn, tables, datasource
|
|
2193
|
-
)
|
|
2194
|
-
elif (
|
|
2195
|
-
self.config.extract_lineage_from_unsupported_custom_sql_queries
|
|
2196
|
-
):
|
|
2197
|
-
logger.debug("Extracting TLL & CLL from custom sql")
|
|
2198
|
-
# custom sql tables may contain unsupported sql, causing incomplete lineage
|
|
2199
|
-
# we extract the lineage from the raw queries
|
|
2200
|
-
yield from self._create_lineage_from_unsupported_csql(
|
|
2201
|
-
csql_urn, csql, columns
|
|
2202
|
-
)
|
|
2251
|
+
# lineage from custom sql -> datasets/tables #
|
|
2252
|
+
yield from self._create_lineage_to_upstream_tables(
|
|
2253
|
+
csql_urn, tableau_table_list, datasource
|
|
2254
|
+
)
|
|
2203
2255
|
|
|
2204
2256
|
# Schema Metadata
|
|
2205
2257
|
schema_metadata = self.get_schema_metadata_for_custom_sql(columns)
|
|
@@ -2237,7 +2289,6 @@ class TableauSiteSource:
|
|
|
2237
2289
|
yield self.get_metadata_change_event(dataset_snapshot)
|
|
2238
2290
|
yield self.get_metadata_change_proposal(
|
|
2239
2291
|
dataset_snapshot.urn,
|
|
2240
|
-
aspect_name=c.SUB_TYPES,
|
|
2241
2292
|
aspect=SubTypesClass(typeNames=[DatasetSubTypes.VIEW, c.CUSTOM_SQL]),
|
|
2242
2293
|
)
|
|
2243
2294
|
|
|
@@ -2402,7 +2453,6 @@ class TableauSiteSource:
|
|
|
2402
2453
|
upstream_lineage = UpstreamLineage(upstreams=upstream_tables)
|
|
2403
2454
|
yield self.get_metadata_change_proposal(
|
|
2404
2455
|
csql_urn,
|
|
2405
|
-
aspect_name=c.UPSTREAM_LINEAGE,
|
|
2406
2456
|
aspect=upstream_lineage,
|
|
2407
2457
|
)
|
|
2408
2458
|
self.report.num_tables_with_upstream_lineage += 1
|
|
@@ -2588,7 +2638,6 @@ class TableauSiteSource:
|
|
|
2588
2638
|
)
|
|
2589
2639
|
yield self.get_metadata_change_proposal(
|
|
2590
2640
|
csql_urn,
|
|
2591
|
-
aspect_name=c.UPSTREAM_LINEAGE,
|
|
2592
2641
|
aspect=upstream_lineage,
|
|
2593
2642
|
)
|
|
2594
2643
|
self.report.num_tables_with_upstream_lineage += 1
|
|
@@ -2634,14 +2683,10 @@ class TableauSiteSource:
|
|
|
2634
2683
|
def get_metadata_change_proposal(
|
|
2635
2684
|
self,
|
|
2636
2685
|
urn: str,
|
|
2637
|
-
aspect_name: str,
|
|
2638
2686
|
aspect: Union["UpstreamLineage", "SubTypesClass"],
|
|
2639
2687
|
) -> MetadataWorkUnit:
|
|
2640
2688
|
return MetadataChangeProposalWrapper(
|
|
2641
|
-
entityType=c.DATASET,
|
|
2642
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
2643
2689
|
entityUrn=urn,
|
|
2644
|
-
aspectName=aspect_name,
|
|
2645
2690
|
aspect=aspect,
|
|
2646
2691
|
).as_workunit()
|
|
2647
2692
|
|
|
@@ -2698,13 +2743,12 @@ class TableauSiteSource:
|
|
|
2698
2743
|
dataset_snapshot.aspects.append(browse_paths)
|
|
2699
2744
|
|
|
2700
2745
|
# Ownership
|
|
2701
|
-
|
|
2702
|
-
self.
|
|
2703
|
-
if datasource_info
|
|
2704
|
-
and datasource_info.get(c.OWNER)
|
|
2705
|
-
and datasource_info[c.OWNER].get(c.USERNAME)
|
|
2746
|
+
owner_identifier = (
|
|
2747
|
+
self._get_owner_identifier(datasource_info[c.OWNER])
|
|
2748
|
+
if datasource_info and datasource_info.get(c.OWNER)
|
|
2706
2749
|
else None
|
|
2707
2750
|
)
|
|
2751
|
+
owner = self._get_ownership(owner_identifier) if owner_identifier else None
|
|
2708
2752
|
if owner is not None:
|
|
2709
2753
|
dataset_snapshot.aspects.append(owner)
|
|
2710
2754
|
|
|
@@ -2749,7 +2793,6 @@ class TableauSiteSource:
|
|
|
2749
2793
|
)
|
|
2750
2794
|
yield self.get_metadata_change_proposal(
|
|
2751
2795
|
datasource_urn,
|
|
2752
|
-
aspect_name=c.UPSTREAM_LINEAGE,
|
|
2753
2796
|
aspect=upstream_lineage,
|
|
2754
2797
|
)
|
|
2755
2798
|
self.report.num_tables_with_upstream_lineage += 1
|
|
@@ -2768,7 +2811,6 @@ class TableauSiteSource:
|
|
|
2768
2811
|
yield self.get_metadata_change_event(dataset_snapshot)
|
|
2769
2812
|
yield self.get_metadata_change_proposal(
|
|
2770
2813
|
dataset_snapshot.urn,
|
|
2771
|
-
aspect_name=c.SUB_TYPES,
|
|
2772
2814
|
aspect=SubTypesClass(
|
|
2773
2815
|
typeNames=(
|
|
2774
2816
|
["Embedded Data Source"]
|
|
@@ -2854,7 +2896,11 @@ class TableauSiteSource:
|
|
|
2854
2896
|
return datasource
|
|
2855
2897
|
|
|
2856
2898
|
def emit_published_datasources(self) -> Iterable[MetadataWorkUnit]:
|
|
2857
|
-
datasource_filter =
|
|
2899
|
+
datasource_filter = (
|
|
2900
|
+
{}
|
|
2901
|
+
if self.config.emit_all_published_datasources
|
|
2902
|
+
else {c.ID_WITH_IN: self.datasource_ids_being_used}
|
|
2903
|
+
)
|
|
2858
2904
|
|
|
2859
2905
|
for datasource in self.get_connection_objects(
|
|
2860
2906
|
query=published_datasource_graphql_query,
|
|
@@ -3107,7 +3153,7 @@ class TableauSiteSource:
|
|
|
3107
3153
|
|
|
3108
3154
|
creator: Optional[str] = None
|
|
3109
3155
|
if workbook is not None and workbook.get(c.OWNER) is not None:
|
|
3110
|
-
creator = workbook[c.OWNER]
|
|
3156
|
+
creator = self._get_owner_identifier(workbook[c.OWNER])
|
|
3111
3157
|
created_at = sheet.get(c.CREATED_AT, datetime.now())
|
|
3112
3158
|
updated_at = sheet.get(c.UPDATED_AT, datetime.now())
|
|
3113
3159
|
last_modified = self.get_last_modified(creator, created_at, updated_at)
|
|
@@ -3256,7 +3302,7 @@ class TableauSiteSource:
|
|
|
3256
3302
|
|
|
3257
3303
|
def emit_workbook_as_container(self, workbook: Dict) -> Iterable[MetadataWorkUnit]:
|
|
3258
3304
|
workbook_container_key = self.gen_workbook_key(workbook[c.ID])
|
|
3259
|
-
creator = workbook.get(c.OWNER, {})
|
|
3305
|
+
creator = self._get_owner_identifier(workbook.get(c.OWNER, {}))
|
|
3260
3306
|
|
|
3261
3307
|
owner_urn = (
|
|
3262
3308
|
builder.make_user_urn(creator)
|
|
@@ -3438,7 +3484,7 @@ class TableauSiteSource:
|
|
|
3438
3484
|
|
|
3439
3485
|
creator: Optional[str] = None
|
|
3440
3486
|
if workbook is not None and workbook.get(c.OWNER) is not None:
|
|
3441
|
-
creator = workbook[c.OWNER]
|
|
3487
|
+
creator = self._get_owner_identifier(workbook[c.OWNER])
|
|
3442
3488
|
created_at = dashboard.get(c.CREATED_AT, datetime.now())
|
|
3443
3489
|
updated_at = dashboard.get(c.UPDATED_AT, datetime.now())
|
|
3444
3490
|
last_modified = self.get_last_modified(creator, created_at, updated_at)
|
|
@@ -3547,7 +3593,11 @@ class TableauSiteSource:
|
|
|
3547
3593
|
return browse_paths
|
|
3548
3594
|
|
|
3549
3595
|
def emit_embedded_datasources(self) -> Iterable[MetadataWorkUnit]:
|
|
3550
|
-
datasource_filter =
|
|
3596
|
+
datasource_filter = (
|
|
3597
|
+
{}
|
|
3598
|
+
if self.config.emit_all_embedded_datasources
|
|
3599
|
+
else {c.ID_WITH_IN: self.embedded_datasource_ids_being_used}
|
|
3600
|
+
)
|
|
3551
3601
|
|
|
3552
3602
|
for datasource in self.get_connection_objects(
|
|
3553
3603
|
query=embedded_datasource_graphql_query,
|
|
@@ -3581,6 +3631,20 @@ class TableauSiteSource:
|
|
|
3581
3631
|
)
|
|
3582
3632
|
return last_modified
|
|
3583
3633
|
|
|
3634
|
+
def _get_owner_identifier(self, owner_dict: dict) -> Optional[str]:
|
|
3635
|
+
"""Extract owner identifier (email or username) based on configuration."""
|
|
3636
|
+
if not owner_dict:
|
|
3637
|
+
return None
|
|
3638
|
+
|
|
3639
|
+
if self.config.use_email_as_username:
|
|
3640
|
+
email = owner_dict.get(c.EMAIL)
|
|
3641
|
+
if email:
|
|
3642
|
+
return email
|
|
3643
|
+
# Fall back to username if email is not available
|
|
3644
|
+
self.report.num_email_fallback_to_username += 1
|
|
3645
|
+
|
|
3646
|
+
return owner_dict.get(c.USERNAME)
|
|
3647
|
+
|
|
3584
3648
|
@lru_cache(maxsize=None)
|
|
3585
3649
|
def _get_ownership(self, user: str) -> Optional[OwnershipClass]:
|
|
3586
3650
|
if self.config.ingest_owner and user:
|
|
@@ -3659,7 +3723,7 @@ class TableauSiteSource:
|
|
|
3659
3723
|
container_key=project_key,
|
|
3660
3724
|
name=project_.name,
|
|
3661
3725
|
description=project_.description,
|
|
3662
|
-
sub_types=[
|
|
3726
|
+
sub_types=[BIContainerSubTypes.TABLEAU_PROJECT],
|
|
3663
3727
|
parent_container_key=parent_project_key,
|
|
3664
3728
|
)
|
|
3665
3729
|
|
|
@@ -3677,7 +3741,7 @@ class TableauSiteSource:
|
|
|
3677
3741
|
yield from gen_containers(
|
|
3678
3742
|
container_key=self.gen_site_key(self.site_id),
|
|
3679
3743
|
name=self.site.name or "Default",
|
|
3680
|
-
sub_types=[
|
|
3744
|
+
sub_types=[BIContainerSubTypes.TABLEAU_SITE],
|
|
3681
3745
|
)
|
|
3682
3746
|
|
|
3683
3747
|
def _fetch_groups(self):
|
|
@@ -3804,3 +3868,15 @@ class TableauSiteSource:
|
|
|
3804
3868
|
self.report.emit_upstream_tables_timer[self.site_content_url] = (
|
|
3805
3869
|
timer.elapsed_seconds(digits=2)
|
|
3806
3870
|
)
|
|
3871
|
+
|
|
3872
|
+
# Log owner extraction statistics if there were fallbacks
|
|
3873
|
+
if (
|
|
3874
|
+
self.config.use_email_as_username
|
|
3875
|
+
and self.config.ingest_owner
|
|
3876
|
+
and self.report.num_email_fallback_to_username > 0
|
|
3877
|
+
):
|
|
3878
|
+
logger.info(
|
|
3879
|
+
f"Owner extraction summary for site '{self.site_content_url}': "
|
|
3880
|
+
f"{self.report.num_email_fallback_to_username} entities fell back from email to username "
|
|
3881
|
+
f"(email was not available)"
|
|
3882
|
+
)
|
|
@@ -65,6 +65,7 @@ workbook_graphql_query = """
|
|
|
65
65
|
projectName
|
|
66
66
|
owner {
|
|
67
67
|
username
|
|
68
|
+
email
|
|
68
69
|
}
|
|
69
70
|
description
|
|
70
71
|
uri
|
|
@@ -107,6 +108,7 @@ sheet_graphql_query = """
|
|
|
107
108
|
luid
|
|
108
109
|
owner {
|
|
109
110
|
username
|
|
111
|
+
email
|
|
110
112
|
}
|
|
111
113
|
}
|
|
112
114
|
datasourceFields {
|
|
@@ -185,6 +187,7 @@ dashboard_graphql_query = """
|
|
|
185
187
|
luid
|
|
186
188
|
owner {
|
|
187
189
|
username
|
|
190
|
+
email
|
|
188
191
|
}
|
|
189
192
|
}
|
|
190
193
|
}
|
|
@@ -268,6 +271,7 @@ embedded_datasource_graphql_query = """
|
|
|
268
271
|
luid
|
|
269
272
|
owner {
|
|
270
273
|
username
|
|
274
|
+
email
|
|
271
275
|
}
|
|
272
276
|
}
|
|
273
277
|
}
|
|
@@ -424,6 +428,7 @@ published_datasource_graphql_query = """
|
|
|
424
428
|
}
|
|
425
429
|
owner {
|
|
426
430
|
username
|
|
431
|
+
email
|
|
427
432
|
}
|
|
428
433
|
description
|
|
429
434
|
uri
|
|
@@ -579,10 +584,12 @@ def get_platform(connection_type: str) -> str:
|
|
|
579
584
|
platform = "oracle"
|
|
580
585
|
elif connection_type in ("tbio", "teradata"):
|
|
581
586
|
platform = "teradata"
|
|
582
|
-
elif connection_type in ("sqlserver"):
|
|
587
|
+
elif connection_type in ("sqlserver",):
|
|
583
588
|
platform = "mssql"
|
|
584
|
-
elif connection_type in ("athena"):
|
|
589
|
+
elif connection_type in ("athena",):
|
|
585
590
|
platform = "athena"
|
|
591
|
+
elif connection_type in ("googlebigquery",):
|
|
592
|
+
platform = "bigquery"
|
|
586
593
|
elif connection_type.endswith("_jdbc"):
|
|
587
594
|
# e.g. convert trino_jdbc -> trino
|
|
588
595
|
platform = connection_type[: -len("_jdbc")]
|
|
@@ -50,7 +50,6 @@ TABLES = "tables"
|
|
|
50
50
|
DESCRIPTION = "description"
|
|
51
51
|
SQL = "SQL"
|
|
52
52
|
QUERY = "query"
|
|
53
|
-
SUB_TYPES = "subTypes"
|
|
54
53
|
VIEW = "view"
|
|
55
54
|
CUSTOM_SQL = "Custom SQL"
|
|
56
55
|
REMOTE_TYPE = "remoteType"
|
|
@@ -58,9 +57,9 @@ UNKNOWN = "UNKNOWN"
|
|
|
58
57
|
PUBLISHED_DATA_SOURCE = "PublishedDatasource"
|
|
59
58
|
LUID = "luid"
|
|
60
59
|
EMBEDDED_DATA_SOURCE = "EmbeddedDatasource"
|
|
61
|
-
UPSTREAM_LINEAGE = "upstreamLineage"
|
|
62
60
|
OWNER = "owner"
|
|
63
61
|
USERNAME = "username"
|
|
62
|
+
EMAIL = "email"
|
|
64
63
|
HAS_EXTRACTS = "hasExtracts"
|
|
65
64
|
EXTRACT_LAST_REFRESH_TIME = "extractLastRefreshTime"
|
|
66
65
|
EXTRACT_LAST_INCREMENTAL_UPDATE_TIME = "extractLastIncrementalUpdateTime"
|
|
@@ -78,8 +77,6 @@ CHART = "chart"
|
|
|
78
77
|
DASHBOARD = "dashboard"
|
|
79
78
|
DASHBOARDS_CONNECTION = "dashboardsConnection"
|
|
80
79
|
EMBEDDED_DATA_SOURCES_CONNECTION = "embeddedDatasourcesConnection"
|
|
81
|
-
PROJECT = "Project"
|
|
82
|
-
SITE = "Site"
|
|
83
80
|
IS_UNSUPPORTED_CUSTOM_SQL = "isUnsupportedCustomSql"
|
|
84
81
|
SITE_PERMISSION = "sitePermission"
|
|
85
82
|
ROLE_SITE_ADMIN_EXPLORER = "SiteAdministratorExplorer"
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
from tableauserverclient import Server, UserItem
|
|
4
5
|
|
|
@@ -10,6 +11,7 @@ class UserInfo:
|
|
|
10
11
|
user_name: str
|
|
11
12
|
site_role: str
|
|
12
13
|
site_id: str
|
|
14
|
+
email: Optional[str] = None
|
|
13
15
|
|
|
14
16
|
def has_site_administrator_explorer_privileges(self):
|
|
15
17
|
return self.site_role in [
|
|
@@ -34,4 +36,5 @@ class UserInfo:
|
|
|
34
36
|
user_name=user.name,
|
|
35
37
|
site_role=user.site_role,
|
|
36
38
|
site_id=server.site_id,
|
|
39
|
+
email=user.email,
|
|
37
40
|
)
|