acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -3,6 +3,7 @@ import logging
|
|
|
3
3
|
import re
|
|
4
4
|
import time
|
|
5
5
|
from collections import OrderedDict, defaultdict
|
|
6
|
+
from copy import deepcopy
|
|
6
7
|
from dataclasses import dataclass, field as dataclass_field
|
|
7
8
|
from datetime import datetime, timedelta, timezone
|
|
8
9
|
from functools import lru_cache
|
|
@@ -12,6 +13,7 @@ from typing import (
|
|
|
12
13
|
Dict,
|
|
13
14
|
Iterable,
|
|
14
15
|
List,
|
|
16
|
+
Literal,
|
|
15
17
|
Optional,
|
|
16
18
|
Set,
|
|
17
19
|
Tuple,
|
|
@@ -79,6 +81,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
79
81
|
from datahub.ingestion.source.common.subtypes import (
|
|
80
82
|
BIContainerSubTypes,
|
|
81
83
|
DatasetSubTypes,
|
|
84
|
+
SourceCapabilityModifier,
|
|
82
85
|
)
|
|
83
86
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
84
87
|
StaleEntityRemovalHandler,
|
|
@@ -117,7 +120,6 @@ from datahub.ingestion.source.tableau.tableau_common import (
|
|
|
117
120
|
)
|
|
118
121
|
from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
|
|
119
122
|
from datahub.ingestion.source.tableau.tableau_validation import check_user_role
|
|
120
|
-
from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
|
|
121
123
|
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
|
122
124
|
AuditStamp,
|
|
123
125
|
ChangeAuditStamps,
|
|
@@ -147,7 +149,6 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
|
147
149
|
)
|
|
148
150
|
from datahub.metadata.schema_classes import (
|
|
149
151
|
BrowsePathsClass,
|
|
150
|
-
ChangeTypeClass,
|
|
151
152
|
ChartInfoClass,
|
|
152
153
|
ChartUsageStatisticsClass,
|
|
153
154
|
DashboardInfoClass,
|
|
@@ -473,6 +474,13 @@ class TableauPageSizeConfig(ConfigModel):
|
|
|
473
474
|
return self.database_table_page_size or self.page_size
|
|
474
475
|
|
|
475
476
|
|
|
477
|
+
_IngestHiddenAssetsOptionsType = Literal["worksheet", "dashboard"]
|
|
478
|
+
_IngestHiddenAssetsOptions: List[_IngestHiddenAssetsOptionsType] = [
|
|
479
|
+
"worksheet",
|
|
480
|
+
"dashboard",
|
|
481
|
+
]
|
|
482
|
+
|
|
483
|
+
|
|
476
484
|
class TableauConfig(
|
|
477
485
|
DatasetLineageProviderConfigBase,
|
|
478
486
|
StatefulIngestionConfigBase,
|
|
@@ -523,10 +531,22 @@ class TableauConfig(
|
|
|
523
531
|
default=False,
|
|
524
532
|
description="Ingest Owner from source. This will override Owner info entered from UI",
|
|
525
533
|
)
|
|
534
|
+
use_email_as_username: bool = Field(
|
|
535
|
+
default=False,
|
|
536
|
+
description="Use email address instead of username for entity owners. Requires ingest_owner to be True.",
|
|
537
|
+
)
|
|
526
538
|
ingest_tables_external: bool = Field(
|
|
527
539
|
default=False,
|
|
528
540
|
description="Ingest details for tables external to (not embedded in) tableau as entities.",
|
|
529
541
|
)
|
|
542
|
+
emit_all_published_datasources: bool = Field(
|
|
543
|
+
default=False,
|
|
544
|
+
description="Ingest all published data sources. When False (default), only ingest published data sources that belong to an ingested workbook.",
|
|
545
|
+
)
|
|
546
|
+
emit_all_embedded_datasources: bool = Field(
|
|
547
|
+
default=False,
|
|
548
|
+
description="Ingest all embedded data sources. When False (default), only ingest embedded data sources that belong to an ingested workbook.",
|
|
549
|
+
)
|
|
530
550
|
|
|
531
551
|
env: str = Field(
|
|
532
552
|
default=builder.DEFAULT_ENV,
|
|
@@ -573,13 +593,13 @@ class TableauConfig(
|
|
|
573
593
|
)
|
|
574
594
|
|
|
575
595
|
extract_lineage_from_unsupported_custom_sql_queries: bool = Field(
|
|
576
|
-
default=
|
|
577
|
-
description="[Experimental]
|
|
596
|
+
default=True,
|
|
597
|
+
description="[Experimental] Extract lineage from Custom SQL queries using DataHub's SQL parser in cases where the Tableau Catalog API fails to return lineage for the query.",
|
|
578
598
|
)
|
|
579
599
|
|
|
580
600
|
force_extraction_of_lineage_from_custom_sql_queries: bool = Field(
|
|
581
601
|
default=False,
|
|
582
|
-
description="[Experimental] Force extraction of lineage from
|
|
602
|
+
description="[Experimental] Force extraction of lineage from Custom SQL queries using DataHub's SQL parser, even when the Tableau Catalog API returns lineage already.",
|
|
583
603
|
)
|
|
584
604
|
|
|
585
605
|
sql_parsing_disable_schema_awareness: bool = Field(
|
|
@@ -612,10 +632,14 @@ class TableauConfig(
|
|
|
612
632
|
description="Configuration settings for ingesting Tableau groups and their capabilities as custom properties.",
|
|
613
633
|
)
|
|
614
634
|
|
|
615
|
-
ingest_hidden_assets: bool = Field(
|
|
616
|
-
|
|
617
|
-
description=
|
|
618
|
-
|
|
635
|
+
ingest_hidden_assets: Union[List[_IngestHiddenAssetsOptionsType], bool] = Field(
|
|
636
|
+
_IngestHiddenAssetsOptions,
|
|
637
|
+
description=(
|
|
638
|
+
"When enabled, hidden worksheets and dashboards are ingested into Datahub."
|
|
639
|
+
" If a dashboard or worksheet is hidden in Tableau the luid is blank."
|
|
640
|
+
" A list of asset types can also be specified, to only ingest those hidden assets."
|
|
641
|
+
" Current options supported are 'worksheet' and 'dashboard'."
|
|
642
|
+
),
|
|
619
643
|
)
|
|
620
644
|
|
|
621
645
|
tags_for_hidden_assets: List[str] = Field(
|
|
@@ -631,6 +655,11 @@ class TableauConfig(
|
|
|
631
655
|
# pre = True because we want to take some decision before pydantic initialize the configuration to default values
|
|
632
656
|
@root_validator(pre=True)
|
|
633
657
|
def projects_backward_compatibility(cls, values: Dict) -> Dict:
|
|
658
|
+
# In-place update of the input dict would cause state contamination. This was discovered through test failures
|
|
659
|
+
# in test_hex.py where the same dict is reused.
|
|
660
|
+
# So a copy is performed first.
|
|
661
|
+
values = deepcopy(values)
|
|
662
|
+
|
|
634
663
|
projects = values.get("projects")
|
|
635
664
|
project_pattern = values.get("project_pattern")
|
|
636
665
|
project_path_pattern = values.get("project_path_pattern")
|
|
@@ -642,6 +671,7 @@ class TableauConfig(
|
|
|
642
671
|
values["project_pattern"] = AllowDenyPattern(
|
|
643
672
|
allow=[f"^{prj}$" for prj in projects]
|
|
644
673
|
)
|
|
674
|
+
values.pop("projects")
|
|
645
675
|
elif (project_pattern or project_path_pattern) and projects:
|
|
646
676
|
raise ValueError(
|
|
647
677
|
"projects is deprecated. Please use project_path_pattern only."
|
|
@@ -653,7 +683,7 @@ class TableauConfig(
|
|
|
653
683
|
|
|
654
684
|
return values
|
|
655
685
|
|
|
656
|
-
@root_validator()
|
|
686
|
+
@root_validator(skip_on_failure=True)
|
|
657
687
|
def validate_config_values(cls, values: Dict) -> Dict:
|
|
658
688
|
tags_for_hidden_assets = values.get("tags_for_hidden_assets")
|
|
659
689
|
ingest_tags = values.get("ingest_tags")
|
|
@@ -665,6 +695,14 @@ class TableauConfig(
|
|
|
665
695
|
raise ValueError(
|
|
666
696
|
"tags_for_hidden_assets is only allowed with ingest_tags enabled. Be aware that this will overwrite tags entered from the UI."
|
|
667
697
|
)
|
|
698
|
+
|
|
699
|
+
use_email_as_username = values.get("use_email_as_username")
|
|
700
|
+
ingest_owner = values.get("ingest_owner")
|
|
701
|
+
if use_email_as_username and not ingest_owner:
|
|
702
|
+
raise ValueError(
|
|
703
|
+
"use_email_as_username requires ingest_owner to be enabled."
|
|
704
|
+
)
|
|
705
|
+
|
|
668
706
|
return values
|
|
669
707
|
|
|
670
708
|
|
|
@@ -756,7 +794,6 @@ class SiteIdContentUrl:
|
|
|
756
794
|
@dataclass
|
|
757
795
|
class TableauSourceReport(
|
|
758
796
|
StaleEntityRemovalSourceReport,
|
|
759
|
-
IngestionStageReport,
|
|
760
797
|
):
|
|
761
798
|
get_all_datasources_query_failed: bool = False
|
|
762
799
|
num_get_datasource_query_failures: int = 0
|
|
@@ -826,6 +863,9 @@ class TableauSourceReport(
|
|
|
826
863
|
default_factory=(lambda: defaultdict(int))
|
|
827
864
|
)
|
|
828
865
|
|
|
866
|
+
# Owner extraction statistics
|
|
867
|
+
num_email_fallback_to_username: int = 0
|
|
868
|
+
|
|
829
869
|
|
|
830
870
|
def report_user_role(report: TableauSourceReport, server: Server) -> None:
|
|
831
871
|
title: str = "Insufficient Permissions"
|
|
@@ -856,16 +896,29 @@ def report_user_role(report: TableauSourceReport, server: Server) -> None:
|
|
|
856
896
|
@platform_name("Tableau")
|
|
857
897
|
@config_class(TableauConfig)
|
|
858
898
|
@support_status(SupportStatus.CERTIFIED)
|
|
899
|
+
@capability(
|
|
900
|
+
SourceCapability.CONTAINERS,
|
|
901
|
+
"Enabled by default",
|
|
902
|
+
subtype_modifier=[
|
|
903
|
+
SourceCapabilityModifier.TABLEAU_PROJECT,
|
|
904
|
+
SourceCapabilityModifier.TABLEAU_SITE,
|
|
905
|
+
SourceCapabilityModifier.TABLEAU_WORKBOOK,
|
|
906
|
+
],
|
|
907
|
+
)
|
|
859
908
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
860
909
|
@capability(SourceCapability.DOMAINS, "Requires transformer", supported=False)
|
|
861
910
|
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
|
|
862
911
|
@capability(
|
|
863
912
|
SourceCapability.USAGE_STATS,
|
|
864
913
|
"Dashboard/Chart view counts, enabled using extract_usage_stats config",
|
|
914
|
+
subtype_modifier=[
|
|
915
|
+
SourceCapabilityModifier.DASHBOARD,
|
|
916
|
+
SourceCapabilityModifier.CHART,
|
|
917
|
+
],
|
|
865
918
|
)
|
|
866
919
|
@capability(
|
|
867
920
|
SourceCapability.DELETION_DETECTION,
|
|
868
|
-
"Enabled by default
|
|
921
|
+
"Enabled by default via stateful ingestion.",
|
|
869
922
|
)
|
|
870
923
|
@capability(SourceCapability.OWNERSHIP, "Requires recipe configuration")
|
|
871
924
|
@capability(SourceCapability.TAGS, "Requires recipe configuration")
|
|
@@ -874,6 +927,7 @@ def report_user_role(report: TableauSourceReport, server: Server) -> None:
|
|
|
874
927
|
SourceCapability.LINEAGE_FINE,
|
|
875
928
|
"Enabled by default, configure using `extract_column_level_lineage`",
|
|
876
929
|
)
|
|
930
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
877
931
|
class TableauSource(StatefulIngestionSourceBase, TestableSource):
|
|
878
932
|
platform = "tableau"
|
|
879
933
|
|
|
@@ -1157,7 +1211,7 @@ class TableauSiteSource:
|
|
|
1157
1211
|
self.report.warning(
|
|
1158
1212
|
title="Incomplete project hierarchy",
|
|
1159
1213
|
message="Project details missing. Child projects will be ingested without reference to their parent project. We generally need Site Administrator Explorer permissions to extract the complete project hierarchy.",
|
|
1160
|
-
context=f"Missing {project.parent_id}, referenced by {project.id} {project.
|
|
1214
|
+
context=f"Missing {project.parent_id}, referenced by {project.id} {project.name}",
|
|
1161
1215
|
)
|
|
1162
1216
|
project.parent_id = None
|
|
1163
1217
|
|
|
@@ -1348,6 +1402,26 @@ class TableauSiteSource:
|
|
|
1348
1402
|
# More info here: https://help.tableau.com/current/api/metadata_api/en-us/reference/view.doc.html
|
|
1349
1403
|
return not dashboard_or_view.get(c.LUID)
|
|
1350
1404
|
|
|
1405
|
+
def _should_ingest_worksheet(self, worksheet: Dict) -> bool:
|
|
1406
|
+
return (
|
|
1407
|
+
self.config.ingest_hidden_assets is True
|
|
1408
|
+
or (
|
|
1409
|
+
isinstance(self.config.ingest_hidden_assets, list)
|
|
1410
|
+
and "worksheet" in self.config.ingest_hidden_assets
|
|
1411
|
+
)
|
|
1412
|
+
or not self._is_hidden_view(worksheet)
|
|
1413
|
+
)
|
|
1414
|
+
|
|
1415
|
+
def _should_ingest_dashboard(self, dashboard: Dict) -> bool:
|
|
1416
|
+
return (
|
|
1417
|
+
self.config.ingest_hidden_assets is True
|
|
1418
|
+
or (
|
|
1419
|
+
isinstance(self.config.ingest_hidden_assets, list)
|
|
1420
|
+
and "dashboard" in self.config.ingest_hidden_assets
|
|
1421
|
+
)
|
|
1422
|
+
or not self._is_hidden_view(dashboard)
|
|
1423
|
+
)
|
|
1424
|
+
|
|
1351
1425
|
def get_connection_object_page(
|
|
1352
1426
|
self,
|
|
1353
1427
|
query: str,
|
|
@@ -1369,7 +1443,9 @@ class TableauSiteSource:
|
|
|
1369
1443
|
`fetch_size:` The number of records to retrieve from Tableau
|
|
1370
1444
|
Server in a single API call, starting from the current cursor position on Tableau Server.
|
|
1371
1445
|
"""
|
|
1372
|
-
retries_remaining =
|
|
1446
|
+
retries_remaining = (
|
|
1447
|
+
self.config.max_retries if retries_remaining is None else retries_remaining
|
|
1448
|
+
)
|
|
1373
1449
|
|
|
1374
1450
|
logger.debug(
|
|
1375
1451
|
f"Query {connection_type} to get {fetch_size} objects with cursor {current_cursor}"
|
|
@@ -1512,12 +1588,15 @@ class TableauSiteSource:
|
|
|
1512
1588
|
}}""",
|
|
1513
1589
|
)
|
|
1514
1590
|
else:
|
|
1515
|
-
# As of Tableau Server 2024.2, the metadata API sporadically returns a 30-second
|
|
1516
|
-
# timeout error.
|
|
1517
|
-
# It doesn't reliably happen, so retrying a couple of times makes sense.
|
|
1518
1591
|
if all(
|
|
1592
|
+
# As of Tableau Server 2024.2, the metadata API sporadically returns a 30-second
|
|
1593
|
+
# timeout error.
|
|
1594
|
+
# It doesn't reliably happen, so retrying a couple of times makes sense.
|
|
1519
1595
|
error.get("message")
|
|
1520
1596
|
== "Execution canceled because timeout of 30000 millis was reached"
|
|
1597
|
+
# The Metadata API sometimes returns an 'unexpected error' message when querying
|
|
1598
|
+
# embeddedDatasourcesConnection. Try retrying a couple of times.
|
|
1599
|
+
or error.get("message") == "Unexpected error occurred"
|
|
1521
1600
|
for error in errors
|
|
1522
1601
|
):
|
|
1523
1602
|
# If it was only a timeout error, we can retry.
|
|
@@ -1529,8 +1608,8 @@ class TableauSiteSource:
|
|
|
1529
1608
|
(self.config.max_retries - retries_remaining + 1) ** 2, 60
|
|
1530
1609
|
)
|
|
1531
1610
|
logger.info(
|
|
1532
|
-
f"Query {connection_type} received a
|
|
1533
|
-
f"
|
|
1611
|
+
f"Query {connection_type} received a retryable error with {retries_remaining} retries remaining, "
|
|
1612
|
+
f"will retry in {backoff_time} seconds: {errors}"
|
|
1534
1613
|
)
|
|
1535
1614
|
time.sleep(backoff_time)
|
|
1536
1615
|
return self.get_connection_object_page(
|
|
@@ -1540,7 +1619,7 @@ class TableauSiteSource:
|
|
|
1540
1619
|
fetch_size=fetch_size,
|
|
1541
1620
|
current_cursor=current_cursor,
|
|
1542
1621
|
retry_on_auth_error=True,
|
|
1543
|
-
retries_remaining=retries_remaining,
|
|
1622
|
+
retries_remaining=retries_remaining - 1,
|
|
1544
1623
|
)
|
|
1545
1624
|
raise RuntimeError(f"Query {connection_type} error: {errors}")
|
|
1546
1625
|
|
|
@@ -1623,7 +1702,7 @@ class TableauSiteSource:
|
|
|
1623
1702
|
# if multiple project has name C. Ideal solution is to use projectLuidWithin to avoid duplicate project,
|
|
1624
1703
|
# however Tableau supports projectLuidWithin in Tableau Cloud June 2022 / Server 2022.3 and later.
|
|
1625
1704
|
project_luid: Optional[str] = self._get_workbook_project_luid(workbook)
|
|
1626
|
-
if project_luid not in self.tableau_project_registry
|
|
1705
|
+
if project_luid not in self.tableau_project_registry:
|
|
1627
1706
|
wrk_name: Optional[str] = workbook.get(c.NAME)
|
|
1628
1707
|
wrk_id: Optional[str] = workbook.get(c.ID)
|
|
1629
1708
|
prj_name: Optional[str] = workbook.get(c.PROJECT_NAME)
|
|
@@ -2147,32 +2226,32 @@ class TableauSiteSource:
|
|
|
2147
2226
|
else []
|
|
2148
2227
|
)
|
|
2149
2228
|
|
|
2150
|
-
|
|
2151
|
-
|
|
2152
|
-
|
|
2153
|
-
|
|
2154
|
-
|
|
2155
|
-
|
|
2229
|
+
tableau_table_list = csql.get(c.TABLES, [])
|
|
2230
|
+
if self.config.force_extraction_of_lineage_from_custom_sql_queries or (
|
|
2231
|
+
not tableau_table_list
|
|
2232
|
+
and self.config.extract_lineage_from_unsupported_custom_sql_queries
|
|
2233
|
+
):
|
|
2234
|
+
if not tableau_table_list:
|
|
2235
|
+
# custom sql tables may contain unsupported sql, causing incomplete lineage
|
|
2236
|
+
# we extract the lineage from the raw queries
|
|
2237
|
+
logger.debug(
|
|
2238
|
+
"Parsing TLL & CLL from custom sql (tableau metadata incomplete)"
|
|
2239
|
+
)
|
|
2240
|
+
else:
|
|
2241
|
+
# The Tableau SQL parser is much worse than our sqlglot based parser,
|
|
2242
|
+
# so relying on metadata parsed by Tableau from SQL queries can be
|
|
2243
|
+
# less accurate. This option allows us to ignore Tableau's parser and
|
|
2244
|
+
# only use our own.
|
|
2245
|
+
logger.debug("Parsing TLL & CLL from custom sql (forced)")
|
|
2246
|
+
|
|
2156
2247
|
yield from self._create_lineage_from_unsupported_csql(
|
|
2157
2248
|
csql_urn, csql, columns
|
|
2158
2249
|
)
|
|
2159
2250
|
else:
|
|
2160
|
-
|
|
2161
|
-
|
|
2162
|
-
|
|
2163
|
-
|
|
2164
|
-
yield from self._create_lineage_to_upstream_tables(
|
|
2165
|
-
csql_urn, tables, datasource
|
|
2166
|
-
)
|
|
2167
|
-
elif (
|
|
2168
|
-
self.config.extract_lineage_from_unsupported_custom_sql_queries
|
|
2169
|
-
):
|
|
2170
|
-
logger.debug("Extracting TLL & CLL from custom sql")
|
|
2171
|
-
# custom sql tables may contain unsupported sql, causing incomplete lineage
|
|
2172
|
-
# we extract the lineage from the raw queries
|
|
2173
|
-
yield from self._create_lineage_from_unsupported_csql(
|
|
2174
|
-
csql_urn, csql, columns
|
|
2175
|
-
)
|
|
2251
|
+
# lineage from custom sql -> datasets/tables #
|
|
2252
|
+
yield from self._create_lineage_to_upstream_tables(
|
|
2253
|
+
csql_urn, tableau_table_list, datasource
|
|
2254
|
+
)
|
|
2176
2255
|
|
|
2177
2256
|
# Schema Metadata
|
|
2178
2257
|
schema_metadata = self.get_schema_metadata_for_custom_sql(columns)
|
|
@@ -2210,7 +2289,6 @@ class TableauSiteSource:
|
|
|
2210
2289
|
yield self.get_metadata_change_event(dataset_snapshot)
|
|
2211
2290
|
yield self.get_metadata_change_proposal(
|
|
2212
2291
|
dataset_snapshot.urn,
|
|
2213
|
-
aspect_name=c.SUB_TYPES,
|
|
2214
2292
|
aspect=SubTypesClass(typeNames=[DatasetSubTypes.VIEW, c.CUSTOM_SQL]),
|
|
2215
2293
|
)
|
|
2216
2294
|
|
|
@@ -2253,7 +2331,7 @@ class TableauSiteSource:
|
|
|
2253
2331
|
# It is possible due to https://github.com/tableau/server-client-python/issues/1210
|
|
2254
2332
|
if (
|
|
2255
2333
|
ds.get(c.LUID)
|
|
2256
|
-
and ds[c.LUID] not in self.datasource_project_map
|
|
2334
|
+
and ds[c.LUID] not in self.datasource_project_map
|
|
2257
2335
|
and self.report.get_all_datasources_query_failed
|
|
2258
2336
|
):
|
|
2259
2337
|
logger.debug(
|
|
@@ -2265,7 +2343,7 @@ class TableauSiteSource:
|
|
|
2265
2343
|
|
|
2266
2344
|
if (
|
|
2267
2345
|
ds.get(c.LUID)
|
|
2268
|
-
and ds[c.LUID] in self.datasource_project_map
|
|
2346
|
+
and ds[c.LUID] in self.datasource_project_map
|
|
2269
2347
|
and self.datasource_project_map[ds[c.LUID]] in self.tableau_project_registry
|
|
2270
2348
|
):
|
|
2271
2349
|
return self.datasource_project_map[ds[c.LUID]]
|
|
@@ -2375,7 +2453,6 @@ class TableauSiteSource:
|
|
|
2375
2453
|
upstream_lineage = UpstreamLineage(upstreams=upstream_tables)
|
|
2376
2454
|
yield self.get_metadata_change_proposal(
|
|
2377
2455
|
csql_urn,
|
|
2378
|
-
aspect_name=c.UPSTREAM_LINEAGE,
|
|
2379
2456
|
aspect=upstream_lineage,
|
|
2380
2457
|
)
|
|
2381
2458
|
self.report.num_tables_with_upstream_lineage += 1
|
|
@@ -2561,7 +2638,6 @@ class TableauSiteSource:
|
|
|
2561
2638
|
)
|
|
2562
2639
|
yield self.get_metadata_change_proposal(
|
|
2563
2640
|
csql_urn,
|
|
2564
|
-
aspect_name=c.UPSTREAM_LINEAGE,
|
|
2565
2641
|
aspect=upstream_lineage,
|
|
2566
2642
|
)
|
|
2567
2643
|
self.report.num_tables_with_upstream_lineage += 1
|
|
@@ -2607,14 +2683,10 @@ class TableauSiteSource:
|
|
|
2607
2683
|
def get_metadata_change_proposal(
|
|
2608
2684
|
self,
|
|
2609
2685
|
urn: str,
|
|
2610
|
-
aspect_name: str,
|
|
2611
2686
|
aspect: Union["UpstreamLineage", "SubTypesClass"],
|
|
2612
2687
|
) -> MetadataWorkUnit:
|
|
2613
2688
|
return MetadataChangeProposalWrapper(
|
|
2614
|
-
entityType=c.DATASET,
|
|
2615
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
2616
2689
|
entityUrn=urn,
|
|
2617
|
-
aspectName=aspect_name,
|
|
2618
2690
|
aspect=aspect,
|
|
2619
2691
|
).as_workunit()
|
|
2620
2692
|
|
|
@@ -2671,13 +2743,12 @@ class TableauSiteSource:
|
|
|
2671
2743
|
dataset_snapshot.aspects.append(browse_paths)
|
|
2672
2744
|
|
|
2673
2745
|
# Ownership
|
|
2674
|
-
|
|
2675
|
-
self.
|
|
2676
|
-
if datasource_info
|
|
2677
|
-
and datasource_info.get(c.OWNER)
|
|
2678
|
-
and datasource_info[c.OWNER].get(c.USERNAME)
|
|
2746
|
+
owner_identifier = (
|
|
2747
|
+
self._get_owner_identifier(datasource_info[c.OWNER])
|
|
2748
|
+
if datasource_info and datasource_info.get(c.OWNER)
|
|
2679
2749
|
else None
|
|
2680
2750
|
)
|
|
2751
|
+
owner = self._get_ownership(owner_identifier) if owner_identifier else None
|
|
2681
2752
|
if owner is not None:
|
|
2682
2753
|
dataset_snapshot.aspects.append(owner)
|
|
2683
2754
|
|
|
@@ -2722,7 +2793,6 @@ class TableauSiteSource:
|
|
|
2722
2793
|
)
|
|
2723
2794
|
yield self.get_metadata_change_proposal(
|
|
2724
2795
|
datasource_urn,
|
|
2725
|
-
aspect_name=c.UPSTREAM_LINEAGE,
|
|
2726
2796
|
aspect=upstream_lineage,
|
|
2727
2797
|
)
|
|
2728
2798
|
self.report.num_tables_with_upstream_lineage += 1
|
|
@@ -2741,7 +2811,6 @@ class TableauSiteSource:
|
|
|
2741
2811
|
yield self.get_metadata_change_event(dataset_snapshot)
|
|
2742
2812
|
yield self.get_metadata_change_proposal(
|
|
2743
2813
|
dataset_snapshot.urn,
|
|
2744
|
-
aspect_name=c.SUB_TYPES,
|
|
2745
2814
|
aspect=SubTypesClass(
|
|
2746
2815
|
typeNames=(
|
|
2747
2816
|
["Embedded Data Source"]
|
|
@@ -2827,7 +2896,11 @@ class TableauSiteSource:
|
|
|
2827
2896
|
return datasource
|
|
2828
2897
|
|
|
2829
2898
|
def emit_published_datasources(self) -> Iterable[MetadataWorkUnit]:
|
|
2830
|
-
datasource_filter =
|
|
2899
|
+
datasource_filter = (
|
|
2900
|
+
{}
|
|
2901
|
+
if self.config.emit_all_published_datasources
|
|
2902
|
+
else {c.ID_WITH_IN: self.datasource_ids_being_used}
|
|
2903
|
+
)
|
|
2831
2904
|
|
|
2832
2905
|
for datasource in self.get_connection_objects(
|
|
2833
2906
|
query=published_datasource_graphql_query,
|
|
@@ -3059,7 +3132,7 @@ class TableauSiteSource:
|
|
|
3059
3132
|
query_filter=sheets_filter,
|
|
3060
3133
|
page_size=self.config.effective_sheet_page_size,
|
|
3061
3134
|
):
|
|
3062
|
-
if self.
|
|
3135
|
+
if self._should_ingest_worksheet(sheet):
|
|
3063
3136
|
yield from self.emit_sheets_as_charts(sheet, sheet.get(c.WORKBOOK))
|
|
3064
3137
|
else:
|
|
3065
3138
|
self.report.num_hidden_assets_skipped += 1
|
|
@@ -3080,7 +3153,7 @@ class TableauSiteSource:
|
|
|
3080
3153
|
|
|
3081
3154
|
creator: Optional[str] = None
|
|
3082
3155
|
if workbook is not None and workbook.get(c.OWNER) is not None:
|
|
3083
|
-
creator = workbook[c.OWNER]
|
|
3156
|
+
creator = self._get_owner_identifier(workbook[c.OWNER])
|
|
3084
3157
|
created_at = sheet.get(c.CREATED_AT, datetime.now())
|
|
3085
3158
|
updated_at = sheet.get(c.UPDATED_AT, datetime.now())
|
|
3086
3159
|
last_modified = self.get_last_modified(creator, created_at, updated_at)
|
|
@@ -3229,7 +3302,7 @@ class TableauSiteSource:
|
|
|
3229
3302
|
|
|
3230
3303
|
def emit_workbook_as_container(self, workbook: Dict) -> Iterable[MetadataWorkUnit]:
|
|
3231
3304
|
workbook_container_key = self.gen_workbook_key(workbook[c.ID])
|
|
3232
|
-
creator = workbook.get(c.OWNER, {})
|
|
3305
|
+
creator = self._get_owner_identifier(workbook.get(c.OWNER, {}))
|
|
3233
3306
|
|
|
3234
3307
|
owner_urn = (
|
|
3235
3308
|
builder.make_user_urn(creator)
|
|
@@ -3252,7 +3325,7 @@ class TableauSiteSource:
|
|
|
3252
3325
|
|
|
3253
3326
|
parent_key = None
|
|
3254
3327
|
project_luid: Optional[str] = self._get_workbook_project_luid(workbook)
|
|
3255
|
-
if project_luid and project_luid in self.tableau_project_registry
|
|
3328
|
+
if project_luid and project_luid in self.tableau_project_registry:
|
|
3256
3329
|
parent_key = self.gen_project_key(project_luid)
|
|
3257
3330
|
else:
|
|
3258
3331
|
workbook_id: Optional[str] = workbook.get(c.ID)
|
|
@@ -3380,7 +3453,7 @@ class TableauSiteSource:
|
|
|
3380
3453
|
query_filter=dashboards_filter,
|
|
3381
3454
|
page_size=self.config.effective_dashboard_page_size,
|
|
3382
3455
|
):
|
|
3383
|
-
if self.
|
|
3456
|
+
if self._should_ingest_dashboard(dashboard):
|
|
3384
3457
|
yield from self.emit_dashboard(dashboard, dashboard.get(c.WORKBOOK))
|
|
3385
3458
|
else:
|
|
3386
3459
|
self.report.num_hidden_assets_skipped += 1
|
|
@@ -3411,7 +3484,7 @@ class TableauSiteSource:
|
|
|
3411
3484
|
|
|
3412
3485
|
creator: Optional[str] = None
|
|
3413
3486
|
if workbook is not None and workbook.get(c.OWNER) is not None:
|
|
3414
|
-
creator = workbook[c.OWNER]
|
|
3487
|
+
creator = self._get_owner_identifier(workbook[c.OWNER])
|
|
3415
3488
|
created_at = dashboard.get(c.CREATED_AT, datetime.now())
|
|
3416
3489
|
updated_at = dashboard.get(c.UPDATED_AT, datetime.now())
|
|
3417
3490
|
last_modified = self.get_last_modified(creator, created_at, updated_at)
|
|
@@ -3520,7 +3593,11 @@ class TableauSiteSource:
|
|
|
3520
3593
|
return browse_paths
|
|
3521
3594
|
|
|
3522
3595
|
def emit_embedded_datasources(self) -> Iterable[MetadataWorkUnit]:
|
|
3523
|
-
datasource_filter =
|
|
3596
|
+
datasource_filter = (
|
|
3597
|
+
{}
|
|
3598
|
+
if self.config.emit_all_embedded_datasources
|
|
3599
|
+
else {c.ID_WITH_IN: self.embedded_datasource_ids_being_used}
|
|
3600
|
+
)
|
|
3524
3601
|
|
|
3525
3602
|
for datasource in self.get_connection_objects(
|
|
3526
3603
|
query=embedded_datasource_graphql_query,
|
|
@@ -3554,6 +3631,20 @@ class TableauSiteSource:
|
|
|
3554
3631
|
)
|
|
3555
3632
|
return last_modified
|
|
3556
3633
|
|
|
3634
|
+
def _get_owner_identifier(self, owner_dict: dict) -> Optional[str]:
|
|
3635
|
+
"""Extract owner identifier (email or username) based on configuration."""
|
|
3636
|
+
if not owner_dict:
|
|
3637
|
+
return None
|
|
3638
|
+
|
|
3639
|
+
if self.config.use_email_as_username:
|
|
3640
|
+
email = owner_dict.get(c.EMAIL)
|
|
3641
|
+
if email:
|
|
3642
|
+
return email
|
|
3643
|
+
# Fall back to username if email is not available
|
|
3644
|
+
self.report.num_email_fallback_to_username += 1
|
|
3645
|
+
|
|
3646
|
+
return owner_dict.get(c.USERNAME)
|
|
3647
|
+
|
|
3557
3648
|
@lru_cache(maxsize=None)
|
|
3558
3649
|
def _get_ownership(self, user: str) -> Optional[OwnershipClass]:
|
|
3559
3650
|
if self.config.ingest_owner and user:
|
|
@@ -3632,7 +3723,7 @@ class TableauSiteSource:
|
|
|
3632
3723
|
container_key=project_key,
|
|
3633
3724
|
name=project_.name,
|
|
3634
3725
|
description=project_.description,
|
|
3635
|
-
sub_types=[
|
|
3726
|
+
sub_types=[BIContainerSubTypes.TABLEAU_PROJECT],
|
|
3636
3727
|
parent_container_key=parent_project_key,
|
|
3637
3728
|
)
|
|
3638
3729
|
|
|
@@ -3650,7 +3741,7 @@ class TableauSiteSource:
|
|
|
3650
3741
|
yield from gen_containers(
|
|
3651
3742
|
container_key=self.gen_site_key(self.site_id),
|
|
3652
3743
|
name=self.site.name or "Default",
|
|
3653
|
-
sub_types=[
|
|
3744
|
+
sub_types=[BIContainerSubTypes.TABLEAU_SITE],
|
|
3654
3745
|
)
|
|
3655
3746
|
|
|
3656
3747
|
def _fetch_groups(self):
|
|
@@ -3777,3 +3868,15 @@ class TableauSiteSource:
|
|
|
3777
3868
|
self.report.emit_upstream_tables_timer[self.site_content_url] = (
|
|
3778
3869
|
timer.elapsed_seconds(digits=2)
|
|
3779
3870
|
)
|
|
3871
|
+
|
|
3872
|
+
# Log owner extraction statistics if there were fallbacks
|
|
3873
|
+
if (
|
|
3874
|
+
self.config.use_email_as_username
|
|
3875
|
+
and self.config.ingest_owner
|
|
3876
|
+
and self.report.num_email_fallback_to_username > 0
|
|
3877
|
+
):
|
|
3878
|
+
logger.info(
|
|
3879
|
+
f"Owner extraction summary for site '{self.site_content_url}': "
|
|
3880
|
+
f"{self.report.num_email_fallback_to_username} entities fell back from email to username "
|
|
3881
|
+
f"(email was not available)"
|
|
3882
|
+
)
|
|
@@ -65,6 +65,7 @@ workbook_graphql_query = """
|
|
|
65
65
|
projectName
|
|
66
66
|
owner {
|
|
67
67
|
username
|
|
68
|
+
email
|
|
68
69
|
}
|
|
69
70
|
description
|
|
70
71
|
uri
|
|
@@ -107,6 +108,7 @@ sheet_graphql_query = """
|
|
|
107
108
|
luid
|
|
108
109
|
owner {
|
|
109
110
|
username
|
|
111
|
+
email
|
|
110
112
|
}
|
|
111
113
|
}
|
|
112
114
|
datasourceFields {
|
|
@@ -185,6 +187,7 @@ dashboard_graphql_query = """
|
|
|
185
187
|
luid
|
|
186
188
|
owner {
|
|
187
189
|
username
|
|
190
|
+
email
|
|
188
191
|
}
|
|
189
192
|
}
|
|
190
193
|
}
|
|
@@ -268,6 +271,7 @@ embedded_datasource_graphql_query = """
|
|
|
268
271
|
luid
|
|
269
272
|
owner {
|
|
270
273
|
username
|
|
274
|
+
email
|
|
271
275
|
}
|
|
272
276
|
}
|
|
273
277
|
}
|
|
@@ -424,6 +428,7 @@ published_datasource_graphql_query = """
|
|
|
424
428
|
}
|
|
425
429
|
owner {
|
|
426
430
|
username
|
|
431
|
+
email
|
|
427
432
|
}
|
|
428
433
|
description
|
|
429
434
|
uri
|
|
@@ -579,10 +584,12 @@ def get_platform(connection_type: str) -> str:
|
|
|
579
584
|
platform = "oracle"
|
|
580
585
|
elif connection_type in ("tbio", "teradata"):
|
|
581
586
|
platform = "teradata"
|
|
582
|
-
elif connection_type in ("sqlserver"):
|
|
587
|
+
elif connection_type in ("sqlserver",):
|
|
583
588
|
platform = "mssql"
|
|
584
|
-
elif connection_type in ("athena"):
|
|
589
|
+
elif connection_type in ("athena",):
|
|
585
590
|
platform = "athena"
|
|
591
|
+
elif connection_type in ("googlebigquery",):
|
|
592
|
+
platform = "bigquery"
|
|
586
593
|
elif connection_type.endswith("_jdbc"):
|
|
587
594
|
# e.g. convert trino_jdbc -> trino
|
|
588
595
|
platform = connection_type[: -len("_jdbc")]
|
|
@@ -774,7 +781,7 @@ def get_overridden_info(
|
|
|
774
781
|
if (
|
|
775
782
|
lineage_overrides is not None
|
|
776
783
|
and lineage_overrides.platform_override_map is not None
|
|
777
|
-
and original_platform in lineage_overrides.platform_override_map
|
|
784
|
+
and original_platform in lineage_overrides.platform_override_map
|
|
778
785
|
):
|
|
779
786
|
platform = lineage_overrides.platform_override_map[original_platform]
|
|
780
787
|
|
|
@@ -782,7 +789,7 @@ def get_overridden_info(
|
|
|
782
789
|
lineage_overrides is not None
|
|
783
790
|
and lineage_overrides.database_override_map is not None
|
|
784
791
|
and upstream_db is not None
|
|
785
|
-
and upstream_db in lineage_overrides.database_override_map
|
|
792
|
+
and upstream_db in lineage_overrides.database_override_map
|
|
786
793
|
):
|
|
787
794
|
upstream_db = lineage_overrides.database_override_map[upstream_db]
|
|
788
795
|
|
|
@@ -50,7 +50,6 @@ TABLES = "tables"
|
|
|
50
50
|
DESCRIPTION = "description"
|
|
51
51
|
SQL = "SQL"
|
|
52
52
|
QUERY = "query"
|
|
53
|
-
SUB_TYPES = "subTypes"
|
|
54
53
|
VIEW = "view"
|
|
55
54
|
CUSTOM_SQL = "Custom SQL"
|
|
56
55
|
REMOTE_TYPE = "remoteType"
|
|
@@ -58,9 +57,9 @@ UNKNOWN = "UNKNOWN"
|
|
|
58
57
|
PUBLISHED_DATA_SOURCE = "PublishedDatasource"
|
|
59
58
|
LUID = "luid"
|
|
60
59
|
EMBEDDED_DATA_SOURCE = "EmbeddedDatasource"
|
|
61
|
-
UPSTREAM_LINEAGE = "upstreamLineage"
|
|
62
60
|
OWNER = "owner"
|
|
63
61
|
USERNAME = "username"
|
|
62
|
+
EMAIL = "email"
|
|
64
63
|
HAS_EXTRACTS = "hasExtracts"
|
|
65
64
|
EXTRACT_LAST_REFRESH_TIME = "extractLastRefreshTime"
|
|
66
65
|
EXTRACT_LAST_INCREMENTAL_UPDATE_TIME = "extractLastIncrementalUpdateTime"
|
|
@@ -78,8 +77,6 @@ CHART = "chart"
|
|
|
78
77
|
DASHBOARD = "dashboard"
|
|
79
78
|
DASHBOARDS_CONNECTION = "dashboardsConnection"
|
|
80
79
|
EMBEDDED_DATA_SOURCES_CONNECTION = "embeddedDatasourcesConnection"
|
|
81
|
-
PROJECT = "Project"
|
|
82
|
-
SITE = "Site"
|
|
83
80
|
IS_UNSUPPORTED_CUSTOM_SQL = "isUnsupportedCustomSql"
|
|
84
81
|
SITE_PERMISSION = "sitePermission"
|
|
85
82
|
ROLE_SITE_ADMIN_EXPLORER = "SiteAdministratorExplorer"
|