acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
+
from dataclasses import dataclass
|
|
4
5
|
from json import JSONDecodeError
|
|
5
6
|
from typing import (
|
|
6
7
|
Any,
|
|
@@ -18,7 +19,7 @@ from typing import (
|
|
|
18
19
|
from looker_sdk.error import SDKError
|
|
19
20
|
from looker_sdk.rtl.serialize import DeserializeError
|
|
20
21
|
from looker_sdk.sdk.api40.models import (
|
|
21
|
-
Dashboard,
|
|
22
|
+
Dashboard as LookerAPIDashboard,
|
|
22
23
|
DashboardElement,
|
|
23
24
|
Folder,
|
|
24
25
|
FolderBase,
|
|
@@ -29,7 +30,7 @@ from looker_sdk.sdk.api40.models import (
|
|
|
29
30
|
|
|
30
31
|
import datahub.emitter.mce_builder as builder
|
|
31
32
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
32
|
-
from datahub.emitter.mcp_builder import
|
|
33
|
+
from datahub.emitter.mcp_builder import mcps_from_mce
|
|
33
34
|
from datahub.ingestion.api.common import PipelineContext
|
|
34
35
|
from datahub.ingestion.api.decorators import (
|
|
35
36
|
SupportStatus,
|
|
@@ -51,6 +52,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
51
52
|
from datahub.ingestion.source.common.subtypes import (
|
|
52
53
|
BIAssetSubTypes,
|
|
53
54
|
BIContainerSubTypes,
|
|
55
|
+
SourceCapabilityModifier,
|
|
54
56
|
)
|
|
55
57
|
from datahub.ingestion.source.looker import looker_usage
|
|
56
58
|
from datahub.ingestion.source.looker.looker_common import (
|
|
@@ -79,36 +81,38 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
79
81
|
StatefulIngestionSourceBase,
|
|
80
82
|
)
|
|
81
83
|
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
|
82
|
-
AuditStamp,
|
|
83
|
-
ChangeAuditStamps,
|
|
84
|
-
DataPlatformInstance,
|
|
85
84
|
Status,
|
|
86
85
|
)
|
|
87
|
-
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
|
|
88
|
-
ChartSnapshot,
|
|
89
|
-
DashboardSnapshot,
|
|
90
|
-
)
|
|
91
|
-
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
92
86
|
from datahub.metadata.schema_classes import (
|
|
93
|
-
BrowsePathEntryClass,
|
|
94
|
-
BrowsePathsClass,
|
|
95
|
-
BrowsePathsV2Class,
|
|
96
|
-
ChartInfoClass,
|
|
97
87
|
ChartTypeClass,
|
|
98
|
-
|
|
99
|
-
DashboardInfoClass,
|
|
88
|
+
EmbedClass,
|
|
100
89
|
InputFieldClass,
|
|
101
90
|
InputFieldsClass,
|
|
102
91
|
OwnerClass,
|
|
103
|
-
OwnershipClass,
|
|
104
92
|
OwnershipTypeClass,
|
|
105
|
-
SubTypesClass,
|
|
106
93
|
)
|
|
94
|
+
from datahub.sdk.chart import Chart
|
|
95
|
+
from datahub.sdk.container import Container
|
|
96
|
+
from datahub.sdk.dashboard import Dashboard
|
|
97
|
+
from datahub.sdk.dataset import Dataset
|
|
98
|
+
from datahub.sdk.entity import Entity
|
|
107
99
|
from datahub.utilities.backpressure_aware_executor import BackpressureAwareExecutor
|
|
100
|
+
from datahub.utilities.sentinels import Unset, unset
|
|
108
101
|
|
|
109
102
|
logger = logging.getLogger(__name__)
|
|
110
103
|
|
|
111
104
|
|
|
105
|
+
@dataclass
|
|
106
|
+
class DashboardProcessingResult:
|
|
107
|
+
"""Result of processing a single dashboard."""
|
|
108
|
+
|
|
109
|
+
entities: List[Entity]
|
|
110
|
+
dashboard_usage: Optional[looker_usage.LookerDashboardForUsage]
|
|
111
|
+
dashboard_id: str
|
|
112
|
+
start_time: datetime.datetime
|
|
113
|
+
end_time: datetime.datetime
|
|
114
|
+
|
|
115
|
+
|
|
112
116
|
@platform_name("Looker")
|
|
113
117
|
@support_status(SupportStatus.CERTIFIED)
|
|
114
118
|
@config_class(LookerDashboardSourceConfig)
|
|
@@ -126,6 +130,15 @@ logger = logging.getLogger(__name__)
|
|
|
126
130
|
SourceCapability.USAGE_STATS,
|
|
127
131
|
"Enabled by default, configured using `extract_usage_history`",
|
|
128
132
|
)
|
|
133
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
134
|
+
@capability(
|
|
135
|
+
SourceCapability.CONTAINERS,
|
|
136
|
+
"Enabled by default",
|
|
137
|
+
subtype_modifier=[
|
|
138
|
+
SourceCapabilityModifier.LOOKML_MODEL,
|
|
139
|
+
SourceCapabilityModifier.LOOKER_FOLDER,
|
|
140
|
+
],
|
|
141
|
+
)
|
|
129
142
|
class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
130
143
|
"""
|
|
131
144
|
This plugin extracts the following:
|
|
@@ -623,35 +636,17 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
623
636
|
|
|
624
637
|
return chart_type
|
|
625
638
|
|
|
626
|
-
def
|
|
639
|
+
def _get_folder_ancestors_urn_entries(
|
|
627
640
|
self, folder: LookerFolder, include_current_folder: bool = True
|
|
628
|
-
) -> Iterable[
|
|
641
|
+
) -> Iterable[str]:
|
|
629
642
|
for ancestor in self.looker_api.folder_ancestors(folder_id=folder.id):
|
|
630
|
-
assert ancestor.id
|
|
643
|
+
assert ancestor.id # to make the linter happy as `Folder` has id field marked optional - which is always returned by the API
|
|
631
644
|
urn = self._gen_folder_key(ancestor.id).as_urn()
|
|
632
|
-
yield
|
|
645
|
+
yield urn
|
|
633
646
|
|
|
634
647
|
urn = self._gen_folder_key(folder.id).as_urn()
|
|
635
648
|
if include_current_folder:
|
|
636
|
-
yield
|
|
637
|
-
|
|
638
|
-
def _create_platform_instance_aspect(
|
|
639
|
-
self,
|
|
640
|
-
) -> DataPlatformInstance:
|
|
641
|
-
assert self.source_config.platform_name, (
|
|
642
|
-
"Platform name is not set in the configuration."
|
|
643
|
-
)
|
|
644
|
-
assert self.source_config.platform_instance, (
|
|
645
|
-
"Platform instance is not set in the configuration."
|
|
646
|
-
)
|
|
647
|
-
|
|
648
|
-
return DataPlatformInstance(
|
|
649
|
-
platform=builder.make_data_platform_urn(self.source_config.platform_name),
|
|
650
|
-
instance=builder.make_dataplatform_instance_urn(
|
|
651
|
-
platform=self.source_config.platform_name,
|
|
652
|
-
instance=self.source_config.platform_instance,
|
|
653
|
-
),
|
|
654
|
-
)
|
|
649
|
+
yield urn
|
|
655
650
|
|
|
656
651
|
def _make_chart_urn(self, element_id: str) -> str:
|
|
657
652
|
platform_instance: Optional[str] = None
|
|
@@ -664,104 +659,46 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
664
659
|
platform_instance=platform_instance,
|
|
665
660
|
)
|
|
666
661
|
|
|
667
|
-
def
|
|
662
|
+
def _make_chart_entities(
|
|
668
663
|
self,
|
|
669
664
|
dashboard_element: LookerDashboardElement,
|
|
670
665
|
dashboard: Optional[
|
|
671
666
|
LookerDashboard
|
|
672
667
|
], # dashboard will be None if this is a standalone look
|
|
673
|
-
) -> List[
|
|
674
|
-
|
|
675
|
-
element_id=dashboard_element.get_urn_element_id()
|
|
676
|
-
)
|
|
677
|
-
self.chart_urns.add(chart_urn)
|
|
678
|
-
chart_snapshot = ChartSnapshot(
|
|
679
|
-
urn=chart_urn,
|
|
680
|
-
aspects=[Status(removed=False)],
|
|
681
|
-
)
|
|
682
|
-
browse_path_v2: Optional[BrowsePathsV2Class] = None
|
|
683
|
-
|
|
684
|
-
chart_type = self._get_chart_type(dashboard_element)
|
|
685
|
-
chart_info = ChartInfoClass(
|
|
686
|
-
type=chart_type,
|
|
687
|
-
description=dashboard_element.description or "",
|
|
688
|
-
title=dashboard_element.title or "",
|
|
689
|
-
lastModified=ChangeAuditStamps(),
|
|
690
|
-
chartUrl=dashboard_element.url(self.source_config.external_base_url or ""),
|
|
691
|
-
inputs=dashboard_element.get_view_urns(self.source_config),
|
|
692
|
-
customProperties={
|
|
693
|
-
"upstream_fields": (
|
|
694
|
-
",".join(
|
|
695
|
-
sorted({field.name for field in dashboard_element.input_fields})
|
|
696
|
-
)
|
|
697
|
-
if dashboard_element.input_fields
|
|
698
|
-
else ""
|
|
699
|
-
)
|
|
700
|
-
},
|
|
701
|
-
)
|
|
702
|
-
chart_snapshot.aspects.append(chart_info)
|
|
703
|
-
|
|
668
|
+
) -> List[Chart]:
|
|
669
|
+
chart_parent_container: Union[List[str], Unset] = unset
|
|
704
670
|
if (
|
|
705
671
|
dashboard
|
|
706
672
|
and dashboard.folder_path is not None
|
|
707
673
|
and dashboard.folder is not None
|
|
708
674
|
):
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
dashboard_urn = self.make_dashboard_urn(dashboard)
|
|
715
|
-
browse_path_v2 = BrowsePathsV2Class(
|
|
716
|
-
path=[
|
|
717
|
-
BrowsePathEntryClass("Folders"),
|
|
718
|
-
*self._get_folder_browse_path_v2_entries(dashboard.folder),
|
|
719
|
-
BrowsePathEntryClass(id=dashboard_urn, urn=dashboard_urn),
|
|
720
|
-
],
|
|
721
|
-
)
|
|
675
|
+
chart_parent_container = [
|
|
676
|
+
"Folders",
|
|
677
|
+
*self._get_folder_ancestors_urn_entries(dashboard.folder),
|
|
678
|
+
self.make_dashboard_urn(dashboard),
|
|
679
|
+
]
|
|
722
680
|
elif (
|
|
723
681
|
dashboard is None
|
|
724
682
|
and dashboard_element.folder_path is not None
|
|
725
683
|
and dashboard_element.folder is not None
|
|
726
|
-
): #
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
browse_path_v2 = BrowsePathsV2Class(
|
|
732
|
-
path=[
|
|
733
|
-
BrowsePathEntryClass("Folders"),
|
|
734
|
-
*self._get_folder_browse_path_v2_entries(dashboard_element.folder),
|
|
735
|
-
],
|
|
736
|
-
)
|
|
684
|
+
): # Independent look
|
|
685
|
+
chart_parent_container = [
|
|
686
|
+
"Folders",
|
|
687
|
+
*self._get_folder_ancestors_urn_entries(dashboard_element.folder),
|
|
688
|
+
]
|
|
737
689
|
|
|
690
|
+
# Determine chart ownership
|
|
691
|
+
chart_ownership: Optional[List[OwnerClass]] = None
|
|
738
692
|
if dashboard is not None:
|
|
739
693
|
ownership = self.get_ownership(dashboard)
|
|
740
694
|
if ownership is not None:
|
|
741
|
-
|
|
695
|
+
chart_ownership = [ownership]
|
|
742
696
|
elif dashboard is None and dashboard_element is not None:
|
|
743
697
|
ownership = self.get_ownership(dashboard_element)
|
|
744
698
|
if ownership is not None:
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
chart_mce = MetadataChangeEvent(proposedSnapshot=chart_snapshot)
|
|
748
|
-
|
|
749
|
-
proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
|
|
750
|
-
chart_mce,
|
|
751
|
-
MetadataChangeProposalWrapper(
|
|
752
|
-
entityUrn=chart_urn,
|
|
753
|
-
aspect=SubTypesClass(typeNames=[BIAssetSubTypes.LOOKER_LOOK]),
|
|
754
|
-
),
|
|
755
|
-
]
|
|
756
|
-
|
|
757
|
-
if self.source_config.include_platform_instance_in_urns:
|
|
758
|
-
proposals.append(
|
|
759
|
-
MetadataChangeProposalWrapper(
|
|
760
|
-
entityUrn=chart_urn,
|
|
761
|
-
aspect=self._create_platform_instance_aspect(),
|
|
762
|
-
),
|
|
763
|
-
)
|
|
699
|
+
chart_ownership = [ownership]
|
|
764
700
|
|
|
701
|
+
chart_extra_aspects: List[Union[InputFieldsClass, EmbedClass]] = []
|
|
765
702
|
# If extracting embeds is enabled, produce an MCP for embed URL.
|
|
766
703
|
if (
|
|
767
704
|
self.source_config.extract_embed_urls
|
|
@@ -771,111 +708,124 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
771
708
|
self.source_config.external_base_url
|
|
772
709
|
)
|
|
773
710
|
if maybe_embed_url:
|
|
774
|
-
|
|
775
|
-
create_embed_mcp(
|
|
776
|
-
chart_snapshot.urn,
|
|
777
|
-
maybe_embed_url,
|
|
778
|
-
)
|
|
779
|
-
)
|
|
711
|
+
chart_extra_aspects.append(EmbedClass(renderUrl=maybe_embed_url))
|
|
780
712
|
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
713
|
+
chart_extra_aspects.append(
|
|
714
|
+
InputFieldsClass(
|
|
715
|
+
fields=self._input_fields_from_dashboard_element(dashboard_element)
|
|
784
716
|
)
|
|
785
|
-
proposals.append(
|
|
786
|
-
MetadataChangeProposalWrapper(entityUrn=chart_urn, aspect=container)
|
|
787
|
-
)
|
|
788
|
-
|
|
789
|
-
if browse_path_v2:
|
|
790
|
-
proposals.append(
|
|
791
|
-
MetadataChangeProposalWrapper(
|
|
792
|
-
entityUrn=chart_urn, aspect=browse_path_v2
|
|
793
|
-
)
|
|
794
|
-
)
|
|
795
|
-
|
|
796
|
-
return proposals
|
|
797
|
-
|
|
798
|
-
def _make_dashboard_metadata_events(
|
|
799
|
-
self, looker_dashboard: LookerDashboard, chart_urns: List[str]
|
|
800
|
-
) -> List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
|
|
801
|
-
dashboard_urn = self.make_dashboard_urn(looker_dashboard)
|
|
802
|
-
dashboard_snapshot = DashboardSnapshot(
|
|
803
|
-
urn=dashboard_urn,
|
|
804
|
-
aspects=[],
|
|
805
|
-
)
|
|
806
|
-
browse_path_v2: Optional[BrowsePathsV2Class] = None
|
|
807
|
-
dashboard_info = DashboardInfoClass(
|
|
808
|
-
description=looker_dashboard.description or "",
|
|
809
|
-
title=looker_dashboard.title,
|
|
810
|
-
charts=chart_urns,
|
|
811
|
-
lastModified=self._get_change_audit_stamps(looker_dashboard),
|
|
812
|
-
dashboardUrl=looker_dashboard.url(self.source_config.external_base_url),
|
|
813
717
|
)
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
718
|
+
return [
|
|
719
|
+
Chart(
|
|
720
|
+
chart_type=self._get_chart_type(dashboard_element),
|
|
721
|
+
chart_url=dashboard_element.url(
|
|
722
|
+
self.source_config.external_base_url or ""
|
|
723
|
+
),
|
|
724
|
+
custom_properties={
|
|
725
|
+
"upstream_fields": (
|
|
726
|
+
",".join(
|
|
727
|
+
sorted(
|
|
728
|
+
{field.name for field in dashboard_element.input_fields}
|
|
729
|
+
)
|
|
730
|
+
)
|
|
731
|
+
if dashboard_element.input_fields
|
|
732
|
+
else ""
|
|
733
|
+
)
|
|
734
|
+
},
|
|
735
|
+
description=dashboard_element.description or "",
|
|
736
|
+
display_name=dashboard_element.title, # title is (deprecated) using display_name
|
|
737
|
+
extra_aspects=chart_extra_aspects,
|
|
738
|
+
input_datasets=dashboard_element.get_view_urns(self.source_config),
|
|
739
|
+
last_modified=self._get_last_modified_time(
|
|
740
|
+
dashboard
|
|
741
|
+
), # Inherited from Dashboard
|
|
742
|
+
last_modified_by=self._get_last_modified_by(
|
|
743
|
+
dashboard
|
|
744
|
+
), # Inherited from Dashboard
|
|
745
|
+
created_at=self._get_created_at(dashboard), # Inherited from Dashboard
|
|
746
|
+
created_by=self._get_created_by(dashboard), # Inherited from Dashboard
|
|
747
|
+
deleted_on=self._get_deleted_on(dashboard), # Inherited from Dashboard
|
|
748
|
+
deleted_by=self._get_deleted_by(dashboard), # Inherited from Dashboard
|
|
749
|
+
name=dashboard_element.get_urn_element_id(),
|
|
750
|
+
owners=chart_ownership,
|
|
751
|
+
parent_container=chart_parent_container,
|
|
752
|
+
platform=self.source_config.platform_name,
|
|
753
|
+
platform_instance=self.source_config.platform_instance
|
|
754
|
+
if self.source_config.include_platform_instance_in_urns
|
|
755
|
+
else None,
|
|
756
|
+
subtype=BIAssetSubTypes.LOOKER_LOOK,
|
|
828
757
|
)
|
|
829
|
-
dashboard_snapshot.aspects.append(browse_path)
|
|
830
|
-
|
|
831
|
-
ownership = self.get_ownership(looker_dashboard)
|
|
832
|
-
if ownership is not None:
|
|
833
|
-
dashboard_snapshot.aspects.append(ownership)
|
|
834
|
-
|
|
835
|
-
dashboard_snapshot.aspects.append(Status(removed=looker_dashboard.is_deleted))
|
|
836
|
-
|
|
837
|
-
dashboard_mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot)
|
|
838
|
-
|
|
839
|
-
proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
|
|
840
|
-
dashboard_mce
|
|
841
758
|
]
|
|
842
759
|
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
760
|
+
def _make_dashboard_entities(
|
|
761
|
+
self, looker_dashboard: LookerDashboard, charts: List[Chart]
|
|
762
|
+
) -> List[Dashboard]:
|
|
763
|
+
dashboard_ownership: Optional[List[OwnerClass]] = None
|
|
764
|
+
ownership: Optional[OwnerClass] = self.get_ownership(looker_dashboard)
|
|
765
|
+
if ownership is not None:
|
|
766
|
+
dashboard_ownership = [ownership]
|
|
850
767
|
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
MetadataChangeProposalWrapper(
|
|
854
|
-
entityUrn=dashboard_urn, aspect=browse_path_v2
|
|
855
|
-
)
|
|
856
|
-
)
|
|
768
|
+
# Extra Aspects not yet supported in the Dashboard entity class SDKv2
|
|
769
|
+
dashboard_extra_aspects: List[Union[EmbedClass, InputFieldsClass, Status]] = []
|
|
857
770
|
|
|
858
|
-
#
|
|
771
|
+
# Embed URL aspect
|
|
859
772
|
if (
|
|
860
773
|
self.source_config.extract_embed_urls
|
|
861
774
|
and self.source_config.external_base_url
|
|
862
775
|
):
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
776
|
+
dashboard_extra_aspects.append(
|
|
777
|
+
EmbedClass(
|
|
778
|
+
renderUrl=looker_dashboard.embed_url(
|
|
779
|
+
self.source_config.external_base_url
|
|
780
|
+
)
|
|
867
781
|
)
|
|
868
782
|
)
|
|
869
783
|
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
)
|
|
784
|
+
# Input fields aspect
|
|
785
|
+
# Populate input fields from all the dashboard elements
|
|
786
|
+
all_fields: List[InputFieldClass] = []
|
|
787
|
+
for dashboard_element in looker_dashboard.dashboard_elements:
|
|
788
|
+
all_fields.extend(
|
|
789
|
+
self._input_fields_from_dashboard_element(dashboard_element)
|
|
876
790
|
)
|
|
791
|
+
dashboard_extra_aspects.append(InputFieldsClass(fields=all_fields))
|
|
792
|
+
# Status aspect
|
|
793
|
+
dashboard_extra_aspects.append(Status(removed=looker_dashboard.is_deleted))
|
|
877
794
|
|
|
878
|
-
|
|
795
|
+
dashboard_parent_container: Union[List[str], Unset] = unset
|
|
796
|
+
if (
|
|
797
|
+
looker_dashboard.folder_path is not None
|
|
798
|
+
and looker_dashboard.folder is not None
|
|
799
|
+
):
|
|
800
|
+
dashboard_parent_container = [
|
|
801
|
+
"Folders",
|
|
802
|
+
*self._get_folder_ancestors_urn_entries(looker_dashboard.folder),
|
|
803
|
+
]
|
|
804
|
+
|
|
805
|
+
return [
|
|
806
|
+
Dashboard(
|
|
807
|
+
charts=charts,
|
|
808
|
+
dashboard_url=looker_dashboard.url(
|
|
809
|
+
self.source_config.external_base_url
|
|
810
|
+
),
|
|
811
|
+
description=looker_dashboard.description or "",
|
|
812
|
+
display_name=looker_dashboard.title, # title is (deprecated) using display_name
|
|
813
|
+
extra_aspects=dashboard_extra_aspects,
|
|
814
|
+
last_modified=self._get_last_modified_time(looker_dashboard),
|
|
815
|
+
last_modified_by=self._get_last_modified_by(looker_dashboard),
|
|
816
|
+
created_at=self._get_created_at(looker_dashboard),
|
|
817
|
+
created_by=self._get_created_by(looker_dashboard),
|
|
818
|
+
deleted_on=self._get_deleted_on(looker_dashboard),
|
|
819
|
+
deleted_by=self._get_deleted_by(looker_dashboard),
|
|
820
|
+
name=looker_dashboard.get_urn_dashboard_id(),
|
|
821
|
+
owners=dashboard_ownership,
|
|
822
|
+
parent_container=dashboard_parent_container,
|
|
823
|
+
platform=self.source_config.platform_name,
|
|
824
|
+
platform_instance=self.source_config.platform_instance
|
|
825
|
+
if self.source_config.include_platform_instance_in_urns
|
|
826
|
+
else None,
|
|
827
|
+
)
|
|
828
|
+
]
|
|
879
829
|
|
|
880
830
|
def _make_dashboard_urn(self, looker_dashboard_name_part: str) -> str:
|
|
881
831
|
# Note that `looker_dashboard_name_part` will like be `dashboard.1234`.
|
|
@@ -892,11 +842,9 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
892
842
|
def make_dashboard_urn(self, looker_dashboard: LookerDashboard) -> str:
|
|
893
843
|
return self._make_dashboard_urn(looker_dashboard.get_urn_dashboard_id())
|
|
894
844
|
|
|
895
|
-
def
|
|
845
|
+
def _make_explore_containers(
|
|
896
846
|
self,
|
|
897
|
-
) -> Iterable[
|
|
898
|
-
Union[MetadataChangeEvent, MetadataChangeProposalWrapper, MetadataWorkUnit]
|
|
899
|
-
]:
|
|
847
|
+
) -> Iterable[Union[Container, Dataset]]:
|
|
900
848
|
if not self.source_config.emit_used_explores_only:
|
|
901
849
|
explores_to_fetch = list(self.list_all_explores())
|
|
902
850
|
else:
|
|
@@ -914,19 +862,14 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
914
862
|
for project_name, model, _ in explores_to_fetch:
|
|
915
863
|
if model not in processed_models:
|
|
916
864
|
model_key = gen_model_key(self.source_config, model)
|
|
917
|
-
yield
|
|
865
|
+
yield Container(
|
|
918
866
|
container_key=model_key,
|
|
919
|
-
|
|
920
|
-
|
|
867
|
+
display_name=model,
|
|
868
|
+
subtype=BIContainerSubTypes.LOOKML_MODEL,
|
|
921
869
|
extra_properties=(
|
|
922
870
|
{"project": project_name} if project_name is not None else None
|
|
923
871
|
),
|
|
924
|
-
|
|
925
|
-
yield MetadataChangeProposalWrapper(
|
|
926
|
-
entityUrn=model_key.as_urn(),
|
|
927
|
-
aspect=BrowsePathsV2Class(
|
|
928
|
-
path=[BrowsePathEntryClass("Explore")],
|
|
929
|
-
),
|
|
872
|
+
parent_container=["Explore"],
|
|
930
873
|
)
|
|
931
874
|
|
|
932
875
|
processed_models.append(model)
|
|
@@ -937,9 +880,10 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
937
880
|
((model, explore) for (_project, model, explore) in explores_to_fetch),
|
|
938
881
|
max_workers=self.source_config.max_threads,
|
|
939
882
|
):
|
|
940
|
-
|
|
883
|
+
explore_dataset_entity, explore_id, start_time, end_time = future.result()
|
|
941
884
|
self.reporter.explores_scanned += 1
|
|
942
|
-
|
|
885
|
+
if explore_dataset_entity:
|
|
886
|
+
yield explore_dataset_entity
|
|
943
887
|
self.reporter.report_upstream_latency(start_time, end_time)
|
|
944
888
|
logger.debug(
|
|
945
889
|
f"Running time of fetch_one_explore for {explore_id}: {(end_time - start_time).total_seconds()}"
|
|
@@ -959,66 +903,50 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
959
903
|
def fetch_one_explore(
|
|
960
904
|
self, model: str, explore: str
|
|
961
905
|
) -> Tuple[
|
|
962
|
-
|
|
906
|
+
Optional[Dataset],
|
|
963
907
|
str,
|
|
964
908
|
datetime.datetime,
|
|
965
909
|
datetime.datetime,
|
|
966
910
|
]:
|
|
967
911
|
start_time = datetime.datetime.now()
|
|
968
|
-
events: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = []
|
|
969
912
|
looker_explore = self.explore_registry.get_explore(model, explore)
|
|
913
|
+
explore_dataset_entity: Optional[Dataset] = None
|
|
970
914
|
if looker_explore is not None:
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
self.source_config.extract_embed_urls,
|
|
977
|
-
)
|
|
978
|
-
or events
|
|
915
|
+
explore_dataset_entity = looker_explore._to_metadata_events(
|
|
916
|
+
self.source_config,
|
|
917
|
+
self.reporter,
|
|
918
|
+
self.source_config.external_base_url or self.source_config.base_url,
|
|
919
|
+
self.source_config.extract_embed_urls,
|
|
979
920
|
)
|
|
980
921
|
|
|
981
|
-
return
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
return event.proposedSnapshot.urn
|
|
988
|
-
else:
|
|
989
|
-
return event.entityUrn
|
|
922
|
+
return (
|
|
923
|
+
explore_dataset_entity,
|
|
924
|
+
f"{model}:{explore}",
|
|
925
|
+
start_time,
|
|
926
|
+
datetime.datetime.now(),
|
|
927
|
+
)
|
|
990
928
|
|
|
991
|
-
def _emit_folder_as_container(
|
|
992
|
-
self, folder: LookerFolder
|
|
993
|
-
) -> Iterable[MetadataWorkUnit]:
|
|
929
|
+
def _emit_folder_as_container(self, folder: LookerFolder) -> Iterable[Container]:
|
|
994
930
|
if folder.id not in self.processed_folders:
|
|
995
|
-
yield from gen_containers(
|
|
996
|
-
container_key=self._gen_folder_key(folder.id),
|
|
997
|
-
name=folder.name,
|
|
998
|
-
sub_types=[BIContainerSubTypes.LOOKER_FOLDER],
|
|
999
|
-
parent_container_key=(
|
|
1000
|
-
self._gen_folder_key(folder.parent_id) if folder.parent_id else None
|
|
1001
|
-
),
|
|
1002
|
-
)
|
|
1003
931
|
if folder.parent_id is None:
|
|
1004
|
-
yield
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
)
|
|
932
|
+
yield Container(
|
|
933
|
+
container_key=self._gen_folder_key(folder.id),
|
|
934
|
+
display_name=folder.name,
|
|
935
|
+
subtype=BIContainerSubTypes.LOOKER_FOLDER,
|
|
936
|
+
parent_container=["Folders"],
|
|
937
|
+
)
|
|
1010
938
|
else:
|
|
1011
|
-
yield
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
)
|
|
939
|
+
yield Container(
|
|
940
|
+
container_key=self._gen_folder_key(folder.id),
|
|
941
|
+
display_name=folder.name,
|
|
942
|
+
subtype=BIContainerSubTypes.LOOKER_FOLDER,
|
|
943
|
+
parent_container=[
|
|
944
|
+
"Folders",
|
|
945
|
+
*self._get_folder_ancestors_urn_entries(
|
|
946
|
+
folder, include_current_folder=False
|
|
947
|
+
),
|
|
948
|
+
],
|
|
949
|
+
)
|
|
1022
950
|
self.processed_folders.append(folder.id)
|
|
1023
951
|
|
|
1024
952
|
def _gen_folder_key(self, folder_id: str) -> LookerFolderKey:
|
|
@@ -1029,91 +957,89 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1029
957
|
instance=self.source_config.platform_instance,
|
|
1030
958
|
)
|
|
1031
959
|
|
|
1032
|
-
def
|
|
960
|
+
def _make_dashboard_and_chart_entities(
|
|
1033
961
|
self, looker_dashboard: LookerDashboard
|
|
1034
|
-
) -> Iterable[Union[
|
|
962
|
+
) -> Iterable[Union[Chart, Dashboard]]:
|
|
1035
963
|
# Step 1: Emit metadata for each Chart inside the Dashboard.
|
|
1036
|
-
chart_events = []
|
|
964
|
+
chart_events: List[Chart] = []
|
|
1037
965
|
for element in looker_dashboard.dashboard_elements:
|
|
1038
966
|
if element.type == "vis":
|
|
1039
967
|
chart_events.extend(
|
|
1040
|
-
self.
|
|
968
|
+
self._make_chart_entities(element, looker_dashboard)
|
|
1041
969
|
)
|
|
1042
970
|
|
|
1043
971
|
yield from chart_events
|
|
1044
972
|
|
|
1045
|
-
# Step 2: Emit metadata events for the Dashboard itself.
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
) # Collect the unique child chart urns for dashboard input lineage.
|
|
973
|
+
# # Step 2: Emit metadata events for the Dashboard itself.
|
|
974
|
+
# Create a set of unique chart entities for dashboard input lineage based in chart.urn
|
|
975
|
+
unique_chart_entities: List[Chart] = []
|
|
1049
976
|
for chart_event in chart_events:
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
977
|
+
# Use chart.urn to ensure uniqueness based on the chart's URN property
|
|
978
|
+
# Also, update the set of processed chart urns
|
|
979
|
+
if str(chart_event.urn) not in self.chart_urns:
|
|
980
|
+
self.chart_urns.add(str(chart_event.urn))
|
|
981
|
+
unique_chart_entities.append(chart_event)
|
|
982
|
+
|
|
983
|
+
dashboard_events = self._make_dashboard_entities(
|
|
984
|
+
looker_dashboard, unique_chart_entities
|
|
1056
985
|
)
|
|
1057
986
|
yield from dashboard_events
|
|
1058
987
|
|
|
1059
988
|
def get_ownership(
|
|
1060
989
|
self, looker_dashboard_look: Union[LookerDashboard, LookerDashboardElement]
|
|
1061
|
-
) -> Optional[
|
|
990
|
+
) -> Optional[OwnerClass]:
|
|
1062
991
|
if looker_dashboard_look.owner is not None:
|
|
1063
992
|
owner_urn = looker_dashboard_look.owner.get_urn(
|
|
1064
993
|
self.source_config.strip_user_ids_from_email
|
|
1065
994
|
)
|
|
1066
995
|
if owner_urn is not None:
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
owner=owner_urn,
|
|
1071
|
-
type=OwnershipTypeClass.DATAOWNER,
|
|
1072
|
-
)
|
|
1073
|
-
]
|
|
996
|
+
return OwnerClass(
|
|
997
|
+
owner=owner_urn,
|
|
998
|
+
type=OwnershipTypeClass.DATAOWNER,
|
|
1074
999
|
)
|
|
1075
|
-
return ownership
|
|
1076
1000
|
return None
|
|
1077
1001
|
|
|
1078
|
-
def
|
|
1079
|
-
self, looker_dashboard: LookerDashboard
|
|
1080
|
-
) ->
|
|
1081
|
-
|
|
1082
|
-
if looker_dashboard.created_at is not None:
|
|
1083
|
-
change_audit_stamp.created.time = round(
|
|
1084
|
-
looker_dashboard.created_at.timestamp() * 1000
|
|
1085
|
-
)
|
|
1086
|
-
if looker_dashboard.owner is not None:
|
|
1087
|
-
owner_urn = looker_dashboard.owner.get_urn(
|
|
1088
|
-
self.source_config.strip_user_ids_from_email
|
|
1089
|
-
)
|
|
1090
|
-
if owner_urn:
|
|
1091
|
-
change_audit_stamp.created.actor = owner_urn
|
|
1092
|
-
if looker_dashboard.last_updated_at is not None:
|
|
1093
|
-
change_audit_stamp.lastModified.time = round(
|
|
1094
|
-
looker_dashboard.last_updated_at.timestamp() * 1000
|
|
1095
|
-
)
|
|
1096
|
-
if looker_dashboard.last_updated_by is not None:
|
|
1097
|
-
updated_by_urn = looker_dashboard.last_updated_by.get_urn(
|
|
1098
|
-
self.source_config.strip_user_ids_from_email
|
|
1099
|
-
)
|
|
1100
|
-
if updated_by_urn:
|
|
1101
|
-
change_audit_stamp.lastModified.actor = updated_by_urn
|
|
1102
|
-
if (
|
|
1103
|
-
looker_dashboard.is_deleted
|
|
1104
|
-
and looker_dashboard.deleted_by is not None
|
|
1105
|
-
and looker_dashboard.deleted_at is not None
|
|
1106
|
-
):
|
|
1107
|
-
deleter_urn = looker_dashboard.deleted_by.get_urn(
|
|
1108
|
-
self.source_config.strip_user_ids_from_email
|
|
1109
|
-
)
|
|
1110
|
-
if deleter_urn:
|
|
1111
|
-
change_audit_stamp.deleted = AuditStamp(
|
|
1112
|
-
actor=deleter_urn,
|
|
1113
|
-
time=round(looker_dashboard.deleted_at.timestamp() * 1000),
|
|
1114
|
-
)
|
|
1002
|
+
def _get_last_modified_time(
|
|
1003
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1004
|
+
) -> Optional[datetime.datetime]:
|
|
1005
|
+
return looker_dashboard.last_updated_at if looker_dashboard else None
|
|
1115
1006
|
|
|
1116
|
-
|
|
1007
|
+
def _get_last_modified_by(
|
|
1008
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1009
|
+
) -> Optional[str]:
|
|
1010
|
+
if not looker_dashboard or not looker_dashboard.last_updated_by:
|
|
1011
|
+
return None
|
|
1012
|
+
return looker_dashboard.last_updated_by.get_urn(
|
|
1013
|
+
self.source_config.strip_user_ids_from_email
|
|
1014
|
+
)
|
|
1015
|
+
|
|
1016
|
+
def _get_created_at(
|
|
1017
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1018
|
+
) -> Optional[datetime.datetime]:
|
|
1019
|
+
return looker_dashboard.created_at if looker_dashboard else None
|
|
1020
|
+
|
|
1021
|
+
def _get_created_by(
|
|
1022
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1023
|
+
) -> Optional[str]:
|
|
1024
|
+
if not looker_dashboard or not looker_dashboard.owner:
|
|
1025
|
+
return None
|
|
1026
|
+
return looker_dashboard.owner.get_urn(
|
|
1027
|
+
self.source_config.strip_user_ids_from_email
|
|
1028
|
+
)
|
|
1029
|
+
|
|
1030
|
+
def _get_deleted_on(
|
|
1031
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1032
|
+
) -> Optional[datetime.datetime]:
|
|
1033
|
+
return looker_dashboard.deleted_at if looker_dashboard else None
|
|
1034
|
+
|
|
1035
|
+
def _get_deleted_by(
|
|
1036
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1037
|
+
) -> Optional[str]:
|
|
1038
|
+
if not looker_dashboard or not looker_dashboard.deleted_by:
|
|
1039
|
+
return None
|
|
1040
|
+
return looker_dashboard.deleted_by.get_urn(
|
|
1041
|
+
self.source_config.strip_user_ids_from_email
|
|
1042
|
+
)
|
|
1117
1043
|
|
|
1118
1044
|
def _get_looker_folder(self, folder: Union[Folder, FolderBase]) -> LookerFolder:
|
|
1119
1045
|
assert folder.id
|
|
@@ -1126,7 +1052,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1126
1052
|
]
|
|
1127
1053
|
return "/".join(ancestors + [folder.name])
|
|
1128
1054
|
|
|
1129
|
-
def _get_looker_dashboard(self, dashboard:
|
|
1055
|
+
def _get_looker_dashboard(self, dashboard: LookerAPIDashboard) -> LookerDashboard:
|
|
1130
1056
|
self.reporter.accessed_dashboards += 1
|
|
1131
1057
|
if dashboard.folder is None:
|
|
1132
1058
|
logger.debug(f"{dashboard.id} has no folder")
|
|
@@ -1200,22 +1126,6 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1200
1126
|
|
|
1201
1127
|
return user
|
|
1202
1128
|
|
|
1203
|
-
def process_metrics_dimensions_and_fields_for_dashboard(
|
|
1204
|
-
self, dashboard: LookerDashboard
|
|
1205
|
-
) -> List[MetadataWorkUnit]:
|
|
1206
|
-
chart_mcps = [
|
|
1207
|
-
self._make_metrics_dimensions_chart_mcp(element)
|
|
1208
|
-
for element in dashboard.dashboard_elements
|
|
1209
|
-
]
|
|
1210
|
-
dashboard_mcp = self._make_metrics_dimensions_dashboard_mcp(dashboard)
|
|
1211
|
-
|
|
1212
|
-
mcps = chart_mcps
|
|
1213
|
-
mcps.append(dashboard_mcp)
|
|
1214
|
-
|
|
1215
|
-
workunits = [mcp.as_workunit() for mcp in mcps]
|
|
1216
|
-
|
|
1217
|
-
return workunits
|
|
1218
|
-
|
|
1219
1129
|
def _input_fields_from_dashboard_element(
|
|
1220
1130
|
self, dashboard_element: LookerDashboardElement
|
|
1221
1131
|
) -> List[InputFieldClass]:
|
|
@@ -1308,104 +1218,141 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1308
1218
|
aspect=input_fields_aspect,
|
|
1309
1219
|
)
|
|
1310
1220
|
|
|
1311
|
-
def
|
|
1221
|
+
def _should_skip_personal_folder_dashboard(
|
|
1222
|
+
self, dashboard_object: LookerAPIDashboard
|
|
1223
|
+
) -> bool:
|
|
1224
|
+
"""Check if dashboard should be skipped due to being in personal folder."""
|
|
1225
|
+
if not self.source_config.skip_personal_folders:
|
|
1226
|
+
return False
|
|
1227
|
+
|
|
1228
|
+
if dashboard_object.folder is not None and (
|
|
1229
|
+
dashboard_object.folder.is_personal
|
|
1230
|
+
or dashboard_object.folder.is_personal_descendant
|
|
1231
|
+
):
|
|
1232
|
+
self.reporter.info(
|
|
1233
|
+
title="Dropped Dashboard",
|
|
1234
|
+
message="Dropped due to being a personal folder",
|
|
1235
|
+
context=f"Dashboard ID: {dashboard_object.id}",
|
|
1236
|
+
)
|
|
1237
|
+
assert dashboard_object.id is not None
|
|
1238
|
+
self.reporter.report_dashboards_dropped(dashboard_object.id)
|
|
1239
|
+
return True
|
|
1240
|
+
return False
|
|
1241
|
+
|
|
1242
|
+
def _should_skip_dashboard_by_folder_path(
|
|
1243
|
+
self, looker_dashboard: LookerDashboard
|
|
1244
|
+
) -> bool:
|
|
1245
|
+
"""Check if dashboard should be skipped based on folder path pattern."""
|
|
1246
|
+
if (
|
|
1247
|
+
looker_dashboard.folder_path is not None
|
|
1248
|
+
and not self.source_config.folder_path_pattern.allowed(
|
|
1249
|
+
looker_dashboard.folder_path
|
|
1250
|
+
)
|
|
1251
|
+
):
|
|
1252
|
+
logger.debug(
|
|
1253
|
+
f"Folder path {looker_dashboard.folder_path} is denied in folder_path_pattern"
|
|
1254
|
+
)
|
|
1255
|
+
self.reporter.report_dashboards_dropped(looker_dashboard.id)
|
|
1256
|
+
return True
|
|
1257
|
+
return False
|
|
1258
|
+
|
|
1259
|
+
def _fetch_dashboard_from_api(
|
|
1312
1260
|
self, dashboard_id: str, fields: List[str]
|
|
1313
|
-
) ->
|
|
1314
|
-
|
|
1315
|
-
Optional[looker_usage.LookerDashboardForUsage],
|
|
1316
|
-
str,
|
|
1317
|
-
datetime.datetime,
|
|
1318
|
-
datetime.datetime,
|
|
1319
|
-
]:
|
|
1320
|
-
start_time = datetime.datetime.now()
|
|
1321
|
-
assert dashboard_id is not None
|
|
1322
|
-
if not self.source_config.dashboard_pattern.allowed(dashboard_id):
|
|
1323
|
-
self.reporter.report_dashboards_dropped(dashboard_id)
|
|
1324
|
-
return [], None, dashboard_id, start_time, datetime.datetime.now()
|
|
1261
|
+
) -> Optional[LookerAPIDashboard]:
|
|
1262
|
+
"""Fetch dashboard object from Looker API with error handling."""
|
|
1325
1263
|
try:
|
|
1326
|
-
|
|
1264
|
+
return self.looker_api.dashboard(
|
|
1327
1265
|
dashboard_id=dashboard_id,
|
|
1328
1266
|
fields=fields,
|
|
1329
1267
|
)
|
|
1330
1268
|
except (SDKError, DeserializeError) as e:
|
|
1331
|
-
# A looker dashboard could be deleted in between the list and the get
|
|
1332
1269
|
self.reporter.report_warning(
|
|
1333
1270
|
title="Failed to fetch dashboard from the Looker API",
|
|
1334
1271
|
message="Error occurred while attempting to loading dashboard from Looker API. Skipping.",
|
|
1335
1272
|
context=f"Dashboard ID: {dashboard_id}",
|
|
1336
1273
|
exc=e,
|
|
1337
1274
|
)
|
|
1338
|
-
return
|
|
1275
|
+
return None
|
|
1339
1276
|
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
return [], None, dashboard_id, start_time, datetime.datetime.now()
|
|
1277
|
+
def _create_empty_result(
|
|
1278
|
+
self, dashboard_id: str, start_time: datetime.datetime
|
|
1279
|
+
) -> DashboardProcessingResult:
|
|
1280
|
+
"""Create an empty result for skipped or failed dashboard processing."""
|
|
1281
|
+
return DashboardProcessingResult(
|
|
1282
|
+
entities=[],
|
|
1283
|
+
dashboard_usage=None,
|
|
1284
|
+
dashboard_id=dashboard_id,
|
|
1285
|
+
start_time=start_time,
|
|
1286
|
+
end_time=datetime.datetime.now(),
|
|
1287
|
+
)
|
|
1352
1288
|
|
|
1353
|
-
|
|
1289
|
+
def process_dashboard(
|
|
1290
|
+
self, dashboard_id: str, fields: List[str]
|
|
1291
|
+
) -> DashboardProcessingResult:
|
|
1292
|
+
"""
|
|
1293
|
+
Process a single dashboard and return the metadata workunits.
|
|
1354
1294
|
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
and not self.source_config.folder_path_pattern.allowed(
|
|
1359
|
-
looker_dashboard.folder_path
|
|
1360
|
-
)
|
|
1361
|
-
):
|
|
1362
|
-
logger.debug(
|
|
1363
|
-
f"Folder path {looker_dashboard.folder_path} is denied in folder_path_pattern"
|
|
1364
|
-
)
|
|
1365
|
-
return [], None, dashboard_id, start_time, datetime.datetime.now()
|
|
1295
|
+
Args:
|
|
1296
|
+
dashboard_id: The ID of the dashboard to process
|
|
1297
|
+
fields: List of fields to fetch from the Looker API
|
|
1366
1298
|
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1299
|
+
Returns:
|
|
1300
|
+
DashboardProcessingResult containing entities, usage data, and timing information
|
|
1301
|
+
"""
|
|
1302
|
+
start_time = datetime.datetime.now()
|
|
1371
1303
|
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
(
|
|
1375
|
-
MetadataWorkUnit(id=f"looker-{mce.proposedSnapshot.urn}", mce=mce)
|
|
1376
|
-
if isinstance(mce, MetadataChangeEvent)
|
|
1377
|
-
else MetadataWorkUnit(
|
|
1378
|
-
id=f"looker-{mce.aspectName}-{mce.entityUrn}", mcp=mce
|
|
1379
|
-
)
|
|
1380
|
-
)
|
|
1381
|
-
for mce in mces
|
|
1382
|
-
]
|
|
1304
|
+
if dashboard_id is None:
|
|
1305
|
+
raise ValueError("Dashboard ID cannot be None")
|
|
1383
1306
|
|
|
1384
|
-
#
|
|
1385
|
-
|
|
1386
|
-
|
|
1307
|
+
# Fetch dashboard from API
|
|
1308
|
+
dashboard_object: Optional[LookerAPIDashboard] = self._fetch_dashboard_from_api(
|
|
1309
|
+
dashboard_id, fields
|
|
1387
1310
|
)
|
|
1311
|
+
if dashboard_object is None:
|
|
1312
|
+
return self._create_empty_result(dashboard_id, start_time)
|
|
1313
|
+
|
|
1314
|
+
# Check if dashboard should be skipped due to personal folder
|
|
1315
|
+
if self._should_skip_personal_folder_dashboard(dashboard_object):
|
|
1316
|
+
return self._create_empty_result(dashboard_id, start_time)
|
|
1317
|
+
|
|
1318
|
+
# Convert to internal representation
|
|
1319
|
+
looker_dashboard: LookerDashboard = self._get_looker_dashboard(dashboard_object)
|
|
1320
|
+
|
|
1321
|
+
# Check folder path pattern
|
|
1322
|
+
if self._should_skip_dashboard_by_folder_path(looker_dashboard):
|
|
1323
|
+
return self._create_empty_result(dashboard_id, start_time)
|
|
1324
|
+
|
|
1325
|
+
# Build entities list
|
|
1326
|
+
entities: List[Entity] = []
|
|
1388
1327
|
|
|
1389
|
-
|
|
1328
|
+
# Add folder containers if dashboard has a folder
|
|
1329
|
+
if looker_dashboard.folder:
|
|
1330
|
+
entities.extend(
|
|
1331
|
+
list(self._get_folder_and_ancestors_containers(looker_dashboard.folder))
|
|
1332
|
+
)
|
|
1390
1333
|
|
|
1334
|
+
# Add dashboard and chart entities
|
|
1335
|
+
entities.extend(list(self._make_dashboard_and_chart_entities(looker_dashboard)))
|
|
1336
|
+
|
|
1337
|
+
# Report successful processing
|
|
1391
1338
|
self.reporter.report_dashboards_scanned()
|
|
1392
1339
|
|
|
1393
|
-
#
|
|
1340
|
+
# Generate usage tracking object
|
|
1394
1341
|
dashboard_usage = looker_usage.LookerDashboardForUsage.from_dashboard(
|
|
1395
1342
|
dashboard_object
|
|
1396
1343
|
)
|
|
1397
1344
|
|
|
1398
|
-
return (
|
|
1399
|
-
|
|
1400
|
-
dashboard_usage,
|
|
1401
|
-
dashboard_id,
|
|
1402
|
-
start_time,
|
|
1403
|
-
datetime.datetime.now(),
|
|
1345
|
+
return DashboardProcessingResult(
|
|
1346
|
+
entities=entities,
|
|
1347
|
+
dashboard_usage=dashboard_usage,
|
|
1348
|
+
dashboard_id=dashboard_id,
|
|
1349
|
+
start_time=start_time,
|
|
1350
|
+
end_time=datetime.datetime.now(),
|
|
1404
1351
|
)
|
|
1405
1352
|
|
|
1406
|
-
def
|
|
1353
|
+
def _get_folder_and_ancestors_containers(
|
|
1407
1354
|
self, folder: LookerFolder
|
|
1408
|
-
) -> Iterable[
|
|
1355
|
+
) -> Iterable[Container]:
|
|
1409
1356
|
for ancestor_folder in self.looker_api.folder_ancestors(folder.id):
|
|
1410
1357
|
yield from self._emit_folder_as_container(
|
|
1411
1358
|
self._get_looker_folder(ancestor_folder)
|
|
@@ -1476,39 +1423,27 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1476
1423
|
).workunit_processor,
|
|
1477
1424
|
]
|
|
1478
1425
|
|
|
1479
|
-
def
|
|
1426
|
+
def emit_independent_looks_entities(
|
|
1480
1427
|
self, dashboard_element: LookerDashboardElement
|
|
1481
|
-
) -> Iterable[
|
|
1428
|
+
) -> Iterable[Union[Container, Chart]]:
|
|
1482
1429
|
if dashboard_element.folder: # independent look
|
|
1483
|
-
yield from self.
|
|
1430
|
+
yield from self._get_folder_and_ancestors_containers(
|
|
1484
1431
|
dashboard_element.folder
|
|
1485
1432
|
)
|
|
1486
1433
|
|
|
1487
|
-
yield from
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
dashboard=None,
|
|
1491
|
-
)
|
|
1434
|
+
yield from self._make_chart_entities(
|
|
1435
|
+
dashboard_element=dashboard_element,
|
|
1436
|
+
dashboard=None,
|
|
1492
1437
|
)
|
|
1493
1438
|
|
|
1494
|
-
|
|
1495
|
-
[
|
|
1496
|
-
self._make_metrics_dimensions_chart_mcp(
|
|
1497
|
-
dashboard_element,
|
|
1498
|
-
)
|
|
1499
|
-
]
|
|
1500
|
-
)
|
|
1501
|
-
|
|
1502
|
-
def extract_independent_looks(self) -> Iterable[MetadataWorkUnit]:
|
|
1503
|
-
"""
|
|
1504
|
-
Emit MetadataWorkUnit for looks which are not part of any Dashboard
|
|
1439
|
+
def extract_independent_looks(self) -> Iterable[Union[Container, Chart]]:
|
|
1505
1440
|
"""
|
|
1506
|
-
|
|
1507
|
-
return
|
|
1441
|
+
Emit entities for Looks which are not part of any Dashboard.
|
|
1508
1442
|
|
|
1509
|
-
|
|
1443
|
+
Returns: Containers for the folders and ancestors folders and Charts for the looks
|
|
1444
|
+
"""
|
|
1445
|
+
logger.debug("Extracting Looks not part of any Dashboard")
|
|
1510
1446
|
|
|
1511
|
-
logger.debug("Extracting looks not part of Dashboard")
|
|
1512
1447
|
look_fields: List[str] = [
|
|
1513
1448
|
"id",
|
|
1514
1449
|
"title",
|
|
@@ -1530,15 +1465,21 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1530
1465
|
all_looks: List[Look] = self.looker_api.all_looks(
|
|
1531
1466
|
fields=look_fields, soft_deleted=self.source_config.include_deleted
|
|
1532
1467
|
)
|
|
1468
|
+
|
|
1533
1469
|
for look in all_looks:
|
|
1470
|
+
# Skip looks that are already referenced from a dashboard
|
|
1471
|
+
if look.id is None:
|
|
1472
|
+
logger.warning("Encountered Look with no ID, skipping.")
|
|
1473
|
+
continue
|
|
1474
|
+
|
|
1534
1475
|
if look.id in self.reachable_look_registry:
|
|
1535
|
-
# This look is reachable from the Dashboard
|
|
1536
1476
|
continue
|
|
1537
1477
|
|
|
1538
1478
|
if look.query_id is None:
|
|
1539
1479
|
logger.info(f"query_id is None for look {look.title}({look.id})")
|
|
1540
1480
|
continue
|
|
1541
1481
|
|
|
1482
|
+
# Skip looks in personal folders if configured
|
|
1542
1483
|
if self.source_config.skip_personal_folders:
|
|
1543
1484
|
if look.folder is not None and (
|
|
1544
1485
|
look.folder.is_personal or look.folder.is_personal_descendant
|
|
@@ -1549,76 +1490,96 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1549
1490
|
context=f"Look ID: {look.id}",
|
|
1550
1491
|
)
|
|
1551
1492
|
|
|
1552
|
-
assert look.id, "Looker id is null"
|
|
1553
1493
|
self.reporter.report_charts_dropped(look.id)
|
|
1554
1494
|
continue
|
|
1555
1495
|
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1496
|
+
# Fetch the Look's query and filter to allowed fields
|
|
1497
|
+
query: Optional[Query] = None
|
|
1498
|
+
try:
|
|
1499
|
+
look_with_query = self.looker_api.get_look(look.id, fields=["query"])
|
|
1500
|
+
query_obj = look_with_query.query
|
|
1501
|
+
if query_obj:
|
|
1502
|
+
query = Query(
|
|
1503
|
+
**{
|
|
1504
|
+
key: getattr(query_obj, key)
|
|
1505
|
+
for key in query_fields
|
|
1506
|
+
if hasattr(query_obj, key)
|
|
1507
|
+
}
|
|
1508
|
+
)
|
|
1509
|
+
except Exception as exc:
|
|
1510
|
+
logger.warning(f"Failed to fetch query for Look {look.id}: {exc}")
|
|
1511
|
+
continue
|
|
1568
1512
|
|
|
1569
|
-
dashboard_element
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
),
|
|
1513
|
+
dashboard_element = self._get_looker_dashboard_element(
|
|
1514
|
+
DashboardElement(
|
|
1515
|
+
id=f"looks_{look.id}", # to avoid conflict with non-standalone looks (element.id prefixes),
|
|
1516
|
+
# we add the "looks_" prefix to look.id.
|
|
1517
|
+
title=look.title,
|
|
1518
|
+
subtitle_text=look.description,
|
|
1519
|
+
look_id=look.id,
|
|
1520
|
+
dashboard_id=None, # As this is an independent look
|
|
1521
|
+
look=LookWithQuery(
|
|
1522
|
+
query=query,
|
|
1523
|
+
folder=getattr(look, "folder", None),
|
|
1524
|
+
user_id=getattr(look, "user_id", None),
|
|
1581
1525
|
),
|
|
1582
1526
|
)
|
|
1583
1527
|
)
|
|
1584
1528
|
|
|
1585
1529
|
if dashboard_element is not None:
|
|
1586
|
-
logger.debug(f"Emitting
|
|
1587
|
-
yield from self.
|
|
1530
|
+
logger.debug(f"Emitting MCPs for look {look.title}({look.id})")
|
|
1531
|
+
yield from self.emit_independent_looks_entities(
|
|
1588
1532
|
dashboard_element=dashboard_element
|
|
1589
1533
|
)
|
|
1590
1534
|
|
|
1591
|
-
|
|
1535
|
+
def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
|
|
1536
|
+
"""
|
|
1537
|
+
Note: Returns Entities from SDKv2 where possible else MCPs only.
|
|
1592
1538
|
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
dashboards = self.looker_api.all_dashboards(fields="id")
|
|
1596
|
-
deleted_dashboards = (
|
|
1597
|
-
self.looker_api.search_dashboards(fields="id", deleted="true")
|
|
1598
|
-
if self.source_config.include_deleted
|
|
1599
|
-
else []
|
|
1600
|
-
)
|
|
1601
|
-
if deleted_dashboards != []:
|
|
1602
|
-
logger.debug(f"Deleted Dashboards = {deleted_dashboards}")
|
|
1539
|
+
Using SDKv2: Containers, Datasets, Dashboards and Charts
|
|
1540
|
+
Using MCPW: Tags, DashboardUsageStats and UserResourceMapping
|
|
1603
1541
|
|
|
1604
|
-
|
|
1605
|
-
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
1542
|
+
TODO: Convert MCPWs to use SDKv2 entities
|
|
1543
|
+
"""
|
|
1544
|
+
with self.reporter.report_stage("list_dashboards"):
|
|
1545
|
+
# Fetch all dashboards (not deleted)
|
|
1546
|
+
dashboards = self.looker_api.all_dashboards(fields="id")
|
|
1547
|
+
|
|
1548
|
+
# Optionally fetch deleted dashboards if configured
|
|
1549
|
+
if self.source_config.include_deleted:
|
|
1550
|
+
deleted_dashboards = self.looker_api.search_dashboards(
|
|
1551
|
+
fields="id", deleted="true"
|
|
1552
|
+
)
|
|
1614
1553
|
else:
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1618
|
-
|
|
1554
|
+
deleted_dashboards = []
|
|
1555
|
+
|
|
1556
|
+
if deleted_dashboards:
|
|
1557
|
+
logger.debug(f"Deleted Dashboards = {deleted_dashboards}")
|
|
1558
|
+
|
|
1559
|
+
# Collect all dashboard IDs (including deleted if applicable)
|
|
1560
|
+
all_dashboard_ids: List[Optional[str]] = [
|
|
1561
|
+
dashboard.id for dashboard in dashboards
|
|
1562
|
+
]
|
|
1563
|
+
all_dashboard_ids.extend([dashboard.id for dashboard in deleted_dashboards])
|
|
1619
1564
|
|
|
1620
|
-
|
|
1621
|
-
|
|
1565
|
+
# Filter dashboard IDs based on the allowed pattern
|
|
1566
|
+
filtered_dashboard_ids: List[str] = []
|
|
1567
|
+
for dashboard_id in all_dashboard_ids:
|
|
1568
|
+
if dashboard_id is None:
|
|
1569
|
+
continue
|
|
1570
|
+
if not self.source_config.dashboard_pattern.allowed(dashboard_id):
|
|
1571
|
+
self.reporter.report_dashboards_dropped(dashboard_id)
|
|
1572
|
+
else:
|
|
1573
|
+
filtered_dashboard_ids.append(dashboard_id)
|
|
1574
|
+
|
|
1575
|
+
# Use the filtered list for further processing
|
|
1576
|
+
dashboard_ids: List[str] = filtered_dashboard_ids
|
|
1577
|
+
|
|
1578
|
+
# Report the total number of dashboards to be processed
|
|
1579
|
+
self.reporter.report_total_dashboards(len(dashboard_ids))
|
|
1580
|
+
|
|
1581
|
+
# Define the fields to extract for each dashboard
|
|
1582
|
+
dashboard_fields = [
|
|
1622
1583
|
"id",
|
|
1623
1584
|
"title",
|
|
1624
1585
|
"dashboard_elements",
|
|
@@ -1634,41 +1595,47 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1634
1595
|
"deleted_at",
|
|
1635
1596
|
"deleter_id",
|
|
1636
1597
|
]
|
|
1598
|
+
|
|
1599
|
+
# Add usage-related fields if usage history extraction is enabled
|
|
1637
1600
|
if self.source_config.extract_usage_history:
|
|
1638
|
-
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1601
|
+
dashboard_fields.extend(
|
|
1602
|
+
[
|
|
1603
|
+
"favorite_count",
|
|
1604
|
+
"view_count",
|
|
1605
|
+
"last_viewed_at",
|
|
1606
|
+
]
|
|
1607
|
+
)
|
|
1643
1608
|
|
|
1609
|
+
# Store dashboards for which usage stats will be extracted
|
|
1644
1610
|
looker_dashboards_for_usage: List[looker_usage.LookerDashboardForUsage] = []
|
|
1645
1611
|
|
|
1612
|
+
# Process dashboard and chart metadata
|
|
1646
1613
|
with self.reporter.report_stage("dashboard_chart_metadata"):
|
|
1614
|
+
dashboard_jobs = (
|
|
1615
|
+
(dashboard_id, dashboard_fields)
|
|
1616
|
+
for dashboard_id in dashboard_ids
|
|
1617
|
+
if dashboard_id is not None
|
|
1618
|
+
)
|
|
1647
1619
|
for job in BackpressureAwareExecutor.map(
|
|
1648
1620
|
self.process_dashboard,
|
|
1649
|
-
|
|
1650
|
-
(dashboard_id, fields)
|
|
1651
|
-
for dashboard_id in dashboard_ids
|
|
1652
|
-
if dashboard_id is not None
|
|
1653
|
-
),
|
|
1621
|
+
dashboard_jobs,
|
|
1654
1622
|
max_workers=self.source_config.max_threads,
|
|
1655
1623
|
):
|
|
1656
|
-
(
|
|
1657
|
-
|
|
1658
|
-
dashboard_usage,
|
|
1659
|
-
dashboard_id,
|
|
1660
|
-
start_time,
|
|
1661
|
-
end_time,
|
|
1662
|
-
) = job.result()
|
|
1624
|
+
result: DashboardProcessingResult = job.result()
|
|
1625
|
+
|
|
1663
1626
|
logger.debug(
|
|
1664
|
-
f"Running time of process_dashboard for {dashboard_id} = {(end_time - start_time).total_seconds()}"
|
|
1627
|
+
f"Running time of process_dashboard for {result.dashboard_id} = {(result.end_time - result.start_time).total_seconds()}"
|
|
1665
1628
|
)
|
|
1666
|
-
self.reporter.report_upstream_latency(
|
|
1629
|
+
self.reporter.report_upstream_latency(
|
|
1630
|
+
result.start_time, result.end_time
|
|
1631
|
+
)
|
|
1632
|
+
|
|
1633
|
+
yield from result.entities
|
|
1667
1634
|
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
looker_dashboards_for_usage.append(dashboard_usage)
|
|
1635
|
+
if result.dashboard_usage is not None:
|
|
1636
|
+
looker_dashboards_for_usage.append(result.dashboard_usage)
|
|
1671
1637
|
|
|
1638
|
+
# Warn if owner extraction was enabled but no emails could be found
|
|
1672
1639
|
if (
|
|
1673
1640
|
self.source_config.extract_owners
|
|
1674
1641
|
and self.reporter.resolved_user_ids > 0
|
|
@@ -1680,53 +1647,42 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1680
1647
|
"Failed to extract owners emails for any dashboards. Please enable the see_users permission for your Looker API key",
|
|
1681
1648
|
)
|
|
1682
1649
|
|
|
1683
|
-
# Extract independent
|
|
1684
|
-
|
|
1650
|
+
# Extract independent looks first, so their explores are considered in _make_explore_containers.
|
|
1651
|
+
if self.source_config.extract_independent_looks:
|
|
1652
|
+
with self.reporter.report_stage("extract_independent_looks"):
|
|
1653
|
+
yield from self.extract_independent_looks()
|
|
1685
1654
|
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
if isinstance(event, MetadataChangeEvent):
|
|
1690
|
-
yield MetadataWorkUnit(
|
|
1691
|
-
id=f"looker-{event.proposedSnapshot.urn}", mce=event
|
|
1692
|
-
)
|
|
1693
|
-
elif isinstance(event, MetadataChangeProposalWrapper):
|
|
1694
|
-
yield event.as_workunit()
|
|
1695
|
-
elif isinstance(event, MetadataWorkUnit):
|
|
1696
|
-
yield event
|
|
1697
|
-
else:
|
|
1698
|
-
raise Exception(f"Unexpected type of event {event}")
|
|
1699
|
-
self.reporter.report_stage_end("explore_metadata")
|
|
1655
|
+
# Process explore containers and yield them.
|
|
1656
|
+
with self.reporter.report_stage("explore_metadata"):
|
|
1657
|
+
yield from self._make_explore_containers()
|
|
1700
1658
|
|
|
1701
1659
|
if (
|
|
1702
1660
|
self.source_config.tag_measures_and_dimensions
|
|
1703
1661
|
and self.reporter.explores_scanned > 0
|
|
1704
1662
|
):
|
|
1705
|
-
# Emit tag
|
|
1663
|
+
# Emit tag MCPs for measures and dimensions if we produced any explores:
|
|
1664
|
+
# Tags MCEs are converted to MCPs
|
|
1706
1665
|
for tag_mce in LookerUtil.get_tag_mces():
|
|
1707
|
-
yield
|
|
1708
|
-
id=f"tag-{tag_mce.proposedSnapshot.urn}",
|
|
1709
|
-
mce=tag_mce,
|
|
1710
|
-
)
|
|
1666
|
+
yield from auto_workunit(mcps_from_mce(tag_mce))
|
|
1711
1667
|
|
|
1712
1668
|
# Extract usage history is enabled
|
|
1713
1669
|
if self.source_config.extract_usage_history:
|
|
1714
|
-
self.reporter.
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
|
|
1670
|
+
with self.reporter.report_stage("usage_extraction"):
|
|
1671
|
+
usage_mcps: List[MetadataChangeProposalWrapper] = (
|
|
1672
|
+
self.extract_usage_stat(
|
|
1673
|
+
looker_dashboards_for_usage, self.chart_urns
|
|
1674
|
+
)
|
|
1675
|
+
)
|
|
1676
|
+
yield from auto_workunit(usage_mcps)
|
|
1721
1677
|
|
|
1722
|
-
#
|
|
1678
|
+
# Ingest looker user resource mapping workunits.
|
|
1723
1679
|
logger.info("Ingesting looker user resource mapping workunits")
|
|
1724
|
-
self.reporter.
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1680
|
+
with self.reporter.report_stage("user_resource_extraction"):
|
|
1681
|
+
yield from auto_workunit(
|
|
1682
|
+
self.user_registry.to_platform_resource(
|
|
1683
|
+
self.source_config.platform_instance
|
|
1684
|
+
)
|
|
1728
1685
|
)
|
|
1729
|
-
)
|
|
1730
1686
|
|
|
1731
1687
|
def get_report(self) -> SourceReport:
|
|
1732
1688
|
return self.reporter
|