acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
+
from dataclasses import dataclass
|
|
4
5
|
from json import JSONDecodeError
|
|
5
6
|
from typing import (
|
|
6
7
|
Any,
|
|
@@ -18,7 +19,7 @@ from typing import (
|
|
|
18
19
|
from looker_sdk.error import SDKError
|
|
19
20
|
from looker_sdk.rtl.serialize import DeserializeError
|
|
20
21
|
from looker_sdk.sdk.api40.models import (
|
|
21
|
-
Dashboard,
|
|
22
|
+
Dashboard as LookerAPIDashboard,
|
|
22
23
|
DashboardElement,
|
|
23
24
|
Folder,
|
|
24
25
|
FolderBase,
|
|
@@ -29,7 +30,7 @@ from looker_sdk.sdk.api40.models import (
|
|
|
29
30
|
|
|
30
31
|
import datahub.emitter.mce_builder as builder
|
|
31
32
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
32
|
-
from datahub.emitter.mcp_builder import
|
|
33
|
+
from datahub.emitter.mcp_builder import mcps_from_mce
|
|
33
34
|
from datahub.ingestion.api.common import PipelineContext
|
|
34
35
|
from datahub.ingestion.api.decorators import (
|
|
35
36
|
SupportStatus,
|
|
@@ -51,6 +52,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
51
52
|
from datahub.ingestion.source.common.subtypes import (
|
|
52
53
|
BIAssetSubTypes,
|
|
53
54
|
BIContainerSubTypes,
|
|
55
|
+
SourceCapabilityModifier,
|
|
54
56
|
)
|
|
55
57
|
from datahub.ingestion.source.looker import looker_usage
|
|
56
58
|
from datahub.ingestion.source.looker.looker_common import (
|
|
@@ -79,36 +81,38 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
79
81
|
StatefulIngestionSourceBase,
|
|
80
82
|
)
|
|
81
83
|
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
|
82
|
-
AuditStamp,
|
|
83
|
-
ChangeAuditStamps,
|
|
84
|
-
DataPlatformInstance,
|
|
85
84
|
Status,
|
|
86
85
|
)
|
|
87
|
-
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
|
|
88
|
-
ChartSnapshot,
|
|
89
|
-
DashboardSnapshot,
|
|
90
|
-
)
|
|
91
|
-
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
92
86
|
from datahub.metadata.schema_classes import (
|
|
93
|
-
BrowsePathEntryClass,
|
|
94
|
-
BrowsePathsClass,
|
|
95
|
-
BrowsePathsV2Class,
|
|
96
|
-
ChartInfoClass,
|
|
97
87
|
ChartTypeClass,
|
|
98
|
-
|
|
99
|
-
DashboardInfoClass,
|
|
88
|
+
EmbedClass,
|
|
100
89
|
InputFieldClass,
|
|
101
90
|
InputFieldsClass,
|
|
102
91
|
OwnerClass,
|
|
103
|
-
OwnershipClass,
|
|
104
92
|
OwnershipTypeClass,
|
|
105
|
-
SubTypesClass,
|
|
106
93
|
)
|
|
94
|
+
from datahub.sdk.chart import Chart
|
|
95
|
+
from datahub.sdk.container import Container
|
|
96
|
+
from datahub.sdk.dashboard import Dashboard
|
|
97
|
+
from datahub.sdk.dataset import Dataset
|
|
98
|
+
from datahub.sdk.entity import Entity
|
|
107
99
|
from datahub.utilities.backpressure_aware_executor import BackpressureAwareExecutor
|
|
100
|
+
from datahub.utilities.sentinels import Unset, unset
|
|
108
101
|
|
|
109
102
|
logger = logging.getLogger(__name__)
|
|
110
103
|
|
|
111
104
|
|
|
105
|
+
@dataclass
|
|
106
|
+
class DashboardProcessingResult:
|
|
107
|
+
"""Result of processing a single dashboard."""
|
|
108
|
+
|
|
109
|
+
entities: List[Entity]
|
|
110
|
+
dashboard_usage: Optional[looker_usage.LookerDashboardForUsage]
|
|
111
|
+
dashboard_id: str
|
|
112
|
+
start_time: datetime.datetime
|
|
113
|
+
end_time: datetime.datetime
|
|
114
|
+
|
|
115
|
+
|
|
112
116
|
@platform_name("Looker")
|
|
113
117
|
@support_status(SupportStatus.CERTIFIED)
|
|
114
118
|
@config_class(LookerDashboardSourceConfig)
|
|
@@ -126,6 +130,15 @@ logger = logging.getLogger(__name__)
|
|
|
126
130
|
SourceCapability.USAGE_STATS,
|
|
127
131
|
"Enabled by default, configured using `extract_usage_history`",
|
|
128
132
|
)
|
|
133
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
134
|
+
@capability(
|
|
135
|
+
SourceCapability.CONTAINERS,
|
|
136
|
+
"Enabled by default",
|
|
137
|
+
subtype_modifier=[
|
|
138
|
+
SourceCapabilityModifier.LOOKML_MODEL,
|
|
139
|
+
SourceCapabilityModifier.LOOKER_FOLDER,
|
|
140
|
+
],
|
|
141
|
+
)
|
|
129
142
|
class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
130
143
|
"""
|
|
131
144
|
This plugin extracts the following:
|
|
@@ -279,6 +292,11 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
279
292
|
return []
|
|
280
293
|
result = []
|
|
281
294
|
|
|
295
|
+
if query is not None:
|
|
296
|
+
logger.debug(
|
|
297
|
+
f"Processing query: model={query.model}, view={query.view}, input_fields_count={len(query.fields) if query.fields else 0}"
|
|
298
|
+
)
|
|
299
|
+
|
|
282
300
|
# query.dynamic_fields can contain:
|
|
283
301
|
# - looker table calculations: https://docs.looker.com/exploring-data/using-table-calculations
|
|
284
302
|
# - looker custom measures: https://docs.looker.com/de/exploring-data/adding-fields/custom-measure
|
|
@@ -363,7 +381,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
363
381
|
filters: MutableMapping[str, Any] = (
|
|
364
382
|
query.filters if query.filters is not None else {}
|
|
365
383
|
)
|
|
366
|
-
for field in filters
|
|
384
|
+
for field in filters:
|
|
367
385
|
if field is None:
|
|
368
386
|
continue
|
|
369
387
|
|
|
@@ -399,9 +417,12 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
399
417
|
# Get the explore from the view directly
|
|
400
418
|
explores = [element.query.view] if element.query.view is not None else []
|
|
401
419
|
logger.debug(
|
|
402
|
-
f"
|
|
420
|
+
f"Dashboard element {element.title} (ID: {element.id}): Upstream explores added via query={explores} with model={element.query.model}, explore={element.query.view}"
|
|
403
421
|
)
|
|
404
422
|
for exp in explores:
|
|
423
|
+
logger.debug(
|
|
424
|
+
f"Adding reachable explore: model={element.query.model}, explore={exp}, element_id={element.id}, title={element.title}"
|
|
425
|
+
)
|
|
405
426
|
self.add_reachable_explore(
|
|
406
427
|
model=element.query.model,
|
|
407
428
|
explore=exp,
|
|
@@ -477,12 +498,10 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
477
498
|
|
|
478
499
|
# Failing the above two approaches, pick out details from result_maker
|
|
479
500
|
elif element.result_maker is not None:
|
|
480
|
-
model: str = ""
|
|
481
501
|
input_fields = []
|
|
482
502
|
|
|
483
503
|
explores = []
|
|
484
504
|
if element.result_maker.query is not None:
|
|
485
|
-
model = element.result_maker.query.model
|
|
486
505
|
if element.result_maker.query.view is not None:
|
|
487
506
|
explores.append(element.result_maker.query.view)
|
|
488
507
|
input_fields = self._get_input_fields_from_query(
|
|
@@ -502,9 +521,15 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
502
521
|
|
|
503
522
|
# In addition to the query, filters can point to fields as well
|
|
504
523
|
assert element.result_maker.filterables is not None
|
|
524
|
+
|
|
525
|
+
# Different dashboard elements my reference explores from different models
|
|
526
|
+
# so we need to create a mapping of explore names to their models to maintain correct associations
|
|
527
|
+
explore_to_model_map = {}
|
|
528
|
+
|
|
505
529
|
for filterable in element.result_maker.filterables:
|
|
506
530
|
if filterable.view is not None and filterable.model is not None:
|
|
507
|
-
model
|
|
531
|
+
# Store the model for this view/explore in our mapping
|
|
532
|
+
explore_to_model_map[filterable.view] = filterable.model
|
|
508
533
|
explores.append(filterable.view)
|
|
509
534
|
self.add_reachable_explore(
|
|
510
535
|
model=filterable.model,
|
|
@@ -527,6 +552,18 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
527
552
|
|
|
528
553
|
explores = sorted(list(set(explores))) # dedup the list of views
|
|
529
554
|
|
|
555
|
+
logger.debug(
|
|
556
|
+
f"Dashboard element {element.id} and their explores with the corresponding model: {explore_to_model_map}"
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
# If we have a query, use its model as the default for any explores that don't have a model in our mapping
|
|
560
|
+
default_model = ""
|
|
561
|
+
if (
|
|
562
|
+
element.result_maker.query is not None
|
|
563
|
+
and element.result_maker.query.model is not None
|
|
564
|
+
):
|
|
565
|
+
default_model = element.result_maker.query.model
|
|
566
|
+
|
|
530
567
|
return LookerDashboardElement(
|
|
531
568
|
id=element.id,
|
|
532
569
|
title=element.title if element.title is not None else "",
|
|
@@ -540,7 +577,11 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
540
577
|
else ""
|
|
541
578
|
),
|
|
542
579
|
upstream_explores=[
|
|
543
|
-
LookerExplore(
|
|
580
|
+
LookerExplore(
|
|
581
|
+
model_name=explore_to_model_map.get(exp, default_model),
|
|
582
|
+
name=exp,
|
|
583
|
+
)
|
|
584
|
+
for exp in explores
|
|
544
585
|
],
|
|
545
586
|
input_fields=input_fields,
|
|
546
587
|
owner=None,
|
|
@@ -595,35 +636,17 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
595
636
|
|
|
596
637
|
return chart_type
|
|
597
638
|
|
|
598
|
-
def
|
|
639
|
+
def _get_folder_ancestors_urn_entries(
|
|
599
640
|
self, folder: LookerFolder, include_current_folder: bool = True
|
|
600
|
-
) -> Iterable[
|
|
641
|
+
) -> Iterable[str]:
|
|
601
642
|
for ancestor in self.looker_api.folder_ancestors(folder_id=folder.id):
|
|
602
|
-
assert ancestor.id
|
|
643
|
+
assert ancestor.id # to make the linter happy as `Folder` has id field marked optional - which is always returned by the API
|
|
603
644
|
urn = self._gen_folder_key(ancestor.id).as_urn()
|
|
604
|
-
yield
|
|
645
|
+
yield urn
|
|
605
646
|
|
|
606
647
|
urn = self._gen_folder_key(folder.id).as_urn()
|
|
607
648
|
if include_current_folder:
|
|
608
|
-
yield
|
|
609
|
-
|
|
610
|
-
def _create_platform_instance_aspect(
|
|
611
|
-
self,
|
|
612
|
-
) -> DataPlatformInstance:
|
|
613
|
-
assert self.source_config.platform_name, (
|
|
614
|
-
"Platform name is not set in the configuration."
|
|
615
|
-
)
|
|
616
|
-
assert self.source_config.platform_instance, (
|
|
617
|
-
"Platform instance is not set in the configuration."
|
|
618
|
-
)
|
|
619
|
-
|
|
620
|
-
return DataPlatformInstance(
|
|
621
|
-
platform=builder.make_data_platform_urn(self.source_config.platform_name),
|
|
622
|
-
instance=builder.make_dataplatform_instance_urn(
|
|
623
|
-
platform=self.source_config.platform_name,
|
|
624
|
-
instance=self.source_config.platform_instance,
|
|
625
|
-
),
|
|
626
|
-
)
|
|
649
|
+
yield urn
|
|
627
650
|
|
|
628
651
|
def _make_chart_urn(self, element_id: str) -> str:
|
|
629
652
|
platform_instance: Optional[str] = None
|
|
@@ -636,104 +659,46 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
636
659
|
platform_instance=platform_instance,
|
|
637
660
|
)
|
|
638
661
|
|
|
639
|
-
def
|
|
662
|
+
def _make_chart_entities(
|
|
640
663
|
self,
|
|
641
664
|
dashboard_element: LookerDashboardElement,
|
|
642
665
|
dashboard: Optional[
|
|
643
666
|
LookerDashboard
|
|
644
667
|
], # dashboard will be None if this is a standalone look
|
|
645
|
-
) -> List[
|
|
646
|
-
|
|
647
|
-
element_id=dashboard_element.get_urn_element_id()
|
|
648
|
-
)
|
|
649
|
-
self.chart_urns.add(chart_urn)
|
|
650
|
-
chart_snapshot = ChartSnapshot(
|
|
651
|
-
urn=chart_urn,
|
|
652
|
-
aspects=[Status(removed=False)],
|
|
653
|
-
)
|
|
654
|
-
browse_path_v2: Optional[BrowsePathsV2Class] = None
|
|
655
|
-
|
|
656
|
-
chart_type = self._get_chart_type(dashboard_element)
|
|
657
|
-
chart_info = ChartInfoClass(
|
|
658
|
-
type=chart_type,
|
|
659
|
-
description=dashboard_element.description or "",
|
|
660
|
-
title=dashboard_element.title or "",
|
|
661
|
-
lastModified=ChangeAuditStamps(),
|
|
662
|
-
chartUrl=dashboard_element.url(self.source_config.external_base_url or ""),
|
|
663
|
-
inputs=dashboard_element.get_view_urns(self.source_config),
|
|
664
|
-
customProperties={
|
|
665
|
-
"upstream_fields": (
|
|
666
|
-
",".join(
|
|
667
|
-
sorted({field.name for field in dashboard_element.input_fields})
|
|
668
|
-
)
|
|
669
|
-
if dashboard_element.input_fields
|
|
670
|
-
else ""
|
|
671
|
-
)
|
|
672
|
-
},
|
|
673
|
-
)
|
|
674
|
-
chart_snapshot.aspects.append(chart_info)
|
|
675
|
-
|
|
668
|
+
) -> List[Chart]:
|
|
669
|
+
chart_parent_container: Union[List[str], Unset] = unset
|
|
676
670
|
if (
|
|
677
671
|
dashboard
|
|
678
672
|
and dashboard.folder_path is not None
|
|
679
673
|
and dashboard.folder is not None
|
|
680
674
|
):
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
dashboard_urn = self.make_dashboard_urn(dashboard)
|
|
687
|
-
browse_path_v2 = BrowsePathsV2Class(
|
|
688
|
-
path=[
|
|
689
|
-
BrowsePathEntryClass("Folders"),
|
|
690
|
-
*self._get_folder_browse_path_v2_entries(dashboard.folder),
|
|
691
|
-
BrowsePathEntryClass(id=dashboard_urn, urn=dashboard_urn),
|
|
692
|
-
],
|
|
693
|
-
)
|
|
675
|
+
chart_parent_container = [
|
|
676
|
+
"Folders",
|
|
677
|
+
*self._get_folder_ancestors_urn_entries(dashboard.folder),
|
|
678
|
+
self.make_dashboard_urn(dashboard),
|
|
679
|
+
]
|
|
694
680
|
elif (
|
|
695
681
|
dashboard is None
|
|
696
682
|
and dashboard_element.folder_path is not None
|
|
697
683
|
and dashboard_element.folder is not None
|
|
698
|
-
): #
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
browse_path_v2 = BrowsePathsV2Class(
|
|
704
|
-
path=[
|
|
705
|
-
BrowsePathEntryClass("Folders"),
|
|
706
|
-
*self._get_folder_browse_path_v2_entries(dashboard_element.folder),
|
|
707
|
-
],
|
|
708
|
-
)
|
|
684
|
+
): # Independent look
|
|
685
|
+
chart_parent_container = [
|
|
686
|
+
"Folders",
|
|
687
|
+
*self._get_folder_ancestors_urn_entries(dashboard_element.folder),
|
|
688
|
+
]
|
|
709
689
|
|
|
690
|
+
# Determine chart ownership
|
|
691
|
+
chart_ownership: Optional[List[OwnerClass]] = None
|
|
710
692
|
if dashboard is not None:
|
|
711
693
|
ownership = self.get_ownership(dashboard)
|
|
712
694
|
if ownership is not None:
|
|
713
|
-
|
|
695
|
+
chart_ownership = [ownership]
|
|
714
696
|
elif dashboard is None and dashboard_element is not None:
|
|
715
697
|
ownership = self.get_ownership(dashboard_element)
|
|
716
698
|
if ownership is not None:
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
chart_mce = MetadataChangeEvent(proposedSnapshot=chart_snapshot)
|
|
720
|
-
|
|
721
|
-
proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
|
|
722
|
-
chart_mce,
|
|
723
|
-
MetadataChangeProposalWrapper(
|
|
724
|
-
entityUrn=chart_urn,
|
|
725
|
-
aspect=SubTypesClass(typeNames=[BIAssetSubTypes.LOOKER_LOOK]),
|
|
726
|
-
),
|
|
727
|
-
]
|
|
728
|
-
|
|
729
|
-
if self.source_config.include_platform_instance_in_urns:
|
|
730
|
-
proposals.append(
|
|
731
|
-
MetadataChangeProposalWrapper(
|
|
732
|
-
entityUrn=chart_urn,
|
|
733
|
-
aspect=self._create_platform_instance_aspect(),
|
|
734
|
-
),
|
|
735
|
-
)
|
|
699
|
+
chart_ownership = [ownership]
|
|
736
700
|
|
|
701
|
+
chart_extra_aspects: List[Union[InputFieldsClass, EmbedClass]] = []
|
|
737
702
|
# If extracting embeds is enabled, produce an MCP for embed URL.
|
|
738
703
|
if (
|
|
739
704
|
self.source_config.extract_embed_urls
|
|
@@ -743,111 +708,124 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
743
708
|
self.source_config.external_base_url
|
|
744
709
|
)
|
|
745
710
|
if maybe_embed_url:
|
|
746
|
-
|
|
747
|
-
create_embed_mcp(
|
|
748
|
-
chart_snapshot.urn,
|
|
749
|
-
maybe_embed_url,
|
|
750
|
-
)
|
|
751
|
-
)
|
|
752
|
-
|
|
753
|
-
if dashboard is None and dashboard_element.folder:
|
|
754
|
-
container = ContainerClass(
|
|
755
|
-
container=self._gen_folder_key(dashboard_element.folder.id).as_urn(),
|
|
756
|
-
)
|
|
757
|
-
proposals.append(
|
|
758
|
-
MetadataChangeProposalWrapper(entityUrn=chart_urn, aspect=container)
|
|
759
|
-
)
|
|
711
|
+
chart_extra_aspects.append(EmbedClass(renderUrl=maybe_embed_url))
|
|
760
712
|
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
entityUrn=chart_urn, aspect=browse_path_v2
|
|
765
|
-
)
|
|
713
|
+
chart_extra_aspects.append(
|
|
714
|
+
InputFieldsClass(
|
|
715
|
+
fields=self._input_fields_from_dashboard_element(dashboard_element)
|
|
766
716
|
)
|
|
767
|
-
|
|
768
|
-
return proposals
|
|
769
|
-
|
|
770
|
-
def _make_dashboard_metadata_events(
|
|
771
|
-
self, looker_dashboard: LookerDashboard, chart_urns: List[str]
|
|
772
|
-
) -> List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
|
|
773
|
-
dashboard_urn = self.make_dashboard_urn(looker_dashboard)
|
|
774
|
-
dashboard_snapshot = DashboardSnapshot(
|
|
775
|
-
urn=dashboard_urn,
|
|
776
|
-
aspects=[],
|
|
777
717
|
)
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
718
|
+
return [
|
|
719
|
+
Chart(
|
|
720
|
+
chart_type=self._get_chart_type(dashboard_element),
|
|
721
|
+
chart_url=dashboard_element.url(
|
|
722
|
+
self.source_config.external_base_url or ""
|
|
723
|
+
),
|
|
724
|
+
custom_properties={
|
|
725
|
+
"upstream_fields": (
|
|
726
|
+
",".join(
|
|
727
|
+
sorted(
|
|
728
|
+
{field.name for field in dashboard_element.input_fields}
|
|
729
|
+
)
|
|
730
|
+
)
|
|
731
|
+
if dashboard_element.input_fields
|
|
732
|
+
else ""
|
|
733
|
+
)
|
|
734
|
+
},
|
|
735
|
+
description=dashboard_element.description or "",
|
|
736
|
+
display_name=dashboard_element.title, # title is (deprecated) using display_name
|
|
737
|
+
extra_aspects=chart_extra_aspects,
|
|
738
|
+
input_datasets=dashboard_element.get_view_urns(self.source_config),
|
|
739
|
+
last_modified=self._get_last_modified_time(
|
|
740
|
+
dashboard
|
|
741
|
+
), # Inherited from Dashboard
|
|
742
|
+
last_modified_by=self._get_last_modified_by(
|
|
743
|
+
dashboard
|
|
744
|
+
), # Inherited from Dashboard
|
|
745
|
+
created_at=self._get_created_at(dashboard), # Inherited from Dashboard
|
|
746
|
+
created_by=self._get_created_by(dashboard), # Inherited from Dashboard
|
|
747
|
+
deleted_on=self._get_deleted_on(dashboard), # Inherited from Dashboard
|
|
748
|
+
deleted_by=self._get_deleted_by(dashboard), # Inherited from Dashboard
|
|
749
|
+
name=dashboard_element.get_urn_element_id(),
|
|
750
|
+
owners=chart_ownership,
|
|
751
|
+
parent_container=chart_parent_container,
|
|
752
|
+
platform=self.source_config.platform_name,
|
|
753
|
+
platform_instance=self.source_config.platform_instance
|
|
754
|
+
if self.source_config.include_platform_instance_in_urns
|
|
755
|
+
else None,
|
|
756
|
+
subtype=BIAssetSubTypes.LOOKER_LOOK,
|
|
800
757
|
)
|
|
801
|
-
dashboard_snapshot.aspects.append(browse_path)
|
|
802
|
-
|
|
803
|
-
ownership = self.get_ownership(looker_dashboard)
|
|
804
|
-
if ownership is not None:
|
|
805
|
-
dashboard_snapshot.aspects.append(ownership)
|
|
806
|
-
|
|
807
|
-
dashboard_snapshot.aspects.append(Status(removed=looker_dashboard.is_deleted))
|
|
808
|
-
|
|
809
|
-
dashboard_mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot)
|
|
810
|
-
|
|
811
|
-
proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
|
|
812
|
-
dashboard_mce
|
|
813
758
|
]
|
|
814
759
|
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
760
|
+
def _make_dashboard_entities(
|
|
761
|
+
self, looker_dashboard: LookerDashboard, charts: List[Chart]
|
|
762
|
+
) -> List[Dashboard]:
|
|
763
|
+
dashboard_ownership: Optional[List[OwnerClass]] = None
|
|
764
|
+
ownership: Optional[OwnerClass] = self.get_ownership(looker_dashboard)
|
|
765
|
+
if ownership is not None:
|
|
766
|
+
dashboard_ownership = [ownership]
|
|
822
767
|
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
MetadataChangeProposalWrapper(
|
|
826
|
-
entityUrn=dashboard_urn, aspect=browse_path_v2
|
|
827
|
-
)
|
|
828
|
-
)
|
|
768
|
+
# Extra Aspects not yet supported in the Dashboard entity class SDKv2
|
|
769
|
+
dashboard_extra_aspects: List[Union[EmbedClass, InputFieldsClass, Status]] = []
|
|
829
770
|
|
|
830
|
-
#
|
|
771
|
+
# Embed URL aspect
|
|
831
772
|
if (
|
|
832
773
|
self.source_config.extract_embed_urls
|
|
833
774
|
and self.source_config.external_base_url
|
|
834
775
|
):
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
776
|
+
dashboard_extra_aspects.append(
|
|
777
|
+
EmbedClass(
|
|
778
|
+
renderUrl=looker_dashboard.embed_url(
|
|
779
|
+
self.source_config.external_base_url
|
|
780
|
+
)
|
|
839
781
|
)
|
|
840
782
|
)
|
|
841
783
|
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
)
|
|
784
|
+
# Input fields aspect
|
|
785
|
+
# Populate input fields from all the dashboard elements
|
|
786
|
+
all_fields: List[InputFieldClass] = []
|
|
787
|
+
for dashboard_element in looker_dashboard.dashboard_elements:
|
|
788
|
+
all_fields.extend(
|
|
789
|
+
self._input_fields_from_dashboard_element(dashboard_element)
|
|
848
790
|
)
|
|
791
|
+
dashboard_extra_aspects.append(InputFieldsClass(fields=all_fields))
|
|
792
|
+
# Status aspect
|
|
793
|
+
dashboard_extra_aspects.append(Status(removed=looker_dashboard.is_deleted))
|
|
849
794
|
|
|
850
|
-
|
|
795
|
+
dashboard_parent_container: Union[List[str], Unset] = unset
|
|
796
|
+
if (
|
|
797
|
+
looker_dashboard.folder_path is not None
|
|
798
|
+
and looker_dashboard.folder is not None
|
|
799
|
+
):
|
|
800
|
+
dashboard_parent_container = [
|
|
801
|
+
"Folders",
|
|
802
|
+
*self._get_folder_ancestors_urn_entries(looker_dashboard.folder),
|
|
803
|
+
]
|
|
804
|
+
|
|
805
|
+
return [
|
|
806
|
+
Dashboard(
|
|
807
|
+
charts=charts,
|
|
808
|
+
dashboard_url=looker_dashboard.url(
|
|
809
|
+
self.source_config.external_base_url
|
|
810
|
+
),
|
|
811
|
+
description=looker_dashboard.description or "",
|
|
812
|
+
display_name=looker_dashboard.title, # title is (deprecated) using display_name
|
|
813
|
+
extra_aspects=dashboard_extra_aspects,
|
|
814
|
+
last_modified=self._get_last_modified_time(looker_dashboard),
|
|
815
|
+
last_modified_by=self._get_last_modified_by(looker_dashboard),
|
|
816
|
+
created_at=self._get_created_at(looker_dashboard),
|
|
817
|
+
created_by=self._get_created_by(looker_dashboard),
|
|
818
|
+
deleted_on=self._get_deleted_on(looker_dashboard),
|
|
819
|
+
deleted_by=self._get_deleted_by(looker_dashboard),
|
|
820
|
+
name=looker_dashboard.get_urn_dashboard_id(),
|
|
821
|
+
owners=dashboard_ownership,
|
|
822
|
+
parent_container=dashboard_parent_container,
|
|
823
|
+
platform=self.source_config.platform_name,
|
|
824
|
+
platform_instance=self.source_config.platform_instance
|
|
825
|
+
if self.source_config.include_platform_instance_in_urns
|
|
826
|
+
else None,
|
|
827
|
+
)
|
|
828
|
+
]
|
|
851
829
|
|
|
852
830
|
def _make_dashboard_urn(self, looker_dashboard_name_part: str) -> str:
|
|
853
831
|
# Note that `looker_dashboard_name_part` will like be `dashboard.1234`.
|
|
@@ -864,11 +842,9 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
864
842
|
def make_dashboard_urn(self, looker_dashboard: LookerDashboard) -> str:
|
|
865
843
|
return self._make_dashboard_urn(looker_dashboard.get_urn_dashboard_id())
|
|
866
844
|
|
|
867
|
-
def
|
|
845
|
+
def _make_explore_containers(
|
|
868
846
|
self,
|
|
869
|
-
) -> Iterable[
|
|
870
|
-
Union[MetadataChangeEvent, MetadataChangeProposalWrapper, MetadataWorkUnit]
|
|
871
|
-
]:
|
|
847
|
+
) -> Iterable[Union[Container, Dataset]]:
|
|
872
848
|
if not self.source_config.emit_used_explores_only:
|
|
873
849
|
explores_to_fetch = list(self.list_all_explores())
|
|
874
850
|
else:
|
|
@@ -877,8 +853,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
877
853
|
# fine to set them to None.
|
|
878
854
|
# TODO: Track project names for each explore.
|
|
879
855
|
explores_to_fetch = [
|
|
880
|
-
(None, model, explore)
|
|
881
|
-
for (model, explore) in self.reachable_explores.keys()
|
|
856
|
+
(None, model, explore) for (model, explore) in self.reachable_explores
|
|
882
857
|
]
|
|
883
858
|
explores_to_fetch.sort()
|
|
884
859
|
|
|
@@ -887,19 +862,14 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
887
862
|
for project_name, model, _ in explores_to_fetch:
|
|
888
863
|
if model not in processed_models:
|
|
889
864
|
model_key = gen_model_key(self.source_config, model)
|
|
890
|
-
yield
|
|
865
|
+
yield Container(
|
|
891
866
|
container_key=model_key,
|
|
892
|
-
|
|
893
|
-
|
|
867
|
+
display_name=model,
|
|
868
|
+
subtype=BIContainerSubTypes.LOOKML_MODEL,
|
|
894
869
|
extra_properties=(
|
|
895
870
|
{"project": project_name} if project_name is not None else None
|
|
896
871
|
),
|
|
897
|
-
|
|
898
|
-
yield MetadataChangeProposalWrapper(
|
|
899
|
-
entityUrn=model_key.as_urn(),
|
|
900
|
-
aspect=BrowsePathsV2Class(
|
|
901
|
-
path=[BrowsePathEntryClass("Explore")],
|
|
902
|
-
),
|
|
872
|
+
parent_container=["Explore"],
|
|
903
873
|
)
|
|
904
874
|
|
|
905
875
|
processed_models.append(model)
|
|
@@ -910,9 +880,10 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
910
880
|
((model, explore) for (_project, model, explore) in explores_to_fetch),
|
|
911
881
|
max_workers=self.source_config.max_threads,
|
|
912
882
|
):
|
|
913
|
-
|
|
883
|
+
explore_dataset_entity, explore_id, start_time, end_time = future.result()
|
|
914
884
|
self.reporter.explores_scanned += 1
|
|
915
|
-
|
|
885
|
+
if explore_dataset_entity:
|
|
886
|
+
yield explore_dataset_entity
|
|
916
887
|
self.reporter.report_upstream_latency(start_time, end_time)
|
|
917
888
|
logger.debug(
|
|
918
889
|
f"Running time of fetch_one_explore for {explore_id}: {(end_time - start_time).total_seconds()}"
|
|
@@ -932,66 +903,50 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
932
903
|
def fetch_one_explore(
|
|
933
904
|
self, model: str, explore: str
|
|
934
905
|
) -> Tuple[
|
|
935
|
-
|
|
906
|
+
Optional[Dataset],
|
|
936
907
|
str,
|
|
937
908
|
datetime.datetime,
|
|
938
909
|
datetime.datetime,
|
|
939
910
|
]:
|
|
940
911
|
start_time = datetime.datetime.now()
|
|
941
|
-
events: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = []
|
|
942
912
|
looker_explore = self.explore_registry.get_explore(model, explore)
|
|
913
|
+
explore_dataset_entity: Optional[Dataset] = None
|
|
943
914
|
if looker_explore is not None:
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
self.source_config.extract_embed_urls,
|
|
950
|
-
)
|
|
951
|
-
or events
|
|
915
|
+
explore_dataset_entity = looker_explore._to_metadata_events(
|
|
916
|
+
self.source_config,
|
|
917
|
+
self.reporter,
|
|
918
|
+
self.source_config.external_base_url or self.source_config.base_url,
|
|
919
|
+
self.source_config.extract_embed_urls,
|
|
952
920
|
)
|
|
953
921
|
|
|
954
|
-
return
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
return event.proposedSnapshot.urn
|
|
961
|
-
else:
|
|
962
|
-
return event.entityUrn
|
|
922
|
+
return (
|
|
923
|
+
explore_dataset_entity,
|
|
924
|
+
f"{model}:{explore}",
|
|
925
|
+
start_time,
|
|
926
|
+
datetime.datetime.now(),
|
|
927
|
+
)
|
|
963
928
|
|
|
964
|
-
def _emit_folder_as_container(
|
|
965
|
-
self, folder: LookerFolder
|
|
966
|
-
) -> Iterable[MetadataWorkUnit]:
|
|
929
|
+
def _emit_folder_as_container(self, folder: LookerFolder) -> Iterable[Container]:
|
|
967
930
|
if folder.id not in self.processed_folders:
|
|
968
|
-
yield from gen_containers(
|
|
969
|
-
container_key=self._gen_folder_key(folder.id),
|
|
970
|
-
name=folder.name,
|
|
971
|
-
sub_types=[BIContainerSubTypes.LOOKER_FOLDER],
|
|
972
|
-
parent_container_key=(
|
|
973
|
-
self._gen_folder_key(folder.parent_id) if folder.parent_id else None
|
|
974
|
-
),
|
|
975
|
-
)
|
|
976
931
|
if folder.parent_id is None:
|
|
977
|
-
yield
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
)
|
|
932
|
+
yield Container(
|
|
933
|
+
container_key=self._gen_folder_key(folder.id),
|
|
934
|
+
display_name=folder.name,
|
|
935
|
+
subtype=BIContainerSubTypes.LOOKER_FOLDER,
|
|
936
|
+
parent_container=["Folders"],
|
|
937
|
+
)
|
|
983
938
|
else:
|
|
984
|
-
yield
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
)
|
|
939
|
+
yield Container(
|
|
940
|
+
container_key=self._gen_folder_key(folder.id),
|
|
941
|
+
display_name=folder.name,
|
|
942
|
+
subtype=BIContainerSubTypes.LOOKER_FOLDER,
|
|
943
|
+
parent_container=[
|
|
944
|
+
"Folders",
|
|
945
|
+
*self._get_folder_ancestors_urn_entries(
|
|
946
|
+
folder, include_current_folder=False
|
|
947
|
+
),
|
|
948
|
+
],
|
|
949
|
+
)
|
|
995
950
|
self.processed_folders.append(folder.id)
|
|
996
951
|
|
|
997
952
|
def _gen_folder_key(self, folder_id: str) -> LookerFolderKey:
|
|
@@ -1002,91 +957,89 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1002
957
|
instance=self.source_config.platform_instance,
|
|
1003
958
|
)
|
|
1004
959
|
|
|
1005
|
-
def
|
|
960
|
+
def _make_dashboard_and_chart_entities(
|
|
1006
961
|
self, looker_dashboard: LookerDashboard
|
|
1007
|
-
) -> Iterable[Union[
|
|
962
|
+
) -> Iterable[Union[Chart, Dashboard]]:
|
|
1008
963
|
# Step 1: Emit metadata for each Chart inside the Dashboard.
|
|
1009
|
-
chart_events = []
|
|
964
|
+
chart_events: List[Chart] = []
|
|
1010
965
|
for element in looker_dashboard.dashboard_elements:
|
|
1011
966
|
if element.type == "vis":
|
|
1012
967
|
chart_events.extend(
|
|
1013
|
-
self.
|
|
968
|
+
self._make_chart_entities(element, looker_dashboard)
|
|
1014
969
|
)
|
|
1015
970
|
|
|
1016
971
|
yield from chart_events
|
|
1017
972
|
|
|
1018
|
-
# Step 2: Emit metadata events for the Dashboard itself.
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
) # Collect the unique child chart urns for dashboard input lineage.
|
|
973
|
+
# # Step 2: Emit metadata events for the Dashboard itself.
|
|
974
|
+
# Create a set of unique chart entities for dashboard input lineage based in chart.urn
|
|
975
|
+
unique_chart_entities: List[Chart] = []
|
|
1022
976
|
for chart_event in chart_events:
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
977
|
+
# Use chart.urn to ensure uniqueness based on the chart's URN property
|
|
978
|
+
# Also, update the set of processed chart urns
|
|
979
|
+
if str(chart_event.urn) not in self.chart_urns:
|
|
980
|
+
self.chart_urns.add(str(chart_event.urn))
|
|
981
|
+
unique_chart_entities.append(chart_event)
|
|
982
|
+
|
|
983
|
+
dashboard_events = self._make_dashboard_entities(
|
|
984
|
+
looker_dashboard, unique_chart_entities
|
|
1029
985
|
)
|
|
1030
986
|
yield from dashboard_events
|
|
1031
987
|
|
|
1032
988
|
def get_ownership(
|
|
1033
989
|
self, looker_dashboard_look: Union[LookerDashboard, LookerDashboardElement]
|
|
1034
|
-
) -> Optional[
|
|
990
|
+
) -> Optional[OwnerClass]:
|
|
1035
991
|
if looker_dashboard_look.owner is not None:
|
|
1036
992
|
owner_urn = looker_dashboard_look.owner.get_urn(
|
|
1037
993
|
self.source_config.strip_user_ids_from_email
|
|
1038
994
|
)
|
|
1039
995
|
if owner_urn is not None:
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
owner=owner_urn,
|
|
1044
|
-
type=OwnershipTypeClass.DATAOWNER,
|
|
1045
|
-
)
|
|
1046
|
-
]
|
|
996
|
+
return OwnerClass(
|
|
997
|
+
owner=owner_urn,
|
|
998
|
+
type=OwnershipTypeClass.DATAOWNER,
|
|
1047
999
|
)
|
|
1048
|
-
return ownership
|
|
1049
1000
|
return None
|
|
1050
1001
|
|
|
1051
|
-
def
|
|
1052
|
-
self, looker_dashboard: LookerDashboard
|
|
1053
|
-
) ->
|
|
1054
|
-
|
|
1055
|
-
if looker_dashboard.created_at is not None:
|
|
1056
|
-
change_audit_stamp.created.time = round(
|
|
1057
|
-
looker_dashboard.created_at.timestamp() * 1000
|
|
1058
|
-
)
|
|
1059
|
-
if looker_dashboard.owner is not None:
|
|
1060
|
-
owner_urn = looker_dashboard.owner.get_urn(
|
|
1061
|
-
self.source_config.strip_user_ids_from_email
|
|
1062
|
-
)
|
|
1063
|
-
if owner_urn:
|
|
1064
|
-
change_audit_stamp.created.actor = owner_urn
|
|
1065
|
-
if looker_dashboard.last_updated_at is not None:
|
|
1066
|
-
change_audit_stamp.lastModified.time = round(
|
|
1067
|
-
looker_dashboard.last_updated_at.timestamp() * 1000
|
|
1068
|
-
)
|
|
1069
|
-
if looker_dashboard.last_updated_by is not None:
|
|
1070
|
-
updated_by_urn = looker_dashboard.last_updated_by.get_urn(
|
|
1071
|
-
self.source_config.strip_user_ids_from_email
|
|
1072
|
-
)
|
|
1073
|
-
if updated_by_urn:
|
|
1074
|
-
change_audit_stamp.lastModified.actor = updated_by_urn
|
|
1075
|
-
if (
|
|
1076
|
-
looker_dashboard.is_deleted
|
|
1077
|
-
and looker_dashboard.deleted_by is not None
|
|
1078
|
-
and looker_dashboard.deleted_at is not None
|
|
1079
|
-
):
|
|
1080
|
-
deleter_urn = looker_dashboard.deleted_by.get_urn(
|
|
1081
|
-
self.source_config.strip_user_ids_from_email
|
|
1082
|
-
)
|
|
1083
|
-
if deleter_urn:
|
|
1084
|
-
change_audit_stamp.deleted = AuditStamp(
|
|
1085
|
-
actor=deleter_urn,
|
|
1086
|
-
time=round(looker_dashboard.deleted_at.timestamp() * 1000),
|
|
1087
|
-
)
|
|
1002
|
+
def _get_last_modified_time(
|
|
1003
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1004
|
+
) -> Optional[datetime.datetime]:
|
|
1005
|
+
return looker_dashboard.last_updated_at if looker_dashboard else None
|
|
1088
1006
|
|
|
1089
|
-
|
|
1007
|
+
def _get_last_modified_by(
|
|
1008
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1009
|
+
) -> Optional[str]:
|
|
1010
|
+
if not looker_dashboard or not looker_dashboard.last_updated_by:
|
|
1011
|
+
return None
|
|
1012
|
+
return looker_dashboard.last_updated_by.get_urn(
|
|
1013
|
+
self.source_config.strip_user_ids_from_email
|
|
1014
|
+
)
|
|
1015
|
+
|
|
1016
|
+
def _get_created_at(
|
|
1017
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1018
|
+
) -> Optional[datetime.datetime]:
|
|
1019
|
+
return looker_dashboard.created_at if looker_dashboard else None
|
|
1020
|
+
|
|
1021
|
+
def _get_created_by(
|
|
1022
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1023
|
+
) -> Optional[str]:
|
|
1024
|
+
if not looker_dashboard or not looker_dashboard.owner:
|
|
1025
|
+
return None
|
|
1026
|
+
return looker_dashboard.owner.get_urn(
|
|
1027
|
+
self.source_config.strip_user_ids_from_email
|
|
1028
|
+
)
|
|
1029
|
+
|
|
1030
|
+
def _get_deleted_on(
|
|
1031
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1032
|
+
) -> Optional[datetime.datetime]:
|
|
1033
|
+
return looker_dashboard.deleted_at if looker_dashboard else None
|
|
1034
|
+
|
|
1035
|
+
def _get_deleted_by(
|
|
1036
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1037
|
+
) -> Optional[str]:
|
|
1038
|
+
if not looker_dashboard or not looker_dashboard.deleted_by:
|
|
1039
|
+
return None
|
|
1040
|
+
return looker_dashboard.deleted_by.get_urn(
|
|
1041
|
+
self.source_config.strip_user_ids_from_email
|
|
1042
|
+
)
|
|
1090
1043
|
|
|
1091
1044
|
def _get_looker_folder(self, folder: Union[Folder, FolderBase]) -> LookerFolder:
|
|
1092
1045
|
assert folder.id
|
|
@@ -1099,7 +1052,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1099
1052
|
]
|
|
1100
1053
|
return "/".join(ancestors + [folder.name])
|
|
1101
1054
|
|
|
1102
|
-
def _get_looker_dashboard(self, dashboard:
|
|
1055
|
+
def _get_looker_dashboard(self, dashboard: LookerAPIDashboard) -> LookerDashboard:
|
|
1103
1056
|
self.reporter.accessed_dashboards += 1
|
|
1104
1057
|
if dashboard.folder is None:
|
|
1105
1058
|
logger.debug(f"{dashboard.id} has no folder")
|
|
@@ -1173,22 +1126,6 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1173
1126
|
|
|
1174
1127
|
return user
|
|
1175
1128
|
|
|
1176
|
-
def process_metrics_dimensions_and_fields_for_dashboard(
|
|
1177
|
-
self, dashboard: LookerDashboard
|
|
1178
|
-
) -> List[MetadataWorkUnit]:
|
|
1179
|
-
chart_mcps = [
|
|
1180
|
-
self._make_metrics_dimensions_chart_mcp(element)
|
|
1181
|
-
for element in dashboard.dashboard_elements
|
|
1182
|
-
]
|
|
1183
|
-
dashboard_mcp = self._make_metrics_dimensions_dashboard_mcp(dashboard)
|
|
1184
|
-
|
|
1185
|
-
mcps = chart_mcps
|
|
1186
|
-
mcps.append(dashboard_mcp)
|
|
1187
|
-
|
|
1188
|
-
workunits = [mcp.as_workunit() for mcp in mcps]
|
|
1189
|
-
|
|
1190
|
-
return workunits
|
|
1191
|
-
|
|
1192
1129
|
def _input_fields_from_dashboard_element(
|
|
1193
1130
|
self, dashboard_element: LookerDashboardElement
|
|
1194
1131
|
) -> List[InputFieldClass]:
|
|
@@ -1271,6 +1208,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1271
1208
|
chart_urn = self._make_chart_urn(
|
|
1272
1209
|
element_id=dashboard_element.get_urn_element_id()
|
|
1273
1210
|
)
|
|
1211
|
+
|
|
1274
1212
|
input_fields_aspect = InputFieldsClass(
|
|
1275
1213
|
fields=self._input_fields_from_dashboard_element(dashboard_element)
|
|
1276
1214
|
)
|
|
@@ -1280,104 +1218,141 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1280
1218
|
aspect=input_fields_aspect,
|
|
1281
1219
|
)
|
|
1282
1220
|
|
|
1283
|
-
def
|
|
1221
|
+
def _should_skip_personal_folder_dashboard(
|
|
1222
|
+
self, dashboard_object: LookerAPIDashboard
|
|
1223
|
+
) -> bool:
|
|
1224
|
+
"""Check if dashboard should be skipped due to being in personal folder."""
|
|
1225
|
+
if not self.source_config.skip_personal_folders:
|
|
1226
|
+
return False
|
|
1227
|
+
|
|
1228
|
+
if dashboard_object.folder is not None and (
|
|
1229
|
+
dashboard_object.folder.is_personal
|
|
1230
|
+
or dashboard_object.folder.is_personal_descendant
|
|
1231
|
+
):
|
|
1232
|
+
self.reporter.info(
|
|
1233
|
+
title="Dropped Dashboard",
|
|
1234
|
+
message="Dropped due to being a personal folder",
|
|
1235
|
+
context=f"Dashboard ID: {dashboard_object.id}",
|
|
1236
|
+
)
|
|
1237
|
+
assert dashboard_object.id is not None
|
|
1238
|
+
self.reporter.report_dashboards_dropped(dashboard_object.id)
|
|
1239
|
+
return True
|
|
1240
|
+
return False
|
|
1241
|
+
|
|
1242
|
+
def _should_skip_dashboard_by_folder_path(
|
|
1243
|
+
self, looker_dashboard: LookerDashboard
|
|
1244
|
+
) -> bool:
|
|
1245
|
+
"""Check if dashboard should be skipped based on folder path pattern."""
|
|
1246
|
+
if (
|
|
1247
|
+
looker_dashboard.folder_path is not None
|
|
1248
|
+
and not self.source_config.folder_path_pattern.allowed(
|
|
1249
|
+
looker_dashboard.folder_path
|
|
1250
|
+
)
|
|
1251
|
+
):
|
|
1252
|
+
logger.debug(
|
|
1253
|
+
f"Folder path {looker_dashboard.folder_path} is denied in folder_path_pattern"
|
|
1254
|
+
)
|
|
1255
|
+
self.reporter.report_dashboards_dropped(looker_dashboard.id)
|
|
1256
|
+
return True
|
|
1257
|
+
return False
|
|
1258
|
+
|
|
1259
|
+
def _fetch_dashboard_from_api(
|
|
1284
1260
|
self, dashboard_id: str, fields: List[str]
|
|
1285
|
-
) ->
|
|
1286
|
-
|
|
1287
|
-
Optional[looker_usage.LookerDashboardForUsage],
|
|
1288
|
-
str,
|
|
1289
|
-
datetime.datetime,
|
|
1290
|
-
datetime.datetime,
|
|
1291
|
-
]:
|
|
1292
|
-
start_time = datetime.datetime.now()
|
|
1293
|
-
assert dashboard_id is not None
|
|
1294
|
-
if not self.source_config.dashboard_pattern.allowed(dashboard_id):
|
|
1295
|
-
self.reporter.report_dashboards_dropped(dashboard_id)
|
|
1296
|
-
return [], None, dashboard_id, start_time, datetime.datetime.now()
|
|
1261
|
+
) -> Optional[LookerAPIDashboard]:
|
|
1262
|
+
"""Fetch dashboard object from Looker API with error handling."""
|
|
1297
1263
|
try:
|
|
1298
|
-
|
|
1264
|
+
return self.looker_api.dashboard(
|
|
1299
1265
|
dashboard_id=dashboard_id,
|
|
1300
1266
|
fields=fields,
|
|
1301
1267
|
)
|
|
1302
1268
|
except (SDKError, DeserializeError) as e:
|
|
1303
|
-
# A looker dashboard could be deleted in between the list and the get
|
|
1304
1269
|
self.reporter.report_warning(
|
|
1305
1270
|
title="Failed to fetch dashboard from the Looker API",
|
|
1306
1271
|
message="Error occurred while attempting to loading dashboard from Looker API. Skipping.",
|
|
1307
1272
|
context=f"Dashboard ID: {dashboard_id}",
|
|
1308
1273
|
exc=e,
|
|
1309
1274
|
)
|
|
1310
|
-
return
|
|
1275
|
+
return None
|
|
1311
1276
|
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
return [], None, dashboard_id, start_time, datetime.datetime.now()
|
|
1277
|
+
def _create_empty_result(
|
|
1278
|
+
self, dashboard_id: str, start_time: datetime.datetime
|
|
1279
|
+
) -> DashboardProcessingResult:
|
|
1280
|
+
"""Create an empty result for skipped or failed dashboard processing."""
|
|
1281
|
+
return DashboardProcessingResult(
|
|
1282
|
+
entities=[],
|
|
1283
|
+
dashboard_usage=None,
|
|
1284
|
+
dashboard_id=dashboard_id,
|
|
1285
|
+
start_time=start_time,
|
|
1286
|
+
end_time=datetime.datetime.now(),
|
|
1287
|
+
)
|
|
1324
1288
|
|
|
1325
|
-
|
|
1289
|
+
def process_dashboard(
|
|
1290
|
+
self, dashboard_id: str, fields: List[str]
|
|
1291
|
+
) -> DashboardProcessingResult:
|
|
1292
|
+
"""
|
|
1293
|
+
Process a single dashboard and return the metadata workunits.
|
|
1326
1294
|
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
and not self.source_config.folder_path_pattern.allowed(
|
|
1331
|
-
looker_dashboard.folder_path
|
|
1332
|
-
)
|
|
1333
|
-
):
|
|
1334
|
-
logger.debug(
|
|
1335
|
-
f"Folder path {looker_dashboard.folder_path} is denied in folder_path_pattern"
|
|
1336
|
-
)
|
|
1337
|
-
return [], None, dashboard_id, start_time, datetime.datetime.now()
|
|
1295
|
+
Args:
|
|
1296
|
+
dashboard_id: The ID of the dashboard to process
|
|
1297
|
+
fields: List of fields to fetch from the Looker API
|
|
1338
1298
|
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1299
|
+
Returns:
|
|
1300
|
+
DashboardProcessingResult containing entities, usage data, and timing information
|
|
1301
|
+
"""
|
|
1302
|
+
start_time = datetime.datetime.now()
|
|
1343
1303
|
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
(
|
|
1347
|
-
MetadataWorkUnit(id=f"looker-{mce.proposedSnapshot.urn}", mce=mce)
|
|
1348
|
-
if isinstance(mce, MetadataChangeEvent)
|
|
1349
|
-
else MetadataWorkUnit(
|
|
1350
|
-
id=f"looker-{mce.aspectName}-{mce.entityUrn}", mcp=mce
|
|
1351
|
-
)
|
|
1352
|
-
)
|
|
1353
|
-
for mce in mces
|
|
1354
|
-
]
|
|
1304
|
+
if dashboard_id is None:
|
|
1305
|
+
raise ValueError("Dashboard ID cannot be None")
|
|
1355
1306
|
|
|
1356
|
-
#
|
|
1357
|
-
|
|
1358
|
-
|
|
1307
|
+
# Fetch dashboard from API
|
|
1308
|
+
dashboard_object: Optional[LookerAPIDashboard] = self._fetch_dashboard_from_api(
|
|
1309
|
+
dashboard_id, fields
|
|
1359
1310
|
)
|
|
1311
|
+
if dashboard_object is None:
|
|
1312
|
+
return self._create_empty_result(dashboard_id, start_time)
|
|
1313
|
+
|
|
1314
|
+
# Check if dashboard should be skipped due to personal folder
|
|
1315
|
+
if self._should_skip_personal_folder_dashboard(dashboard_object):
|
|
1316
|
+
return self._create_empty_result(dashboard_id, start_time)
|
|
1360
1317
|
|
|
1361
|
-
|
|
1318
|
+
# Convert to internal representation
|
|
1319
|
+
looker_dashboard: LookerDashboard = self._get_looker_dashboard(dashboard_object)
|
|
1362
1320
|
|
|
1321
|
+
# Check folder path pattern
|
|
1322
|
+
if self._should_skip_dashboard_by_folder_path(looker_dashboard):
|
|
1323
|
+
return self._create_empty_result(dashboard_id, start_time)
|
|
1324
|
+
|
|
1325
|
+
# Build entities list
|
|
1326
|
+
entities: List[Entity] = []
|
|
1327
|
+
|
|
1328
|
+
# Add folder containers if dashboard has a folder
|
|
1329
|
+
if looker_dashboard.folder:
|
|
1330
|
+
entities.extend(
|
|
1331
|
+
list(self._get_folder_and_ancestors_containers(looker_dashboard.folder))
|
|
1332
|
+
)
|
|
1333
|
+
|
|
1334
|
+
# Add dashboard and chart entities
|
|
1335
|
+
entities.extend(list(self._make_dashboard_and_chart_entities(looker_dashboard)))
|
|
1336
|
+
|
|
1337
|
+
# Report successful processing
|
|
1363
1338
|
self.reporter.report_dashboards_scanned()
|
|
1364
1339
|
|
|
1365
|
-
#
|
|
1340
|
+
# Generate usage tracking object
|
|
1366
1341
|
dashboard_usage = looker_usage.LookerDashboardForUsage.from_dashboard(
|
|
1367
1342
|
dashboard_object
|
|
1368
1343
|
)
|
|
1369
1344
|
|
|
1370
|
-
return (
|
|
1371
|
-
|
|
1372
|
-
dashboard_usage,
|
|
1373
|
-
dashboard_id,
|
|
1374
|
-
start_time,
|
|
1375
|
-
datetime.datetime.now(),
|
|
1345
|
+
return DashboardProcessingResult(
|
|
1346
|
+
entities=entities,
|
|
1347
|
+
dashboard_usage=dashboard_usage,
|
|
1348
|
+
dashboard_id=dashboard_id,
|
|
1349
|
+
start_time=start_time,
|
|
1350
|
+
end_time=datetime.datetime.now(),
|
|
1376
1351
|
)
|
|
1377
1352
|
|
|
1378
|
-
def
|
|
1353
|
+
def _get_folder_and_ancestors_containers(
|
|
1379
1354
|
self, folder: LookerFolder
|
|
1380
|
-
) -> Iterable[
|
|
1355
|
+
) -> Iterable[Container]:
|
|
1381
1356
|
for ancestor_folder in self.looker_api.folder_ancestors(folder.id):
|
|
1382
1357
|
yield from self._emit_folder_as_container(
|
|
1383
1358
|
self._get_looker_folder(ancestor_folder)
|
|
@@ -1448,39 +1423,27 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1448
1423
|
).workunit_processor,
|
|
1449
1424
|
]
|
|
1450
1425
|
|
|
1451
|
-
def
|
|
1426
|
+
def emit_independent_looks_entities(
|
|
1452
1427
|
self, dashboard_element: LookerDashboardElement
|
|
1453
|
-
) -> Iterable[
|
|
1428
|
+
) -> Iterable[Union[Container, Chart]]:
|
|
1454
1429
|
if dashboard_element.folder: # independent look
|
|
1455
|
-
yield from self.
|
|
1430
|
+
yield from self._get_folder_and_ancestors_containers(
|
|
1456
1431
|
dashboard_element.folder
|
|
1457
1432
|
)
|
|
1458
1433
|
|
|
1459
|
-
yield from
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
dashboard=None,
|
|
1463
|
-
)
|
|
1464
|
-
)
|
|
1465
|
-
|
|
1466
|
-
yield from auto_workunit(
|
|
1467
|
-
[
|
|
1468
|
-
self._make_metrics_dimensions_chart_mcp(
|
|
1469
|
-
dashboard_element,
|
|
1470
|
-
)
|
|
1471
|
-
]
|
|
1434
|
+
yield from self._make_chart_entities(
|
|
1435
|
+
dashboard_element=dashboard_element,
|
|
1436
|
+
dashboard=None,
|
|
1472
1437
|
)
|
|
1473
1438
|
|
|
1474
|
-
def extract_independent_looks(self) -> Iterable[
|
|
1439
|
+
def extract_independent_looks(self) -> Iterable[Union[Container, Chart]]:
|
|
1475
1440
|
"""
|
|
1476
|
-
Emit
|
|
1477
|
-
"""
|
|
1478
|
-
if self.source_config.extract_independent_looks is False:
|
|
1479
|
-
return
|
|
1441
|
+
Emit entities for Looks which are not part of any Dashboard.
|
|
1480
1442
|
|
|
1481
|
-
|
|
1443
|
+
Returns: Containers for the folders and ancestors folders and Charts for the looks
|
|
1444
|
+
"""
|
|
1445
|
+
logger.debug("Extracting Looks not part of any Dashboard")
|
|
1482
1446
|
|
|
1483
|
-
logger.debug("Extracting looks not part of Dashboard")
|
|
1484
1447
|
look_fields: List[str] = [
|
|
1485
1448
|
"id",
|
|
1486
1449
|
"title",
|
|
@@ -1502,15 +1465,21 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1502
1465
|
all_looks: List[Look] = self.looker_api.all_looks(
|
|
1503
1466
|
fields=look_fields, soft_deleted=self.source_config.include_deleted
|
|
1504
1467
|
)
|
|
1468
|
+
|
|
1505
1469
|
for look in all_looks:
|
|
1470
|
+
# Skip looks that are already referenced from a dashboard
|
|
1471
|
+
if look.id is None:
|
|
1472
|
+
logger.warning("Encountered Look with no ID, skipping.")
|
|
1473
|
+
continue
|
|
1474
|
+
|
|
1506
1475
|
if look.id in self.reachable_look_registry:
|
|
1507
|
-
# This look is reachable from the Dashboard
|
|
1508
1476
|
continue
|
|
1509
1477
|
|
|
1510
1478
|
if look.query_id is None:
|
|
1511
1479
|
logger.info(f"query_id is None for look {look.title}({look.id})")
|
|
1512
1480
|
continue
|
|
1513
1481
|
|
|
1482
|
+
# Skip looks in personal folders if configured
|
|
1514
1483
|
if self.source_config.skip_personal_folders:
|
|
1515
1484
|
if look.folder is not None and (
|
|
1516
1485
|
look.folder.is_personal or look.folder.is_personal_descendant
|
|
@@ -1521,76 +1490,96 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1521
1490
|
context=f"Look ID: {look.id}",
|
|
1522
1491
|
)
|
|
1523
1492
|
|
|
1524
|
-
assert look.id, "Looker id is null"
|
|
1525
1493
|
self.reporter.report_charts_dropped(look.id)
|
|
1526
1494
|
continue
|
|
1527
1495
|
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1496
|
+
# Fetch the Look's query and filter to allowed fields
|
|
1497
|
+
query: Optional[Query] = None
|
|
1498
|
+
try:
|
|
1499
|
+
look_with_query = self.looker_api.get_look(look.id, fields=["query"])
|
|
1500
|
+
query_obj = look_with_query.query
|
|
1501
|
+
if query_obj:
|
|
1502
|
+
query = Query(
|
|
1503
|
+
**{
|
|
1504
|
+
key: getattr(query_obj, key)
|
|
1505
|
+
for key in query_fields
|
|
1506
|
+
if hasattr(query_obj, key)
|
|
1507
|
+
}
|
|
1508
|
+
)
|
|
1509
|
+
except Exception as exc:
|
|
1510
|
+
logger.warning(f"Failed to fetch query for Look {look.id}: {exc}")
|
|
1511
|
+
continue
|
|
1540
1512
|
|
|
1541
|
-
dashboard_element
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
),
|
|
1513
|
+
dashboard_element = self._get_looker_dashboard_element(
|
|
1514
|
+
DashboardElement(
|
|
1515
|
+
id=f"looks_{look.id}", # to avoid conflict with non-standalone looks (element.id prefixes),
|
|
1516
|
+
# we add the "looks_" prefix to look.id.
|
|
1517
|
+
title=look.title,
|
|
1518
|
+
subtitle_text=look.description,
|
|
1519
|
+
look_id=look.id,
|
|
1520
|
+
dashboard_id=None, # As this is an independent look
|
|
1521
|
+
look=LookWithQuery(
|
|
1522
|
+
query=query,
|
|
1523
|
+
folder=getattr(look, "folder", None),
|
|
1524
|
+
user_id=getattr(look, "user_id", None),
|
|
1553
1525
|
),
|
|
1554
1526
|
)
|
|
1555
1527
|
)
|
|
1556
1528
|
|
|
1557
1529
|
if dashboard_element is not None:
|
|
1558
|
-
logger.debug(f"Emitting
|
|
1559
|
-
yield from self.
|
|
1530
|
+
logger.debug(f"Emitting MCPs for look {look.title}({look.id})")
|
|
1531
|
+
yield from self.emit_independent_looks_entities(
|
|
1560
1532
|
dashboard_element=dashboard_element
|
|
1561
1533
|
)
|
|
1562
1534
|
|
|
1563
|
-
|
|
1535
|
+
def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
|
|
1536
|
+
"""
|
|
1537
|
+
Note: Returns Entities from SDKv2 where possible else MCPs only.
|
|
1564
1538
|
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
dashboards = self.looker_api.all_dashboards(fields="id")
|
|
1568
|
-
deleted_dashboards = (
|
|
1569
|
-
self.looker_api.search_dashboards(fields="id", deleted="true")
|
|
1570
|
-
if self.source_config.include_deleted
|
|
1571
|
-
else []
|
|
1572
|
-
)
|
|
1573
|
-
if deleted_dashboards != []:
|
|
1574
|
-
logger.debug(f"Deleted Dashboards = {deleted_dashboards}")
|
|
1539
|
+
Using SDKv2: Containers, Datasets, Dashboards and Charts
|
|
1540
|
+
Using MCPW: Tags, DashboardUsageStats and UserResourceMapping
|
|
1575
1541
|
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1542
|
+
TODO: Convert MCPWs to use SDKv2 entities
|
|
1543
|
+
"""
|
|
1544
|
+
with self.reporter.report_stage("list_dashboards"):
|
|
1545
|
+
# Fetch all dashboards (not deleted)
|
|
1546
|
+
dashboards = self.looker_api.all_dashboards(fields="id")
|
|
1547
|
+
|
|
1548
|
+
# Optionally fetch deleted dashboards if configured
|
|
1549
|
+
if self.source_config.include_deleted:
|
|
1550
|
+
deleted_dashboards = self.looker_api.search_dashboards(
|
|
1551
|
+
fields="id", deleted="true"
|
|
1552
|
+
)
|
|
1586
1553
|
else:
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1554
|
+
deleted_dashboards = []
|
|
1555
|
+
|
|
1556
|
+
if deleted_dashboards:
|
|
1557
|
+
logger.debug(f"Deleted Dashboards = {deleted_dashboards}")
|
|
1591
1558
|
|
|
1592
|
-
|
|
1593
|
-
|
|
1559
|
+
# Collect all dashboard IDs (including deleted if applicable)
|
|
1560
|
+
all_dashboard_ids: List[Optional[str]] = [
|
|
1561
|
+
dashboard.id for dashboard in dashboards
|
|
1562
|
+
]
|
|
1563
|
+
all_dashboard_ids.extend([dashboard.id for dashboard in deleted_dashboards])
|
|
1564
|
+
|
|
1565
|
+
# Filter dashboard IDs based on the allowed pattern
|
|
1566
|
+
filtered_dashboard_ids: List[str] = []
|
|
1567
|
+
for dashboard_id in all_dashboard_ids:
|
|
1568
|
+
if dashboard_id is None:
|
|
1569
|
+
continue
|
|
1570
|
+
if not self.source_config.dashboard_pattern.allowed(dashboard_id):
|
|
1571
|
+
self.reporter.report_dashboards_dropped(dashboard_id)
|
|
1572
|
+
else:
|
|
1573
|
+
filtered_dashboard_ids.append(dashboard_id)
|
|
1574
|
+
|
|
1575
|
+
# Use the filtered list for further processing
|
|
1576
|
+
dashboard_ids: List[str] = filtered_dashboard_ids
|
|
1577
|
+
|
|
1578
|
+
# Report the total number of dashboards to be processed
|
|
1579
|
+
self.reporter.report_total_dashboards(len(dashboard_ids))
|
|
1580
|
+
|
|
1581
|
+
# Define the fields to extract for each dashboard
|
|
1582
|
+
dashboard_fields = [
|
|
1594
1583
|
"id",
|
|
1595
1584
|
"title",
|
|
1596
1585
|
"dashboard_elements",
|
|
@@ -1606,41 +1595,47 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1606
1595
|
"deleted_at",
|
|
1607
1596
|
"deleter_id",
|
|
1608
1597
|
]
|
|
1598
|
+
|
|
1599
|
+
# Add usage-related fields if usage history extraction is enabled
|
|
1609
1600
|
if self.source_config.extract_usage_history:
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
1614
|
-
|
|
1601
|
+
dashboard_fields.extend(
|
|
1602
|
+
[
|
|
1603
|
+
"favorite_count",
|
|
1604
|
+
"view_count",
|
|
1605
|
+
"last_viewed_at",
|
|
1606
|
+
]
|
|
1607
|
+
)
|
|
1615
1608
|
|
|
1609
|
+
# Store dashboards for which usage stats will be extracted
|
|
1616
1610
|
looker_dashboards_for_usage: List[looker_usage.LookerDashboardForUsage] = []
|
|
1617
1611
|
|
|
1612
|
+
# Process dashboard and chart metadata
|
|
1618
1613
|
with self.reporter.report_stage("dashboard_chart_metadata"):
|
|
1614
|
+
dashboard_jobs = (
|
|
1615
|
+
(dashboard_id, dashboard_fields)
|
|
1616
|
+
for dashboard_id in dashboard_ids
|
|
1617
|
+
if dashboard_id is not None
|
|
1618
|
+
)
|
|
1619
1619
|
for job in BackpressureAwareExecutor.map(
|
|
1620
1620
|
self.process_dashboard,
|
|
1621
|
-
|
|
1622
|
-
(dashboard_id, fields)
|
|
1623
|
-
for dashboard_id in dashboard_ids
|
|
1624
|
-
if dashboard_id is not None
|
|
1625
|
-
),
|
|
1621
|
+
dashboard_jobs,
|
|
1626
1622
|
max_workers=self.source_config.max_threads,
|
|
1627
1623
|
):
|
|
1628
|
-
(
|
|
1629
|
-
|
|
1630
|
-
dashboard_usage,
|
|
1631
|
-
dashboard_id,
|
|
1632
|
-
start_time,
|
|
1633
|
-
end_time,
|
|
1634
|
-
) = job.result()
|
|
1624
|
+
result: DashboardProcessingResult = job.result()
|
|
1625
|
+
|
|
1635
1626
|
logger.debug(
|
|
1636
|
-
f"Running time of process_dashboard for {dashboard_id} = {(end_time - start_time).total_seconds()}"
|
|
1627
|
+
f"Running time of process_dashboard for {result.dashboard_id} = {(result.end_time - result.start_time).total_seconds()}"
|
|
1628
|
+
)
|
|
1629
|
+
self.reporter.report_upstream_latency(
|
|
1630
|
+
result.start_time, result.end_time
|
|
1637
1631
|
)
|
|
1638
|
-
self.reporter.report_upstream_latency(start_time, end_time)
|
|
1639
1632
|
|
|
1640
|
-
yield from
|
|
1641
|
-
if dashboard_usage is not None:
|
|
1642
|
-
looker_dashboards_for_usage.append(dashboard_usage)
|
|
1633
|
+
yield from result.entities
|
|
1643
1634
|
|
|
1635
|
+
if result.dashboard_usage is not None:
|
|
1636
|
+
looker_dashboards_for_usage.append(result.dashboard_usage)
|
|
1637
|
+
|
|
1638
|
+
# Warn if owner extraction was enabled but no emails could be found
|
|
1644
1639
|
if (
|
|
1645
1640
|
self.source_config.extract_owners
|
|
1646
1641
|
and self.reporter.resolved_user_ids > 0
|
|
@@ -1652,53 +1647,42 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1652
1647
|
"Failed to extract owners emails for any dashboards. Please enable the see_users permission for your Looker API key",
|
|
1653
1648
|
)
|
|
1654
1649
|
|
|
1655
|
-
# Extract independent
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1650
|
+
# Extract independent looks first, so their explores are considered in _make_explore_containers.
|
|
1651
|
+
if self.source_config.extract_independent_looks:
|
|
1652
|
+
with self.reporter.report_stage("extract_independent_looks"):
|
|
1653
|
+
yield from self.extract_independent_looks()
|
|
1659
1654
|
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
id=f"looker-{event.proposedSnapshot.urn}", mce=event
|
|
1664
|
-
)
|
|
1665
|
-
elif isinstance(event, MetadataChangeProposalWrapper):
|
|
1666
|
-
yield event.as_workunit()
|
|
1667
|
-
elif isinstance(event, MetadataWorkUnit):
|
|
1668
|
-
yield event
|
|
1669
|
-
else:
|
|
1670
|
-
raise Exception(f"Unexpected type of event {event}")
|
|
1671
|
-
self.reporter.report_stage_end("explore_metadata")
|
|
1655
|
+
# Process explore containers and yield them.
|
|
1656
|
+
with self.reporter.report_stage("explore_metadata"):
|
|
1657
|
+
yield from self._make_explore_containers()
|
|
1672
1658
|
|
|
1673
1659
|
if (
|
|
1674
1660
|
self.source_config.tag_measures_and_dimensions
|
|
1675
1661
|
and self.reporter.explores_scanned > 0
|
|
1676
1662
|
):
|
|
1677
|
-
# Emit tag
|
|
1663
|
+
# Emit tag MCPs for measures and dimensions if we produced any explores:
|
|
1664
|
+
# Tags MCEs are converted to MCPs
|
|
1678
1665
|
for tag_mce in LookerUtil.get_tag_mces():
|
|
1679
|
-
yield
|
|
1680
|
-
id=f"tag-{tag_mce.proposedSnapshot.urn}",
|
|
1681
|
-
mce=tag_mce,
|
|
1682
|
-
)
|
|
1666
|
+
yield from auto_workunit(mcps_from_mce(tag_mce))
|
|
1683
1667
|
|
|
1684
1668
|
# Extract usage history is enabled
|
|
1685
1669
|
if self.source_config.extract_usage_history:
|
|
1686
|
-
self.reporter.
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
|
|
1670
|
+
with self.reporter.report_stage("usage_extraction"):
|
|
1671
|
+
usage_mcps: List[MetadataChangeProposalWrapper] = (
|
|
1672
|
+
self.extract_usage_stat(
|
|
1673
|
+
looker_dashboards_for_usage, self.chart_urns
|
|
1674
|
+
)
|
|
1675
|
+
)
|
|
1676
|
+
yield from auto_workunit(usage_mcps)
|
|
1693
1677
|
|
|
1694
|
-
#
|
|
1678
|
+
# Ingest looker user resource mapping workunits.
|
|
1695
1679
|
logger.info("Ingesting looker user resource mapping workunits")
|
|
1696
|
-
self.reporter.
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
1680
|
+
with self.reporter.report_stage("user_resource_extraction"):
|
|
1681
|
+
yield from auto_workunit(
|
|
1682
|
+
self.user_registry.to_platform_resource(
|
|
1683
|
+
self.source_config.platform_instance
|
|
1684
|
+
)
|
|
1700
1685
|
)
|
|
1701
|
-
)
|
|
1702
1686
|
|
|
1703
1687
|
def get_report(self) -> SourceReport:
|
|
1704
1688
|
return self.reporter
|