acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -3,11 +3,11 @@ from typing import Dict, List, Optional
|
|
|
3
3
|
|
|
4
4
|
from datahub.ingestion.source.looker.looker_common import LookerViewId, ViewFieldValue
|
|
5
5
|
from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition
|
|
6
|
+
from datahub.ingestion.source.looker.looker_constant import NAME
|
|
6
7
|
from datahub.ingestion.source.looker.looker_dataclasses import LookerModel
|
|
7
8
|
from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader
|
|
8
9
|
from datahub.ingestion.source.looker.lookml_config import (
|
|
9
10
|
BASE_PROJECT_NAME,
|
|
10
|
-
NAME,
|
|
11
11
|
LookMLSourceReport,
|
|
12
12
|
)
|
|
13
13
|
|
|
@@ -12,12 +12,12 @@ from datahub.ingestion.source.looker.looker_constant import (
|
|
|
12
12
|
DIMENSION_GROUPS,
|
|
13
13
|
DIMENSIONS,
|
|
14
14
|
MEASURES,
|
|
15
|
+
NAME,
|
|
15
16
|
)
|
|
16
17
|
from datahub.ingestion.source.looker.looker_dataclasses import LookerViewFile
|
|
17
18
|
from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader
|
|
18
19
|
from datahub.ingestion.source.looker.lookml_config import (
|
|
19
20
|
DERIVED_VIEW_SUFFIX,
|
|
20
|
-
NAME,
|
|
21
21
|
LookMLSourceReport,
|
|
22
22
|
)
|
|
23
23
|
from datahub.ingestion.source.looker.lookml_refinement import LookerRefinementResolver
|
|
@@ -28,11 +28,10 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
28
28
|
StatefulIngestionConfigBase,
|
|
29
29
|
)
|
|
30
30
|
from datahub.utilities.lossy_collections import LossyList
|
|
31
|
+
from datahub.utilities.stats_collections import TopKDict, float_top_k_dict
|
|
31
32
|
|
|
32
33
|
logger = logging.getLogger(__name__)
|
|
33
34
|
|
|
34
|
-
NAME: str = "name"
|
|
35
|
-
|
|
36
35
|
BASE_PROJECT_NAME = "__BASE"
|
|
37
36
|
|
|
38
37
|
EXPLORE_FILE_EXTENSION = ".explore.lkml"
|
|
@@ -47,6 +46,9 @@ DERIVED_VIEW_PATTERN: str = r"\$\{([^}]*)\}"
|
|
|
47
46
|
@dataclass
|
|
48
47
|
class LookMLSourceReport(StaleEntityRemovalSourceReport):
|
|
49
48
|
git_clone_latency: Optional[timedelta] = None
|
|
49
|
+
looker_query_api_latency_seconds: TopKDict[str, float] = dataclass_field(
|
|
50
|
+
default_factory=float_top_k_dict
|
|
51
|
+
)
|
|
50
52
|
models_discovered: int = 0
|
|
51
53
|
models_dropped: LossyList[str] = dataclass_field(default_factory=LossyList)
|
|
52
54
|
views_discovered: int = 0
|
|
@@ -81,6 +83,11 @@ class LookMLSourceReport(StaleEntityRemovalSourceReport):
|
|
|
81
83
|
self.api_stats = self._looker_api.compute_stats()
|
|
82
84
|
return super().compute_stats()
|
|
83
85
|
|
|
86
|
+
def report_looker_query_api_latency(
|
|
87
|
+
self, view_urn: str, latency: timedelta
|
|
88
|
+
) -> None:
|
|
89
|
+
self.looker_query_api_latency_seconds[view_urn] = latency.total_seconds()
|
|
90
|
+
|
|
84
91
|
|
|
85
92
|
class LookMLSourceConfig(
|
|
86
93
|
LookerCommonConfig, StatefulIngestionConfigBase, EnvConfigMixin
|
|
@@ -122,7 +129,17 @@ class LookMLSourceConfig(
|
|
|
122
129
|
description="List of regex patterns for LookML views to include in the extraction.",
|
|
123
130
|
)
|
|
124
131
|
parse_table_names_from_sql: bool = Field(True, description="See note below.")
|
|
125
|
-
|
|
132
|
+
use_api_for_view_lineage: bool = Field(
|
|
133
|
+
False,
|
|
134
|
+
description="When enabled, uses Looker API to get SQL representation of views for lineage parsing instead of parsing LookML files directly. Requires 'api' configuration to be provided."
|
|
135
|
+
"Coverage of regex based lineage extraction has limitations, it only supportes ${TABLE}.column_name syntax, See (https://cloud.google.com/looker/docs/reference/param-field-sql#sql_for_dimensions) to"
|
|
136
|
+
"understand the other substitutions and cross-references allowed in LookML.",
|
|
137
|
+
)
|
|
138
|
+
use_api_cache_for_view_lineage: bool = Field(
|
|
139
|
+
False,
|
|
140
|
+
description="When enabled, uses Looker API server-side caching for query execution. Requires 'api' configuration to be provided.",
|
|
141
|
+
)
|
|
142
|
+
api: Optional[LookerAPIConfig] = None
|
|
126
143
|
project_name: Optional[str] = Field(
|
|
127
144
|
None,
|
|
128
145
|
description="Required if you don't specify the `api` section. The project name within which all the model "
|
|
@@ -239,6 +256,17 @@ class LookMLSourceConfig(
|
|
|
239
256
|
)
|
|
240
257
|
return values
|
|
241
258
|
|
|
259
|
+
@root_validator(skip_on_failure=True)
|
|
260
|
+
def check_api_provided_for_view_lineage(cls, values):
|
|
261
|
+
"""Validate that we must have an api credential to use Looker API for view's column lineage"""
|
|
262
|
+
if not values.get("api") and values.get("use_api_for_view_lineage"):
|
|
263
|
+
raise ValueError(
|
|
264
|
+
"API credential was not found. LookML source requires api credentials "
|
|
265
|
+
"for Looker to use Looker APIs for view's column lineage extraction."
|
|
266
|
+
"Set `use_api_for_view_lineage` to False to skip using Looker APIs."
|
|
267
|
+
)
|
|
268
|
+
return values
|
|
269
|
+
|
|
242
270
|
@validator("base_folder", always=True)
|
|
243
271
|
def check_base_folder_if_not_provided(
|
|
244
272
|
cls, v: Optional[pydantic.DirectoryPath], values: Dict[str, Any]
|
|
@@ -4,10 +4,10 @@ import logging
|
|
|
4
4
|
from typing import ClassVar, Dict, List, Set
|
|
5
5
|
|
|
6
6
|
from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition
|
|
7
|
+
from datahub.ingestion.source.looker.looker_constant import NAME
|
|
7
8
|
from datahub.ingestion.source.looker.looker_dataclasses import LookerModel
|
|
8
9
|
from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader
|
|
9
10
|
from datahub.ingestion.source.looker.lookml_config import (
|
|
10
|
-
NAME,
|
|
11
11
|
LookMLSourceConfig,
|
|
12
12
|
LookMLSourceReport,
|
|
13
13
|
)
|
|
@@ -4,7 +4,7 @@ import tempfile
|
|
|
4
4
|
from collections import OrderedDict
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from datetime import datetime, timezone
|
|
7
|
-
from typing import Dict, Iterable, List, Optional, Set, Tuple
|
|
7
|
+
from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
|
|
8
8
|
|
|
9
9
|
import lkml
|
|
10
10
|
import lkml.simple
|
|
@@ -12,8 +12,7 @@ from looker_sdk.error import SDKError
|
|
|
12
12
|
|
|
13
13
|
from datahub.configuration.git import GitInfo
|
|
14
14
|
from datahub.emitter.mce_builder import make_schema_field_urn
|
|
15
|
-
from datahub.emitter.
|
|
16
|
-
from datahub.emitter.mcp_builder import gen_containers
|
|
15
|
+
from datahub.emitter.mcp_builder import mcps_from_mce
|
|
17
16
|
from datahub.ingestion.api.common import PipelineContext
|
|
18
17
|
from datahub.ingestion.api.decorators import (
|
|
19
18
|
SupportStatus,
|
|
@@ -27,6 +26,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
27
26
|
from datahub.ingestion.source.common.subtypes import (
|
|
28
27
|
BIContainerSubTypes,
|
|
29
28
|
DatasetSubTypes,
|
|
29
|
+
SourceCapabilityModifier,
|
|
30
30
|
)
|
|
31
31
|
from datahub.ingestion.source.git.git_import import GitClone
|
|
32
32
|
from datahub.ingestion.source.looker.looker_common import (
|
|
@@ -76,7 +76,7 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
|
76
76
|
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
77
77
|
StatefulIngestionSourceBase,
|
|
78
78
|
)
|
|
79
|
-
from datahub.metadata.com.linkedin.pegasus2avro.common import
|
|
79
|
+
from datahub.metadata.com.linkedin.pegasus2avro.common import Status
|
|
80
80
|
from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
|
|
81
81
|
DatasetLineageTypeClass,
|
|
82
82
|
FineGrainedLineageDownstreamType,
|
|
@@ -84,18 +84,15 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
|
|
|
84
84
|
UpstreamLineage,
|
|
85
85
|
ViewProperties,
|
|
86
86
|
)
|
|
87
|
-
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
|
|
88
|
-
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
89
87
|
from datahub.metadata.schema_classes import (
|
|
90
88
|
AuditStampClass,
|
|
91
|
-
BrowsePathEntryClass,
|
|
92
|
-
BrowsePathsV2Class,
|
|
93
|
-
ContainerClass,
|
|
94
89
|
DatasetPropertiesClass,
|
|
95
90
|
FineGrainedLineageClass,
|
|
96
91
|
FineGrainedLineageUpstreamTypeClass,
|
|
97
|
-
SubTypesClass,
|
|
98
92
|
)
|
|
93
|
+
from datahub.sdk.container import Container
|
|
94
|
+
from datahub.sdk.dataset import Dataset
|
|
95
|
+
from datahub.sdk.entity import Entity
|
|
99
96
|
from datahub.sql_parsing.sqlglot_lineage import ColumnRef
|
|
100
97
|
|
|
101
98
|
VIEW_LANGUAGE_LOOKML: str = "lookml"
|
|
@@ -145,6 +142,8 @@ class LookerView:
|
|
|
145
142
|
ctx: PipelineContext,
|
|
146
143
|
extract_col_level_lineage: bool = False,
|
|
147
144
|
populate_sql_logic_in_descriptions: bool = False,
|
|
145
|
+
looker_client: Optional[LookerAPI] = None,
|
|
146
|
+
view_to_explore_map: Optional[Dict[str, str]] = None,
|
|
148
147
|
) -> Optional["LookerView"]:
|
|
149
148
|
view_name = view_context.name()
|
|
150
149
|
|
|
@@ -163,6 +162,8 @@ class LookerView:
|
|
|
163
162
|
config=config,
|
|
164
163
|
ctx=ctx,
|
|
165
164
|
reporter=reporter,
|
|
165
|
+
looker_client=looker_client,
|
|
166
|
+
view_to_explore_map=view_to_explore_map,
|
|
166
167
|
)
|
|
167
168
|
|
|
168
169
|
field_type_vs_raw_fields = OrderedDict(
|
|
@@ -273,6 +274,13 @@ class LookerManifest:
|
|
|
273
274
|
SourceCapability.LINEAGE_FINE,
|
|
274
275
|
"Enabled by default, configured using `extract_column_level_lineage`",
|
|
275
276
|
)
|
|
277
|
+
@capability(
|
|
278
|
+
SourceCapability.CONTAINERS,
|
|
279
|
+
"Enabled by default",
|
|
280
|
+
subtype_modifier=[
|
|
281
|
+
SourceCapabilityModifier.LOOKML_PROJECT,
|
|
282
|
+
],
|
|
283
|
+
)
|
|
276
284
|
class LookMLSource(StatefulIngestionSourceBase):
|
|
277
285
|
"""
|
|
278
286
|
This plugin extracts the following:
|
|
@@ -420,69 +428,40 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
420
428
|
|
|
421
429
|
return dataset_props
|
|
422
430
|
|
|
423
|
-
def
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
subTypeEvent = MetadataChangeProposalWrapper(
|
|
429
|
-
entityUrn=view_urn,
|
|
430
|
-
aspect=SubTypesClass(typeNames=[DatasetSubTypes.VIEW]),
|
|
431
|
-
)
|
|
432
|
-
events = [subTypeEvent]
|
|
431
|
+
def _build_dataset_entities(self, looker_view: LookerView) -> Iterable[Dataset]:
|
|
432
|
+
dataset_extra_aspects: List[Union[ViewProperties, Status]] = [
|
|
433
|
+
Status(removed=False)
|
|
434
|
+
]
|
|
433
435
|
if looker_view.view_details is not None:
|
|
434
|
-
|
|
435
|
-
entityUrn=view_urn,
|
|
436
|
-
aspect=looker_view.view_details,
|
|
437
|
-
)
|
|
438
|
-
events.append(viewEvent)
|
|
439
|
-
|
|
440
|
-
project_key = gen_project_key(self.source_config, looker_view.id.project_name)
|
|
441
|
-
|
|
442
|
-
container = ContainerClass(container=project_key.as_urn())
|
|
443
|
-
events.append(
|
|
444
|
-
MetadataChangeProposalWrapper(entityUrn=view_urn, aspect=container)
|
|
445
|
-
)
|
|
436
|
+
dataset_extra_aspects.append(looker_view.view_details)
|
|
446
437
|
|
|
447
|
-
events.append(
|
|
448
|
-
MetadataChangeProposalWrapper(
|
|
449
|
-
entityUrn=view_urn,
|
|
450
|
-
aspect=looker_view.id.get_browse_path_v2(self.source_config),
|
|
451
|
-
)
|
|
452
|
-
)
|
|
453
|
-
|
|
454
|
-
return events
|
|
455
|
-
|
|
456
|
-
def _build_dataset_mce(self, looker_view: LookerView) -> MetadataChangeEvent:
|
|
457
|
-
"""
|
|
458
|
-
Creates MetadataChangeEvent for the dataset, creating upstream lineage links
|
|
459
|
-
"""
|
|
460
|
-
logger.debug(f"looker_view = {looker_view.id}")
|
|
461
|
-
|
|
462
|
-
dataset_snapshot = DatasetSnapshot(
|
|
463
|
-
urn=looker_view.id.get_urn(self.source_config),
|
|
464
|
-
aspects=[], # we append to this list later on
|
|
465
|
-
)
|
|
466
|
-
browse_paths = BrowsePaths(
|
|
467
|
-
paths=[looker_view.id.get_browse_path(self.source_config)]
|
|
468
|
-
)
|
|
469
|
-
|
|
470
|
-
dataset_snapshot.aspects.append(browse_paths)
|
|
471
|
-
dataset_snapshot.aspects.append(Status(removed=False))
|
|
472
|
-
upstream_lineage = self._get_upstream_lineage(looker_view)
|
|
473
|
-
if upstream_lineage is not None:
|
|
474
|
-
dataset_snapshot.aspects.append(upstream_lineage)
|
|
475
438
|
schema_metadata = LookerUtil._get_schema(
|
|
476
439
|
self.source_config.platform_name,
|
|
477
440
|
looker_view.id.view_name,
|
|
478
441
|
looker_view.fields,
|
|
479
442
|
self.reporter,
|
|
480
443
|
)
|
|
481
|
-
if schema_metadata is not None:
|
|
482
|
-
dataset_snapshot.aspects.append(schema_metadata)
|
|
483
|
-
dataset_snapshot.aspects.append(self._get_custom_properties(looker_view))
|
|
484
444
|
|
|
485
|
-
|
|
445
|
+
custom_properties: DatasetPropertiesClass = self._get_custom_properties(
|
|
446
|
+
looker_view
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
yield Dataset(
|
|
450
|
+
platform=self.source_config.platform_name,
|
|
451
|
+
name=looker_view.id.get_view_dataset_name(self.source_config),
|
|
452
|
+
display_name=looker_view.id.view_name,
|
|
453
|
+
platform_instance=self.source_config.platform_instance,
|
|
454
|
+
env=self.source_config.env,
|
|
455
|
+
subtype=DatasetSubTypes.VIEW,
|
|
456
|
+
parent_container=looker_view.id.get_view_dataset_parent_container(
|
|
457
|
+
self.source_config
|
|
458
|
+
),
|
|
459
|
+
schema=schema_metadata,
|
|
460
|
+
custom_properties=custom_properties.customProperties,
|
|
461
|
+
external_url=custom_properties.externalUrl,
|
|
462
|
+
upstreams=self._get_upstream_lineage(looker_view),
|
|
463
|
+
extra_aspects=dataset_extra_aspects,
|
|
464
|
+
)
|
|
486
465
|
|
|
487
466
|
def get_project_name(self, model_name: str) -> str:
|
|
488
467
|
if self.source_config.project_name is not None:
|
|
@@ -546,7 +525,7 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
546
525
|
).workunit_processor,
|
|
547
526
|
]
|
|
548
527
|
|
|
549
|
-
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
528
|
+
def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
|
|
550
529
|
with tempfile.TemporaryDirectory("lookml_tmp") as tmp_dir:
|
|
551
530
|
# Clone the base_folder if necessary.
|
|
552
531
|
if not self.source_config.base_folder:
|
|
@@ -707,7 +686,7 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
707
686
|
tmp_dir, project, project_visited, manifest_constants
|
|
708
687
|
)
|
|
709
688
|
|
|
710
|
-
def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901
|
|
689
|
+
def get_internal_workunits(self) -> Iterable[Union[MetadataWorkUnit, Entity]]: # noqa: C901
|
|
711
690
|
assert self.source_config.base_folder
|
|
712
691
|
viewfile_loader = LookerViewFileLoader(
|
|
713
692
|
self.source_config.project_name,
|
|
@@ -730,6 +709,11 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
730
709
|
# Value: Tuple(model file name, connection name)
|
|
731
710
|
view_connection_map: Dict[str, Tuple[str, str]] = {}
|
|
732
711
|
|
|
712
|
+
# Map of view name to explore name for API-based view lineage
|
|
713
|
+
# A view can be referenced by multiple explores, we only need one of the explores to use Looker Query API
|
|
714
|
+
# Key: view_name, Value: explore_name
|
|
715
|
+
view_to_explore_map: Dict[str, str] = {}
|
|
716
|
+
|
|
733
717
|
# The ** means "this directory and all subdirectories", and hence should
|
|
734
718
|
# include all the files we want.
|
|
735
719
|
model_files = sorted(
|
|
@@ -784,37 +768,37 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
784
768
|
)
|
|
785
769
|
)
|
|
786
770
|
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
continue
|
|
771
|
+
model_explores_map = {d["name"]: d for d in model.explores}
|
|
772
|
+
for explore_dict in model.explores:
|
|
773
|
+
try:
|
|
774
|
+
if LookerRefinementResolver.is_refinement(explore_dict["name"]):
|
|
775
|
+
continue
|
|
793
776
|
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
for view_name in explore.upstream_views:
|
|
777
|
+
explore_dict = looker_refinement_resolver.apply_explore_refinement(
|
|
778
|
+
explore_dict
|
|
779
|
+
)
|
|
780
|
+
explore: LookerExplore = LookerExplore.from_dict(
|
|
781
|
+
model_name,
|
|
782
|
+
explore_dict,
|
|
783
|
+
model.resolved_includes,
|
|
784
|
+
viewfile_loader,
|
|
785
|
+
self.reporter,
|
|
786
|
+
model_explores_map,
|
|
787
|
+
)
|
|
788
|
+
if explore.upstream_views:
|
|
789
|
+
for view_name in explore.upstream_views:
|
|
790
|
+
if self.source_config.emit_reachable_views_only:
|
|
809
791
|
explore_reachable_views.add(view_name.include)
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
792
|
+
# Build view to explore mapping for API-based view lineage
|
|
793
|
+
view_to_explore_map[view_name.include] = explore.name
|
|
794
|
+
except Exception as e:
|
|
795
|
+
self.reporter.report_warning(
|
|
796
|
+
title="Failed to process explores",
|
|
797
|
+
message="Failed to process explore dictionary.",
|
|
798
|
+
context=f"Explore Details: {explore_dict}",
|
|
799
|
+
exc=e,
|
|
800
|
+
)
|
|
801
|
+
logger.debug("Failed to process explore", exc_info=e)
|
|
818
802
|
|
|
819
803
|
processed_view_files = processed_view_map.setdefault(
|
|
820
804
|
model.connection, set()
|
|
@@ -903,6 +887,10 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
903
887
|
populate_sql_logic_in_descriptions=self.source_config.populate_sql_logic_for_missing_descriptions,
|
|
904
888
|
config=self.source_config,
|
|
905
889
|
ctx=self.ctx,
|
|
890
|
+
looker_client=self.looker_client,
|
|
891
|
+
view_to_explore_map=view_to_explore_map
|
|
892
|
+
if view_to_explore_map
|
|
893
|
+
else None,
|
|
906
894
|
)
|
|
907
895
|
except Exception as e:
|
|
908
896
|
self.reporter.report_warning(
|
|
@@ -941,7 +929,7 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
941
929
|
maybe_looker_view.id.project_name
|
|
942
930
|
not in self.processed_projects
|
|
943
931
|
):
|
|
944
|
-
yield from self.
|
|
932
|
+
yield from self.gen_project_containers(
|
|
945
933
|
maybe_looker_view.id.project_name
|
|
946
934
|
)
|
|
947
935
|
|
|
@@ -949,15 +937,10 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
949
937
|
maybe_looker_view.id.project_name
|
|
950
938
|
)
|
|
951
939
|
|
|
952
|
-
|
|
940
|
+
yield from self._build_dataset_entities(
|
|
953
941
|
maybe_looker_view
|
|
954
|
-
):
|
|
955
|
-
yield mcp.as_workunit()
|
|
956
|
-
mce = self._build_dataset_mce(maybe_looker_view)
|
|
957
|
-
yield MetadataWorkUnit(
|
|
958
|
-
id=f"lookml-view-{maybe_looker_view.id}",
|
|
959
|
-
mce=mce,
|
|
960
942
|
)
|
|
943
|
+
|
|
961
944
|
processed_view_files.add(include.include)
|
|
962
945
|
else:
|
|
963
946
|
(
|
|
@@ -986,28 +969,24 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
986
969
|
self.source_config.tag_measures_and_dimensions
|
|
987
970
|
and self.reporter.events_produced != 0
|
|
988
971
|
):
|
|
989
|
-
# Emit tag MCEs for measures and dimensions:
|
|
972
|
+
# Emit tag MCEs for measures and dimensions if we produced any explores:
|
|
990
973
|
for tag_mce in LookerUtil.get_tag_mces():
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
974
|
+
# Convert MCE to MCPs
|
|
975
|
+
for mcp in mcps_from_mce(tag_mce):
|
|
976
|
+
yield mcp.as_workunit()
|
|
994
977
|
|
|
995
|
-
def
|
|
978
|
+
def gen_project_containers(self, project_name: str) -> Iterable[Container]:
|
|
996
979
|
project_key = gen_project_key(
|
|
997
980
|
self.source_config,
|
|
998
981
|
project_name,
|
|
999
982
|
)
|
|
1000
|
-
|
|
983
|
+
|
|
984
|
+
yield Container(
|
|
1001
985
|
container_key=project_key,
|
|
1002
|
-
|
|
1003
|
-
|
|
986
|
+
display_name=project_name,
|
|
987
|
+
subtype=BIContainerSubTypes.LOOKML_PROJECT,
|
|
988
|
+
parent_container=["Folders"],
|
|
1004
989
|
)
|
|
1005
|
-
yield MetadataChangeProposalWrapper(
|
|
1006
|
-
entityUrn=project_key.as_urn(),
|
|
1007
|
-
aspect=BrowsePathsV2Class(
|
|
1008
|
-
path=[BrowsePathEntryClass("Folders")],
|
|
1009
|
-
),
|
|
1010
|
-
).as_workunit()
|
|
1011
990
|
|
|
1012
991
|
def report_skipped_unreachable_views(
|
|
1013
992
|
self,
|