acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -28,7 +28,7 @@ from looker_sdk.sdk.api40.models import (
|
|
|
28
28
|
User,
|
|
29
29
|
WriteQuery,
|
|
30
30
|
)
|
|
31
|
-
from pydantic
|
|
31
|
+
from pydantic import validator
|
|
32
32
|
|
|
33
33
|
import datahub.emitter.mce_builder as builder
|
|
34
34
|
from datahub.api.entities.platformresource.platform_resource import (
|
|
@@ -36,7 +36,7 @@ from datahub.api.entities.platformresource.platform_resource import (
|
|
|
36
36
|
PlatformResourceKey,
|
|
37
37
|
)
|
|
38
38
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
39
|
-
from datahub.emitter.mcp_builder import ContainerKey
|
|
39
|
+
from datahub.emitter.mcp_builder import ContainerKey
|
|
40
40
|
from datahub.ingestion.api.report import Report
|
|
41
41
|
from datahub.ingestion.api.source import SourceReport
|
|
42
42
|
from datahub.ingestion.source.common.subtypes import DatasetSubTypes
|
|
@@ -72,7 +72,6 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
|
|
|
72
72
|
UpstreamClass,
|
|
73
73
|
UpstreamLineage,
|
|
74
74
|
)
|
|
75
|
-
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
|
|
76
75
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
77
76
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
78
77
|
ArrayTypeClass,
|
|
@@ -90,21 +89,18 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
|
90
89
|
)
|
|
91
90
|
from datahub.metadata.schema_classes import (
|
|
92
91
|
BrowsePathEntryClass,
|
|
93
|
-
BrowsePathsClass,
|
|
94
92
|
BrowsePathsV2Class,
|
|
95
|
-
|
|
96
|
-
DatasetPropertiesClass,
|
|
93
|
+
EmbedClass,
|
|
97
94
|
EnumTypeClass,
|
|
98
95
|
FineGrainedLineageClass,
|
|
99
96
|
GlobalTagsClass,
|
|
100
97
|
SchemaMetadataClass,
|
|
101
|
-
StatusClass,
|
|
102
|
-
SubTypesClass,
|
|
103
98
|
TagAssociationClass,
|
|
104
99
|
TagPropertiesClass,
|
|
105
100
|
TagSnapshotClass,
|
|
106
101
|
)
|
|
107
102
|
from datahub.metadata.urns import TagUrn
|
|
103
|
+
from datahub.sdk.dataset import Dataset
|
|
108
104
|
from datahub.sql_parsing.sqlglot_lineage import ColumnRef
|
|
109
105
|
from datahub.utilities.lossy_collections import LossyList, LossySet
|
|
110
106
|
from datahub.utilities.url_util import remove_port_from_url
|
|
@@ -242,13 +238,24 @@ class LookerViewId:
|
|
|
242
238
|
|
|
243
239
|
dataset_name = config.view_naming_pattern.replace_variables(n_mapping)
|
|
244
240
|
|
|
245
|
-
|
|
241
|
+
generated_urn = builder.make_dataset_urn_with_platform_instance(
|
|
246
242
|
platform=config.platform_name,
|
|
247
243
|
name=dataset_name,
|
|
248
244
|
platform_instance=config.platform_instance,
|
|
249
245
|
env=config.env,
|
|
250
246
|
)
|
|
251
247
|
|
|
248
|
+
logger.debug(
|
|
249
|
+
f"LookerViewId.get_urn for view '{self.view_name}': project='{self.project_name}', model='{self.model_name}', file_path='{self.file_path}', dataset_name='{dataset_name}', generated_urn='{generated_urn}'"
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
return generated_urn
|
|
253
|
+
|
|
254
|
+
def get_view_dataset_name(self, config: LookerCommonConfig) -> str:
|
|
255
|
+
n_mapping: ViewNamingPatternMapping = self.get_mapping(config)
|
|
256
|
+
n_mapping.file_path = self.preprocess_file_path(n_mapping.file_path)
|
|
257
|
+
return config.view_naming_pattern.replace_variables(n_mapping)
|
|
258
|
+
|
|
252
259
|
def get_browse_path(self, config: LookerCommonConfig) -> str:
|
|
253
260
|
browse_path = config.view_browse_pattern.replace_variables(
|
|
254
261
|
self.get_mapping(config)
|
|
@@ -276,6 +283,22 @@ class LookerViewId:
|
|
|
276
283
|
],
|
|
277
284
|
)
|
|
278
285
|
|
|
286
|
+
def get_view_dataset_parent_container(
|
|
287
|
+
self, config: LookerCommonConfig
|
|
288
|
+
) -> List[str]:
|
|
289
|
+
project_key = gen_project_key(config, self.project_name)
|
|
290
|
+
view_path = (
|
|
291
|
+
remove_suffix(self.file_path, ".view.lkml")
|
|
292
|
+
if "{file_path}" in config.view_browse_pattern.pattern
|
|
293
|
+
else os.path.dirname(self.file_path)
|
|
294
|
+
)
|
|
295
|
+
path_entries = view_path.split("/") if view_path else []
|
|
296
|
+
return [
|
|
297
|
+
"Develop",
|
|
298
|
+
project_key.as_urn(),
|
|
299
|
+
*path_entries,
|
|
300
|
+
]
|
|
301
|
+
|
|
279
302
|
|
|
280
303
|
class ViewFieldType(Enum):
|
|
281
304
|
DIMENSION = "Dimension"
|
|
@@ -284,6 +307,12 @@ class ViewFieldType(Enum):
|
|
|
284
307
|
UNKNOWN = "Unknown"
|
|
285
308
|
|
|
286
309
|
|
|
310
|
+
class ViewFieldDimensionGroupType(Enum):
|
|
311
|
+
# Ref: https://cloud.google.com/looker/docs/reference/param-field-dimension-group
|
|
312
|
+
TIME = "time"
|
|
313
|
+
DURATION = "duration"
|
|
314
|
+
|
|
315
|
+
|
|
287
316
|
class ViewFieldValue(Enum):
|
|
288
317
|
NOT_AVAILABLE = "NotAvailable"
|
|
289
318
|
|
|
@@ -299,6 +328,7 @@ class ViewField:
|
|
|
299
328
|
view_name: Optional[str] = None
|
|
300
329
|
is_primary_key: bool = False
|
|
301
330
|
tags: List[str] = dataclasses_field(default_factory=list)
|
|
331
|
+
group_label: Optional[str] = None
|
|
302
332
|
|
|
303
333
|
# It is the list of ColumnRef for derived view defined using SQL otherwise simple column name
|
|
304
334
|
upstream_fields: Union[List[ColumnRef]] = dataclasses_field(default_factory=list)
|
|
@@ -326,6 +356,7 @@ class ViewField:
|
|
|
326
356
|
description = field_dict.get("description", default_description)
|
|
327
357
|
|
|
328
358
|
label = field_dict.get("label", "")
|
|
359
|
+
group_label = field_dict.get("group_label")
|
|
329
360
|
|
|
330
361
|
return ViewField(
|
|
331
362
|
name=name,
|
|
@@ -336,6 +367,7 @@ class ViewField:
|
|
|
336
367
|
field_type=type_cls,
|
|
337
368
|
tags=field_dict.get("tags") or [],
|
|
338
369
|
upstream_fields=upstream_column_ref,
|
|
370
|
+
group_label=group_label,
|
|
339
371
|
)
|
|
340
372
|
|
|
341
373
|
|
|
@@ -370,6 +402,14 @@ class ExploreUpstreamViewField:
|
|
|
370
402
|
: -(len(self.field.field_group_variant.lower()) + 1)
|
|
371
403
|
]
|
|
372
404
|
|
|
405
|
+
# Validate that field_name is not empty to prevent invalid schema field URNs
|
|
406
|
+
if not field_name or not field_name.strip():
|
|
407
|
+
logger.warning(
|
|
408
|
+
f"Empty field name detected for field '{self.field.name}' in explore '{self.explore.name}'. "
|
|
409
|
+
f"Skipping field to prevent invalid schema field URN generation."
|
|
410
|
+
)
|
|
411
|
+
return None
|
|
412
|
+
|
|
373
413
|
assert view_name # for lint false positive
|
|
374
414
|
|
|
375
415
|
project_include: ProjectInclude = ProjectInclude(
|
|
@@ -449,15 +489,36 @@ class ExploreUpstreamViewField:
|
|
|
449
489
|
)
|
|
450
490
|
|
|
451
491
|
|
|
452
|
-
def create_view_project_map(
|
|
492
|
+
def create_view_project_map(
|
|
493
|
+
view_fields: List[ViewField],
|
|
494
|
+
explore_primary_view: Optional[str] = None,
|
|
495
|
+
explore_project_name: Optional[str] = None,
|
|
496
|
+
) -> Dict[str, str]:
|
|
453
497
|
"""
|
|
454
498
|
Each view in a model has unique name.
|
|
455
499
|
Use this function in scope of a model.
|
|
500
|
+
|
|
501
|
+
Args:
|
|
502
|
+
view_fields: List of ViewField objects
|
|
503
|
+
explore_primary_view: The primary view name of the explore (explore.view_name)
|
|
504
|
+
explore_project_name: The project name of the explore (explore.project_name)
|
|
456
505
|
"""
|
|
457
506
|
view_project_map: Dict[str, str] = {}
|
|
458
507
|
for view_field in view_fields:
|
|
459
508
|
if view_field.view_name is not None and view_field.project_name is not None:
|
|
460
|
-
|
|
509
|
+
# Override field-level project assignment for the primary view when different
|
|
510
|
+
if (
|
|
511
|
+
view_field.view_name == explore_primary_view
|
|
512
|
+
and explore_project_name is not None
|
|
513
|
+
and explore_project_name != view_field.project_name
|
|
514
|
+
):
|
|
515
|
+
logger.debug(
|
|
516
|
+
f"Overriding project assignment for primary view '{view_field.view_name}': "
|
|
517
|
+
f"field-level project '{view_field.project_name}' → explore-level project '{explore_project_name}'"
|
|
518
|
+
)
|
|
519
|
+
view_project_map[view_field.view_name] = explore_project_name
|
|
520
|
+
else:
|
|
521
|
+
view_project_map[view_field.view_name] = view_field.project_name
|
|
461
522
|
|
|
462
523
|
return view_project_map
|
|
463
524
|
|
|
@@ -471,7 +532,10 @@ def get_view_file_path(
|
|
|
471
532
|
logger.debug("Entered")
|
|
472
533
|
|
|
473
534
|
for field in lkml_fields:
|
|
474
|
-
if
|
|
535
|
+
if (
|
|
536
|
+
LookerUtil.extract_view_name_from_lookml_model_explore_field(field)
|
|
537
|
+
== view_name
|
|
538
|
+
):
|
|
475
539
|
# This path is relative to git clone directory
|
|
476
540
|
logger.debug(f"Found view({view_name}) file-path {field.source_file}")
|
|
477
541
|
return field.source_file
|
|
@@ -701,14 +765,47 @@ class LookerUtil:
|
|
|
701
765
|
),
|
|
702
766
|
}
|
|
703
767
|
|
|
768
|
+
# Add a pattern-based regex for checking if a tag is a group_label tag
|
|
769
|
+
GROUP_LABEL_TAG_PATTERN = re.compile(r"^looker\:group_label\:(.+)$")
|
|
770
|
+
|
|
704
771
|
@staticmethod
|
|
705
772
|
def _get_tag_mce_for_urn(tag_urn: str) -> MetadataChangeEvent:
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
773
|
+
# Check if this is a group_label tag
|
|
774
|
+
tag_name = tag_urn[len("urn:li:tag:") :]
|
|
775
|
+
match = LookerUtil.GROUP_LABEL_TAG_PATTERN.match(tag_name)
|
|
776
|
+
|
|
777
|
+
if match:
|
|
778
|
+
# This is a group_label tag, create tag definition on the fly
|
|
779
|
+
group_label_value = match.group(1)
|
|
780
|
+
tag_properties = TagPropertiesClass(
|
|
781
|
+
name=f"looker:group_label:{group_label_value}",
|
|
782
|
+
description=f"Fields with Looker group label: {group_label_value}",
|
|
783
|
+
)
|
|
784
|
+
|
|
785
|
+
return MetadataChangeEvent(
|
|
786
|
+
proposedSnapshot=TagSnapshotClass(urn=tag_urn, aspects=[tag_properties])
|
|
787
|
+
)
|
|
788
|
+
elif tag_urn in LookerUtil.tag_definitions:
|
|
789
|
+
# This is a predefined tag
|
|
790
|
+
return MetadataChangeEvent(
|
|
791
|
+
proposedSnapshot=TagSnapshotClass(
|
|
792
|
+
urn=tag_urn, aspects=[LookerUtil.tag_definitions[tag_urn]]
|
|
793
|
+
)
|
|
794
|
+
)
|
|
795
|
+
else:
|
|
796
|
+
# Should not happen, but handle gracefully
|
|
797
|
+
logger.warning(f"No tag definition found for tag URN: {tag_urn}")
|
|
798
|
+
return MetadataChangeEvent(
|
|
799
|
+
proposedSnapshot=TagSnapshotClass(
|
|
800
|
+
urn=tag_urn,
|
|
801
|
+
aspects=[
|
|
802
|
+
TagPropertiesClass(
|
|
803
|
+
name=tag_name,
|
|
804
|
+
description=f"Tag: {tag_name}",
|
|
805
|
+
)
|
|
806
|
+
],
|
|
807
|
+
)
|
|
710
808
|
)
|
|
711
|
-
)
|
|
712
809
|
|
|
713
810
|
@staticmethod
|
|
714
811
|
def _get_tags_from_field_type(
|
|
@@ -732,6 +829,14 @@ class LookerUtil:
|
|
|
732
829
|
message=f"Failed to map view field type {field.field_type}. Won't emit tags for measure and dimension",
|
|
733
830
|
)
|
|
734
831
|
|
|
832
|
+
# Add group_label as tags if present
|
|
833
|
+
if field.group_label:
|
|
834
|
+
schema_field_tags.append(
|
|
835
|
+
TagAssociationClass(
|
|
836
|
+
tag=builder.make_tag_urn(f"looker:group_label:{field.group_label}")
|
|
837
|
+
)
|
|
838
|
+
)
|
|
839
|
+
|
|
735
840
|
if schema_field_tags:
|
|
736
841
|
return GlobalTagsClass(tags=schema_field_tags)
|
|
737
842
|
|
|
@@ -906,6 +1011,9 @@ class LookerExplore:
|
|
|
906
1011
|
f"Could not resolve view {view_name} for explore {dict['name']} in model {model_name}"
|
|
907
1012
|
)
|
|
908
1013
|
else:
|
|
1014
|
+
logger.debug(
|
|
1015
|
+
f"LookerExplore.from_dict adding upstream view for explore '{dict['name']}' (model='{model_name}'): view_name='{view_name}', info[0].project='{info[0].project}'"
|
|
1016
|
+
)
|
|
909
1017
|
upstream_views.append(
|
|
910
1018
|
ProjectInclude(project=info[0].project, include=view_name)
|
|
911
1019
|
)
|
|
@@ -934,6 +1042,7 @@ class LookerExplore:
|
|
|
934
1042
|
) -> Optional["LookerExplore"]:
|
|
935
1043
|
try:
|
|
936
1044
|
explore = client.lookml_model_explore(model, explore_name)
|
|
1045
|
+
|
|
937
1046
|
views: Set[str] = set()
|
|
938
1047
|
lkml_fields: List[LookmlModelExploreField] = (
|
|
939
1048
|
explore_field_set_to_lkml_fields(explore)
|
|
@@ -1027,6 +1136,7 @@ class LookerExplore:
|
|
|
1027
1136
|
else False
|
|
1028
1137
|
),
|
|
1029
1138
|
upstream_fields=[],
|
|
1139
|
+
group_label=dim_field.field_group_label,
|
|
1030
1140
|
)
|
|
1031
1141
|
)
|
|
1032
1142
|
if explore.fields.measures is not None:
|
|
@@ -1065,10 +1175,15 @@ class LookerExplore:
|
|
|
1065
1175
|
else False
|
|
1066
1176
|
),
|
|
1067
1177
|
upstream_fields=[],
|
|
1178
|
+
group_label=measure_field.field_group_label,
|
|
1068
1179
|
)
|
|
1069
1180
|
)
|
|
1070
1181
|
|
|
1071
|
-
view_project_map: Dict[str, str] = create_view_project_map(
|
|
1182
|
+
view_project_map: Dict[str, str] = create_view_project_map(
|
|
1183
|
+
view_fields,
|
|
1184
|
+
explore_primary_view=explore.view_name,
|
|
1185
|
+
explore_project_name=explore.project_name,
|
|
1186
|
+
)
|
|
1072
1187
|
if view_project_map:
|
|
1073
1188
|
logger.debug(f"views and their projects: {view_project_map}")
|
|
1074
1189
|
|
|
@@ -1103,7 +1218,7 @@ class LookerExplore:
|
|
|
1103
1218
|
[column_ref] if column_ref is not None else []
|
|
1104
1219
|
)
|
|
1105
1220
|
|
|
1106
|
-
|
|
1221
|
+
looker_explore = cls(
|
|
1107
1222
|
name=explore_name,
|
|
1108
1223
|
model_name=model,
|
|
1109
1224
|
project_name=explore.project_name,
|
|
@@ -1121,6 +1236,8 @@ class LookerExplore:
|
|
|
1121
1236
|
source_file=explore.source_file,
|
|
1122
1237
|
tags=list(explore.tags) if explore.tags is not None else [],
|
|
1123
1238
|
)
|
|
1239
|
+
logger.debug(f"Created LookerExplore from API: {looker_explore}")
|
|
1240
|
+
return looker_explore
|
|
1124
1241
|
except SDKError as e:
|
|
1125
1242
|
if "<title>Looker Not Found (404)</title>" in str(e):
|
|
1126
1243
|
logger.info(
|
|
@@ -1161,6 +1278,9 @@ class LookerExplore:
|
|
|
1161
1278
|
dataset_name = config.explore_naming_pattern.replace_variables(
|
|
1162
1279
|
self.get_mapping(config)
|
|
1163
1280
|
)
|
|
1281
|
+
logger.debug(
|
|
1282
|
+
f"Generated dataset_name={dataset_name} for explore with model_name={self.model_name}, name={self.name}"
|
|
1283
|
+
)
|
|
1164
1284
|
|
|
1165
1285
|
return builder.make_dataset_urn_with_platform_instance(
|
|
1166
1286
|
platform=config.platform_name,
|
|
@@ -1189,52 +1309,31 @@ class LookerExplore:
|
|
|
1189
1309
|
reporter: SourceReport,
|
|
1190
1310
|
base_url: str,
|
|
1191
1311
|
extract_embed_urls: bool,
|
|
1192
|
-
) ->
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
dataset_snapshot = DatasetSnapshot(
|
|
1197
|
-
urn=self.get_explore_urn(config),
|
|
1198
|
-
aspects=[], # we append to this list later on
|
|
1199
|
-
)
|
|
1200
|
-
|
|
1201
|
-
model_key = gen_model_key(config, self.model_name)
|
|
1202
|
-
browse_paths = BrowsePathsClass(paths=[self.get_explore_browse_path(config)])
|
|
1203
|
-
container = ContainerClass(container=model_key.as_urn())
|
|
1204
|
-
dataset_snapshot.aspects.append(browse_paths)
|
|
1205
|
-
dataset_snapshot.aspects.append(StatusClass(removed=False))
|
|
1206
|
-
|
|
1207
|
-
custom_properties = {
|
|
1208
|
-
"project": self.project_name,
|
|
1209
|
-
"model": self.model_name,
|
|
1210
|
-
"looker.explore.label": self.label,
|
|
1211
|
-
"looker.explore.name": self.name,
|
|
1212
|
-
"looker.explore.file": self.source_file,
|
|
1213
|
-
}
|
|
1214
|
-
dataset_props = DatasetPropertiesClass(
|
|
1215
|
-
name=str(self.label) if self.label else LookerUtil._display_name(self.name),
|
|
1216
|
-
description=self.description,
|
|
1217
|
-
customProperties={
|
|
1218
|
-
k: str(v) for k, v in custom_properties.items() if v is not None
|
|
1219
|
-
},
|
|
1220
|
-
)
|
|
1221
|
-
dataset_props.externalUrl = self._get_url(base_url)
|
|
1312
|
+
) -> Dataset:
|
|
1313
|
+
"""
|
|
1314
|
+
Generate a Dataset metadata event for this Looker Explore.
|
|
1222
1315
|
|
|
1223
|
-
|
|
1316
|
+
Only generates datasets for explores that contain FROM clauses and do NOT contain joins.
|
|
1317
|
+
Passthrough explores and joins are handled via lineage and do not need additional nodes.
|
|
1318
|
+
"""
|
|
1319
|
+
upstream_lineage = None
|
|
1224
1320
|
view_name_to_urn_map: Dict[str, str] = {}
|
|
1321
|
+
|
|
1225
1322
|
if self.upstream_views is not None:
|
|
1226
1323
|
assert self.project_name is not None
|
|
1227
|
-
upstreams = []
|
|
1324
|
+
upstreams: list[UpstreamClass] = []
|
|
1228
1325
|
observed_lineage_ts = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
1326
|
+
|
|
1229
1327
|
for view_ref in sorted(self.upstream_views):
|
|
1230
1328
|
# set file_path to ViewFieldType.UNKNOWN if file_path is not available to keep backward compatibility
|
|
1231
1329
|
# if we raise error on file_path equal to None then existing test-cases will fail as mock data
|
|
1232
1330
|
# doesn't have required attributes.
|
|
1233
1331
|
file_path: str = (
|
|
1234
1332
|
cast(str, self.upstream_views_file_path[view_ref.include])
|
|
1235
|
-
if self.upstream_views_file_path
|
|
1333
|
+
if self.upstream_views_file_path.get(view_ref.include) is not None
|
|
1236
1334
|
else ViewFieldValue.NOT_AVAILABLE.value
|
|
1237
1335
|
)
|
|
1336
|
+
|
|
1238
1337
|
view_urn = LookerViewId(
|
|
1239
1338
|
project_name=(
|
|
1240
1339
|
view_ref.project
|
|
@@ -1258,10 +1357,28 @@ class LookerExplore:
|
|
|
1258
1357
|
)
|
|
1259
1358
|
view_name_to_urn_map[view_ref.include] = view_urn
|
|
1260
1359
|
|
|
1261
|
-
fine_grained_lineages = []
|
|
1360
|
+
fine_grained_lineages: list[FineGrainedLineageClass] = []
|
|
1262
1361
|
if config.extract_column_level_lineage:
|
|
1263
1362
|
for field in self.fields or []:
|
|
1363
|
+
# Skip creating fine-grained lineage for empty field names to prevent invalid schema field URNs
|
|
1364
|
+
if not field.name or not field.name.strip():
|
|
1365
|
+
logger.warning(
|
|
1366
|
+
f"Skipping fine-grained lineage for field with empty name in explore '{self.name}'"
|
|
1367
|
+
)
|
|
1368
|
+
continue
|
|
1369
|
+
|
|
1264
1370
|
for upstream_column_ref in field.upstream_fields:
|
|
1371
|
+
# Skip creating fine-grained lineage for empty column names to prevent invalid schema field URNs
|
|
1372
|
+
if (
|
|
1373
|
+
not upstream_column_ref.column
|
|
1374
|
+
or not upstream_column_ref.column.strip()
|
|
1375
|
+
):
|
|
1376
|
+
logger.warning(
|
|
1377
|
+
f"Skipping some fine-grained lineage for field '{field.name}' in explore '{self.name}' "
|
|
1378
|
+
f"due to empty upstream column name in table '{upstream_column_ref.table}'"
|
|
1379
|
+
)
|
|
1380
|
+
continue
|
|
1381
|
+
|
|
1265
1382
|
fine_grained_lineages.append(
|
|
1266
1383
|
FineGrainedLineageClass(
|
|
1267
1384
|
upstreamType=FineGrainedLineageUpstreamType.FIELD_SET,
|
|
@@ -1281,9 +1398,11 @@ class LookerExplore:
|
|
|
1281
1398
|
)
|
|
1282
1399
|
|
|
1283
1400
|
upstream_lineage = UpstreamLineage(
|
|
1284
|
-
upstreams=upstreams,
|
|
1401
|
+
upstreams=upstreams,
|
|
1402
|
+
fineGrainedLineages=fine_grained_lineages or None,
|
|
1285
1403
|
)
|
|
1286
|
-
|
|
1404
|
+
|
|
1405
|
+
schema_metadata = None
|
|
1287
1406
|
if self.fields is not None:
|
|
1288
1407
|
schema_metadata = LookerUtil._get_schema(
|
|
1289
1408
|
platform_name=config.platform_name,
|
|
@@ -1291,42 +1410,46 @@ class LookerExplore:
|
|
|
1291
1410
|
view_fields=self.fields,
|
|
1292
1411
|
reporter=reporter,
|
|
1293
1412
|
)
|
|
1294
|
-
if schema_metadata is not None:
|
|
1295
|
-
dataset_snapshot.aspects.append(schema_metadata)
|
|
1296
1413
|
|
|
1297
|
-
|
|
1298
|
-
mcp = MetadataChangeProposalWrapper(
|
|
1299
|
-
entityUrn=dataset_snapshot.urn,
|
|
1300
|
-
aspect=SubTypesClass(typeNames=[DatasetSubTypes.LOOKER_EXPLORE]),
|
|
1301
|
-
)
|
|
1302
|
-
|
|
1303
|
-
proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
|
|
1304
|
-
mce,
|
|
1305
|
-
mcp,
|
|
1306
|
-
]
|
|
1414
|
+
extra_aspects: List[Union[GlobalTagsClass, EmbedClass]] = []
|
|
1307
1415
|
|
|
1308
|
-
|
|
1309
|
-
explore_tag_urns: List[TagAssociationClass] = [
|
|
1310
|
-
TagAssociationClass(tag=TagUrn(tag).urn()) for tag in self.tags
|
|
1311
|
-
]
|
|
1312
|
-
if explore_tag_urns:
|
|
1313
|
-
dataset_snapshot.aspects.append(GlobalTagsClass(explore_tag_urns))
|
|
1314
|
-
|
|
1315
|
-
# If extracting embeds is enabled, produce an MCP for embed URL.
|
|
1416
|
+
explore_tag_urns: List[TagUrn] = [TagUrn(tag) for tag in self.tags]
|
|
1316
1417
|
if extract_embed_urls:
|
|
1317
|
-
|
|
1318
|
-
dataset_snapshot.urn, self._get_embed_url(base_url)
|
|
1319
|
-
)
|
|
1320
|
-
proposals.append(embed_mcp)
|
|
1418
|
+
extra_aspects.append(EmbedClass(renderUrl=self._get_embed_url(base_url)))
|
|
1321
1419
|
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1420
|
+
custom_properties: Dict[str, Optional[str]] = {
|
|
1421
|
+
"project": self.project_name,
|
|
1422
|
+
"model": self.model_name,
|
|
1423
|
+
"looker.explore.label": self.label,
|
|
1424
|
+
"looker.explore.name": self.name,
|
|
1425
|
+
"looker.explore.file": self.source_file,
|
|
1426
|
+
}
|
|
1328
1427
|
|
|
1329
|
-
return
|
|
1428
|
+
return Dataset(
|
|
1429
|
+
platform=config.platform_name,
|
|
1430
|
+
name=config.explore_naming_pattern.replace_variables(
|
|
1431
|
+
self.get_mapping(config)
|
|
1432
|
+
),
|
|
1433
|
+
display_name=str(self.label)
|
|
1434
|
+
if self.label
|
|
1435
|
+
else LookerUtil._display_name(self.name),
|
|
1436
|
+
description=self.description,
|
|
1437
|
+
subtype=DatasetSubTypes.LOOKER_EXPLORE,
|
|
1438
|
+
env=config.env,
|
|
1439
|
+
platform_instance=config.platform_instance,
|
|
1440
|
+
custom_properties={
|
|
1441
|
+
k: str(v) for k, v in custom_properties.items() if v is not None
|
|
1442
|
+
},
|
|
1443
|
+
external_url=self._get_url(base_url),
|
|
1444
|
+
upstreams=upstream_lineage,
|
|
1445
|
+
schema=schema_metadata,
|
|
1446
|
+
parent_container=[
|
|
1447
|
+
"Explore",
|
|
1448
|
+
gen_model_key(config, self.model_name).as_urn(),
|
|
1449
|
+
],
|
|
1450
|
+
tags=explore_tag_urns if explore_tag_urns else None,
|
|
1451
|
+
extra_aspects=extra_aspects,
|
|
1452
|
+
)
|
|
1330
1453
|
|
|
1331
1454
|
|
|
1332
1455
|
def gen_project_key(config: LookerCommonConfig, project_name: str) -> LookMLProjectKey:
|
|
@@ -1362,6 +1485,7 @@ class LookerExploreRegistry:
|
|
|
1362
1485
|
|
|
1363
1486
|
@lru_cache(maxsize=200)
|
|
1364
1487
|
def get_explore(self, model: str, explore: str) -> Optional[LookerExplore]:
|
|
1488
|
+
logger.debug(f"Retrieving explore: model={model}, explore={explore}")
|
|
1365
1489
|
looker_explore = LookerExplore.from_api(
|
|
1366
1490
|
model,
|
|
1367
1491
|
explore,
|
|
@@ -1369,6 +1493,12 @@ class LookerExploreRegistry:
|
|
|
1369
1493
|
self.report,
|
|
1370
1494
|
self.source_config,
|
|
1371
1495
|
)
|
|
1496
|
+
if looker_explore is not None:
|
|
1497
|
+
logger.debug(
|
|
1498
|
+
f"Found explore with model_name={looker_explore.model_name}, name={looker_explore.name}"
|
|
1499
|
+
)
|
|
1500
|
+
else:
|
|
1501
|
+
logger.debug(f"No explore found for model={model}, explore={explore}")
|
|
1372
1502
|
return looker_explore
|
|
1373
1503
|
|
|
1374
1504
|
def compute_stats(self) -> Dict:
|
|
@@ -5,10 +5,14 @@ from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union, cast
|
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
7
|
from looker_sdk.sdk.api40.models import DBConnection
|
|
8
|
-
from pydantic import Field, validator
|
|
8
|
+
from pydantic import Field, model_validator, validator
|
|
9
9
|
|
|
10
10
|
from datahub.configuration import ConfigModel
|
|
11
|
-
from datahub.configuration.common import
|
|
11
|
+
from datahub.configuration.common import (
|
|
12
|
+
AllowDenyPattern,
|
|
13
|
+
ConfigurationError,
|
|
14
|
+
HiddenFromDocs,
|
|
15
|
+
)
|
|
12
16
|
from datahub.configuration.source_common import (
|
|
13
17
|
EnvConfigMixin,
|
|
14
18
|
PlatformInstanceConfigMixin,
|
|
@@ -43,6 +47,14 @@ class NamingPattern(ConfigModel):
|
|
|
43
47
|
assert isinstance(v, str), "pattern must be a string"
|
|
44
48
|
return {"pattern": v}
|
|
45
49
|
|
|
50
|
+
@model_validator(mode="before")
|
|
51
|
+
@classmethod
|
|
52
|
+
def pydantic_v2_accept_raw_pattern(cls, v):
|
|
53
|
+
# Pydantic v2 compatibility: handle string input by converting to dict
|
|
54
|
+
if isinstance(v, str):
|
|
55
|
+
return {"pattern": v}
|
|
56
|
+
return v
|
|
57
|
+
|
|
46
58
|
@classmethod
|
|
47
59
|
def pydantic_validate_pattern(cls, v):
|
|
48
60
|
assert isinstance(v, NamingPattern)
|
|
@@ -132,11 +144,10 @@ class LookerCommonConfig(EnvConfigMixin, PlatformInstanceConfigMixin):
|
|
|
132
144
|
description="When enabled, attaches tags to measures, dimensions and dimension groups to make them more "
|
|
133
145
|
"discoverable. When disabled, adds this information to the description of the column.",
|
|
134
146
|
)
|
|
135
|
-
platform_name: str = Field(
|
|
147
|
+
platform_name: HiddenFromDocs[str] = Field(
|
|
136
148
|
# TODO: This shouldn't be part of the config.
|
|
137
149
|
"looker",
|
|
138
150
|
description="Default platform name.",
|
|
139
|
-
hidden_from_docs=True,
|
|
140
151
|
)
|
|
141
152
|
extract_column_level_lineage: bool = Field(
|
|
142
153
|
True,
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
+
from enum import Enum
|
|
5
6
|
from functools import lru_cache
|
|
6
7
|
from typing import Dict, List, MutableMapping, Optional, Sequence, Set, Union, cast
|
|
7
8
|
|
|
@@ -31,6 +32,14 @@ from datahub.configuration.common import ConfigurationError
|
|
|
31
32
|
logger = logging.getLogger(__name__)
|
|
32
33
|
|
|
33
34
|
|
|
35
|
+
class LookerQueryResponseFormat(Enum):
|
|
36
|
+
# result_format - Ref: https://cloud.google.com/looker/docs/reference/looker-api/latest/methods/Query/run_inline_query
|
|
37
|
+
JSON = "json"
|
|
38
|
+
SQL = (
|
|
39
|
+
"sql" # Note: This does not execute the query, it only generates the SQL query.
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
34
43
|
class TransportOptionsConfig(ConfigModel):
|
|
35
44
|
timeout: int
|
|
36
45
|
headers: MutableMapping[str, str]
|
|
@@ -69,6 +78,7 @@ class LookerAPIStats(BaseModel):
|
|
|
69
78
|
search_looks_calls: int = 0
|
|
70
79
|
search_dashboards_calls: int = 0
|
|
71
80
|
all_user_calls: int = 0
|
|
81
|
+
generate_sql_query_calls: int = 0
|
|
72
82
|
|
|
73
83
|
|
|
74
84
|
class LookerAPI:
|
|
@@ -113,7 +123,7 @@ class LookerAPI:
|
|
|
113
123
|
)
|
|
114
124
|
except SDKError as e:
|
|
115
125
|
raise ConfigurationError(
|
|
116
|
-
|
|
126
|
+
"Failed to connect/authenticate with looker - check your configuration"
|
|
117
127
|
) from e
|
|
118
128
|
|
|
119
129
|
self.client_stats = LookerAPIStats()
|
|
@@ -170,17 +180,40 @@ class LookerAPI:
|
|
|
170
180
|
logger.debug(f"Executing query {write_query}")
|
|
171
181
|
self.client_stats.query_calls += 1
|
|
172
182
|
|
|
173
|
-
|
|
174
|
-
result_format=
|
|
183
|
+
response = self.client.run_inline_query(
|
|
184
|
+
result_format=LookerQueryResponseFormat.JSON.value,
|
|
175
185
|
body=write_query,
|
|
176
186
|
transport_options=self.transport_options,
|
|
177
187
|
)
|
|
178
188
|
|
|
189
|
+
data = json.loads(response)
|
|
190
|
+
|
|
179
191
|
logger.debug("=================Response=================")
|
|
180
|
-
data = json.loads(response_json)
|
|
181
192
|
logger.debug("Length of response: %d", len(data))
|
|
182
193
|
return data
|
|
183
194
|
|
|
195
|
+
def generate_sql_query(
|
|
196
|
+
self, write_query: WriteQuery, use_cache: bool = False
|
|
197
|
+
) -> str:
|
|
198
|
+
"""
|
|
199
|
+
Generates a SQL query string for a given WriteQuery.
|
|
200
|
+
|
|
201
|
+
Note: This does not execute the query, it only generates the SQL query.
|
|
202
|
+
"""
|
|
203
|
+
logger.debug(f"Generating SQL query for {write_query}")
|
|
204
|
+
self.client_stats.generate_sql_query_calls += 1
|
|
205
|
+
|
|
206
|
+
response = self.client.run_inline_query(
|
|
207
|
+
result_format=LookerQueryResponseFormat.SQL.value,
|
|
208
|
+
body=write_query,
|
|
209
|
+
transport_options=self.transport_options,
|
|
210
|
+
cache=use_cache,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
logger.debug("=================Response=================")
|
|
214
|
+
logger.debug("Length of SQL response: %d", len(response))
|
|
215
|
+
return str(response)
|
|
216
|
+
|
|
184
217
|
def dashboard(self, dashboard_id: str, fields: Union[str, List[str]]) -> Dashboard:
|
|
185
218
|
self.client_stats.dashboard_calls += 1
|
|
186
219
|
return self.client.dashboard(
|