acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -28,7 +28,7 @@ from looker_sdk.sdk.api40.models import (
|
|
|
28
28
|
User,
|
|
29
29
|
WriteQuery,
|
|
30
30
|
)
|
|
31
|
-
from pydantic
|
|
31
|
+
from pydantic import validator
|
|
32
32
|
|
|
33
33
|
import datahub.emitter.mce_builder as builder
|
|
34
34
|
from datahub.api.entities.platformresource.platform_resource import (
|
|
@@ -36,7 +36,7 @@ from datahub.api.entities.platformresource.platform_resource import (
|
|
|
36
36
|
PlatformResourceKey,
|
|
37
37
|
)
|
|
38
38
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
39
|
-
from datahub.emitter.mcp_builder import ContainerKey
|
|
39
|
+
from datahub.emitter.mcp_builder import ContainerKey
|
|
40
40
|
from datahub.ingestion.api.report import Report
|
|
41
41
|
from datahub.ingestion.api.source import SourceReport
|
|
42
42
|
from datahub.ingestion.source.common.subtypes import DatasetSubTypes
|
|
@@ -72,7 +72,6 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
|
|
|
72
72
|
UpstreamClass,
|
|
73
73
|
UpstreamLineage,
|
|
74
74
|
)
|
|
75
|
-
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
|
|
76
75
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
77
76
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
78
77
|
ArrayTypeClass,
|
|
@@ -90,21 +89,18 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
|
90
89
|
)
|
|
91
90
|
from datahub.metadata.schema_classes import (
|
|
92
91
|
BrowsePathEntryClass,
|
|
93
|
-
BrowsePathsClass,
|
|
94
92
|
BrowsePathsV2Class,
|
|
95
|
-
|
|
96
|
-
DatasetPropertiesClass,
|
|
93
|
+
EmbedClass,
|
|
97
94
|
EnumTypeClass,
|
|
98
95
|
FineGrainedLineageClass,
|
|
99
96
|
GlobalTagsClass,
|
|
100
97
|
SchemaMetadataClass,
|
|
101
|
-
StatusClass,
|
|
102
|
-
SubTypesClass,
|
|
103
98
|
TagAssociationClass,
|
|
104
99
|
TagPropertiesClass,
|
|
105
100
|
TagSnapshotClass,
|
|
106
101
|
)
|
|
107
102
|
from datahub.metadata.urns import TagUrn
|
|
103
|
+
from datahub.sdk.dataset import Dataset
|
|
108
104
|
from datahub.sql_parsing.sqlglot_lineage import ColumnRef
|
|
109
105
|
from datahub.utilities.lossy_collections import LossyList, LossySet
|
|
110
106
|
from datahub.utilities.url_util import remove_port_from_url
|
|
@@ -242,13 +238,24 @@ class LookerViewId:
|
|
|
242
238
|
|
|
243
239
|
dataset_name = config.view_naming_pattern.replace_variables(n_mapping)
|
|
244
240
|
|
|
245
|
-
|
|
241
|
+
generated_urn = builder.make_dataset_urn_with_platform_instance(
|
|
246
242
|
platform=config.platform_name,
|
|
247
243
|
name=dataset_name,
|
|
248
244
|
platform_instance=config.platform_instance,
|
|
249
245
|
env=config.env,
|
|
250
246
|
)
|
|
251
247
|
|
|
248
|
+
logger.debug(
|
|
249
|
+
f"LookerViewId.get_urn for view '{self.view_name}': project='{self.project_name}', model='{self.model_name}', file_path='{self.file_path}', dataset_name='{dataset_name}', generated_urn='{generated_urn}'"
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
return generated_urn
|
|
253
|
+
|
|
254
|
+
def get_view_dataset_name(self, config: LookerCommonConfig) -> str:
|
|
255
|
+
n_mapping: ViewNamingPatternMapping = self.get_mapping(config)
|
|
256
|
+
n_mapping.file_path = self.preprocess_file_path(n_mapping.file_path)
|
|
257
|
+
return config.view_naming_pattern.replace_variables(n_mapping)
|
|
258
|
+
|
|
252
259
|
def get_browse_path(self, config: LookerCommonConfig) -> str:
|
|
253
260
|
browse_path = config.view_browse_pattern.replace_variables(
|
|
254
261
|
self.get_mapping(config)
|
|
@@ -276,6 +283,22 @@ class LookerViewId:
|
|
|
276
283
|
],
|
|
277
284
|
)
|
|
278
285
|
|
|
286
|
+
def get_view_dataset_parent_container(
|
|
287
|
+
self, config: LookerCommonConfig
|
|
288
|
+
) -> List[str]:
|
|
289
|
+
project_key = gen_project_key(config, self.project_name)
|
|
290
|
+
view_path = (
|
|
291
|
+
remove_suffix(self.file_path, ".view.lkml")
|
|
292
|
+
if "{file_path}" in config.view_browse_pattern.pattern
|
|
293
|
+
else os.path.dirname(self.file_path)
|
|
294
|
+
)
|
|
295
|
+
path_entries = view_path.split("/") if view_path else []
|
|
296
|
+
return [
|
|
297
|
+
"Develop",
|
|
298
|
+
project_key.as_urn(),
|
|
299
|
+
*path_entries,
|
|
300
|
+
]
|
|
301
|
+
|
|
279
302
|
|
|
280
303
|
class ViewFieldType(Enum):
|
|
281
304
|
DIMENSION = "Dimension"
|
|
@@ -284,6 +307,12 @@ class ViewFieldType(Enum):
|
|
|
284
307
|
UNKNOWN = "Unknown"
|
|
285
308
|
|
|
286
309
|
|
|
310
|
+
class ViewFieldDimensionGroupType(Enum):
|
|
311
|
+
# Ref: https://cloud.google.com/looker/docs/reference/param-field-dimension-group
|
|
312
|
+
TIME = "time"
|
|
313
|
+
DURATION = "duration"
|
|
314
|
+
|
|
315
|
+
|
|
287
316
|
class ViewFieldValue(Enum):
|
|
288
317
|
NOT_AVAILABLE = "NotAvailable"
|
|
289
318
|
|
|
@@ -373,6 +402,14 @@ class ExploreUpstreamViewField:
|
|
|
373
402
|
: -(len(self.field.field_group_variant.lower()) + 1)
|
|
374
403
|
]
|
|
375
404
|
|
|
405
|
+
# Validate that field_name is not empty to prevent invalid schema field URNs
|
|
406
|
+
if not field_name or not field_name.strip():
|
|
407
|
+
logger.warning(
|
|
408
|
+
f"Empty field name detected for field '{self.field.name}' in explore '{self.explore.name}'. "
|
|
409
|
+
f"Skipping field to prevent invalid schema field URN generation."
|
|
410
|
+
)
|
|
411
|
+
return None
|
|
412
|
+
|
|
376
413
|
assert view_name # for lint false positive
|
|
377
414
|
|
|
378
415
|
project_include: ProjectInclude = ProjectInclude(
|
|
@@ -452,15 +489,36 @@ class ExploreUpstreamViewField:
|
|
|
452
489
|
)
|
|
453
490
|
|
|
454
491
|
|
|
455
|
-
def create_view_project_map(
|
|
492
|
+
def create_view_project_map(
|
|
493
|
+
view_fields: List[ViewField],
|
|
494
|
+
explore_primary_view: Optional[str] = None,
|
|
495
|
+
explore_project_name: Optional[str] = None,
|
|
496
|
+
) -> Dict[str, str]:
|
|
456
497
|
"""
|
|
457
498
|
Each view in a model has unique name.
|
|
458
499
|
Use this function in scope of a model.
|
|
500
|
+
|
|
501
|
+
Args:
|
|
502
|
+
view_fields: List of ViewField objects
|
|
503
|
+
explore_primary_view: The primary view name of the explore (explore.view_name)
|
|
504
|
+
explore_project_name: The project name of the explore (explore.project_name)
|
|
459
505
|
"""
|
|
460
506
|
view_project_map: Dict[str, str] = {}
|
|
461
507
|
for view_field in view_fields:
|
|
462
508
|
if view_field.view_name is not None and view_field.project_name is not None:
|
|
463
|
-
|
|
509
|
+
# Override field-level project assignment for the primary view when different
|
|
510
|
+
if (
|
|
511
|
+
view_field.view_name == explore_primary_view
|
|
512
|
+
and explore_project_name is not None
|
|
513
|
+
and explore_project_name != view_field.project_name
|
|
514
|
+
):
|
|
515
|
+
logger.debug(
|
|
516
|
+
f"Overriding project assignment for primary view '{view_field.view_name}': "
|
|
517
|
+
f"field-level project '{view_field.project_name}' → explore-level project '{explore_project_name}'"
|
|
518
|
+
)
|
|
519
|
+
view_project_map[view_field.view_name] = explore_project_name
|
|
520
|
+
else:
|
|
521
|
+
view_project_map[view_field.view_name] = view_field.project_name
|
|
464
522
|
|
|
465
523
|
return view_project_map
|
|
466
524
|
|
|
@@ -953,6 +1011,9 @@ class LookerExplore:
|
|
|
953
1011
|
f"Could not resolve view {view_name} for explore {dict['name']} in model {model_name}"
|
|
954
1012
|
)
|
|
955
1013
|
else:
|
|
1014
|
+
logger.debug(
|
|
1015
|
+
f"LookerExplore.from_dict adding upstream view for explore '{dict['name']}' (model='{model_name}'): view_name='{view_name}', info[0].project='{info[0].project}'"
|
|
1016
|
+
)
|
|
956
1017
|
upstream_views.append(
|
|
957
1018
|
ProjectInclude(project=info[0].project, include=view_name)
|
|
958
1019
|
)
|
|
@@ -981,6 +1042,7 @@ class LookerExplore:
|
|
|
981
1042
|
) -> Optional["LookerExplore"]:
|
|
982
1043
|
try:
|
|
983
1044
|
explore = client.lookml_model_explore(model, explore_name)
|
|
1045
|
+
|
|
984
1046
|
views: Set[str] = set()
|
|
985
1047
|
lkml_fields: List[LookmlModelExploreField] = (
|
|
986
1048
|
explore_field_set_to_lkml_fields(explore)
|
|
@@ -1117,7 +1179,11 @@ class LookerExplore:
|
|
|
1117
1179
|
)
|
|
1118
1180
|
)
|
|
1119
1181
|
|
|
1120
|
-
view_project_map: Dict[str, str] = create_view_project_map(
|
|
1182
|
+
view_project_map: Dict[str, str] = create_view_project_map(
|
|
1183
|
+
view_fields,
|
|
1184
|
+
explore_primary_view=explore.view_name,
|
|
1185
|
+
explore_project_name=explore.project_name,
|
|
1186
|
+
)
|
|
1121
1187
|
if view_project_map:
|
|
1122
1188
|
logger.debug(f"views and their projects: {view_project_map}")
|
|
1123
1189
|
|
|
@@ -1243,52 +1309,31 @@ class LookerExplore:
|
|
|
1243
1309
|
reporter: SourceReport,
|
|
1244
1310
|
base_url: str,
|
|
1245
1311
|
extract_embed_urls: bool,
|
|
1246
|
-
) ->
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
dataset_snapshot = DatasetSnapshot(
|
|
1251
|
-
urn=self.get_explore_urn(config),
|
|
1252
|
-
aspects=[], # we append to this list later on
|
|
1253
|
-
)
|
|
1254
|
-
|
|
1255
|
-
model_key = gen_model_key(config, self.model_name)
|
|
1256
|
-
browse_paths = BrowsePathsClass(paths=[self.get_explore_browse_path(config)])
|
|
1257
|
-
container = ContainerClass(container=model_key.as_urn())
|
|
1258
|
-
dataset_snapshot.aspects.append(browse_paths)
|
|
1259
|
-
dataset_snapshot.aspects.append(StatusClass(removed=False))
|
|
1260
|
-
|
|
1261
|
-
custom_properties = {
|
|
1262
|
-
"project": self.project_name,
|
|
1263
|
-
"model": self.model_name,
|
|
1264
|
-
"looker.explore.label": self.label,
|
|
1265
|
-
"looker.explore.name": self.name,
|
|
1266
|
-
"looker.explore.file": self.source_file,
|
|
1267
|
-
}
|
|
1268
|
-
dataset_props = DatasetPropertiesClass(
|
|
1269
|
-
name=str(self.label) if self.label else LookerUtil._display_name(self.name),
|
|
1270
|
-
description=self.description,
|
|
1271
|
-
customProperties={
|
|
1272
|
-
k: str(v) for k, v in custom_properties.items() if v is not None
|
|
1273
|
-
},
|
|
1274
|
-
)
|
|
1275
|
-
dataset_props.externalUrl = self._get_url(base_url)
|
|
1312
|
+
) -> Dataset:
|
|
1313
|
+
"""
|
|
1314
|
+
Generate a Dataset metadata event for this Looker Explore.
|
|
1276
1315
|
|
|
1277
|
-
|
|
1316
|
+
Only generates datasets for explores that contain FROM clauses and do NOT contain joins.
|
|
1317
|
+
Passthrough explores and joins are handled via lineage and do not need additional nodes.
|
|
1318
|
+
"""
|
|
1319
|
+
upstream_lineage = None
|
|
1278
1320
|
view_name_to_urn_map: Dict[str, str] = {}
|
|
1321
|
+
|
|
1279
1322
|
if self.upstream_views is not None:
|
|
1280
1323
|
assert self.project_name is not None
|
|
1281
|
-
upstreams = []
|
|
1324
|
+
upstreams: list[UpstreamClass] = []
|
|
1282
1325
|
observed_lineage_ts = datetime.datetime.now(tz=datetime.timezone.utc)
|
|
1326
|
+
|
|
1283
1327
|
for view_ref in sorted(self.upstream_views):
|
|
1284
1328
|
# set file_path to ViewFieldType.UNKNOWN if file_path is not available to keep backward compatibility
|
|
1285
1329
|
# if we raise error on file_path equal to None then existing test-cases will fail as mock data
|
|
1286
1330
|
# doesn't have required attributes.
|
|
1287
1331
|
file_path: str = (
|
|
1288
1332
|
cast(str, self.upstream_views_file_path[view_ref.include])
|
|
1289
|
-
if self.upstream_views_file_path
|
|
1333
|
+
if self.upstream_views_file_path.get(view_ref.include) is not None
|
|
1290
1334
|
else ViewFieldValue.NOT_AVAILABLE.value
|
|
1291
1335
|
)
|
|
1336
|
+
|
|
1292
1337
|
view_urn = LookerViewId(
|
|
1293
1338
|
project_name=(
|
|
1294
1339
|
view_ref.project
|
|
@@ -1312,10 +1357,28 @@ class LookerExplore:
|
|
|
1312
1357
|
)
|
|
1313
1358
|
view_name_to_urn_map[view_ref.include] = view_urn
|
|
1314
1359
|
|
|
1315
|
-
fine_grained_lineages = []
|
|
1360
|
+
fine_grained_lineages: list[FineGrainedLineageClass] = []
|
|
1316
1361
|
if config.extract_column_level_lineage:
|
|
1317
1362
|
for field in self.fields or []:
|
|
1363
|
+
# Skip creating fine-grained lineage for empty field names to prevent invalid schema field URNs
|
|
1364
|
+
if not field.name or not field.name.strip():
|
|
1365
|
+
logger.warning(
|
|
1366
|
+
f"Skipping fine-grained lineage for field with empty name in explore '{self.name}'"
|
|
1367
|
+
)
|
|
1368
|
+
continue
|
|
1369
|
+
|
|
1318
1370
|
for upstream_column_ref in field.upstream_fields:
|
|
1371
|
+
# Skip creating fine-grained lineage for empty column names to prevent invalid schema field URNs
|
|
1372
|
+
if (
|
|
1373
|
+
not upstream_column_ref.column
|
|
1374
|
+
or not upstream_column_ref.column.strip()
|
|
1375
|
+
):
|
|
1376
|
+
logger.warning(
|
|
1377
|
+
f"Skipping some fine-grained lineage for field '{field.name}' in explore '{self.name}' "
|
|
1378
|
+
f"due to empty upstream column name in table '{upstream_column_ref.table}'"
|
|
1379
|
+
)
|
|
1380
|
+
continue
|
|
1381
|
+
|
|
1319
1382
|
fine_grained_lineages.append(
|
|
1320
1383
|
FineGrainedLineageClass(
|
|
1321
1384
|
upstreamType=FineGrainedLineageUpstreamType.FIELD_SET,
|
|
@@ -1335,9 +1398,11 @@ class LookerExplore:
|
|
|
1335
1398
|
)
|
|
1336
1399
|
|
|
1337
1400
|
upstream_lineage = UpstreamLineage(
|
|
1338
|
-
upstreams=upstreams,
|
|
1401
|
+
upstreams=upstreams,
|
|
1402
|
+
fineGrainedLineages=fine_grained_lineages or None,
|
|
1339
1403
|
)
|
|
1340
|
-
|
|
1404
|
+
|
|
1405
|
+
schema_metadata = None
|
|
1341
1406
|
if self.fields is not None:
|
|
1342
1407
|
schema_metadata = LookerUtil._get_schema(
|
|
1343
1408
|
platform_name=config.platform_name,
|
|
@@ -1345,42 +1410,46 @@ class LookerExplore:
|
|
|
1345
1410
|
view_fields=self.fields,
|
|
1346
1411
|
reporter=reporter,
|
|
1347
1412
|
)
|
|
1348
|
-
if schema_metadata is not None:
|
|
1349
|
-
dataset_snapshot.aspects.append(schema_metadata)
|
|
1350
|
-
|
|
1351
|
-
mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
|
|
1352
|
-
mcp = MetadataChangeProposalWrapper(
|
|
1353
|
-
entityUrn=dataset_snapshot.urn,
|
|
1354
|
-
aspect=SubTypesClass(typeNames=[DatasetSubTypes.LOOKER_EXPLORE]),
|
|
1355
|
-
)
|
|
1356
|
-
|
|
1357
|
-
proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
|
|
1358
|
-
mce,
|
|
1359
|
-
mcp,
|
|
1360
|
-
]
|
|
1361
1413
|
|
|
1362
|
-
|
|
1363
|
-
explore_tag_urns: List[TagAssociationClass] = [
|
|
1364
|
-
TagAssociationClass(tag=TagUrn(tag).urn()) for tag in self.tags
|
|
1365
|
-
]
|
|
1366
|
-
if explore_tag_urns:
|
|
1367
|
-
dataset_snapshot.aspects.append(GlobalTagsClass(explore_tag_urns))
|
|
1414
|
+
extra_aspects: List[Union[GlobalTagsClass, EmbedClass]] = []
|
|
1368
1415
|
|
|
1369
|
-
|
|
1416
|
+
explore_tag_urns: List[TagUrn] = [TagUrn(tag) for tag in self.tags]
|
|
1370
1417
|
if extract_embed_urls:
|
|
1371
|
-
|
|
1372
|
-
dataset_snapshot.urn, self._get_embed_url(base_url)
|
|
1373
|
-
)
|
|
1374
|
-
proposals.append(embed_mcp)
|
|
1418
|
+
extra_aspects.append(EmbedClass(renderUrl=self._get_embed_url(base_url)))
|
|
1375
1419
|
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1420
|
+
custom_properties: Dict[str, Optional[str]] = {
|
|
1421
|
+
"project": self.project_name,
|
|
1422
|
+
"model": self.model_name,
|
|
1423
|
+
"looker.explore.label": self.label,
|
|
1424
|
+
"looker.explore.name": self.name,
|
|
1425
|
+
"looker.explore.file": self.source_file,
|
|
1426
|
+
}
|
|
1382
1427
|
|
|
1383
|
-
return
|
|
1428
|
+
return Dataset(
|
|
1429
|
+
platform=config.platform_name,
|
|
1430
|
+
name=config.explore_naming_pattern.replace_variables(
|
|
1431
|
+
self.get_mapping(config)
|
|
1432
|
+
),
|
|
1433
|
+
display_name=str(self.label)
|
|
1434
|
+
if self.label
|
|
1435
|
+
else LookerUtil._display_name(self.name),
|
|
1436
|
+
description=self.description,
|
|
1437
|
+
subtype=DatasetSubTypes.LOOKER_EXPLORE,
|
|
1438
|
+
env=config.env,
|
|
1439
|
+
platform_instance=config.platform_instance,
|
|
1440
|
+
custom_properties={
|
|
1441
|
+
k: str(v) for k, v in custom_properties.items() if v is not None
|
|
1442
|
+
},
|
|
1443
|
+
external_url=self._get_url(base_url),
|
|
1444
|
+
upstreams=upstream_lineage,
|
|
1445
|
+
schema=schema_metadata,
|
|
1446
|
+
parent_container=[
|
|
1447
|
+
"Explore",
|
|
1448
|
+
gen_model_key(config, self.model_name).as_urn(),
|
|
1449
|
+
],
|
|
1450
|
+
tags=explore_tag_urns if explore_tag_urns else None,
|
|
1451
|
+
extra_aspects=extra_aspects,
|
|
1452
|
+
)
|
|
1384
1453
|
|
|
1385
1454
|
|
|
1386
1455
|
def gen_project_key(config: LookerCommonConfig, project_name: str) -> LookMLProjectKey:
|
|
@@ -5,10 +5,14 @@ from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union, cast
|
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
7
|
from looker_sdk.sdk.api40.models import DBConnection
|
|
8
|
-
from pydantic import Field, validator
|
|
8
|
+
from pydantic import Field, model_validator, validator
|
|
9
9
|
|
|
10
10
|
from datahub.configuration import ConfigModel
|
|
11
|
-
from datahub.configuration.common import
|
|
11
|
+
from datahub.configuration.common import (
|
|
12
|
+
AllowDenyPattern,
|
|
13
|
+
ConfigurationError,
|
|
14
|
+
HiddenFromDocs,
|
|
15
|
+
)
|
|
12
16
|
from datahub.configuration.source_common import (
|
|
13
17
|
EnvConfigMixin,
|
|
14
18
|
PlatformInstanceConfigMixin,
|
|
@@ -43,6 +47,14 @@ class NamingPattern(ConfigModel):
|
|
|
43
47
|
assert isinstance(v, str), "pattern must be a string"
|
|
44
48
|
return {"pattern": v}
|
|
45
49
|
|
|
50
|
+
@model_validator(mode="before")
|
|
51
|
+
@classmethod
|
|
52
|
+
def pydantic_v2_accept_raw_pattern(cls, v):
|
|
53
|
+
# Pydantic v2 compatibility: handle string input by converting to dict
|
|
54
|
+
if isinstance(v, str):
|
|
55
|
+
return {"pattern": v}
|
|
56
|
+
return v
|
|
57
|
+
|
|
46
58
|
@classmethod
|
|
47
59
|
def pydantic_validate_pattern(cls, v):
|
|
48
60
|
assert isinstance(v, NamingPattern)
|
|
@@ -132,11 +144,10 @@ class LookerCommonConfig(EnvConfigMixin, PlatformInstanceConfigMixin):
|
|
|
132
144
|
description="When enabled, attaches tags to measures, dimensions and dimension groups to make them more "
|
|
133
145
|
"discoverable. When disabled, adds this information to the description of the column.",
|
|
134
146
|
)
|
|
135
|
-
platform_name: str = Field(
|
|
147
|
+
platform_name: HiddenFromDocs[str] = Field(
|
|
136
148
|
# TODO: This shouldn't be part of the config.
|
|
137
149
|
"looker",
|
|
138
150
|
description="Default platform name.",
|
|
139
|
-
hidden_from_docs=True,
|
|
140
151
|
)
|
|
141
152
|
extract_column_level_lineage: bool = Field(
|
|
142
153
|
True,
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
+
from enum import Enum
|
|
5
6
|
from functools import lru_cache
|
|
6
7
|
from typing import Dict, List, MutableMapping, Optional, Sequence, Set, Union, cast
|
|
7
8
|
|
|
@@ -31,6 +32,14 @@ from datahub.configuration.common import ConfigurationError
|
|
|
31
32
|
logger = logging.getLogger(__name__)
|
|
32
33
|
|
|
33
34
|
|
|
35
|
+
class LookerQueryResponseFormat(Enum):
|
|
36
|
+
# result_format - Ref: https://cloud.google.com/looker/docs/reference/looker-api/latest/methods/Query/run_inline_query
|
|
37
|
+
JSON = "json"
|
|
38
|
+
SQL = (
|
|
39
|
+
"sql" # Note: This does not execute the query, it only generates the SQL query.
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
34
43
|
class TransportOptionsConfig(ConfigModel):
|
|
35
44
|
timeout: int
|
|
36
45
|
headers: MutableMapping[str, str]
|
|
@@ -69,6 +78,7 @@ class LookerAPIStats(BaseModel):
|
|
|
69
78
|
search_looks_calls: int = 0
|
|
70
79
|
search_dashboards_calls: int = 0
|
|
71
80
|
all_user_calls: int = 0
|
|
81
|
+
generate_sql_query_calls: int = 0
|
|
72
82
|
|
|
73
83
|
|
|
74
84
|
class LookerAPI:
|
|
@@ -170,17 +180,40 @@ class LookerAPI:
|
|
|
170
180
|
logger.debug(f"Executing query {write_query}")
|
|
171
181
|
self.client_stats.query_calls += 1
|
|
172
182
|
|
|
173
|
-
|
|
174
|
-
result_format=
|
|
183
|
+
response = self.client.run_inline_query(
|
|
184
|
+
result_format=LookerQueryResponseFormat.JSON.value,
|
|
175
185
|
body=write_query,
|
|
176
186
|
transport_options=self.transport_options,
|
|
177
187
|
)
|
|
178
188
|
|
|
189
|
+
data = json.loads(response)
|
|
190
|
+
|
|
179
191
|
logger.debug("=================Response=================")
|
|
180
|
-
data = json.loads(response_json)
|
|
181
192
|
logger.debug("Length of response: %d", len(data))
|
|
182
193
|
return data
|
|
183
194
|
|
|
195
|
+
def generate_sql_query(
|
|
196
|
+
self, write_query: WriteQuery, use_cache: bool = False
|
|
197
|
+
) -> str:
|
|
198
|
+
"""
|
|
199
|
+
Generates a SQL query string for a given WriteQuery.
|
|
200
|
+
|
|
201
|
+
Note: This does not execute the query, it only generates the SQL query.
|
|
202
|
+
"""
|
|
203
|
+
logger.debug(f"Generating SQL query for {write_query}")
|
|
204
|
+
self.client_stats.generate_sql_query_calls += 1
|
|
205
|
+
|
|
206
|
+
response = self.client.run_inline_query(
|
|
207
|
+
result_format=LookerQueryResponseFormat.SQL.value,
|
|
208
|
+
body=write_query,
|
|
209
|
+
transport_options=self.transport_options,
|
|
210
|
+
cache=use_cache,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
logger.debug("=================Response=================")
|
|
214
|
+
logger.debug("Length of SQL response: %d", len(response))
|
|
215
|
+
return str(response)
|
|
216
|
+
|
|
184
217
|
def dashboard(self, dashboard_id: str, fields: Union[str, List[str]]) -> Dashboard:
|
|
185
218
|
self.client_stats.dashboard_calls += 1
|
|
186
219
|
return self.client.dashboard(
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from functools import lru_cache
|
|
2
|
-
from typing import ClassVar, Optional, TextIO
|
|
2
|
+
from typing import ClassVar, Optional, TextIO, Type
|
|
3
3
|
|
|
4
4
|
from liquid import Environment
|
|
5
5
|
from liquid.ast import Node
|
|
@@ -20,16 +20,27 @@ class CustomTagException(Exception):
|
|
|
20
20
|
class ConditionNode(Node):
|
|
21
21
|
def __init__(self, tok: Token, sql_or_lookml_reference: str, filter_name: str):
|
|
22
22
|
self.tok = tok
|
|
23
|
-
|
|
24
23
|
self.sql_or_lookml_reference = sql_or_lookml_reference
|
|
25
|
-
|
|
26
24
|
self.filter_name = filter_name
|
|
27
25
|
|
|
28
26
|
def render_to_output(self, context: Context, buffer: TextIO) -> Optional[bool]:
|
|
29
27
|
# This implementation will make sure that sql parse work correctly if looker condition tag
|
|
30
28
|
# is used in lookml sql field
|
|
31
29
|
buffer.write(f"{self.sql_or_lookml_reference}='dummy_value'")
|
|
30
|
+
return True
|
|
32
31
|
|
|
32
|
+
|
|
33
|
+
class IncrementConditionNode(Node):
|
|
34
|
+
def __init__(self, tok: Token, sql_or_lookml_reference: str):
|
|
35
|
+
self.tok = tok
|
|
36
|
+
self.sql_or_lookml_reference = sql_or_lookml_reference
|
|
37
|
+
|
|
38
|
+
def render_to_output(self, context: Context, buffer: TextIO) -> Optional[bool]:
|
|
39
|
+
# For incrementcondition, we need to generate a condition that would be used
|
|
40
|
+
# in incremental PDT updates. This typically involves date/time comparisons.
|
|
41
|
+
# We'll render it as a date comparison with a placeholder value
|
|
42
|
+
# See details in Looker documentation for incrementcondition tag -> cloud.google.com/looker/docs/reference/param-view-increment-key
|
|
43
|
+
buffer.write(f"{self.sql_or_lookml_reference} > '2023-01-01'")
|
|
33
44
|
return True
|
|
34
45
|
|
|
35
46
|
|
|
@@ -44,7 +55,6 @@ class ConditionTag(Tag):
|
|
|
44
55
|
This class render the below tag as order.region='ap-south-1' if order_region is provided in config.liquid_variables
|
|
45
56
|
as order_region: 'ap-south-1'
|
|
46
57
|
{% condition order_region %} order.region {% endcondition %}
|
|
47
|
-
|
|
48
58
|
"""
|
|
49
59
|
|
|
50
60
|
TAG_START: ClassVar[str] = "condition"
|
|
@@ -79,7 +89,48 @@ class ConditionTag(Tag):
|
|
|
79
89
|
)
|
|
80
90
|
|
|
81
91
|
|
|
82
|
-
|
|
92
|
+
class IncrementConditionTag(Tag):
|
|
93
|
+
"""
|
|
94
|
+
IncrementConditionTag is the equivalent implementation of looker's custom liquid tag "incrementcondition".
|
|
95
|
+
Refer doc: https://cloud.google.com/looker/docs/incremental-pdts#using_the_incrementcondition_tag
|
|
96
|
+
|
|
97
|
+
This tag is used for incremental PDTs to determine which records should be updated.
|
|
98
|
+
It typically works with date/time fields to filter data that has changed since the last update.
|
|
99
|
+
|
|
100
|
+
Example usage in Looker:
|
|
101
|
+
{% incrementcondition created_at %} order.created_at {% endincrementcondition %}
|
|
102
|
+
|
|
103
|
+
This would generate SQL like: order.created_at > '2023-01-01 00:00:00'
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
TAG_START: ClassVar[str] = "incrementcondition"
|
|
107
|
+
TAG_END: ClassVar[str] = "endincrementcondition"
|
|
108
|
+
name: str = "incrementcondition"
|
|
109
|
+
|
|
110
|
+
def __init__(self, env: Environment):
|
|
111
|
+
super().__init__(env)
|
|
112
|
+
self.parser = get_parser(self.env)
|
|
113
|
+
|
|
114
|
+
def parse(self, stream: TokenStream) -> Node:
|
|
115
|
+
expect(stream, TOKEN_TAG, value=IncrementConditionTag.TAG_START)
|
|
116
|
+
|
|
117
|
+
start_token = stream.current
|
|
118
|
+
|
|
119
|
+
stream.next_token()
|
|
120
|
+
expect(stream, TOKEN_LITERAL)
|
|
121
|
+
sql_or_lookml_reference: str = stream.current.value.strip()
|
|
122
|
+
|
|
123
|
+
stream.next_token()
|
|
124
|
+
expect(stream, TOKEN_TAG, value=IncrementConditionTag.TAG_END)
|
|
125
|
+
|
|
126
|
+
return IncrementConditionNode(
|
|
127
|
+
tok=start_token,
|
|
128
|
+
sql_or_lookml_reference=sql_or_lookml_reference,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# Updated custom_tags list to include both tags
|
|
133
|
+
custom_tags: list[Type[Tag]] = [ConditionTag, IncrementConditionTag]
|
|
83
134
|
|
|
84
135
|
|
|
85
136
|
@string_filter
|