acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -9,6 +9,7 @@ import re
|
|
|
9
9
|
from dataclasses import dataclass
|
|
10
10
|
from typing import Dict, Iterable, List, Optional, Union
|
|
11
11
|
|
|
12
|
+
from datahub.configuration.time_window_config import BaseTimeWindowConfig
|
|
12
13
|
from datahub.ingestion.api.common import PipelineContext
|
|
13
14
|
from datahub.ingestion.api.decorators import (
|
|
14
15
|
SupportStatus,
|
|
@@ -31,6 +32,7 @@ from datahub.ingestion.api.source import (
|
|
|
31
32
|
)
|
|
32
33
|
from datahub.ingestion.api.source_helpers import auto_workunit
|
|
33
34
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
35
|
+
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
|
|
34
36
|
from datahub.ingestion.source.snowflake.constants import (
|
|
35
37
|
GENERIC_PERMISSION_ERROR_KEY,
|
|
36
38
|
SnowflakeEdition,
|
|
@@ -71,6 +73,7 @@ from datahub.ingestion.source.snowflake.snowflake_utils import (
|
|
|
71
73
|
from datahub.ingestion.source.state.profiling_state_handler import ProfilingHandler
|
|
72
74
|
from datahub.ingestion.source.state.redundant_run_skip_handler import (
|
|
73
75
|
RedundantLineageRunSkipHandler,
|
|
76
|
+
RedundantQueriesRunSkipHandler,
|
|
74
77
|
RedundantUsageRunSkipHandler,
|
|
75
78
|
)
|
|
76
79
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
@@ -96,7 +99,14 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
96
99
|
@support_status(SupportStatus.CERTIFIED)
|
|
97
100
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
98
101
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
99
|
-
@capability(
|
|
102
|
+
@capability(
|
|
103
|
+
SourceCapability.CONTAINERS,
|
|
104
|
+
"Enabled by default",
|
|
105
|
+
subtype_modifier=[
|
|
106
|
+
SourceCapabilityModifier.DATABASE,
|
|
107
|
+
SourceCapabilityModifier.SCHEMA,
|
|
108
|
+
],
|
|
109
|
+
)
|
|
100
110
|
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
|
|
101
111
|
@capability(
|
|
102
112
|
SourceCapability.DATA_PROFILING,
|
|
@@ -117,7 +127,7 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
117
127
|
)
|
|
118
128
|
@capability(
|
|
119
129
|
SourceCapability.DELETION_DETECTION,
|
|
120
|
-
"
|
|
130
|
+
"Enabled by default via stateful ingestion",
|
|
121
131
|
supported=True,
|
|
122
132
|
)
|
|
123
133
|
@capability(
|
|
@@ -130,6 +140,7 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
130
140
|
"Optionally enabled via `classification.enabled`",
|
|
131
141
|
supported=True,
|
|
132
142
|
)
|
|
143
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
133
144
|
class SnowflakeV2Source(
|
|
134
145
|
SnowflakeCommonMixin,
|
|
135
146
|
StatefulIngestionSourceBase,
|
|
@@ -161,7 +172,11 @@ class SnowflakeV2Source(
|
|
|
161
172
|
)
|
|
162
173
|
|
|
163
174
|
# For database, schema, tables, views, etc
|
|
164
|
-
self.data_dictionary = SnowflakeDataDictionary(
|
|
175
|
+
self.data_dictionary = SnowflakeDataDictionary(
|
|
176
|
+
connection=self.connection,
|
|
177
|
+
report=self.report,
|
|
178
|
+
fetch_views_from_information_schema=self.config.fetch_views_from_information_schema,
|
|
179
|
+
)
|
|
165
180
|
self.lineage_extractor: Optional[SnowflakeLineageExtractor] = None
|
|
166
181
|
|
|
167
182
|
self.discovered_datasets: Optional[List[str]] = None
|
|
@@ -185,6 +200,7 @@ class SnowflakeV2Source(
|
|
|
185
200
|
),
|
|
186
201
|
generate_usage_statistics=False,
|
|
187
202
|
generate_operations=False,
|
|
203
|
+
generate_queries=self.config.include_queries,
|
|
188
204
|
format_queries=self.config.format_sql_queries,
|
|
189
205
|
is_temp_table=self._is_temp_table,
|
|
190
206
|
is_allowed_table=self._is_allowed_table,
|
|
@@ -192,7 +208,7 @@ class SnowflakeV2Source(
|
|
|
192
208
|
)
|
|
193
209
|
self.report.sql_aggregator = self.aggregator.report
|
|
194
210
|
|
|
195
|
-
if self.config.include_table_lineage:
|
|
211
|
+
if self.config.include_table_lineage and not self.config.use_queries_v2:
|
|
196
212
|
redundant_lineage_run_skip_handler: Optional[
|
|
197
213
|
RedundantLineageRunSkipHandler
|
|
198
214
|
] = None
|
|
@@ -310,6 +326,7 @@ class SnowflakeV2Source(
|
|
|
310
326
|
SourceCapability.PLATFORM_INSTANCE,
|
|
311
327
|
SourceCapability.DOMAINS,
|
|
312
328
|
SourceCapability.DELETION_DETECTION,
|
|
329
|
+
SourceCapability.TEST_CONNECTION,
|
|
313
330
|
)
|
|
314
331
|
]
|
|
315
332
|
|
|
@@ -515,6 +532,7 @@ class SnowflakeV2Source(
|
|
|
515
532
|
snowsight_url_builder=snowsight_url_builder,
|
|
516
533
|
filters=self.filters,
|
|
517
534
|
identifiers=self.identifiers,
|
|
535
|
+
fetch_views_from_information_schema=self.config.fetch_views_from_information_schema,
|
|
518
536
|
)
|
|
519
537
|
|
|
520
538
|
with self.report.new_stage(f"*: {METADATA_EXTRACTION}"):
|
|
@@ -551,11 +569,15 @@ class SnowflakeV2Source(
|
|
|
551
569
|
and len(discovered_views) == 0
|
|
552
570
|
and len(discovered_streams) == 0
|
|
553
571
|
):
|
|
554
|
-
self.
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
572
|
+
if self.config.warn_no_datasets:
|
|
573
|
+
self.structured_reporter.warning(
|
|
574
|
+
"No tables/views/streams found. Verify dataset permissions if Snowflake source is not empty.",
|
|
575
|
+
)
|
|
576
|
+
else:
|
|
577
|
+
self.structured_reporter.failure(
|
|
578
|
+
GENERIC_PERMISSION_ERROR_KEY,
|
|
579
|
+
"No tables/views/streams found. Verify dataset permissions in Snowflake.",
|
|
580
|
+
)
|
|
559
581
|
|
|
560
582
|
self.discovered_datasets = (
|
|
561
583
|
discovered_tables + discovered_views + discovered_streams
|
|
@@ -568,10 +590,26 @@ class SnowflakeV2Source(
|
|
|
568
590
|
with self.report.new_stage(f"*: {QUERIES_EXTRACTION}"):
|
|
569
591
|
schema_resolver = self.aggregator._schema_resolver
|
|
570
592
|
|
|
593
|
+
redundant_queries_run_skip_handler: Optional[
|
|
594
|
+
RedundantQueriesRunSkipHandler
|
|
595
|
+
] = None
|
|
596
|
+
if self.config.enable_stateful_time_window:
|
|
597
|
+
redundant_queries_run_skip_handler = RedundantQueriesRunSkipHandler(
|
|
598
|
+
source=self,
|
|
599
|
+
config=self.config,
|
|
600
|
+
pipeline_name=self.ctx.pipeline_name,
|
|
601
|
+
run_id=self.ctx.run_id,
|
|
602
|
+
)
|
|
603
|
+
|
|
571
604
|
queries_extractor = SnowflakeQueriesExtractor(
|
|
572
605
|
connection=self.connection,
|
|
606
|
+
# TODO: this should be its own section in main recipe
|
|
573
607
|
config=SnowflakeQueriesExtractorConfig(
|
|
574
|
-
window=
|
|
608
|
+
window=BaseTimeWindowConfig(
|
|
609
|
+
start_time=self.config.start_time,
|
|
610
|
+
end_time=self.config.end_time,
|
|
611
|
+
bucket_duration=self.config.bucket_duration,
|
|
612
|
+
),
|
|
575
613
|
temporary_tables_pattern=self.config.temporary_tables_pattern,
|
|
576
614
|
include_lineage=self.config.include_table_lineage,
|
|
577
615
|
include_usage_statistics=self.config.include_usage_stats,
|
|
@@ -580,10 +618,15 @@ class SnowflakeV2Source(
|
|
|
580
618
|
include_query_usage_statistics=self.config.include_query_usage_statistics,
|
|
581
619
|
user_email_pattern=self.config.user_email_pattern,
|
|
582
620
|
pushdown_deny_usernames=self.config.pushdown_deny_usernames,
|
|
621
|
+
pushdown_allow_usernames=self.config.pushdown_allow_usernames,
|
|
622
|
+
query_dedup_strategy=self.config.query_dedup_strategy,
|
|
623
|
+
push_down_database_pattern_access_history=self.config.push_down_database_pattern_access_history,
|
|
624
|
+
additional_database_names_allowlist=self.config.additional_database_names_allowlist,
|
|
583
625
|
),
|
|
584
626
|
structured_report=self.report,
|
|
585
627
|
filters=self.filters,
|
|
586
628
|
identifiers=self.identifiers,
|
|
629
|
+
redundant_run_skip_handler=redundant_queries_run_skip_handler,
|
|
587
630
|
schema_resolver=schema_resolver,
|
|
588
631
|
discovered_tables=self.discovered_datasets,
|
|
589
632
|
graph=self.ctx.graph,
|
|
@@ -721,6 +764,7 @@ class SnowflakeV2Source(
|
|
|
721
764
|
# For privatelink, account identifier ends with .privatelink
|
|
722
765
|
# See https://docs.snowflake.com/en/user-guide/organizations-connect.html#private-connectivity-urls
|
|
723
766
|
privatelink=self.config.account_id.endswith(".privatelink"),
|
|
767
|
+
snowflake_domain=self.config.snowflake_domain,
|
|
724
768
|
)
|
|
725
769
|
|
|
726
770
|
except Exception as e:
|
|
@@ -732,6 +776,8 @@ class SnowflakeV2Source(
|
|
|
732
776
|
return None
|
|
733
777
|
|
|
734
778
|
def is_standard_edition(self) -> bool:
|
|
779
|
+
if self.config.known_snowflake_edition is not None:
|
|
780
|
+
return self.config.known_snowflake_edition == SnowflakeEdition.STANDARD
|
|
735
781
|
try:
|
|
736
782
|
self.connection.query(SnowflakeQuery.show_tags())
|
|
737
783
|
return False
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Any, Iterable, List, Optional
|
|
5
|
+
|
|
6
|
+
from datahub.ingestion.api.closeable import Closeable
|
|
7
|
+
from datahub.metadata.urns import CorpUserUrn
|
|
8
|
+
from datahub.sql_parsing.sql_parsing_aggregator import (
|
|
9
|
+
PreparsedQuery,
|
|
10
|
+
UrnStr,
|
|
11
|
+
)
|
|
12
|
+
from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint
|
|
13
|
+
from datahub.utilities.file_backed_collections import FileBackedDict
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclasses.dataclass
|
|
17
|
+
class StoredProcCall:
|
|
18
|
+
snowflake_root_query_id: str
|
|
19
|
+
|
|
20
|
+
# Query text will typically be something like:
|
|
21
|
+
# "CALL SALES_FORECASTING.CUSTOMER_ANALYSIS_PROC();"
|
|
22
|
+
query_text: str
|
|
23
|
+
|
|
24
|
+
timestamp: datetime
|
|
25
|
+
user: CorpUserUrn
|
|
26
|
+
default_db: str
|
|
27
|
+
default_schema: str
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class StoredProcExecutionLineage:
|
|
32
|
+
call: StoredProcCall
|
|
33
|
+
|
|
34
|
+
inputs: List[UrnStr]
|
|
35
|
+
outputs: List[UrnStr]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class StoredProcLineageReport:
|
|
40
|
+
num_stored_proc_calls: int = 0
|
|
41
|
+
num_related_queries: int = 0
|
|
42
|
+
num_related_queries_without_proc_call: int = 0
|
|
43
|
+
|
|
44
|
+
# Incremented at generation/build time.
|
|
45
|
+
num_stored_proc_lineage_entries: int = 0
|
|
46
|
+
num_stored_proc_calls_with_no_inputs: int = 0
|
|
47
|
+
num_stored_proc_calls_with_no_outputs: int = 0
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class StoredProcLineageTracker(Closeable):
|
|
51
|
+
"""
|
|
52
|
+
Tracks table-level lineage for Snowflake stored procedures.
|
|
53
|
+
|
|
54
|
+
Stored procedures in Snowflake trigger multiple SQL queries during execution.
|
|
55
|
+
Snowflake assigns each stored procedure call a unique query_id and uses this as the
|
|
56
|
+
root_query_id for all subsequent queries executed within that procedure. This allows
|
|
57
|
+
us to trace which queries belong to a specific stored procedure execution and build
|
|
58
|
+
table-level lineage by aggregating inputs/outputs from all related queries.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(self, platform: str, shared_connection: Optional[Any] = None):
|
|
62
|
+
self.platform = platform
|
|
63
|
+
self.report = StoredProcLineageReport()
|
|
64
|
+
|
|
65
|
+
# { root_query_id -> StoredProcExecutionLineage }
|
|
66
|
+
self._stored_proc_execution_lineage: FileBackedDict[
|
|
67
|
+
StoredProcExecutionLineage
|
|
68
|
+
] = FileBackedDict(shared_connection, tablename="stored_proc_lineage")
|
|
69
|
+
|
|
70
|
+
def add_stored_proc_call(self, call: StoredProcCall) -> None:
|
|
71
|
+
"""Add a stored procedure call to track."""
|
|
72
|
+
self._stored_proc_execution_lineage[call.snowflake_root_query_id] = (
|
|
73
|
+
StoredProcExecutionLineage(
|
|
74
|
+
call=call,
|
|
75
|
+
# Will be populated by subsequent queries.
|
|
76
|
+
inputs=[],
|
|
77
|
+
outputs=[],
|
|
78
|
+
)
|
|
79
|
+
)
|
|
80
|
+
self.report.num_stored_proc_calls += 1
|
|
81
|
+
|
|
82
|
+
def add_related_query(self, query: PreparsedQuery) -> bool:
|
|
83
|
+
"""Add a query that might be related to a stored procedure execution.
|
|
84
|
+
|
|
85
|
+
Returns True if the query was added to a stored procedure execution, False otherwise.
|
|
86
|
+
"""
|
|
87
|
+
snowflake_root_query_id = (query.extra_info or {}).get(
|
|
88
|
+
"snowflake_root_query_id"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if snowflake_root_query_id:
|
|
92
|
+
if snowflake_root_query_id not in self._stored_proc_execution_lineage:
|
|
93
|
+
self.report.num_related_queries_without_proc_call += 1
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
stored_proc_execution = self._stored_proc_execution_lineage.for_mutation(
|
|
97
|
+
snowflake_root_query_id
|
|
98
|
+
)
|
|
99
|
+
stored_proc_execution.inputs.extend(query.upstreams)
|
|
100
|
+
if query.downstream is not None:
|
|
101
|
+
stored_proc_execution.outputs.append(query.downstream)
|
|
102
|
+
self.report.num_related_queries += 1
|
|
103
|
+
return True
|
|
104
|
+
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
def build_merged_lineage_entries(self) -> Iterable[PreparsedQuery]:
|
|
108
|
+
# For stored procedures, we can only get table-level lineage from the audit log.
|
|
109
|
+
# We represent these as PreparsedQuery objects for now. Eventually we'll want to
|
|
110
|
+
# create dataJobInputOutput lineage instead.
|
|
111
|
+
|
|
112
|
+
for stored_proc_execution in self._stored_proc_execution_lineage.values():
|
|
113
|
+
if not stored_proc_execution.inputs:
|
|
114
|
+
self.report.num_stored_proc_calls_with_no_inputs += 1
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
if not stored_proc_execution.outputs:
|
|
118
|
+
self.report.num_stored_proc_calls_with_no_outputs += 1
|
|
119
|
+
# Still continue to generate lineage for cases where we have inputs but no outputs
|
|
120
|
+
|
|
121
|
+
for downstream in stored_proc_execution.outputs:
|
|
122
|
+
stored_proc_query_id = get_query_fingerprint(
|
|
123
|
+
stored_proc_execution.call.query_text,
|
|
124
|
+
self.platform,
|
|
125
|
+
fast=True,
|
|
126
|
+
secondary_id=downstream,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
lineage_entry = PreparsedQuery(
|
|
130
|
+
query_id=stored_proc_query_id,
|
|
131
|
+
query_text=stored_proc_execution.call.query_text,
|
|
132
|
+
upstreams=stored_proc_execution.inputs,
|
|
133
|
+
downstream=downstream,
|
|
134
|
+
query_count=0,
|
|
135
|
+
user=stored_proc_execution.call.user,
|
|
136
|
+
timestamp=stored_proc_execution.call.timestamp,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
self.report.num_stored_proc_lineage_entries += 1
|
|
140
|
+
yield lineage_entry
|
|
141
|
+
|
|
142
|
+
def close(self) -> None:
|
|
143
|
+
self._stored_proc_execution_lineage.close()
|
|
@@ -16,6 +16,7 @@ from sqlalchemy.engine.reflection import Inspector
|
|
|
16
16
|
from sqlalchemy.types import TypeEngine
|
|
17
17
|
from sqlalchemy_bigquery import STRUCT
|
|
18
18
|
|
|
19
|
+
from datahub.configuration.common import HiddenFromDocs
|
|
19
20
|
from datahub.configuration.validate_field_rename import pydantic_renamed_field
|
|
20
21
|
from datahub.emitter.mcp_builder import ContainerKey, DatabaseKey
|
|
21
22
|
from datahub.ingestion.api.decorators import (
|
|
@@ -29,26 +30,38 @@ from datahub.ingestion.api.decorators import (
|
|
|
29
30
|
from datahub.ingestion.api.source import StructuredLogLevel
|
|
30
31
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
31
32
|
from datahub.ingestion.source.aws.s3_util import make_s3_urn
|
|
32
|
-
from datahub.ingestion.source.common.subtypes import
|
|
33
|
+
from datahub.ingestion.source.common.subtypes import (
|
|
34
|
+
DatasetContainerSubTypes,
|
|
35
|
+
SourceCapabilityModifier,
|
|
36
|
+
)
|
|
33
37
|
from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig
|
|
38
|
+
from datahub.ingestion.source.sql.athena_properties_extractor import (
|
|
39
|
+
AthenaPropertiesExtractor,
|
|
40
|
+
)
|
|
34
41
|
from datahub.ingestion.source.sql.sql_common import (
|
|
35
42
|
SQLAlchemySource,
|
|
36
43
|
register_custom_type,
|
|
37
44
|
)
|
|
38
|
-
from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
|
|
45
|
+
from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
|
|
39
46
|
from datahub.ingestion.source.sql.sql_report import SQLSourceReport
|
|
40
47
|
from datahub.ingestion.source.sql.sql_utils import (
|
|
41
48
|
add_table_to_schema_container,
|
|
42
49
|
gen_database_container,
|
|
43
50
|
gen_database_key,
|
|
44
51
|
)
|
|
52
|
+
from datahub.ingestion.source.sql.sqlalchemy_uri import make_sqlalchemy_uri
|
|
45
53
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
|
|
46
|
-
from datahub.metadata.schema_classes import
|
|
54
|
+
from datahub.metadata.schema_classes import (
|
|
55
|
+
ArrayTypeClass,
|
|
56
|
+
MapTypeClass,
|
|
57
|
+
RecordTypeClass,
|
|
58
|
+
)
|
|
47
59
|
from datahub.utilities.hive_schema_to_avro import get_avro_schema_for_hive_column
|
|
48
60
|
from datahub.utilities.sqlalchemy_type_converter import (
|
|
49
61
|
MapType,
|
|
50
62
|
get_schema_fields_for_sqlalchemy_column,
|
|
51
63
|
)
|
|
64
|
+
from datahub.utilities.urns.field_paths import get_simple_field_path_from_v2_field_path
|
|
52
65
|
|
|
53
66
|
try:
|
|
54
67
|
from typing_extensions import override
|
|
@@ -61,6 +74,11 @@ except ImportError:
|
|
|
61
74
|
|
|
62
75
|
logger = logging.getLogger(__name__)
|
|
63
76
|
|
|
77
|
+
# Precompiled regex for SQL identifier validation
|
|
78
|
+
# Athena identifiers can only contain lowercase letters, numbers, underscore, and period (for complex types)
|
|
79
|
+
# Note: Athena automatically converts uppercase to lowercase, but we're being strict for security
|
|
80
|
+
_IDENTIFIER_PATTERN = re.compile(r"^[a-zA-Z0-9_.]+$")
|
|
81
|
+
|
|
64
82
|
assert STRUCT, "required type modules are not available"
|
|
65
83
|
register_custom_type(STRUCT, RecordTypeClass)
|
|
66
84
|
register_custom_type(MapType, MapTypeClass)
|
|
@@ -234,7 +252,7 @@ class CustomAthenaRestDialect(AthenaRestDialect):
|
|
|
234
252
|
|
|
235
253
|
|
|
236
254
|
class AthenaConfig(SQLCommonConfig):
|
|
237
|
-
scheme: str = "awsathena+rest"
|
|
255
|
+
scheme: HiddenFromDocs[str] = "awsathena+rest"
|
|
238
256
|
username: Optional[str] = pydantic.Field(
|
|
239
257
|
default=None,
|
|
240
258
|
description="Username credential. If not specified, detected with boto3 rules. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html",
|
|
@@ -280,12 +298,22 @@ class AthenaConfig(SQLCommonConfig):
|
|
|
280
298
|
description="Extract partitions for tables. Partition extraction needs to run a query (`select * from table$partitions`) on the table. Disable this if you don't want to grant select permission.",
|
|
281
299
|
)
|
|
282
300
|
|
|
301
|
+
extract_partitions_using_create_statements: bool = pydantic.Field(
|
|
302
|
+
default=False,
|
|
303
|
+
description="Extract partitions using the `SHOW CREATE TABLE` statement instead of querying the table's partitions directly. This needs to be enabled to extract Iceberg partitions. If extraction fails it falls back to the default partition extraction. This is experimental.",
|
|
304
|
+
)
|
|
305
|
+
|
|
283
306
|
_s3_staging_dir_population = pydantic_renamed_field(
|
|
284
307
|
old_name="s3_staging_dir",
|
|
285
308
|
new_name="query_result_location",
|
|
286
309
|
print_warning=True,
|
|
287
310
|
)
|
|
288
311
|
|
|
312
|
+
emit_schema_fieldpaths_as_v1: bool = pydantic.Field(
|
|
313
|
+
default=False,
|
|
314
|
+
description="Convert simple field paths to DataHub field path v1 format. Simple column paths are those that do not contain any nested fields.",
|
|
315
|
+
)
|
|
316
|
+
|
|
289
317
|
profiling: AthenaProfilingConfig = AthenaProfilingConfig()
|
|
290
318
|
|
|
291
319
|
def get_sql_alchemy_url(self):
|
|
@@ -320,8 +348,24 @@ class Partitionitem:
|
|
|
320
348
|
@capability(
|
|
321
349
|
SourceCapability.DATA_PROFILING,
|
|
322
350
|
"Optionally enabled via configuration. Profiling uses sql queries on whole table which can be expensive operation.",
|
|
351
|
+
subtype_modifier=[SourceCapabilityModifier.TABLE],
|
|
352
|
+
)
|
|
353
|
+
@capability(
|
|
354
|
+
SourceCapability.LINEAGE_COARSE,
|
|
355
|
+
"Supported for S3 tables",
|
|
356
|
+
subtype_modifier=[
|
|
357
|
+
SourceCapabilityModifier.VIEW,
|
|
358
|
+
SourceCapabilityModifier.TABLE,
|
|
359
|
+
],
|
|
360
|
+
)
|
|
361
|
+
@capability(
|
|
362
|
+
SourceCapability.LINEAGE_FINE,
|
|
363
|
+
"Supported for S3 tables",
|
|
364
|
+
subtype_modifier=[
|
|
365
|
+
SourceCapabilityModifier.VIEW,
|
|
366
|
+
SourceCapabilityModifier.TABLE,
|
|
367
|
+
],
|
|
323
368
|
)
|
|
324
|
-
@capability(SourceCapability.LINEAGE_COARSE, "Supported for S3 tables")
|
|
325
369
|
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
|
|
326
370
|
class AthenaSource(SQLAlchemySource):
|
|
327
371
|
"""
|
|
@@ -472,11 +516,70 @@ class AthenaSource(SQLAlchemySource):
|
|
|
472
516
|
return [schema for schema in schemas if schema == athena_config.database]
|
|
473
517
|
return schemas
|
|
474
518
|
|
|
519
|
+
@classmethod
|
|
520
|
+
def _sanitize_identifier(cls, identifier: str) -> str:
|
|
521
|
+
"""Sanitize SQL identifiers to prevent injection attacks.
|
|
522
|
+
|
|
523
|
+
Args:
|
|
524
|
+
identifier: The SQL identifier to sanitize
|
|
525
|
+
|
|
526
|
+
Returns:
|
|
527
|
+
Sanitized identifier safe for SQL queries
|
|
528
|
+
|
|
529
|
+
Raises:
|
|
530
|
+
ValueError: If identifier contains unsafe characters
|
|
531
|
+
"""
|
|
532
|
+
if not identifier:
|
|
533
|
+
raise ValueError("Identifier cannot be empty")
|
|
534
|
+
|
|
535
|
+
# Allow only alphanumeric characters, underscores, and periods for identifiers
|
|
536
|
+
# This matches Athena's identifier naming rules
|
|
537
|
+
if not _IDENTIFIER_PATTERN.match(identifier):
|
|
538
|
+
raise ValueError(
|
|
539
|
+
f"Identifier '{identifier}' contains unsafe characters. Only alphanumeric characters, underscores, and periods are allowed."
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
return identifier
|
|
543
|
+
|
|
475
544
|
@classmethod
|
|
476
545
|
def _casted_partition_key(cls, key: str) -> str:
|
|
477
546
|
# We need to cast the partition keys to a VARCHAR, since otherwise
|
|
478
547
|
# Athena may throw an error during concatenation / comparison.
|
|
479
|
-
|
|
548
|
+
sanitized_key = cls._sanitize_identifier(key)
|
|
549
|
+
return f"CAST({sanitized_key} as VARCHAR)"
|
|
550
|
+
|
|
551
|
+
@classmethod
|
|
552
|
+
def _build_max_partition_query(
|
|
553
|
+
cls, schema: str, table: str, partitions: List[str]
|
|
554
|
+
) -> str:
|
|
555
|
+
"""Build SQL query to find the row with maximum partition values.
|
|
556
|
+
|
|
557
|
+
Args:
|
|
558
|
+
schema: Database schema name
|
|
559
|
+
table: Table name
|
|
560
|
+
partitions: List of partition column names
|
|
561
|
+
|
|
562
|
+
Returns:
|
|
563
|
+
SQL query string to find the maximum partition
|
|
564
|
+
|
|
565
|
+
Raises:
|
|
566
|
+
ValueError: If any identifier contains unsafe characters
|
|
567
|
+
"""
|
|
568
|
+
# Sanitize all identifiers to prevent SQL injection
|
|
569
|
+
sanitized_schema = cls._sanitize_identifier(schema)
|
|
570
|
+
sanitized_table = cls._sanitize_identifier(table)
|
|
571
|
+
sanitized_partitions = [
|
|
572
|
+
cls._sanitize_identifier(partition) for partition in partitions
|
|
573
|
+
]
|
|
574
|
+
|
|
575
|
+
casted_keys = [cls._casted_partition_key(key) for key in partitions]
|
|
576
|
+
if len(casted_keys) == 1:
|
|
577
|
+
part_concat = casted_keys[0]
|
|
578
|
+
else:
|
|
579
|
+
separator = "CAST('-' AS VARCHAR)"
|
|
580
|
+
part_concat = f"CONCAT({f', {separator}, '.join(casted_keys)})"
|
|
581
|
+
|
|
582
|
+
return f'select {",".join(sanitized_partitions)} from "{sanitized_schema}"."{sanitized_table}$partitions" where {part_concat} = (select max({part_concat}) from "{sanitized_schema}"."{sanitized_table}$partitions")'
|
|
480
583
|
|
|
481
584
|
@override
|
|
482
585
|
def get_partitions(
|
|
@@ -488,27 +591,37 @@ class AthenaSource(SQLAlchemySource):
|
|
|
488
591
|
if not self.cursor:
|
|
489
592
|
return None
|
|
490
593
|
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
594
|
+
if self.config.extract_partitions_using_create_statements:
|
|
595
|
+
try:
|
|
596
|
+
partitions = self._get_partitions_create_table(schema, table)
|
|
597
|
+
except Exception as e:
|
|
598
|
+
logger.warning(
|
|
599
|
+
f"Failed to get partitions from create table statement for {schema}.{table} because of {e}. Falling back to SQLAlchemy.",
|
|
600
|
+
exc_info=True,
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
# If we can't get create table statement, we fall back to SQLAlchemy
|
|
604
|
+
partitions = self._get_partitions_sqlalchemy(schema, table)
|
|
605
|
+
else:
|
|
606
|
+
partitions = self._get_partitions_sqlalchemy(schema, table)
|
|
494
607
|
|
|
495
|
-
partitions = []
|
|
496
|
-
for key in metadata.partition_keys:
|
|
497
|
-
if key.name:
|
|
498
|
-
partitions.append(key.name)
|
|
499
608
|
if not partitions:
|
|
500
609
|
return []
|
|
501
610
|
|
|
611
|
+
if (
|
|
612
|
+
not self.config.profiling.enabled
|
|
613
|
+
or not self.config.profiling.partition_profiling_enabled
|
|
614
|
+
):
|
|
615
|
+
return partitions
|
|
616
|
+
|
|
502
617
|
with self.report.report_exc(
|
|
503
618
|
message="Failed to extract partition details",
|
|
504
619
|
context=f"{schema}.{table}",
|
|
505
620
|
level=StructuredLogLevel.WARN,
|
|
506
621
|
):
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
self._casted_partition_key(key) for key in partitions
|
|
622
|
+
max_partition_query = self._build_max_partition_query(
|
|
623
|
+
schema, table, partitions
|
|
510
624
|
)
|
|
511
|
-
max_partition_query = f'select {",".join(partitions)} from "{schema}"."{table}$partitions" where {part_concat} = (select max({part_concat}) from "{schema}"."{table}$partitions")'
|
|
512
625
|
ret = self.cursor.execute(max_partition_query)
|
|
513
626
|
max_partition: Dict[str, str] = {}
|
|
514
627
|
if ret:
|
|
@@ -524,6 +637,56 @@ class AthenaSource(SQLAlchemySource):
|
|
|
524
637
|
|
|
525
638
|
return partitions
|
|
526
639
|
|
|
640
|
+
def _get_partitions_create_table(self, schema: str, table: str) -> List[str]:
|
|
641
|
+
assert self.cursor
|
|
642
|
+
try:
|
|
643
|
+
res = self.cursor.execute(f"SHOW CREATE TABLE `{schema}`.`{table}`")
|
|
644
|
+
except Exception as e:
|
|
645
|
+
# Athena does not support SHOW CREATE TABLE for views
|
|
646
|
+
# and will throw an error. We need to handle this case
|
|
647
|
+
# and caller needs to fallback to sqlalchemy's get partitions call.
|
|
648
|
+
logger.debug(
|
|
649
|
+
f"Failed to get table properties for {schema}.{table}: {e}",
|
|
650
|
+
exc_info=True,
|
|
651
|
+
)
|
|
652
|
+
raise e
|
|
653
|
+
rows = res.fetchall()
|
|
654
|
+
|
|
655
|
+
# Concatenate all rows into a single string with newlines
|
|
656
|
+
create_table_statement = "\n".join(row[0] for row in rows)
|
|
657
|
+
|
|
658
|
+
try:
|
|
659
|
+
athena_table_info = AthenaPropertiesExtractor.get_table_properties(
|
|
660
|
+
create_table_statement
|
|
661
|
+
)
|
|
662
|
+
except Exception as e:
|
|
663
|
+
logger.debug(
|
|
664
|
+
f"Failed to parse table properties for {schema}.{table}: {e} and statement: {create_table_statement}",
|
|
665
|
+
exc_info=True,
|
|
666
|
+
)
|
|
667
|
+
raise e
|
|
668
|
+
|
|
669
|
+
partitions = []
|
|
670
|
+
if (
|
|
671
|
+
athena_table_info.partition_info
|
|
672
|
+
and athena_table_info.partition_info.simple_columns
|
|
673
|
+
):
|
|
674
|
+
partitions = [
|
|
675
|
+
ci.name for ci in athena_table_info.partition_info.simple_columns
|
|
676
|
+
]
|
|
677
|
+
return partitions
|
|
678
|
+
|
|
679
|
+
def _get_partitions_sqlalchemy(self, schema: str, table: str) -> List[str]:
|
|
680
|
+
assert self.cursor
|
|
681
|
+
metadata: AthenaTableMetadata = self.cursor.get_table_metadata(
|
|
682
|
+
table_name=table, schema_name=schema
|
|
683
|
+
)
|
|
684
|
+
partitions = []
|
|
685
|
+
for key in metadata.partition_keys:
|
|
686
|
+
if key.name:
|
|
687
|
+
partitions.append(key.name)
|
|
688
|
+
return partitions
|
|
689
|
+
|
|
527
690
|
# Overwrite to modify the creation of schema fields
|
|
528
691
|
def get_schema_fields_for_column(
|
|
529
692
|
self,
|
|
@@ -550,6 +713,18 @@ class AthenaSource(SQLAlchemySource):
|
|
|
550
713
|
),
|
|
551
714
|
)
|
|
552
715
|
|
|
716
|
+
# Keeping it as individual check to make it more explicit and easier to understand
|
|
717
|
+
if not self.config.emit_schema_fieldpaths_as_v1:
|
|
718
|
+
return fields
|
|
719
|
+
|
|
720
|
+
if isinstance(
|
|
721
|
+
fields[0].type.type, (RecordTypeClass, MapTypeClass, ArrayTypeClass)
|
|
722
|
+
):
|
|
723
|
+
return fields
|
|
724
|
+
else:
|
|
725
|
+
fields[0].fieldPath = get_simple_field_path_from_v2_field_path(
|
|
726
|
+
fields[0].fieldPath
|
|
727
|
+
)
|
|
553
728
|
return fields
|
|
554
729
|
|
|
555
730
|
def generate_partition_profiler_query(
|
|
@@ -563,16 +738,34 @@ class AthenaSource(SQLAlchemySource):
|
|
|
563
738
|
).get(table, None)
|
|
564
739
|
|
|
565
740
|
if partition and partition.max_partition:
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
741
|
+
try:
|
|
742
|
+
# Sanitize identifiers to prevent SQL injection
|
|
743
|
+
sanitized_schema = self._sanitize_identifier(schema)
|
|
744
|
+
sanitized_table = self._sanitize_identifier(table)
|
|
745
|
+
|
|
746
|
+
max_partition_filters = []
|
|
747
|
+
for key, value in partition.max_partition.items():
|
|
748
|
+
# Sanitize partition key and properly escape the value
|
|
749
|
+
sanitized_key = self._sanitize_identifier(key)
|
|
750
|
+
# Escape single quotes in the value to prevent injection
|
|
751
|
+
escaped_value = value.replace("'", "''") if value else ""
|
|
752
|
+
max_partition_filters.append(
|
|
753
|
+
f"{self._casted_partition_key(sanitized_key)} = '{escaped_value}'"
|
|
754
|
+
)
|
|
755
|
+
max_partition = str(partition.max_partition)
|
|
756
|
+
return (
|
|
757
|
+
max_partition,
|
|
758
|
+
f'SELECT * FROM "{sanitized_schema}"."{sanitized_table}" WHERE {" AND ".join(max_partition_filters)}',
|
|
570
759
|
)
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
760
|
+
except ValueError as e:
|
|
761
|
+
# If sanitization fails due to malicious identifiers,
|
|
762
|
+
# return None to disable partition profiling for this table
|
|
763
|
+
# rather than crashing the entire ingestion
|
|
764
|
+
logger.warning(
|
|
765
|
+
f"Failed to generate partition profiler query for {schema}.{table} due to unsafe identifiers: {e}. "
|
|
766
|
+
f"Partition profiling disabled for this table."
|
|
767
|
+
)
|
|
768
|
+
return None, None
|
|
576
769
|
return None, None
|
|
577
770
|
|
|
578
771
|
def close(self):
|