acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -9,6 +9,8 @@ class SnowflakeCloudProvider(StrEnum):
|
|
|
9
9
|
|
|
10
10
|
SNOWFLAKE_DEFAULT_CLOUD = SnowflakeCloudProvider.AWS
|
|
11
11
|
|
|
12
|
+
DEFAULT_SNOWFLAKE_DOMAIN = "snowflakecomputing.com"
|
|
13
|
+
|
|
12
14
|
|
|
13
15
|
class SnowflakeEdition(StrEnum):
|
|
14
16
|
STANDARD = "Standard"
|
|
@@ -55,6 +57,7 @@ class SnowflakeObjectDomain(StrEnum):
|
|
|
55
57
|
ICEBERG_TABLE = "iceberg table"
|
|
56
58
|
STREAM = "stream"
|
|
57
59
|
PROCEDURE = "procedure"
|
|
60
|
+
DYNAMIC_TABLE = "dynamic table"
|
|
58
61
|
|
|
59
62
|
|
|
60
63
|
GENERIC_PERMISSION_ERROR_KEY = "permission-error"
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
+
from enum import Enum
|
|
4
5
|
from typing import Dict, List, Optional, Set
|
|
5
6
|
|
|
6
7
|
import pydantic
|
|
7
8
|
from pydantic import Field, root_validator, validator
|
|
8
9
|
|
|
9
|
-
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
|
10
|
+
from datahub.configuration.common import AllowDenyPattern, ConfigModel, HiddenFromDocs
|
|
10
11
|
from datahub.configuration.pattern_utils import UUID_REGEX
|
|
11
12
|
from datahub.configuration.source_common import (
|
|
12
13
|
EnvConfigMixin,
|
|
@@ -30,6 +31,7 @@ from datahub.ingestion.source.sql.sql_config import SQLCommonConfig, SQLFilterCo
|
|
|
30
31
|
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
31
32
|
StatefulLineageConfigMixin,
|
|
32
33
|
StatefulProfilingConfigMixin,
|
|
34
|
+
StatefulTimeWindowConfigMixin,
|
|
33
35
|
StatefulUsageConfigMixin,
|
|
34
36
|
)
|
|
35
37
|
from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
|
|
@@ -49,9 +51,15 @@ DEFAULT_TEMP_TABLES_PATTERNS = [
|
|
|
49
51
|
rf".*\.SEGMENT_{UUID_REGEX}", # segment
|
|
50
52
|
rf".*\.STAGING_.*_{UUID_REGEX}", # stitch
|
|
51
53
|
r".*\.(GE_TMP_|GE_TEMP_|GX_TEMP_)[0-9A-F]{8}", # great expectations
|
|
54
|
+
r".*\.SNOWPARK_TEMP_TABLE_.+", # snowpark
|
|
52
55
|
]
|
|
53
56
|
|
|
54
57
|
|
|
58
|
+
class QueryDedupStrategyType(Enum):
|
|
59
|
+
STANDARD = "STANDARD"
|
|
60
|
+
NONE = "NONE"
|
|
61
|
+
|
|
62
|
+
|
|
55
63
|
class TagOption(StrEnum):
|
|
56
64
|
with_lineage = "with_lineage"
|
|
57
65
|
without_lineage = "without_lineage"
|
|
@@ -60,13 +68,10 @@ class TagOption(StrEnum):
|
|
|
60
68
|
|
|
61
69
|
@dataclass(frozen=True)
|
|
62
70
|
class DatabaseId:
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
platform_instance: Optional[str] =
|
|
67
|
-
default=None,
|
|
68
|
-
description="Platform instance of consumer snowflake account.",
|
|
69
|
-
)
|
|
71
|
+
# Database created from share in consumer account
|
|
72
|
+
database: str
|
|
73
|
+
# Platform instance of consumer snowflake account
|
|
74
|
+
platform_instance: Optional[str] = None
|
|
70
75
|
|
|
71
76
|
|
|
72
77
|
class SnowflakeShareConfig(ConfigModel):
|
|
@@ -154,14 +159,11 @@ class SnowflakeIdentifierConfig(
|
|
|
154
159
|
|
|
155
160
|
email_domain: Optional[str] = pydantic.Field(
|
|
156
161
|
default=None,
|
|
157
|
-
description="Email domain of your organization so users can be displayed on UI appropriately.",
|
|
162
|
+
description="Email domain of your organization so users can be displayed on UI appropriately. This is used only if we cannot infer email ID.",
|
|
158
163
|
)
|
|
159
164
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
description="Format user urns as an email, if the snowflake user's email is set. If `email_domain` is "
|
|
163
|
-
"provided, generates email addresses for snowflake users with unset emails, based on their "
|
|
164
|
-
"username.",
|
|
165
|
+
_email_as_user_identifier = pydantic_removed_field(
|
|
166
|
+
"email_as_user_identifier",
|
|
165
167
|
)
|
|
166
168
|
|
|
167
169
|
|
|
@@ -198,6 +200,7 @@ class SnowflakeV2Config(
|
|
|
198
200
|
SnowflakeUsageConfig,
|
|
199
201
|
StatefulLineageConfigMixin,
|
|
200
202
|
StatefulUsageConfigMixin,
|
|
203
|
+
StatefulTimeWindowConfigMixin,
|
|
201
204
|
StatefulProfilingConfigMixin,
|
|
202
205
|
ClassificationSourceConfigMixin,
|
|
203
206
|
IncrementalPropertiesConfigMixin,
|
|
@@ -212,6 +215,16 @@ class SnowflakeV2Config(
|
|
|
212
215
|
description="If enabled, populates the ingested views' definitions.",
|
|
213
216
|
)
|
|
214
217
|
|
|
218
|
+
fetch_views_from_information_schema: bool = Field(
|
|
219
|
+
default=False,
|
|
220
|
+
description="If enabled, uses information_schema.views to fetch view definitions instead of SHOW VIEWS command. "
|
|
221
|
+
"This alternative method can be more reliable for databases with large numbers of views (> 10K views), as the "
|
|
222
|
+
"SHOW VIEWS approach has proven unreliable and can lead to missing views in such scenarios. However, this method "
|
|
223
|
+
"requires OWNERSHIP privileges on views to retrieve their definitions. For views without ownership permissions "
|
|
224
|
+
"(where VIEW_DEFINITION is null/empty), the system will automatically fall back to using batched SHOW VIEWS queries "
|
|
225
|
+
"to populate the missing definitions.",
|
|
226
|
+
)
|
|
227
|
+
|
|
215
228
|
include_technical_schema: bool = Field(
|
|
216
229
|
default=True,
|
|
217
230
|
description="If enabled, populates the snowflake technical schema and descriptions.",
|
|
@@ -232,7 +245,7 @@ class SnowflakeV2Config(
|
|
|
232
245
|
)
|
|
233
246
|
|
|
234
247
|
use_queries_v2: bool = Field(
|
|
235
|
-
default=
|
|
248
|
+
default=True,
|
|
236
249
|
description="If enabled, uses the new queries extractor to extract queries from snowflake.",
|
|
237
250
|
)
|
|
238
251
|
include_queries: bool = Field(
|
|
@@ -250,6 +263,11 @@ class SnowflakeV2Config(
|
|
|
250
263
|
"This is useful if you have a large number of schemas and want to avoid bulk fetching the schema for each table/view.",
|
|
251
264
|
)
|
|
252
265
|
|
|
266
|
+
query_dedup_strategy: QueryDedupStrategyType = Field(
|
|
267
|
+
default=QueryDedupStrategyType.STANDARD,
|
|
268
|
+
description=f"Experimental: Choose the strategy for query deduplication (default value is appropriate for most use-cases; make sure you understand performance implications before changing it). Allowed values are: {', '.join([s.name for s in QueryDedupStrategyType])}",
|
|
269
|
+
)
|
|
270
|
+
|
|
253
271
|
_check_role_grants_removed = pydantic_removed_field("check_role_grants")
|
|
254
272
|
_provision_role_removed = pydantic_removed_field("provision_role")
|
|
255
273
|
|
|
@@ -263,10 +281,11 @@ class SnowflakeV2Config(
|
|
|
263
281
|
description="If enabled along with `extract_tags`, extracts snowflake's key-value tags as DataHub structured properties instead of DataHub tags.",
|
|
264
282
|
)
|
|
265
283
|
|
|
266
|
-
structured_properties_template_cache_invalidation_interval: int =
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
284
|
+
structured_properties_template_cache_invalidation_interval: HiddenFromDocs[int] = (
|
|
285
|
+
Field(
|
|
286
|
+
default=60,
|
|
287
|
+
description="Interval in seconds to invalidate the structured properties template cache.",
|
|
288
|
+
)
|
|
270
289
|
)
|
|
271
290
|
|
|
272
291
|
include_external_url: bool = Field(
|
|
@@ -315,7 +334,7 @@ class SnowflakeV2Config(
|
|
|
315
334
|
"to ignore the temporary staging tables created by known ETL tools.",
|
|
316
335
|
)
|
|
317
336
|
|
|
318
|
-
rename_upstreams_deny_pattern_to_temporary_table_pattern = pydantic_renamed_field(
|
|
337
|
+
rename_upstreams_deny_pattern_to_temporary_table_pattern = pydantic_renamed_field( # type: ignore[pydantic-field]
|
|
319
338
|
"upstreams_deny_pattern", "temporary_tables_pattern"
|
|
320
339
|
)
|
|
321
340
|
|
|
@@ -333,8 +352,7 @@ class SnowflakeV2Config(
|
|
|
333
352
|
)
|
|
334
353
|
|
|
335
354
|
# Allows empty containers to be ingested before datasets are added, avoiding permission errors
|
|
336
|
-
warn_no_datasets: bool = Field(
|
|
337
|
-
hidden_from_docs=True,
|
|
355
|
+
warn_no_datasets: HiddenFromDocs[bool] = Field(
|
|
338
356
|
default=False,
|
|
339
357
|
description="If True, warns when no datasets are found during ingestion. If False, ingestion fails when no datasets are found.",
|
|
340
358
|
)
|
|
@@ -347,11 +365,32 @@ class SnowflakeV2Config(
|
|
|
347
365
|
|
|
348
366
|
pushdown_deny_usernames: List[str] = Field(
|
|
349
367
|
default=[],
|
|
350
|
-
description="List of snowflake usernames which will
|
|
368
|
+
description="List of snowflake usernames (SQL LIKE patterns, e.g., 'SERVICE_%', '%_PROD', 'TEST_USER') which will NOT be considered for lineage/usage/queries extraction. "
|
|
351
369
|
"This is primarily useful for improving performance by filtering out users with extremely high query volumes. "
|
|
352
370
|
"Only applicable if `use_queries_v2` is enabled.",
|
|
353
371
|
)
|
|
354
372
|
|
|
373
|
+
pushdown_allow_usernames: List[str] = Field(
|
|
374
|
+
default=[],
|
|
375
|
+
description="List of snowflake usernames (SQL LIKE patterns, e.g., 'ANALYST_%', '%_USER', 'MAIN_ACCOUNT') which WILL be considered for lineage/usage/queries extraction. "
|
|
376
|
+
"This is primarily useful for improving performance by filtering in only specific users. "
|
|
377
|
+
"Only applicable if `use_queries_v2` is enabled. If not specified, all users not in deny list are included.",
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
push_down_database_pattern_access_history: bool = Field(
|
|
381
|
+
default=False,
|
|
382
|
+
description="If enabled, pushes down database pattern filtering to the access_history table for improved performance. "
|
|
383
|
+
"This filters on the accessed objects in access_history.",
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
additional_database_names_allowlist: List[str] = Field(
|
|
387
|
+
default=[],
|
|
388
|
+
description="Additional database names (no pattern matching) to be included in the access_history filter. "
|
|
389
|
+
"Only applies if push_down_database_pattern_access_history=True. "
|
|
390
|
+
"These databases will be included in the filter being pushed down regardless of database_pattern settings."
|
|
391
|
+
"This may be required in the case of _eg_ temporary tables being created in a different database than the ones in the database_name patterns.",
|
|
392
|
+
)
|
|
393
|
+
|
|
355
394
|
@validator("convert_urns_to_lowercase")
|
|
356
395
|
def validate_convert_urns_to_lowercase(cls, v):
|
|
357
396
|
if not v:
|
|
@@ -440,6 +479,20 @@ class SnowflakeV2Config(
|
|
|
440
479
|
|
|
441
480
|
return shares
|
|
442
481
|
|
|
482
|
+
@root_validator(pre=False, skip_on_failure=True)
|
|
483
|
+
def validate_queries_v2_stateful_ingestion(cls, values: Dict) -> Dict:
|
|
484
|
+
if values.get("use_queries_v2"):
|
|
485
|
+
if values.get("enable_stateful_lineage_ingestion") or values.get(
|
|
486
|
+
"enable_stateful_usage_ingestion"
|
|
487
|
+
):
|
|
488
|
+
logger.warning(
|
|
489
|
+
"enable_stateful_lineage_ingestion and enable_stateful_usage_ingestion are deprecated "
|
|
490
|
+
"when using use_queries_v2=True. These configs only work with the legacy (non-queries v2) extraction path. "
|
|
491
|
+
"For queries v2, use enable_stateful_time_window instead to enable stateful ingestion "
|
|
492
|
+
"for the unified time window extraction (lineage + usage + operations + queries)."
|
|
493
|
+
)
|
|
494
|
+
return values
|
|
495
|
+
|
|
443
496
|
def outbounds(self) -> Dict[str, Set[DatabaseId]]:
|
|
444
497
|
"""
|
|
445
498
|
Returns mapping of
|
|
@@ -15,13 +15,19 @@ from snowflake.connector.network import (
|
|
|
15
15
|
OAUTH_AUTHENTICATOR,
|
|
16
16
|
)
|
|
17
17
|
|
|
18
|
-
from datahub.configuration.common import
|
|
18
|
+
from datahub.configuration.common import (
|
|
19
|
+
ConfigModel,
|
|
20
|
+
ConfigurationError,
|
|
21
|
+
HiddenFromDocs,
|
|
22
|
+
MetaError,
|
|
23
|
+
)
|
|
19
24
|
from datahub.configuration.connection_resolver import auto_connection_resolver
|
|
20
25
|
from datahub.configuration.validate_field_rename import pydantic_renamed_field
|
|
21
26
|
from datahub.ingestion.api.closeable import Closeable
|
|
22
27
|
from datahub.ingestion.source.snowflake.constants import (
|
|
23
28
|
CLIENT_PREFETCH_THREADS,
|
|
24
29
|
CLIENT_SESSION_KEEP_ALIVE,
|
|
30
|
+
DEFAULT_SNOWFLAKE_DOMAIN,
|
|
25
31
|
)
|
|
26
32
|
from datahub.ingestion.source.snowflake.oauth_config import (
|
|
27
33
|
OAuthConfiguration,
|
|
@@ -47,8 +53,6 @@ _VALID_AUTH_TYPES: Dict[str, str] = {
|
|
|
47
53
|
"OAUTH_AUTHENTICATOR_TOKEN": OAUTH_AUTHENTICATOR,
|
|
48
54
|
}
|
|
49
55
|
|
|
50
|
-
_SNOWFLAKE_HOST_SUFFIX = ".snowflakecomputing.com"
|
|
51
|
-
|
|
52
56
|
|
|
53
57
|
class SnowflakePermissionError(MetaError):
|
|
54
58
|
"""A permission error has happened"""
|
|
@@ -64,7 +68,7 @@ class SnowflakeConnectionConfig(ConfigModel):
|
|
|
64
68
|
description="Any options specified here will be passed to [SQLAlchemy.create_engine](https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine) as kwargs.",
|
|
65
69
|
)
|
|
66
70
|
|
|
67
|
-
scheme: str = "snowflake"
|
|
71
|
+
scheme: HiddenFromDocs[str] = "snowflake"
|
|
68
72
|
username: Optional[str] = pydantic.Field(
|
|
69
73
|
default=None, description="Snowflake username."
|
|
70
74
|
)
|
|
@@ -110,18 +114,25 @@ class SnowflakeConnectionConfig(ConfigModel):
|
|
|
110
114
|
default=None,
|
|
111
115
|
description="OAuth token from external identity provider. Not recommended for most use cases because it will not be able to refresh once expired.",
|
|
112
116
|
)
|
|
117
|
+
snowflake_domain: str = pydantic.Field(
|
|
118
|
+
default=DEFAULT_SNOWFLAKE_DOMAIN,
|
|
119
|
+
description="Snowflake domain. Use 'snowflakecomputing.com' for most regions or 'snowflakecomputing.cn' for China (cn-northwest-1) region.",
|
|
120
|
+
)
|
|
113
121
|
|
|
114
122
|
def get_account(self) -> str:
|
|
115
123
|
assert self.account_id
|
|
116
124
|
return self.account_id
|
|
117
125
|
|
|
118
|
-
rename_host_port_to_account_id = pydantic_renamed_field("host_port", "account_id")
|
|
126
|
+
rename_host_port_to_account_id = pydantic_renamed_field("host_port", "account_id") # type: ignore[pydantic-field]
|
|
119
127
|
|
|
120
128
|
@pydantic.validator("account_id")
|
|
121
|
-
def validate_account_id(cls, account_id: str) -> str:
|
|
129
|
+
def validate_account_id(cls, account_id: str, values: Dict) -> str:
|
|
122
130
|
account_id = remove_protocol(account_id)
|
|
123
131
|
account_id = remove_trailing_slashes(account_id)
|
|
124
|
-
|
|
132
|
+
# Get the domain from config, fallback to default
|
|
133
|
+
domain = values.get("snowflake_domain", DEFAULT_SNOWFLAKE_DOMAIN)
|
|
134
|
+
snowflake_host_suffix = f".{domain}"
|
|
135
|
+
account_id = remove_suffix(account_id, snowflake_host_suffix)
|
|
125
136
|
return account_id
|
|
126
137
|
|
|
127
138
|
@pydantic.validator("authentication_type", always=True)
|
|
@@ -311,6 +322,7 @@ class SnowflakeConnectionConfig(ConfigModel):
|
|
|
311
322
|
warehouse=self.warehouse,
|
|
312
323
|
authenticator=_VALID_AUTH_TYPES.get(self.authentication_type),
|
|
313
324
|
application=_APPLICATION_NAME,
|
|
325
|
+
host=f"{self.account_id}.{self.snowflake_domain}",
|
|
314
326
|
**connect_args,
|
|
315
327
|
)
|
|
316
328
|
|
|
@@ -324,6 +336,7 @@ class SnowflakeConnectionConfig(ConfigModel):
|
|
|
324
336
|
role=self.role,
|
|
325
337
|
authenticator=_VALID_AUTH_TYPES.get(self.authentication_type),
|
|
326
338
|
application=_APPLICATION_NAME,
|
|
339
|
+
host=f"{self.account_id}.{self.snowflake_domain}",
|
|
327
340
|
**connect_args,
|
|
328
341
|
)
|
|
329
342
|
|
|
@@ -337,6 +350,7 @@ class SnowflakeConnectionConfig(ConfigModel):
|
|
|
337
350
|
warehouse=self.warehouse,
|
|
338
351
|
role=self.role,
|
|
339
352
|
application=_APPLICATION_NAME,
|
|
353
|
+
host=f"{self.account_id}.{self.snowflake_domain}",
|
|
340
354
|
**connect_args,
|
|
341
355
|
)
|
|
342
356
|
elif self.authentication_type == "OAUTH_AUTHENTICATOR_TOKEN":
|
|
@@ -348,6 +362,7 @@ class SnowflakeConnectionConfig(ConfigModel):
|
|
|
348
362
|
warehouse=self.warehouse,
|
|
349
363
|
role=self.role,
|
|
350
364
|
application=_APPLICATION_NAME,
|
|
365
|
+
host=f"{self.account_id}.{self.snowflake_domain}",
|
|
351
366
|
**connect_args,
|
|
352
367
|
)
|
|
353
368
|
elif self.authentication_type == "OAUTH_AUTHENTICATOR":
|
|
@@ -363,6 +378,7 @@ class SnowflakeConnectionConfig(ConfigModel):
|
|
|
363
378
|
role=self.role,
|
|
364
379
|
authenticator=_VALID_AUTH_TYPES.get(self.authentication_type),
|
|
365
380
|
application=_APPLICATION_NAME,
|
|
381
|
+
host=f"{self.account_id}.{self.snowflake_domain}",
|
|
366
382
|
**connect_args,
|
|
367
383
|
)
|
|
368
384
|
else:
|
|
@@ -408,7 +424,7 @@ class SnowflakeConnection(Closeable):
|
|
|
408
424
|
# We often run multiple queries in parallel across multiple threads,
|
|
409
425
|
# so we need to number them to help with log readability.
|
|
410
426
|
query_num = self.get_query_no()
|
|
411
|
-
logger.info(f"Query #{query_num}: {query}", stacklevel=2)
|
|
427
|
+
logger.info(f"Query #{query_num}: {query.rstrip()}", stacklevel=2)
|
|
412
428
|
resp = self._connection.cursor(DictCursor).execute(query)
|
|
413
429
|
if resp is not None and resp.rowcount is not None:
|
|
414
430
|
logger.info(
|
|
@@ -2,7 +2,17 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from datetime import datetime
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import (
|
|
6
|
+
TYPE_CHECKING,
|
|
7
|
+
Any,
|
|
8
|
+
Collection,
|
|
9
|
+
Iterable,
|
|
10
|
+
List,
|
|
11
|
+
Optional,
|
|
12
|
+
Set,
|
|
13
|
+
Tuple,
|
|
14
|
+
Type,
|
|
15
|
+
)
|
|
6
16
|
|
|
7
17
|
from pydantic import BaseModel, Field, validator
|
|
8
18
|
|
|
@@ -44,6 +54,9 @@ from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint
|
|
|
44
54
|
from datahub.utilities.perf_timer import PerfTimer
|
|
45
55
|
from datahub.utilities.time import ts_millis_to_datetime
|
|
46
56
|
|
|
57
|
+
if TYPE_CHECKING:
|
|
58
|
+
from pydantic.deprecated.class_validators import V1Validator
|
|
59
|
+
|
|
47
60
|
logger: logging.Logger = logging.getLogger(__name__)
|
|
48
61
|
|
|
49
62
|
EXTERNAL_LINEAGE = "external_lineage"
|
|
@@ -51,7 +64,7 @@ TABLE_LINEAGE = "table_lineage"
|
|
|
51
64
|
VIEW_LINEAGE = "view_lineage"
|
|
52
65
|
|
|
53
66
|
|
|
54
|
-
def pydantic_parse_json(field: str) ->
|
|
67
|
+
def pydantic_parse_json(field: str) -> "V1Validator":
|
|
55
68
|
def _parse_from_json(cls: Type, v: Any) -> dict:
|
|
56
69
|
if isinstance(v, str):
|
|
57
70
|
return json.loads(v)
|
|
@@ -72,7 +85,7 @@ class ColumnUpstreamJob(BaseModel):
|
|
|
72
85
|
|
|
73
86
|
|
|
74
87
|
class ColumnUpstreamLineage(BaseModel):
|
|
75
|
-
column_name: Optional[str]
|
|
88
|
+
column_name: Optional[str] = None
|
|
76
89
|
upstreams: List[ColumnUpstreamJob] = Field(default_factory=list)
|
|
77
90
|
|
|
78
91
|
|
|
@@ -91,9 +104,9 @@ class Query(BaseModel):
|
|
|
91
104
|
class UpstreamLineageEdge(BaseModel):
|
|
92
105
|
DOWNSTREAM_TABLE_NAME: str
|
|
93
106
|
DOWNSTREAM_TABLE_DOMAIN: str
|
|
94
|
-
UPSTREAM_TABLES: Optional[List[UpstreamTableNode]]
|
|
95
|
-
UPSTREAM_COLUMNS: Optional[List[ColumnUpstreamLineage]]
|
|
96
|
-
QUERIES: Optional[List[Query]]
|
|
107
|
+
UPSTREAM_TABLES: Optional[List[UpstreamTableNode]] = None
|
|
108
|
+
UPSTREAM_COLUMNS: Optional[List[ColumnUpstreamLineage]] = None
|
|
109
|
+
QUERIES: Optional[List[Query]] = None
|
|
97
110
|
|
|
98
111
|
_json_upstream_tables = pydantic_parse_json("UPSTREAM_TABLES")
|
|
99
112
|
_json_upstream_columns = pydantic_parse_json("UPSTREAM_COLUMNS")
|