acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -8,7 +8,7 @@ from datahub.ingestion.source.snowflake.snowflake_config import (
|
|
|
8
8
|
)
|
|
9
9
|
from datahub.utilities.prefix_batch_builder import PrefixGroup
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
SHOW_COMMAND_MAX_PAGE_SIZE = 10000
|
|
12
12
|
SHOW_STREAM_MAX_PAGE_SIZE = 10000
|
|
13
13
|
|
|
14
14
|
|
|
@@ -38,17 +38,21 @@ class SnowflakeQuery:
|
|
|
38
38
|
SnowflakeObjectDomain.MATERIALIZED_VIEW.capitalize(),
|
|
39
39
|
SnowflakeObjectDomain.ICEBERG_TABLE.capitalize(),
|
|
40
40
|
SnowflakeObjectDomain.STREAM.capitalize(),
|
|
41
|
+
SnowflakeObjectDomain.DYNAMIC_TABLE.capitalize(),
|
|
41
42
|
}
|
|
42
43
|
|
|
43
44
|
ACCESS_HISTORY_TABLE_VIEW_DOMAINS_FILTER = "({})".format(
|
|
44
45
|
",".join(f"'{domain}'" for domain in ACCESS_HISTORY_TABLE_VIEW_DOMAINS)
|
|
45
46
|
)
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
47
|
+
|
|
48
|
+
# Domains that can be downstream tables in lineage
|
|
49
|
+
DOWNSTREAM_TABLE_DOMAINS = {
|
|
50
|
+
SnowflakeObjectDomain.TABLE.capitalize(),
|
|
51
|
+
SnowflakeObjectDomain.DYNAMIC_TABLE.capitalize(),
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
DOWNSTREAM_TABLE_DOMAINS_FILTER = "({})".format(
|
|
55
|
+
",".join(f"'{domain}'" for domain in DOWNSTREAM_TABLE_DOMAINS)
|
|
52
56
|
)
|
|
53
57
|
|
|
54
58
|
@staticmethod
|
|
@@ -71,14 +75,6 @@ class SnowflakeQuery:
|
|
|
71
75
|
def current_warehouse() -> str:
|
|
72
76
|
return "select CURRENT_WAREHOUSE()"
|
|
73
77
|
|
|
74
|
-
@staticmethod
|
|
75
|
-
def current_database() -> str:
|
|
76
|
-
return "select CURRENT_DATABASE()"
|
|
77
|
-
|
|
78
|
-
@staticmethod
|
|
79
|
-
def current_schema() -> str:
|
|
80
|
-
return "select CURRENT_SCHEMA()"
|
|
81
|
-
|
|
82
78
|
@staticmethod
|
|
83
79
|
def show_databases() -> str:
|
|
84
80
|
return "show databases"
|
|
@@ -107,8 +103,8 @@ class SnowflakeQuery:
|
|
|
107
103
|
order by database_name"""
|
|
108
104
|
|
|
109
105
|
@staticmethod
|
|
110
|
-
def schemas_for_database(db_name:
|
|
111
|
-
db_clause = f'"{db_name}".'
|
|
106
|
+
def schemas_for_database(db_name: str) -> str:
|
|
107
|
+
db_clause = f'"{db_name}".'
|
|
112
108
|
return f"""
|
|
113
109
|
SELECT schema_name AS "SCHEMA_NAME",
|
|
114
110
|
created AS "CREATED",
|
|
@@ -119,8 +115,8 @@ class SnowflakeQuery:
|
|
|
119
115
|
order by schema_name"""
|
|
120
116
|
|
|
121
117
|
@staticmethod
|
|
122
|
-
def tables_for_database(db_name:
|
|
123
|
-
db_clause = f'"{db_name}".'
|
|
118
|
+
def tables_for_database(db_name: str) -> str:
|
|
119
|
+
db_clause = f'"{db_name}".'
|
|
124
120
|
return f"""
|
|
125
121
|
SELECT table_catalog AS "TABLE_CATALOG",
|
|
126
122
|
table_schema AS "TABLE_SCHEMA",
|
|
@@ -142,8 +138,8 @@ class SnowflakeQuery:
|
|
|
142
138
|
order by table_schema, table_name"""
|
|
143
139
|
|
|
144
140
|
@staticmethod
|
|
145
|
-
def tables_for_schema(schema_name: str, db_name:
|
|
146
|
-
db_clause = f'"{db_name}".'
|
|
141
|
+
def tables_for_schema(schema_name: str, db_name: str) -> str:
|
|
142
|
+
db_clause = f'"{db_name}".'
|
|
147
143
|
return f"""
|
|
148
144
|
SELECT table_catalog AS "TABLE_CATALOG",
|
|
149
145
|
table_schema AS "TABLE_SCHEMA",
|
|
@@ -164,6 +160,23 @@ class SnowflakeQuery:
|
|
|
164
160
|
and table_type in ('BASE TABLE', 'EXTERNAL TABLE')
|
|
165
161
|
order by table_schema, table_name"""
|
|
166
162
|
|
|
163
|
+
@staticmethod
|
|
164
|
+
def procedures_for_database(db_name: str) -> str:
|
|
165
|
+
db_clause = f'"{db_name}".'
|
|
166
|
+
return f"""
|
|
167
|
+
SELECT procedure_catalog AS "PROCEDURE_CATALOG",
|
|
168
|
+
procedure_schema AS "PROCEDURE_SCHEMA",
|
|
169
|
+
procedure_name AS "PROCEDURE_NAME",
|
|
170
|
+
procedure_language AS "PROCEDURE_LANGUAGE",
|
|
171
|
+
argument_signature AS "ARGUMENT_SIGNATURE",
|
|
172
|
+
data_type AS "PROCEDURE_RETURN_TYPE",
|
|
173
|
+
procedure_definition AS "PROCEDURE_DEFINITION",
|
|
174
|
+
created AS "CREATED",
|
|
175
|
+
last_altered AS "LAST_ALTERED",
|
|
176
|
+
comment AS "COMMENT"
|
|
177
|
+
FROM {db_clause}information_schema.procedures
|
|
178
|
+
order by procedure_schema, procedure_name"""
|
|
179
|
+
|
|
167
180
|
@staticmethod
|
|
168
181
|
def get_all_tags():
|
|
169
182
|
return """
|
|
@@ -233,7 +246,7 @@ class SnowflakeQuery:
|
|
|
233
246
|
@staticmethod
|
|
234
247
|
def show_views_for_database(
|
|
235
248
|
db_name: str,
|
|
236
|
-
limit: int =
|
|
249
|
+
limit: int = SHOW_COMMAND_MAX_PAGE_SIZE,
|
|
237
250
|
view_pagination_marker: Optional[str] = None,
|
|
238
251
|
) -> str:
|
|
239
252
|
# While there is an information_schema.views view, that only shows the view definition if the role
|
|
@@ -242,7 +255,7 @@ class SnowflakeQuery:
|
|
|
242
255
|
|
|
243
256
|
# SHOW VIEWS can return a maximum of 10000 rows.
|
|
244
257
|
# https://docs.snowflake.com/en/sql-reference/sql/show-views#usage-notes
|
|
245
|
-
assert limit <=
|
|
258
|
+
assert limit <= SHOW_COMMAND_MAX_PAGE_SIZE
|
|
246
259
|
|
|
247
260
|
# To work around this, we paginate through the results using the FROM clause.
|
|
248
261
|
from_clause = (
|
|
@@ -253,6 +266,33 @@ SHOW VIEWS IN DATABASE "{db_name}"
|
|
|
253
266
|
LIMIT {limit} {from_clause};
|
|
254
267
|
"""
|
|
255
268
|
|
|
269
|
+
@staticmethod
|
|
270
|
+
def get_views_for_database(db_name: str) -> str:
|
|
271
|
+
# We've seen some issues with the `SHOW VIEWS` query,
|
|
272
|
+
# particularly when it requires pagination.
|
|
273
|
+
# This is an experimental alternative query that might be more reliable.
|
|
274
|
+
return f"""\
|
|
275
|
+
SELECT
|
|
276
|
+
TABLE_CATALOG as "VIEW_CATALOG",
|
|
277
|
+
TABLE_SCHEMA as "VIEW_SCHEMA",
|
|
278
|
+
TABLE_NAME as "VIEW_NAME",
|
|
279
|
+
COMMENT,
|
|
280
|
+
VIEW_DEFINITION,
|
|
281
|
+
CREATED,
|
|
282
|
+
LAST_ALTERED,
|
|
283
|
+
IS_SECURE
|
|
284
|
+
FROM "{db_name}".information_schema.views
|
|
285
|
+
WHERE TABLE_CATALOG = '{db_name}'
|
|
286
|
+
AND TABLE_SCHEMA != 'INFORMATION_SCHEMA'
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
@staticmethod
|
|
290
|
+
def get_views_for_schema(db_name: str, schema_name: str) -> str:
|
|
291
|
+
return f"""\
|
|
292
|
+
{SnowflakeQuery.get_views_for_database(db_name).rstrip()}
|
|
293
|
+
AND TABLE_SCHEMA = '{schema_name}'
|
|
294
|
+
"""
|
|
295
|
+
|
|
256
296
|
@staticmethod
|
|
257
297
|
def get_secure_view_definitions() -> str:
|
|
258
298
|
# https://docs.snowflake.com/en/sql-reference/account-usage/views
|
|
@@ -365,26 +405,6 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
365
405
|
ORDER BY query_start_time DESC
|
|
366
406
|
;"""
|
|
367
407
|
|
|
368
|
-
@staticmethod
|
|
369
|
-
def view_dependencies() -> str:
|
|
370
|
-
return """
|
|
371
|
-
SELECT
|
|
372
|
-
concat(
|
|
373
|
-
referenced_database, '.', referenced_schema,
|
|
374
|
-
'.', referenced_object_name
|
|
375
|
-
) AS "VIEW_UPSTREAM",
|
|
376
|
-
referenced_object_domain as "REFERENCED_OBJECT_DOMAIN",
|
|
377
|
-
concat(
|
|
378
|
-
referencing_database, '.', referencing_schema,
|
|
379
|
-
'.', referencing_object_name
|
|
380
|
-
) AS "DOWNSTREAM_VIEW",
|
|
381
|
-
referencing_object_domain AS "REFERENCING_OBJECT_DOMAIN"
|
|
382
|
-
FROM
|
|
383
|
-
snowflake.account_usage.object_dependencies
|
|
384
|
-
WHERE
|
|
385
|
-
referencing_object_domain in ('VIEW', 'MATERIALIZED VIEW')
|
|
386
|
-
"""
|
|
387
|
-
|
|
388
408
|
# Note on use of `upstreams_deny_pattern` to ignore temporary tables:
|
|
389
409
|
# Snowflake access history may include temporary tables in DIRECT_OBJECTS_ACCESSED and
|
|
390
410
|
# OBJECTS_MODIFIED->columns->directSources. We do not need these temporary tables and filter these in the query.
|
|
@@ -408,32 +428,6 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
408
428
|
upstreams_deny_pattern,
|
|
409
429
|
)
|
|
410
430
|
|
|
411
|
-
@staticmethod
|
|
412
|
-
def view_dependencies_v2() -> str:
|
|
413
|
-
return """
|
|
414
|
-
SELECT
|
|
415
|
-
ARRAY_UNIQUE_AGG(
|
|
416
|
-
OBJECT_CONSTRUCT(
|
|
417
|
-
'upstream_object_name', concat(
|
|
418
|
-
referenced_database, '.', referenced_schema,
|
|
419
|
-
'.', referenced_object_name
|
|
420
|
-
),
|
|
421
|
-
'upstream_object_domain', referenced_object_domain
|
|
422
|
-
)
|
|
423
|
-
) as "UPSTREAM_TABLES",
|
|
424
|
-
concat(
|
|
425
|
-
referencing_database, '.', referencing_schema,
|
|
426
|
-
'.', referencing_object_name
|
|
427
|
-
) AS "DOWNSTREAM_TABLE_NAME",
|
|
428
|
-
ANY_VALUE(referencing_object_domain) AS "DOWNSTREAM_TABLE_DOMAIN"
|
|
429
|
-
FROM
|
|
430
|
-
snowflake.account_usage.object_dependencies
|
|
431
|
-
WHERE
|
|
432
|
-
referencing_object_domain in ('VIEW', 'MATERIALIZED VIEW')
|
|
433
|
-
GROUP BY
|
|
434
|
-
DOWNSTREAM_TABLE_NAME
|
|
435
|
-
"""
|
|
436
|
-
|
|
437
431
|
@staticmethod
|
|
438
432
|
def show_external_tables() -> str:
|
|
439
433
|
return "show external tables in account"
|
|
@@ -730,7 +724,7 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
730
724
|
AND t.query_start_time >= to_timestamp_ltz({start_time_millis}, 3)
|
|
731
725
|
AND t.query_start_time < to_timestamp_ltz({end_time_millis}, 3)
|
|
732
726
|
AND upstream_table_domain in {allowed_upstream_table_domains}
|
|
733
|
-
AND downstream_table_domain
|
|
727
|
+
AND downstream_table_domain in {SnowflakeQuery.DOWNSTREAM_TABLE_DOMAINS_FILTER}
|
|
734
728
|
{("AND " + upstream_sql_filter) if upstream_sql_filter else ""}
|
|
735
729
|
),
|
|
736
730
|
column_upstream_jobs AS (
|
|
@@ -887,7 +881,7 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
887
881
|
AND t.query_start_time >= to_timestamp_ltz({start_time_millis}, 3)
|
|
888
882
|
AND t.query_start_time < to_timestamp_ltz({end_time_millis}, 3)
|
|
889
883
|
AND upstream_table_domain in {allowed_upstream_table_domains}
|
|
890
|
-
AND downstream_table_domain
|
|
884
|
+
AND downstream_table_domain in {SnowflakeQuery.DOWNSTREAM_TABLE_DOMAINS_FILTER}
|
|
891
885
|
{("AND " + upstream_sql_filter) if upstream_sql_filter else ""}
|
|
892
886
|
),
|
|
893
887
|
table_upstream_jobs_unique AS (
|
|
@@ -983,4 +977,38 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
983
977
|
from_clause = (
|
|
984
978
|
f"""FROM '{stream_pagination_marker}'""" if stream_pagination_marker else ""
|
|
985
979
|
)
|
|
986
|
-
return f"""SHOW STREAMS IN DATABASE {db_name} LIMIT {limit} {from_clause};"""
|
|
980
|
+
return f"""SHOW STREAMS IN DATABASE "{db_name}" LIMIT {limit} {from_clause};"""
|
|
981
|
+
|
|
982
|
+
@staticmethod
|
|
983
|
+
def show_dynamic_tables_for_database(
|
|
984
|
+
db_name: str,
|
|
985
|
+
limit: int = SHOW_COMMAND_MAX_PAGE_SIZE,
|
|
986
|
+
dynamic_table_pagination_marker: Optional[str] = None,
|
|
987
|
+
) -> str:
|
|
988
|
+
"""Get dynamic table definitions using SHOW DYNAMIC TABLES."""
|
|
989
|
+
assert limit <= SHOW_COMMAND_MAX_PAGE_SIZE
|
|
990
|
+
|
|
991
|
+
from_clause = (
|
|
992
|
+
f"""FROM '{dynamic_table_pagination_marker}'"""
|
|
993
|
+
if dynamic_table_pagination_marker
|
|
994
|
+
else ""
|
|
995
|
+
)
|
|
996
|
+
return f"""\
|
|
997
|
+
SHOW DYNAMIC TABLES IN DATABASE "{db_name}"
|
|
998
|
+
LIMIT {limit} {from_clause};
|
|
999
|
+
"""
|
|
1000
|
+
|
|
1001
|
+
@staticmethod
|
|
1002
|
+
def get_dynamic_table_graph_history(db_name: str) -> str:
|
|
1003
|
+
"""Get dynamic table dependency information from information schema."""
|
|
1004
|
+
return f"""
|
|
1005
|
+
SELECT
|
|
1006
|
+
name,
|
|
1007
|
+
inputs,
|
|
1008
|
+
target_lag_type,
|
|
1009
|
+
target_lag_sec,
|
|
1010
|
+
scheduling_state,
|
|
1011
|
+
alter_trigger
|
|
1012
|
+
FROM TABLE("{db_name}".INFORMATION_SCHEMA.DYNAMIC_TABLE_GRAPH_HISTORY())
|
|
1013
|
+
ORDER BY name
|
|
1014
|
+
"""
|
|
@@ -9,7 +9,6 @@ from datahub.ingestion.source.sql.sql_report import SQLSourceReport
|
|
|
9
9
|
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
10
10
|
StatefulIngestionReport,
|
|
11
11
|
)
|
|
12
|
-
from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
|
|
13
12
|
from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
|
|
14
13
|
from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport
|
|
15
14
|
from datahub.utilities.lossy_collections import LossyDict
|
|
@@ -96,7 +95,6 @@ class SnowflakeV2Report(
|
|
|
96
95
|
SnowflakeUsageReport,
|
|
97
96
|
StatefulIngestionReport,
|
|
98
97
|
ClassificationReportMixin,
|
|
99
|
-
IngestionStageReport,
|
|
100
98
|
):
|
|
101
99
|
account_locator: Optional[str] = None
|
|
102
100
|
region: Optional[str] = None
|
|
@@ -105,6 +103,7 @@ class SnowflakeV2Report(
|
|
|
105
103
|
databases_scanned: int = 0
|
|
106
104
|
tags_scanned: int = 0
|
|
107
105
|
streams_scanned: int = 0
|
|
106
|
+
procedures_scanned: int = 0
|
|
108
107
|
|
|
109
108
|
include_usage_stats: bool = False
|
|
110
109
|
include_operational_stats: bool = False
|
|
@@ -127,6 +126,7 @@ class SnowflakeV2Report(
|
|
|
127
126
|
# "Information schema query returned too much data. Please repeat query with more selective predicates.""
|
|
128
127
|
# This will result in overall increase in time complexity
|
|
129
128
|
num_get_tables_for_schema_queries: int = 0
|
|
129
|
+
num_get_views_for_schema_queries: int = 0
|
|
130
130
|
|
|
131
131
|
# these will be non-zero if the user choses to enable the extract_tags = "with_lineage" option, which requires
|
|
132
132
|
# individual queries per object (database, schema, table) and an extra query per table to get the tags on the columns.
|
|
@@ -163,6 +163,8 @@ class SnowflakeV2Report(
|
|
|
163
163
|
self.tags_scanned += 1
|
|
164
164
|
elif ent_type == "stream":
|
|
165
165
|
self.streams_scanned += 1
|
|
166
|
+
elif ent_type == "procedure":
|
|
167
|
+
self.procedures_scanned += 1
|
|
166
168
|
else:
|
|
167
169
|
raise KeyError(f"Unknown entity {ent_type}.")
|
|
168
170
|
|