acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import functools
|
|
2
2
|
import importlib.resources as pkg_resource
|
|
3
3
|
import logging
|
|
4
|
-
import os
|
|
5
4
|
from typing import Dict, List, Optional
|
|
6
5
|
|
|
7
6
|
import lark
|
|
8
7
|
from lark import Lark, Tree
|
|
9
8
|
|
|
10
9
|
import datahub.ingestion.source.powerbi.m_query.data_classes
|
|
10
|
+
from datahub.configuration.env_vars import get_powerbi_m_query_parse_timeout
|
|
11
11
|
from datahub.ingestion.api.common import PipelineContext
|
|
12
12
|
from datahub.ingestion.source.powerbi.config import (
|
|
13
13
|
PowerBiDashboardSourceConfig,
|
|
@@ -25,7 +25,7 @@ from datahub.utilities.threading_timeout import TimeoutException, threading_time
|
|
|
25
25
|
|
|
26
26
|
logger = logging.getLogger(__name__)
|
|
27
27
|
|
|
28
|
-
_M_QUERY_PARSE_TIMEOUT =
|
|
28
|
+
_M_QUERY_PARSE_TIMEOUT = get_powerbi_m_query_parse_timeout()
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
@functools.lru_cache(maxsize=1)
|
|
@@ -3,7 +3,9 @@ from abc import ABC, abstractmethod
|
|
|
3
3
|
from enum import Enum
|
|
4
4
|
from typing import Dict, List, Optional, Tuple, Type, cast
|
|
5
5
|
|
|
6
|
+
import sqlglot
|
|
6
7
|
from lark import Tree
|
|
8
|
+
from sqlglot import ParseError, expressions as exp
|
|
7
9
|
|
|
8
10
|
from datahub.configuration.source_common import PlatformDetail
|
|
9
11
|
from datahub.emitter import mce_builder as builder
|
|
@@ -209,15 +211,34 @@ class AbstractLineage(ABC):
|
|
|
209
211
|
|
|
210
212
|
return None
|
|
211
213
|
|
|
214
|
+
@staticmethod
|
|
215
|
+
def is_sql_query(query: Optional[str]) -> bool:
|
|
216
|
+
if not query:
|
|
217
|
+
return False
|
|
218
|
+
query = native_sql_parser.remove_special_characters(query)
|
|
219
|
+
try:
|
|
220
|
+
expression = sqlglot.parse_one(query)
|
|
221
|
+
return isinstance(expression, exp.Select)
|
|
222
|
+
except (ParseError, Exception):
|
|
223
|
+
logger.debug(f"Failed to parse query as SQL: {query}")
|
|
224
|
+
return False
|
|
225
|
+
|
|
212
226
|
def parse_custom_sql(
|
|
213
|
-
self,
|
|
227
|
+
self,
|
|
228
|
+
query: str,
|
|
229
|
+
server: str,
|
|
230
|
+
database: Optional[str],
|
|
231
|
+
schema: Optional[str],
|
|
232
|
+
platform_pair: Optional[DataPlatformPair] = None,
|
|
214
233
|
) -> Lineage:
|
|
215
234
|
dataplatform_tables: List[DataPlatformTable] = []
|
|
235
|
+
if not platform_pair:
|
|
236
|
+
platform_pair = self.get_platform_pair()
|
|
216
237
|
|
|
217
238
|
platform_detail: PlatformDetail = (
|
|
218
239
|
self.platform_instance_resolver.get_platform_instance(
|
|
219
240
|
PowerBIPlatformDetail(
|
|
220
|
-
data_platform_pair=
|
|
241
|
+
data_platform_pair=platform_pair,
|
|
221
242
|
data_platform_server=server,
|
|
222
243
|
)
|
|
223
244
|
)
|
|
@@ -231,7 +252,7 @@ class AbstractLineage(ABC):
|
|
|
231
252
|
native_sql_parser.parse_custom_sql(
|
|
232
253
|
ctx=self.ctx,
|
|
233
254
|
query=query,
|
|
234
|
-
platform=
|
|
255
|
+
platform=platform_pair.datahub_data_platform_name,
|
|
235
256
|
platform_instance=platform_detail.platform_instance,
|
|
236
257
|
env=platform_detail.env,
|
|
237
258
|
database=database,
|
|
@@ -258,7 +279,7 @@ class AbstractLineage(ABC):
|
|
|
258
279
|
for urn in parsed_result.in_tables:
|
|
259
280
|
dataplatform_tables.append(
|
|
260
281
|
DataPlatformTable(
|
|
261
|
-
data_platform_pair=
|
|
282
|
+
data_platform_pair=platform_pair,
|
|
262
283
|
urn=urn,
|
|
263
284
|
)
|
|
264
285
|
)
|
|
@@ -956,7 +977,7 @@ class OdbcLineage(AbstractLineage):
|
|
|
956
977
|
f"data-access function detail {data_access_func_detail}"
|
|
957
978
|
)
|
|
958
979
|
|
|
959
|
-
connect_string,
|
|
980
|
+
connect_string, query = self.get_db_detail_from_argument(
|
|
960
981
|
data_access_func_detail.arg_list
|
|
961
982
|
)
|
|
962
983
|
|
|
@@ -972,12 +993,19 @@ class OdbcLineage(AbstractLineage):
|
|
|
972
993
|
data_platform, powerbi_platform = extract_platform(connect_string)
|
|
973
994
|
server_name = extract_server(connect_string)
|
|
974
995
|
|
|
996
|
+
dsn = extract_dsn(connect_string)
|
|
997
|
+
if not dsn:
|
|
998
|
+
self.reporter.warning(
|
|
999
|
+
title="Can not determine ODBC DSN",
|
|
1000
|
+
message="Can not extract DSN from ODBC connect string. Skipping Lineage creation.",
|
|
1001
|
+
context=f"table-name={self.table.full_name}, connect-string={connect_string}",
|
|
1002
|
+
)
|
|
1003
|
+
return Lineage.empty()
|
|
1004
|
+
logger.debug(f"Extracted DSN: {dsn}")
|
|
1005
|
+
|
|
975
1006
|
if not data_platform:
|
|
976
|
-
|
|
977
|
-
if
|
|
978
|
-
logger.debug(f"Extracted DSN: {dsn}")
|
|
979
|
-
server_name = dsn
|
|
980
|
-
if dsn and self.config.dsn_to_platform_name:
|
|
1007
|
+
server_name = dsn
|
|
1008
|
+
if self.config.dsn_to_platform_name:
|
|
981
1009
|
logger.debug(f"Attempting to map DSN {dsn} to platform")
|
|
982
1010
|
name = self.config.dsn_to_platform_name.get(dsn)
|
|
983
1011
|
if name:
|
|
@@ -1006,6 +1034,63 @@ class OdbcLineage(AbstractLineage):
|
|
|
1006
1034
|
elif not server_name:
|
|
1007
1035
|
server_name = "unknown"
|
|
1008
1036
|
|
|
1037
|
+
if self.is_sql_query(query):
|
|
1038
|
+
return self.query_lineage(query, platform_pair, server_name, dsn)
|
|
1039
|
+
else:
|
|
1040
|
+
return self.expression_lineage(
|
|
1041
|
+
data_access_func_detail, data_platform, platform_pair, server_name
|
|
1042
|
+
)
|
|
1043
|
+
|
|
1044
|
+
def query_lineage(
|
|
1045
|
+
self,
|
|
1046
|
+
query: Optional[str],
|
|
1047
|
+
platform_pair: DataPlatformPair,
|
|
1048
|
+
server_name: str,
|
|
1049
|
+
dsn: str,
|
|
1050
|
+
) -> Lineage:
|
|
1051
|
+
database = None
|
|
1052
|
+
schema = None
|
|
1053
|
+
|
|
1054
|
+
if not query:
|
|
1055
|
+
# query should never be None as it is checked before calling this function.
|
|
1056
|
+
# however, we need to check just in case.
|
|
1057
|
+
self.reporter.warning(
|
|
1058
|
+
title="ODBC Query is null",
|
|
1059
|
+
message="No SQL to parse. Skipping Lineage creation.",
|
|
1060
|
+
context=f"table-name={self.table.full_name}",
|
|
1061
|
+
)
|
|
1062
|
+
return Lineage.empty()
|
|
1063
|
+
|
|
1064
|
+
if self.config.dsn_to_database_schema:
|
|
1065
|
+
value = self.config.dsn_to_database_schema.get(dsn)
|
|
1066
|
+
if value:
|
|
1067
|
+
parts = value.split(".")
|
|
1068
|
+
if len(parts) == 1:
|
|
1069
|
+
database = parts[0]
|
|
1070
|
+
elif len(parts) == 2:
|
|
1071
|
+
database = parts[0]
|
|
1072
|
+
schema = parts[1]
|
|
1073
|
+
|
|
1074
|
+
logger.debug(
|
|
1075
|
+
f"ODBC query processing: dsn={dsn} mapped to database={database}, schema={schema}"
|
|
1076
|
+
)
|
|
1077
|
+
result = self.parse_custom_sql(
|
|
1078
|
+
query=query,
|
|
1079
|
+
server=server_name,
|
|
1080
|
+
database=database,
|
|
1081
|
+
schema=schema,
|
|
1082
|
+
platform_pair=platform_pair,
|
|
1083
|
+
)
|
|
1084
|
+
logger.debug(f"ODBC query lineage generated {len(result.upstreams)} upstreams")
|
|
1085
|
+
return result
|
|
1086
|
+
|
|
1087
|
+
def expression_lineage(
|
|
1088
|
+
self,
|
|
1089
|
+
data_access_func_detail: DataAccessFunctionDetail,
|
|
1090
|
+
data_platform: str,
|
|
1091
|
+
platform_pair: DataPlatformPair,
|
|
1092
|
+
server_name: str,
|
|
1093
|
+
) -> Lineage:
|
|
1009
1094
|
database_name = None
|
|
1010
1095
|
schema_name = None
|
|
1011
1096
|
table_name = None
|
|
@@ -1144,6 +1229,11 @@ class SupportedPattern(Enum):
|
|
|
1144
1229
|
FunctionName.ODBC_DATA_ACCESS,
|
|
1145
1230
|
)
|
|
1146
1231
|
|
|
1232
|
+
ODBC_QUERY = (
|
|
1233
|
+
OdbcLineage,
|
|
1234
|
+
FunctionName.ODBC_QUERY,
|
|
1235
|
+
)
|
|
1236
|
+
|
|
1147
1237
|
def handler(self) -> Type[AbstractLineage]:
|
|
1148
1238
|
return self.value[0]
|
|
1149
1239
|
|
|
@@ -40,6 +40,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
40
40
|
from datahub.ingestion.source.common.subtypes import (
|
|
41
41
|
BIAssetSubTypes,
|
|
42
42
|
BIContainerSubTypes,
|
|
43
|
+
SourceCapabilityModifier,
|
|
43
44
|
)
|
|
44
45
|
from datahub.ingestion.source.powerbi.config import (
|
|
45
46
|
Constant,
|
|
@@ -294,8 +295,6 @@ class Mapper:
|
|
|
294
295
|
logger.debug(f"Dataset urn = {ds_urn} and its lineage = {upstream_lineage}")
|
|
295
296
|
|
|
296
297
|
mcp = MetadataChangeProposalWrapper(
|
|
297
|
-
entityType=Constant.DATASET,
|
|
298
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
299
298
|
entityUrn=ds_urn,
|
|
300
299
|
aspect=upstream_lineage_class,
|
|
301
300
|
)
|
|
@@ -538,9 +537,7 @@ class Mapper:
|
|
|
538
537
|
profile.columnCount = table.column_count
|
|
539
538
|
|
|
540
539
|
mcp = MetadataChangeProposalWrapper(
|
|
541
|
-
entityType="dataset",
|
|
542
540
|
entityUrn=ds_urn,
|
|
543
|
-
aspectName="datasetProfile",
|
|
544
541
|
aspect=profile,
|
|
545
542
|
)
|
|
546
543
|
dataset_mcps.append(mcp)
|
|
@@ -796,7 +793,6 @@ class Mapper:
|
|
|
796
793
|
guid=container_key.guid(),
|
|
797
794
|
)
|
|
798
795
|
mcp = MetadataChangeProposalWrapper(
|
|
799
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
800
796
|
entityUrn=entity_urn,
|
|
801
797
|
aspect=ContainerClass(container=f"{container_urn}"),
|
|
802
798
|
)
|
|
@@ -1231,7 +1227,14 @@ class Mapper:
|
|
|
1231
1227
|
@platform_name("PowerBI")
|
|
1232
1228
|
@config_class(PowerBiDashboardSourceConfig)
|
|
1233
1229
|
@support_status(SupportStatus.CERTIFIED)
|
|
1234
|
-
@capability(
|
|
1230
|
+
@capability(
|
|
1231
|
+
SourceCapability.CONTAINERS,
|
|
1232
|
+
"Enabled by default",
|
|
1233
|
+
subtype_modifier=[
|
|
1234
|
+
SourceCapabilityModifier.POWERBI_WORKSPACE,
|
|
1235
|
+
SourceCapabilityModifier.POWERBI_DATASET,
|
|
1236
|
+
],
|
|
1237
|
+
)
|
|
1235
1238
|
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
|
|
1236
1239
|
@capability(SourceCapability.OWNERSHIP, "Enabled by default")
|
|
1237
1240
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
@@ -1253,6 +1256,7 @@ class Mapper:
|
|
|
1253
1256
|
SourceCapability.DATA_PROFILING,
|
|
1254
1257
|
"Optionally enabled via configuration profiling.enabled",
|
|
1255
1258
|
)
|
|
1259
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
1256
1260
|
class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
|
|
1257
1261
|
"""
|
|
1258
1262
|
This plugin extracts the following:
|
|
@@ -673,7 +673,6 @@ class PowerBiAPI:
|
|
|
673
673
|
fill_dashboard_tags()
|
|
674
674
|
self._fill_independent_datasets(workspace=workspace)
|
|
675
675
|
|
|
676
|
-
# flake8: noqa: C901
|
|
677
676
|
def fill_workspaces(
|
|
678
677
|
self, workspaces: List[Workspace], reporter: PowerBiDashboardSourceReport
|
|
679
678
|
) -> Iterable[Workspace]:
|
|
@@ -52,7 +52,6 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
52
52
|
from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
|
|
53
53
|
from datahub.metadata.schema_classes import (
|
|
54
54
|
BrowsePathsClass,
|
|
55
|
-
ChangeTypeClass,
|
|
56
55
|
CorpUserInfoClass,
|
|
57
56
|
CorpUserKeyClass,
|
|
58
57
|
DashboardInfoClass,
|
|
@@ -243,20 +242,14 @@ class Mapper:
|
|
|
243
242
|
|
|
244
243
|
@staticmethod
|
|
245
244
|
def new_mcp(
|
|
246
|
-
entity_type,
|
|
247
245
|
entity_urn,
|
|
248
|
-
aspect_name,
|
|
249
246
|
aspect,
|
|
250
|
-
change_type=ChangeTypeClass.UPSERT,
|
|
251
247
|
):
|
|
252
248
|
"""
|
|
253
249
|
Create MCP
|
|
254
250
|
"""
|
|
255
251
|
return MetadataChangeProposalWrapper(
|
|
256
|
-
entityType=entity_type,
|
|
257
|
-
changeType=change_type,
|
|
258
252
|
entityUrn=entity_urn,
|
|
259
|
-
aspectName=aspect_name,
|
|
260
253
|
aspect=aspect,
|
|
261
254
|
)
|
|
262
255
|
|
|
@@ -343,17 +336,13 @@ class Mapper:
|
|
|
343
336
|
)
|
|
344
337
|
|
|
345
338
|
info_mcp = self.new_mcp(
|
|
346
|
-
entity_type=Constant.DASHBOARD,
|
|
347
339
|
entity_urn=dashboard_urn,
|
|
348
|
-
aspect_name=Constant.DASHBOARD_INFO,
|
|
349
340
|
aspect=dashboard_info_cls,
|
|
350
341
|
)
|
|
351
342
|
|
|
352
343
|
# removed status mcp
|
|
353
344
|
removed_status_mcp = self.new_mcp(
|
|
354
|
-
entity_type=Constant.DASHBOARD,
|
|
355
345
|
entity_urn=dashboard_urn,
|
|
356
|
-
aspect_name=Constant.STATUS,
|
|
357
346
|
aspect=StatusClass(removed=False),
|
|
358
347
|
)
|
|
359
348
|
|
|
@@ -365,9 +354,7 @@ class Mapper:
|
|
|
365
354
|
|
|
366
355
|
# Dashboard key
|
|
367
356
|
dashboard_key_mcp = self.new_mcp(
|
|
368
|
-
entity_type=Constant.DASHBOARD,
|
|
369
357
|
entity_urn=dashboard_urn,
|
|
370
|
-
aspect_name=Constant.DASHBOARD_KEY,
|
|
371
358
|
aspect=dashboard_key_cls,
|
|
372
359
|
)
|
|
373
360
|
|
|
@@ -378,9 +365,7 @@ class Mapper:
|
|
|
378
365
|
ownership = OwnershipClass(owners=owners)
|
|
379
366
|
# Dashboard owner MCP
|
|
380
367
|
owner_mcp = self.new_mcp(
|
|
381
|
-
entity_type=Constant.DASHBOARD,
|
|
382
368
|
entity_urn=dashboard_urn,
|
|
383
|
-
aspect_name=Constant.OWNERSHIP,
|
|
384
369
|
aspect=ownership,
|
|
385
370
|
)
|
|
386
371
|
|
|
@@ -396,9 +381,7 @@ class Mapper:
|
|
|
396
381
|
]
|
|
397
382
|
)
|
|
398
383
|
browse_path_mcp = self.new_mcp(
|
|
399
|
-
entity_type=Constant.DASHBOARD,
|
|
400
384
|
entity_urn=dashboard_urn,
|
|
401
|
-
aspect_name=Constant.BROWSERPATH,
|
|
402
385
|
aspect=browse_path,
|
|
403
386
|
)
|
|
404
387
|
|
|
@@ -429,27 +412,21 @@ class Mapper:
|
|
|
429
412
|
)
|
|
430
413
|
|
|
431
414
|
info_mcp = self.new_mcp(
|
|
432
|
-
entity_type=Constant.CORP_USER,
|
|
433
415
|
entity_urn=user_urn,
|
|
434
|
-
aspect_name=Constant.CORP_USER_INFO,
|
|
435
416
|
aspect=user_info_instance,
|
|
436
417
|
)
|
|
437
418
|
user_mcps.append(info_mcp)
|
|
438
419
|
|
|
439
420
|
# removed status mcp
|
|
440
421
|
status_mcp = self.new_mcp(
|
|
441
|
-
entity_type=Constant.CORP_USER,
|
|
442
422
|
entity_urn=user_urn,
|
|
443
|
-
aspect_name=Constant.STATUS,
|
|
444
423
|
aspect=StatusClass(removed=False),
|
|
445
424
|
)
|
|
446
425
|
user_mcps.append(status_mcp)
|
|
447
426
|
user_key = CorpUserKeyClass(username=user.username)
|
|
448
427
|
|
|
449
428
|
user_key_mcp = self.new_mcp(
|
|
450
|
-
entity_type=Constant.CORP_USER,
|
|
451
429
|
entity_urn=user_urn,
|
|
452
|
-
aspect_name=Constant.CORP_USER_KEY,
|
|
453
430
|
aspect=user_key,
|
|
454
431
|
)
|
|
455
432
|
user_mcps.append(user_key_mcp)
|
|
@@ -27,10 +27,8 @@ class CatalogItem(BaseModel):
|
|
|
27
27
|
is_favorite: bool = Field(alias="IsFavorite")
|
|
28
28
|
user_info: Any = Field(None, alias="UserInfo")
|
|
29
29
|
display_name: Optional[str] = Field(None, alias="DisplayName")
|
|
30
|
-
has_data_sources: bool = Field(
|
|
31
|
-
data_sources: Optional[List["DataSource"]] = Field(
|
|
32
|
-
default_factory=list, alias="DataSources"
|
|
33
|
-
)
|
|
30
|
+
has_data_sources: bool = Field(False, alias="HasDataSources")
|
|
31
|
+
data_sources: Optional[List["DataSource"]] = Field(None, alias="DataSources")
|
|
34
32
|
|
|
35
33
|
@validator("display_name", always=True)
|
|
36
34
|
def validate_diplay_name(cls, value, values):
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
from typing import Dict, Optional
|
|
3
3
|
|
|
4
4
|
import requests
|
|
5
|
-
from pydantic
|
|
5
|
+
from pydantic import root_validator, validator
|
|
6
6
|
from pydantic.fields import Field
|
|
7
7
|
|
|
8
8
|
from datahub.emitter.mce_builder import DEFAULT_ENV
|
|
@@ -69,9 +69,9 @@ class PresetConfig(SupersetConfig):
|
|
|
69
69
|
|
|
70
70
|
@platform_name("Preset")
|
|
71
71
|
@config_class(PresetConfig)
|
|
72
|
-
@support_status(SupportStatus.
|
|
72
|
+
@support_status(SupportStatus.CERTIFIED)
|
|
73
73
|
@capability(
|
|
74
|
-
SourceCapability.DELETION_DETECTION, "
|
|
74
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
75
75
|
)
|
|
76
76
|
class PresetSource(SupersetSource):
|
|
77
77
|
"""
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
from copy import deepcopy
|
|
1
2
|
from datetime import datetime
|
|
2
3
|
from enum import Enum
|
|
3
4
|
from typing import Dict, List, Optional, Type, Union
|
|
4
5
|
|
|
5
|
-
from pydantic import BaseModel, Field, root_validator
|
|
6
|
+
from pydantic import BaseModel, ConfigDict, Field, root_validator
|
|
6
7
|
|
|
7
8
|
from datahub.emitter.mcp_builder import ContainerKey
|
|
8
9
|
from datahub.ingestion.source.qlik_sense.config import QLIK_DATETIME_FORMAT, Constant
|
|
@@ -78,7 +79,11 @@ PERSONAL_SPACE_DICT = {
|
|
|
78
79
|
}
|
|
79
80
|
|
|
80
81
|
|
|
81
|
-
class
|
|
82
|
+
class _QlikBaseModel(BaseModel):
|
|
83
|
+
model_config = ConfigDict(coerce_numbers_to_str=True)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class Space(_QlikBaseModel):
|
|
82
87
|
id: str
|
|
83
88
|
name: str
|
|
84
89
|
description: str
|
|
@@ -89,6 +94,9 @@ class Space(BaseModel):
|
|
|
89
94
|
|
|
90
95
|
@root_validator(pre=True)
|
|
91
96
|
def update_values(cls, values: Dict) -> Dict:
|
|
97
|
+
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
98
|
+
values = deepcopy(values)
|
|
99
|
+
|
|
92
100
|
values[Constant.CREATEDAT] = datetime.strptime(
|
|
93
101
|
values[Constant.CREATEDAT], QLIK_DATETIME_FORMAT
|
|
94
102
|
)
|
|
@@ -98,7 +106,7 @@ class Space(BaseModel):
|
|
|
98
106
|
return values
|
|
99
107
|
|
|
100
108
|
|
|
101
|
-
class Item(
|
|
109
|
+
class Item(_QlikBaseModel):
|
|
102
110
|
id: str
|
|
103
111
|
description: str = ""
|
|
104
112
|
ownerId: str
|
|
@@ -107,7 +115,7 @@ class Item(BaseModel):
|
|
|
107
115
|
updatedAt: datetime
|
|
108
116
|
|
|
109
117
|
|
|
110
|
-
class SchemaField(
|
|
118
|
+
class SchemaField(_QlikBaseModel):
|
|
111
119
|
name: str
|
|
112
120
|
dataType: Optional[str] = None
|
|
113
121
|
primaryKey: Optional[bool] = None
|
|
@@ -115,6 +123,8 @@ class SchemaField(BaseModel):
|
|
|
115
123
|
|
|
116
124
|
@root_validator(pre=True)
|
|
117
125
|
def update_values(cls, values: Dict) -> Dict:
|
|
126
|
+
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
127
|
+
values = deepcopy(values)
|
|
118
128
|
values[Constant.DATATYPE] = values.get(Constant.DATATYPE, {}).get(Constant.TYPE)
|
|
119
129
|
return values
|
|
120
130
|
|
|
@@ -130,6 +140,8 @@ class QlikDataset(Item):
|
|
|
130
140
|
|
|
131
141
|
@root_validator(pre=True)
|
|
132
142
|
def update_values(cls, values: Dict) -> Dict:
|
|
143
|
+
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
144
|
+
values = deepcopy(values)
|
|
133
145
|
# Update str time to datetime
|
|
134
146
|
values[Constant.CREATEDAT] = datetime.strptime(
|
|
135
147
|
values[Constant.CREATEDTIME], QLIK_DATETIME_FORMAT
|
|
@@ -148,13 +160,13 @@ class QlikDataset(Item):
|
|
|
148
160
|
return values
|
|
149
161
|
|
|
150
162
|
|
|
151
|
-
class AxisProperty(
|
|
163
|
+
class AxisProperty(_QlikBaseModel):
|
|
152
164
|
Title: str = Field(alias="qFallbackTitle")
|
|
153
165
|
Min: str = Field(alias="qMin")
|
|
154
166
|
Max: str = Field(alias="qMax")
|
|
155
167
|
|
|
156
168
|
|
|
157
|
-
class Chart(
|
|
169
|
+
class Chart(_QlikBaseModel):
|
|
158
170
|
qId: str
|
|
159
171
|
visualization: str
|
|
160
172
|
title: str
|
|
@@ -164,13 +176,15 @@ class Chart(BaseModel):
|
|
|
164
176
|
|
|
165
177
|
@root_validator(pre=True)
|
|
166
178
|
def update_values(cls, values: Dict) -> Dict:
|
|
179
|
+
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
180
|
+
values = deepcopy(values)
|
|
167
181
|
values[Constant.QID] = values[Constant.QINFO][Constant.QID]
|
|
168
182
|
values["qDimension"] = values[Constant.HYPERCUBE]["qDimensionInfo"]
|
|
169
183
|
values["qMeasure"] = values[Constant.HYPERCUBE]["qMeasureInfo"]
|
|
170
184
|
return values
|
|
171
185
|
|
|
172
186
|
|
|
173
|
-
class Sheet(
|
|
187
|
+
class Sheet(_QlikBaseModel):
|
|
174
188
|
id: str
|
|
175
189
|
title: str
|
|
176
190
|
description: str
|
|
@@ -181,6 +195,8 @@ class Sheet(BaseModel):
|
|
|
181
195
|
|
|
182
196
|
@root_validator(pre=True)
|
|
183
197
|
def update_values(cls, values: Dict) -> Dict:
|
|
198
|
+
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
199
|
+
values = deepcopy(values)
|
|
184
200
|
values[Constant.CREATEDAT] = datetime.strptime(
|
|
185
201
|
values[Constant.CREATEDDATE], QLIK_DATETIME_FORMAT
|
|
186
202
|
)
|
|
@@ -190,7 +206,7 @@ class Sheet(BaseModel):
|
|
|
190
206
|
return values
|
|
191
207
|
|
|
192
208
|
|
|
193
|
-
class QlikTable(
|
|
209
|
+
class QlikTable(_QlikBaseModel):
|
|
194
210
|
tableName: str
|
|
195
211
|
type: BoxType = Field(alias="boxType")
|
|
196
212
|
tableAlias: str
|
|
@@ -206,6 +222,8 @@ class QlikTable(BaseModel):
|
|
|
206
222
|
|
|
207
223
|
@root_validator(pre=True)
|
|
208
224
|
def update_values(cls, values: Dict) -> Dict:
|
|
225
|
+
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
226
|
+
values = deepcopy(values)
|
|
209
227
|
values[Constant.DATACONNECTORID] = values[Constant.CONNECTIONINFO][Constant.ID]
|
|
210
228
|
values[Constant.DATACONNECTORPLATFORM] = values[Constant.CONNECTIONINFO][
|
|
211
229
|
Constant.SOURCECONNECTORID
|
|
@@ -223,6 +241,8 @@ class App(Item):
|
|
|
223
241
|
|
|
224
242
|
@root_validator(pre=True)
|
|
225
243
|
def update_values(cls, values: Dict) -> Dict:
|
|
244
|
+
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
245
|
+
values = deepcopy(values)
|
|
226
246
|
values[Constant.CREATEDAT] = datetime.strptime(
|
|
227
247
|
values[Constant.CREATEDDATE], QLIK_DATETIME_FORMAT
|
|
228
248
|
)
|
|
@@ -101,7 +101,7 @@ logger = logging.getLogger(__name__)
|
|
|
101
101
|
)
|
|
102
102
|
@capability(
|
|
103
103
|
SourceCapability.LINEAGE_FINE,
|
|
104
|
-
"Disabled by default.
|
|
104
|
+
"Disabled by default.",
|
|
105
105
|
)
|
|
106
106
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
107
107
|
@capability(
|
|
@@ -109,6 +109,7 @@ logger = logging.getLogger(__name__)
|
|
|
109
109
|
"Enabled by default, configured using `ingest_owner`",
|
|
110
110
|
)
|
|
111
111
|
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
|
|
112
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
112
113
|
class QlikSenseSource(StatefulIngestionSourceBase, TestableSource):
|
|
113
114
|
"""
|
|
114
115
|
This plugin extracts the following:
|
|
@@ -447,7 +447,7 @@ class RedashSource(StatefulIngestionSourceBase):
|
|
|
447
447
|
dataset_urns = sql_parser_in_tables.in_tables
|
|
448
448
|
if sql_parser_in_tables.debug_info.table_error:
|
|
449
449
|
self.report.queries_problem_parsing.add(str(query_id))
|
|
450
|
-
self.
|
|
450
|
+
self.warn(
|
|
451
451
|
logger,
|
|
452
452
|
"sql-parsing",
|
|
453
453
|
f"exception {sql_parser_in_tables.debug_info.table_error} in parsing query-{query_id}-datasource-{data_source_id}",
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from copy import deepcopy
|
|
2
3
|
from enum import Enum
|
|
3
4
|
from typing import Any, Dict, List, Optional
|
|
4
5
|
|
|
@@ -6,9 +7,10 @@ from pydantic import root_validator
|
|
|
6
7
|
from pydantic.fields import Field
|
|
7
8
|
|
|
8
9
|
from datahub.configuration import ConfigModel
|
|
9
|
-
from datahub.configuration.common import AllowDenyPattern
|
|
10
|
+
from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
|
|
10
11
|
from datahub.configuration.source_common import DatasetLineageProviderConfigBase
|
|
11
12
|
from datahub.configuration.validate_field_removal import pydantic_removed_field
|
|
13
|
+
from datahub.configuration.validate_field_rename import pydantic_renamed_field
|
|
12
14
|
from datahub.ingestion.api.incremental_lineage_helper import (
|
|
13
15
|
IncrementalLineageConfigMixin,
|
|
14
16
|
)
|
|
@@ -94,13 +96,18 @@ class RedshiftConfig(
|
|
|
94
96
|
# Because of this behavior, it uses dramatically fewer round trips for
|
|
95
97
|
# large Redshift warehouses. As an example, see this query for the columns:
|
|
96
98
|
# https://github.com/sqlalchemy-redshift/sqlalchemy-redshift/blob/60b4db04c1d26071c291aeea52f1dcb5dd8b0eb0/sqlalchemy_redshift/dialect.py#L745.
|
|
97
|
-
scheme: str = Field(
|
|
99
|
+
scheme: HiddenFromDocs[str] = Field(
|
|
98
100
|
default="redshift+redshift_connector",
|
|
99
101
|
description="",
|
|
100
|
-
hidden_from_docs=True,
|
|
101
102
|
)
|
|
102
103
|
|
|
103
104
|
_database_alias_removed = pydantic_removed_field("database_alias")
|
|
105
|
+
_use_lineage_v2_removed = pydantic_removed_field("use_lineage_v2")
|
|
106
|
+
_rename_lineage_v2_generate_queries_to_lineage_generate_queries = (
|
|
107
|
+
pydantic_renamed_field(
|
|
108
|
+
"lineage_v2_generate_queries", "lineage_generate_queries"
|
|
109
|
+
)
|
|
110
|
+
)
|
|
104
111
|
|
|
105
112
|
default_schema: str = Field(
|
|
106
113
|
default="public",
|
|
@@ -112,13 +119,9 @@ class RedshiftConfig(
|
|
|
112
119
|
description="Whether target Redshift instance is serverless (alternative is provisioned cluster)",
|
|
113
120
|
)
|
|
114
121
|
|
|
115
|
-
|
|
116
|
-
default=True,
|
|
117
|
-
description="Whether to use the new SQL-based lineage collector.",
|
|
118
|
-
)
|
|
119
|
-
lineage_v2_generate_queries: bool = Field(
|
|
122
|
+
lineage_generate_queries: bool = Field(
|
|
120
123
|
default=True,
|
|
121
|
-
description="Whether to generate queries entities for the
|
|
124
|
+
description="Whether to generate queries entities for the SQL-based lineage collector.",
|
|
122
125
|
)
|
|
123
126
|
|
|
124
127
|
include_table_lineage: bool = Field(
|
|
@@ -213,6 +216,9 @@ class RedshiftConfig(
|
|
|
213
216
|
|
|
214
217
|
@root_validator(skip_on_failure=True)
|
|
215
218
|
def connection_config_compatibility_set(cls, values: Dict) -> Dict:
|
|
219
|
+
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
220
|
+
values = deepcopy(values)
|
|
221
|
+
|
|
216
222
|
if (
|
|
217
223
|
("options" in values and "connect_args" in values["options"])
|
|
218
224
|
and "extra_client_options" in values
|
|
@@ -26,7 +26,7 @@ from datahub.utilities.search_utils import LogicalOperator
|
|
|
26
26
|
|
|
27
27
|
class OutboundSharePlatformResource(BaseModel):
|
|
28
28
|
namespace: str
|
|
29
|
-
platform_instance: Optional[str]
|
|
29
|
+
platform_instance: Optional[str] = None
|
|
30
30
|
env: str
|
|
31
31
|
source_database: str
|
|
32
32
|
share_name: str
|