acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
# Meta Data Ingestion From the Power BI Source
|
|
4
4
|
#
|
|
5
5
|
#########################################################
|
|
6
|
+
import functools
|
|
6
7
|
import logging
|
|
7
8
|
from datetime import datetime
|
|
8
9
|
from typing import Iterable, List, Optional, Tuple, Union
|
|
@@ -24,6 +25,7 @@ from datahub.ingestion.api.decorators import (
|
|
|
24
25
|
support_status,
|
|
25
26
|
)
|
|
26
27
|
from datahub.ingestion.api.incremental_lineage_helper import (
|
|
28
|
+
auto_incremental_lineage,
|
|
27
29
|
convert_dashboard_info_to_patch,
|
|
28
30
|
)
|
|
29
31
|
from datahub.ingestion.api.source import (
|
|
@@ -38,6 +40,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
38
40
|
from datahub.ingestion.source.common.subtypes import (
|
|
39
41
|
BIAssetSubTypes,
|
|
40
42
|
BIContainerSubTypes,
|
|
43
|
+
SourceCapabilityModifier,
|
|
41
44
|
)
|
|
42
45
|
from datahub.ingestion.source.powerbi.config import (
|
|
43
46
|
Constant,
|
|
@@ -92,7 +95,7 @@ from datahub.metadata.schema_classes import (
|
|
|
92
95
|
UpstreamLineageClass,
|
|
93
96
|
ViewPropertiesClass,
|
|
94
97
|
)
|
|
95
|
-
from datahub.metadata.urns import ChartUrn
|
|
98
|
+
from datahub.metadata.urns import ChartUrn, DatasetUrn
|
|
96
99
|
from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo
|
|
97
100
|
from datahub.utilities.dedup_list import deduplicate_list
|
|
98
101
|
from datahub.utilities.urns.urn_iter import lowercase_dataset_urn
|
|
@@ -238,6 +241,10 @@ class Mapper:
|
|
|
238
241
|
upstream: List[UpstreamClass] = []
|
|
239
242
|
cll_lineage: List[FineGrainedLineage] = []
|
|
240
243
|
|
|
244
|
+
logger.debug(
|
|
245
|
+
f"Extracting lineage for table {table.full_name} in dataset {table.dataset.name if table.dataset else None}"
|
|
246
|
+
)
|
|
247
|
+
|
|
241
248
|
upstream_lineage: List[
|
|
242
249
|
datahub.ingestion.source.powerbi.m_query.data_classes.Lineage
|
|
243
250
|
] = parser.get_upstream_tables(
|
|
@@ -257,7 +264,7 @@ class Mapper:
|
|
|
257
264
|
for upstream_dpt in lineage.upstreams:
|
|
258
265
|
if (
|
|
259
266
|
upstream_dpt.data_platform_pair.powerbi_data_platform_name
|
|
260
|
-
not in self.__config.dataset_type_mapping
|
|
267
|
+
not in self.__config.dataset_type_mapping
|
|
261
268
|
):
|
|
262
269
|
logger.debug(
|
|
263
270
|
f"Skipping upstream table for {ds_urn}. The platform {upstream_dpt.data_platform_pair.powerbi_data_platform_name} is not part of dataset_type_mapping",
|
|
@@ -288,8 +295,6 @@ class Mapper:
|
|
|
288
295
|
logger.debug(f"Dataset urn = {ds_urn} and its lineage = {upstream_lineage}")
|
|
289
296
|
|
|
290
297
|
mcp = MetadataChangeProposalWrapper(
|
|
291
|
-
entityType=Constant.DATASET,
|
|
292
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
293
298
|
entityUrn=ds_urn,
|
|
294
299
|
aspect=upstream_lineage_class,
|
|
295
300
|
)
|
|
@@ -532,9 +537,7 @@ class Mapper:
|
|
|
532
537
|
profile.columnCount = table.column_count
|
|
533
538
|
|
|
534
539
|
mcp = MetadataChangeProposalWrapper(
|
|
535
|
-
entityType="dataset",
|
|
536
540
|
entityUrn=ds_urn,
|
|
537
|
-
aspectName="datasetProfile",
|
|
538
541
|
aspect=profile,
|
|
539
542
|
)
|
|
540
543
|
dataset_mcps.append(mcp)
|
|
@@ -666,6 +669,7 @@ class Mapper:
|
|
|
666
669
|
workspace: powerbi_data_classes.Workspace,
|
|
667
670
|
chart_mcps: List[MetadataChangeProposalWrapper],
|
|
668
671
|
user_mcps: List[MetadataChangeProposalWrapper],
|
|
672
|
+
dashboard_edges: List[EdgeClass],
|
|
669
673
|
) -> List[MetadataChangeProposalWrapper]:
|
|
670
674
|
"""
|
|
671
675
|
Map PowerBi dashboard to Datahub dashboard
|
|
@@ -695,6 +699,7 @@ class Mapper:
|
|
|
695
699
|
lastModified=ChangeAuditStamps(),
|
|
696
700
|
dashboardUrl=dashboard.webUrl,
|
|
697
701
|
customProperties={**chart_custom_properties(dashboard)},
|
|
702
|
+
dashboards=dashboard_edges,
|
|
698
703
|
)
|
|
699
704
|
|
|
700
705
|
info_mcp = self.new_mcp(
|
|
@@ -788,7 +793,6 @@ class Mapper:
|
|
|
788
793
|
guid=container_key.guid(),
|
|
789
794
|
)
|
|
790
795
|
mcp = MetadataChangeProposalWrapper(
|
|
791
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
792
796
|
entityUrn=entity_urn,
|
|
793
797
|
aspect=ContainerClass(container=f"{container_urn}"),
|
|
794
798
|
)
|
|
@@ -933,7 +937,7 @@ class Mapper:
|
|
|
933
937
|
dashboard: powerbi_data_classes.Dashboard,
|
|
934
938
|
workspace: powerbi_data_classes.Workspace,
|
|
935
939
|
) -> List[EquableMetadataWorkUnit]:
|
|
936
|
-
mcps = []
|
|
940
|
+
mcps: List[MetadataChangeProposalWrapper] = []
|
|
937
941
|
|
|
938
942
|
logger.info(
|
|
939
943
|
f"Converting dashboard={dashboard.displayName} to datahub dashboard"
|
|
@@ -945,9 +949,30 @@ class Mapper:
|
|
|
945
949
|
)
|
|
946
950
|
# Convert tiles to charts
|
|
947
951
|
ds_mcps, chart_mcps = self.to_datahub_chart(dashboard.tiles, workspace)
|
|
952
|
+
|
|
953
|
+
# collect all downstream reports (dashboards)
|
|
954
|
+
dashboard_edges = []
|
|
955
|
+
for t in dashboard.tiles:
|
|
956
|
+
if t.report:
|
|
957
|
+
dashboard_urn = builder.make_dashboard_urn(
|
|
958
|
+
platform=self.__config.platform_name,
|
|
959
|
+
platform_instance=self.__config.platform_instance,
|
|
960
|
+
name=t.report.get_urn_part(),
|
|
961
|
+
)
|
|
962
|
+
edge = EdgeClass(
|
|
963
|
+
destinationUrn=dashboard_urn,
|
|
964
|
+
)
|
|
965
|
+
dashboard_edges.append(edge)
|
|
966
|
+
|
|
948
967
|
# Lets convert dashboard to datahub dashboard
|
|
949
968
|
dashboard_mcps: List[MetadataChangeProposalWrapper] = (
|
|
950
|
-
self.to_datahub_dashboard_mcp(
|
|
969
|
+
self.to_datahub_dashboard_mcp(
|
|
970
|
+
dashboard=dashboard,
|
|
971
|
+
workspace=workspace,
|
|
972
|
+
chart_mcps=chart_mcps,
|
|
973
|
+
user_mcps=user_mcps,
|
|
974
|
+
dashboard_edges=dashboard_edges,
|
|
975
|
+
)
|
|
951
976
|
)
|
|
952
977
|
|
|
953
978
|
# Now add MCPs in sequence
|
|
@@ -1054,7 +1079,7 @@ class Mapper:
|
|
|
1054
1079
|
report: powerbi_data_classes.Report,
|
|
1055
1080
|
chart_mcps: List[MetadataChangeProposalWrapper],
|
|
1056
1081
|
user_mcps: List[MetadataChangeProposalWrapper],
|
|
1057
|
-
|
|
1082
|
+
dataset_edges: List[EdgeClass],
|
|
1058
1083
|
) -> List[MetadataChangeProposalWrapper]:
|
|
1059
1084
|
"""
|
|
1060
1085
|
Map PowerBi report to Datahub dashboard
|
|
@@ -1076,7 +1101,7 @@ class Mapper:
|
|
|
1076
1101
|
charts=chart_urn_list,
|
|
1077
1102
|
lastModified=ChangeAuditStamps(),
|
|
1078
1103
|
dashboardUrl=report.webUrl,
|
|
1079
|
-
|
|
1104
|
+
datasetEdges=dataset_edges,
|
|
1080
1105
|
)
|
|
1081
1106
|
|
|
1082
1107
|
info_mcp = self.new_mcp(
|
|
@@ -1170,27 +1195,23 @@ class Mapper:
|
|
|
1170
1195
|
ds_mcps = self.to_datahub_dataset(report.dataset, workspace)
|
|
1171
1196
|
chart_mcps = self.pages_to_chart(report.pages, workspace, ds_mcps)
|
|
1172
1197
|
|
|
1173
|
-
#
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
edge = EdgeClass(
|
|
1183
|
-
destinationUrn=dashboard_urn,
|
|
1184
|
-
sourceUrn=None,
|
|
1185
|
-
created=None,
|
|
1186
|
-
lastModified=None,
|
|
1187
|
-
properties=None,
|
|
1188
|
-
)
|
|
1189
|
-
downstream_dashboards_edges.append(edge)
|
|
1198
|
+
# collect all upstream datasets; using a set to retain unique urns
|
|
1199
|
+
dataset_urns = {
|
|
1200
|
+
dataset.entityUrn
|
|
1201
|
+
for dataset in ds_mcps
|
|
1202
|
+
if dataset.entityType == DatasetUrn.ENTITY_TYPE and dataset.entityUrn
|
|
1203
|
+
}
|
|
1204
|
+
dataset_edges = [
|
|
1205
|
+
EdgeClass(destinationUrn=dataset_urn) for dataset_urn in dataset_urns
|
|
1206
|
+
]
|
|
1190
1207
|
|
|
1191
1208
|
# Let's convert report to datahub dashboard
|
|
1192
1209
|
report_mcps = self.report_to_dashboard(
|
|
1193
|
-
workspace,
|
|
1210
|
+
workspace=workspace,
|
|
1211
|
+
report=report,
|
|
1212
|
+
chart_mcps=chart_mcps,
|
|
1213
|
+
user_mcps=user_mcps,
|
|
1214
|
+
dataset_edges=dataset_edges,
|
|
1194
1215
|
)
|
|
1195
1216
|
|
|
1196
1217
|
# Now add MCPs in sequence
|
|
@@ -1206,7 +1227,14 @@ class Mapper:
|
|
|
1206
1227
|
@platform_name("PowerBI")
|
|
1207
1228
|
@config_class(PowerBiDashboardSourceConfig)
|
|
1208
1229
|
@support_status(SupportStatus.CERTIFIED)
|
|
1209
|
-
@capability(
|
|
1230
|
+
@capability(
|
|
1231
|
+
SourceCapability.CONTAINERS,
|
|
1232
|
+
"Enabled by default",
|
|
1233
|
+
subtype_modifier=[
|
|
1234
|
+
SourceCapabilityModifier.POWERBI_WORKSPACE,
|
|
1235
|
+
SourceCapabilityModifier.POWERBI_DATASET,
|
|
1236
|
+
],
|
|
1237
|
+
)
|
|
1210
1238
|
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
|
|
1211
1239
|
@capability(SourceCapability.OWNERSHIP, "Enabled by default")
|
|
1212
1240
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
@@ -1228,6 +1256,7 @@ class Mapper:
|
|
|
1228
1256
|
SourceCapability.DATA_PROFILING,
|
|
1229
1257
|
"Optionally enabled via configuration profiling.enabled",
|
|
1230
1258
|
)
|
|
1259
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
1231
1260
|
class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
|
|
1232
1261
|
"""
|
|
1233
1262
|
This plugin extracts the following:
|
|
@@ -1300,7 +1329,9 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1300
1329
|
|
|
1301
1330
|
allowed_workspaces = []
|
|
1302
1331
|
for workspace in all_workspaces:
|
|
1303
|
-
if not self.source_config.workspace_id_pattern.allowed(
|
|
1332
|
+
if not self.source_config.workspace_id_pattern.allowed(
|
|
1333
|
+
workspace.id
|
|
1334
|
+
) or not self.source_config.workspace_name_pattern.allowed(workspace.name):
|
|
1304
1335
|
self.reporter.filtered_workspace_names.append(
|
|
1305
1336
|
f"{workspace.id} - {workspace.name}"
|
|
1306
1337
|
)
|
|
@@ -1326,7 +1357,7 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1326
1357
|
for data_platform in SupportedDataPlatform
|
|
1327
1358
|
]
|
|
1328
1359
|
|
|
1329
|
-
for key in self.source_config.dataset_type_mapping
|
|
1360
|
+
for key in self.source_config.dataset_type_mapping:
|
|
1330
1361
|
if key not in powerbi_data_platforms:
|
|
1331
1362
|
raise ValueError(f"PowerBI DataPlatform {key} is not supported")
|
|
1332
1363
|
|
|
@@ -1516,6 +1547,9 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1516
1547
|
else:
|
|
1517
1548
|
return [
|
|
1518
1549
|
*super().get_workunit_processors(),
|
|
1550
|
+
functools.partial(
|
|
1551
|
+
auto_incremental_lineage, self.source_config.incremental_lineage
|
|
1552
|
+
),
|
|
1519
1553
|
self.stale_entity_removal_handler.workunit_processor,
|
|
1520
1554
|
]
|
|
1521
1555
|
|
|
@@ -63,10 +63,10 @@ class SessionWithTimeout(requests.Session):
|
|
|
63
63
|
super().__init__(*args, **kwargs)
|
|
64
64
|
self.timeout = timeout
|
|
65
65
|
|
|
66
|
-
def request(self, method, url, **kwargs):
|
|
66
|
+
def request(self, method, url, *args, **kwargs):
|
|
67
67
|
# Set the default timeout if none is provided
|
|
68
68
|
kwargs.setdefault("timeout", self.timeout)
|
|
69
|
-
return super().request(method, url, **kwargs)
|
|
69
|
+
return super().request(method, url, *args, **kwargs)
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
class DataResolverBase(ABC):
|
|
@@ -115,7 +115,7 @@ class PowerBiAPI:
|
|
|
115
115
|
if scan_result is None:
|
|
116
116
|
return results
|
|
117
117
|
|
|
118
|
-
for scanned_dashboard in scan_result.get(Constant.DASHBOARDS
|
|
118
|
+
for scanned_dashboard in scan_result.get(Constant.DASHBOARDS) or []:
|
|
119
119
|
# Iterate through response and create a list of PowerBiAPI.Dashboard
|
|
120
120
|
dashboard_id = scanned_dashboard.get("id")
|
|
121
121
|
tags = self._parse_endorsement(
|
|
@@ -133,17 +133,17 @@ class PowerBiAPI:
|
|
|
133
133
|
if scan_result is None:
|
|
134
134
|
return results
|
|
135
135
|
|
|
136
|
-
reports: List[dict] = scan_result.get(Constant.REPORTS
|
|
136
|
+
reports: List[dict] = scan_result.get(Constant.REPORTS) or []
|
|
137
137
|
|
|
138
138
|
for report in reports:
|
|
139
|
-
report_id = report.get(Constant.ID
|
|
139
|
+
report_id = report.get(Constant.ID)
|
|
140
140
|
if report_id is None:
|
|
141
141
|
logger.warning(
|
|
142
142
|
f"Report id is none. Skipping endorsement tag for report instance {report}"
|
|
143
143
|
)
|
|
144
144
|
continue
|
|
145
145
|
endorsements = self._parse_endorsement(
|
|
146
|
-
report.get(Constant.ENDORSEMENT_DETAIL
|
|
146
|
+
report.get(Constant.ENDORSEMENT_DETAIL)
|
|
147
147
|
)
|
|
148
148
|
results[report_id] = endorsements
|
|
149
149
|
|
|
@@ -339,7 +339,7 @@ class PowerBiAPI:
|
|
|
339
339
|
if not endorsements:
|
|
340
340
|
return []
|
|
341
341
|
|
|
342
|
-
endorsement = endorsements.get(Constant.ENDORSEMENT
|
|
342
|
+
endorsement = endorsements.get(Constant.ENDORSEMENT)
|
|
343
343
|
if not endorsement:
|
|
344
344
|
return []
|
|
345
345
|
|
|
@@ -396,7 +396,7 @@ class PowerBiAPI:
|
|
|
396
396
|
|
|
397
397
|
if self.__config.extract_endorsements_to_tags:
|
|
398
398
|
dataset_instance.tags = self._parse_endorsement(
|
|
399
|
-
dataset_dict.get(Constant.ENDORSEMENT_DETAIL
|
|
399
|
+
dataset_dict.get(Constant.ENDORSEMENT_DETAIL)
|
|
400
400
|
)
|
|
401
401
|
|
|
402
402
|
dataset_map[dataset_instance.id] = dataset_instance
|
|
@@ -407,7 +407,7 @@ class PowerBiAPI:
|
|
|
407
407
|
else dataset_instance.id
|
|
408
408
|
)
|
|
409
409
|
logger.debug(f"dataset_dict = {dataset_dict}")
|
|
410
|
-
for table in dataset_dict.get(Constant.TABLES
|
|
410
|
+
for table in dataset_dict.get(Constant.TABLES) or []:
|
|
411
411
|
expression: Optional[str] = (
|
|
412
412
|
table[Constant.SOURCE][0][Constant.EXPRESSION]
|
|
413
413
|
if table.get(Constant.SOURCE) is not None
|
|
@@ -430,10 +430,10 @@ class PowerBiAPI:
|
|
|
430
430
|
column["dataType"], FIELD_TYPE_MAPPING["Null"]
|
|
431
431
|
),
|
|
432
432
|
)
|
|
433
|
-
for column in table.get("columns"
|
|
433
|
+
for column in table.get("columns") or []
|
|
434
434
|
],
|
|
435
435
|
measures=[
|
|
436
|
-
Measure(**measure) for measure in table.get("measures"
|
|
436
|
+
Measure(**measure) for measure in table.get("measures") or []
|
|
437
437
|
],
|
|
438
438
|
dataset=dataset_instance,
|
|
439
439
|
row_count=None,
|
|
@@ -480,7 +480,7 @@ class PowerBiAPI:
|
|
|
480
480
|
)
|
|
481
481
|
)
|
|
482
482
|
if app_id is None: # In PowerBI one workspace can have one app
|
|
483
|
-
app_id = report
|
|
483
|
+
app_id = report[Constant.APP_ID]
|
|
484
484
|
|
|
485
485
|
raw_app_dashboards: List[Dict] = []
|
|
486
486
|
# Filter app dashboards
|
|
@@ -488,7 +488,7 @@ class PowerBiAPI:
|
|
|
488
488
|
if dashboard.get(Constant.APP_ID):
|
|
489
489
|
raw_app_dashboards.append(dashboard)
|
|
490
490
|
if app_id is None: # In PowerBI, one workspace contains one app
|
|
491
|
-
app_id =
|
|
491
|
+
app_id = dashboard[Constant.APP_ID]
|
|
492
492
|
|
|
493
493
|
# workspace doesn't have an App. Above two loops can be avoided
|
|
494
494
|
# if app_id is available at root level in workspace_metadata
|
|
@@ -673,7 +673,6 @@ class PowerBiAPI:
|
|
|
673
673
|
fill_dashboard_tags()
|
|
674
674
|
self._fill_independent_datasets(workspace=workspace)
|
|
675
675
|
|
|
676
|
-
# flake8: noqa: C901
|
|
677
676
|
def fill_workspaces(
|
|
678
677
|
self, workspaces: List[Workspace], reporter: PowerBiDashboardSourceReport
|
|
679
678
|
) -> Iterable[Workspace]:
|
|
@@ -52,7 +52,6 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
52
52
|
from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
|
|
53
53
|
from datahub.metadata.schema_classes import (
|
|
54
54
|
BrowsePathsClass,
|
|
55
|
-
ChangeTypeClass,
|
|
56
55
|
CorpUserInfoClass,
|
|
57
56
|
CorpUserKeyClass,
|
|
58
57
|
DashboardInfoClass,
|
|
@@ -243,20 +242,14 @@ class Mapper:
|
|
|
243
242
|
|
|
244
243
|
@staticmethod
|
|
245
244
|
def new_mcp(
|
|
246
|
-
entity_type,
|
|
247
245
|
entity_urn,
|
|
248
|
-
aspect_name,
|
|
249
246
|
aspect,
|
|
250
|
-
change_type=ChangeTypeClass.UPSERT,
|
|
251
247
|
):
|
|
252
248
|
"""
|
|
253
249
|
Create MCP
|
|
254
250
|
"""
|
|
255
251
|
return MetadataChangeProposalWrapper(
|
|
256
|
-
entityType=entity_type,
|
|
257
|
-
changeType=change_type,
|
|
258
252
|
entityUrn=entity_urn,
|
|
259
|
-
aspectName=aspect_name,
|
|
260
253
|
aspect=aspect,
|
|
261
254
|
)
|
|
262
255
|
|
|
@@ -343,17 +336,13 @@ class Mapper:
|
|
|
343
336
|
)
|
|
344
337
|
|
|
345
338
|
info_mcp = self.new_mcp(
|
|
346
|
-
entity_type=Constant.DASHBOARD,
|
|
347
339
|
entity_urn=dashboard_urn,
|
|
348
|
-
aspect_name=Constant.DASHBOARD_INFO,
|
|
349
340
|
aspect=dashboard_info_cls,
|
|
350
341
|
)
|
|
351
342
|
|
|
352
343
|
# removed status mcp
|
|
353
344
|
removed_status_mcp = self.new_mcp(
|
|
354
|
-
entity_type=Constant.DASHBOARD,
|
|
355
345
|
entity_urn=dashboard_urn,
|
|
356
|
-
aspect_name=Constant.STATUS,
|
|
357
346
|
aspect=StatusClass(removed=False),
|
|
358
347
|
)
|
|
359
348
|
|
|
@@ -365,9 +354,7 @@ class Mapper:
|
|
|
365
354
|
|
|
366
355
|
# Dashboard key
|
|
367
356
|
dashboard_key_mcp = self.new_mcp(
|
|
368
|
-
entity_type=Constant.DASHBOARD,
|
|
369
357
|
entity_urn=dashboard_urn,
|
|
370
|
-
aspect_name=Constant.DASHBOARD_KEY,
|
|
371
358
|
aspect=dashboard_key_cls,
|
|
372
359
|
)
|
|
373
360
|
|
|
@@ -378,9 +365,7 @@ class Mapper:
|
|
|
378
365
|
ownership = OwnershipClass(owners=owners)
|
|
379
366
|
# Dashboard owner MCP
|
|
380
367
|
owner_mcp = self.new_mcp(
|
|
381
|
-
entity_type=Constant.DASHBOARD,
|
|
382
368
|
entity_urn=dashboard_urn,
|
|
383
|
-
aspect_name=Constant.OWNERSHIP,
|
|
384
369
|
aspect=ownership,
|
|
385
370
|
)
|
|
386
371
|
|
|
@@ -396,9 +381,7 @@ class Mapper:
|
|
|
396
381
|
]
|
|
397
382
|
)
|
|
398
383
|
browse_path_mcp = self.new_mcp(
|
|
399
|
-
entity_type=Constant.DASHBOARD,
|
|
400
384
|
entity_urn=dashboard_urn,
|
|
401
|
-
aspect_name=Constant.BROWSERPATH,
|
|
402
385
|
aspect=browse_path,
|
|
403
386
|
)
|
|
404
387
|
|
|
@@ -429,27 +412,21 @@ class Mapper:
|
|
|
429
412
|
)
|
|
430
413
|
|
|
431
414
|
info_mcp = self.new_mcp(
|
|
432
|
-
entity_type=Constant.CORP_USER,
|
|
433
415
|
entity_urn=user_urn,
|
|
434
|
-
aspect_name=Constant.CORP_USER_INFO,
|
|
435
416
|
aspect=user_info_instance,
|
|
436
417
|
)
|
|
437
418
|
user_mcps.append(info_mcp)
|
|
438
419
|
|
|
439
420
|
# removed status mcp
|
|
440
421
|
status_mcp = self.new_mcp(
|
|
441
|
-
entity_type=Constant.CORP_USER,
|
|
442
422
|
entity_urn=user_urn,
|
|
443
|
-
aspect_name=Constant.STATUS,
|
|
444
423
|
aspect=StatusClass(removed=False),
|
|
445
424
|
)
|
|
446
425
|
user_mcps.append(status_mcp)
|
|
447
426
|
user_key = CorpUserKeyClass(username=user.username)
|
|
448
427
|
|
|
449
428
|
user_key_mcp = self.new_mcp(
|
|
450
|
-
entity_type=Constant.CORP_USER,
|
|
451
429
|
entity_urn=user_urn,
|
|
452
|
-
aspect_name=Constant.CORP_USER_KEY,
|
|
453
430
|
aspect=user_key,
|
|
454
431
|
)
|
|
455
432
|
user_mcps.append(user_key_mcp)
|
|
@@ -27,10 +27,8 @@ class CatalogItem(BaseModel):
|
|
|
27
27
|
is_favorite: bool = Field(alias="IsFavorite")
|
|
28
28
|
user_info: Any = Field(None, alias="UserInfo")
|
|
29
29
|
display_name: Optional[str] = Field(None, alias="DisplayName")
|
|
30
|
-
has_data_sources: bool = Field(
|
|
31
|
-
data_sources: Optional[List["DataSource"]] = Field(
|
|
32
|
-
default_factory=list, alias="DataSources"
|
|
33
|
-
)
|
|
30
|
+
has_data_sources: bool = Field(False, alias="HasDataSources")
|
|
31
|
+
data_sources: Optional[List["DataSource"]] = Field(None, alias="DataSources")
|
|
34
32
|
|
|
35
33
|
@validator("display_name", always=True)
|
|
36
34
|
def validate_diplay_name(cls, value, values):
|
|
@@ -2,7 +2,7 @@ import logging
|
|
|
2
2
|
from typing import Dict, Optional
|
|
3
3
|
|
|
4
4
|
import requests
|
|
5
|
-
from pydantic
|
|
5
|
+
from pydantic import root_validator, validator
|
|
6
6
|
from pydantic.fields import Field
|
|
7
7
|
|
|
8
8
|
from datahub.emitter.mce_builder import DEFAULT_ENV
|
|
@@ -69,9 +69,9 @@ class PresetConfig(SupersetConfig):
|
|
|
69
69
|
|
|
70
70
|
@platform_name("Preset")
|
|
71
71
|
@config_class(PresetConfig)
|
|
72
|
-
@support_status(SupportStatus.
|
|
72
|
+
@support_status(SupportStatus.CERTIFIED)
|
|
73
73
|
@capability(
|
|
74
|
-
SourceCapability.DELETION_DETECTION, "
|
|
74
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
75
75
|
)
|
|
76
76
|
class PresetSource(SupersetSource):
|
|
77
77
|
"""
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
from copy import deepcopy
|
|
1
2
|
from datetime import datetime
|
|
2
3
|
from enum import Enum
|
|
3
4
|
from typing import Dict, List, Optional, Type, Union
|
|
4
5
|
|
|
5
|
-
from pydantic import BaseModel, Field, root_validator
|
|
6
|
+
from pydantic import BaseModel, ConfigDict, Field, root_validator
|
|
6
7
|
|
|
7
8
|
from datahub.emitter.mcp_builder import ContainerKey
|
|
8
9
|
from datahub.ingestion.source.qlik_sense.config import QLIK_DATETIME_FORMAT, Constant
|
|
@@ -78,7 +79,11 @@ PERSONAL_SPACE_DICT = {
|
|
|
78
79
|
}
|
|
79
80
|
|
|
80
81
|
|
|
81
|
-
class
|
|
82
|
+
class _QlikBaseModel(BaseModel):
|
|
83
|
+
model_config = ConfigDict(coerce_numbers_to_str=True)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class Space(_QlikBaseModel):
|
|
82
87
|
id: str
|
|
83
88
|
name: str
|
|
84
89
|
description: str
|
|
@@ -89,6 +94,9 @@ class Space(BaseModel):
|
|
|
89
94
|
|
|
90
95
|
@root_validator(pre=True)
|
|
91
96
|
def update_values(cls, values: Dict) -> Dict:
|
|
97
|
+
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
98
|
+
values = deepcopy(values)
|
|
99
|
+
|
|
92
100
|
values[Constant.CREATEDAT] = datetime.strptime(
|
|
93
101
|
values[Constant.CREATEDAT], QLIK_DATETIME_FORMAT
|
|
94
102
|
)
|
|
@@ -98,7 +106,7 @@ class Space(BaseModel):
|
|
|
98
106
|
return values
|
|
99
107
|
|
|
100
108
|
|
|
101
|
-
class Item(
|
|
109
|
+
class Item(_QlikBaseModel):
|
|
102
110
|
id: str
|
|
103
111
|
description: str = ""
|
|
104
112
|
ownerId: str
|
|
@@ -107,7 +115,7 @@ class Item(BaseModel):
|
|
|
107
115
|
updatedAt: datetime
|
|
108
116
|
|
|
109
117
|
|
|
110
|
-
class SchemaField(
|
|
118
|
+
class SchemaField(_QlikBaseModel):
|
|
111
119
|
name: str
|
|
112
120
|
dataType: Optional[str] = None
|
|
113
121
|
primaryKey: Optional[bool] = None
|
|
@@ -115,6 +123,8 @@ class SchemaField(BaseModel):
|
|
|
115
123
|
|
|
116
124
|
@root_validator(pre=True)
|
|
117
125
|
def update_values(cls, values: Dict) -> Dict:
|
|
126
|
+
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
127
|
+
values = deepcopy(values)
|
|
118
128
|
values[Constant.DATATYPE] = values.get(Constant.DATATYPE, {}).get(Constant.TYPE)
|
|
119
129
|
return values
|
|
120
130
|
|
|
@@ -130,6 +140,8 @@ class QlikDataset(Item):
|
|
|
130
140
|
|
|
131
141
|
@root_validator(pre=True)
|
|
132
142
|
def update_values(cls, values: Dict) -> Dict:
|
|
143
|
+
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
144
|
+
values = deepcopy(values)
|
|
133
145
|
# Update str time to datetime
|
|
134
146
|
values[Constant.CREATEDAT] = datetime.strptime(
|
|
135
147
|
values[Constant.CREATEDTIME], QLIK_DATETIME_FORMAT
|
|
@@ -148,13 +160,13 @@ class QlikDataset(Item):
|
|
|
148
160
|
return values
|
|
149
161
|
|
|
150
162
|
|
|
151
|
-
class AxisProperty(
|
|
163
|
+
class AxisProperty(_QlikBaseModel):
|
|
152
164
|
Title: str = Field(alias="qFallbackTitle")
|
|
153
165
|
Min: str = Field(alias="qMin")
|
|
154
166
|
Max: str = Field(alias="qMax")
|
|
155
167
|
|
|
156
168
|
|
|
157
|
-
class Chart(
|
|
169
|
+
class Chart(_QlikBaseModel):
|
|
158
170
|
qId: str
|
|
159
171
|
visualization: str
|
|
160
172
|
title: str
|
|
@@ -164,13 +176,15 @@ class Chart(BaseModel):
|
|
|
164
176
|
|
|
165
177
|
@root_validator(pre=True)
|
|
166
178
|
def update_values(cls, values: Dict) -> Dict:
|
|
179
|
+
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
180
|
+
values = deepcopy(values)
|
|
167
181
|
values[Constant.QID] = values[Constant.QINFO][Constant.QID]
|
|
168
182
|
values["qDimension"] = values[Constant.HYPERCUBE]["qDimensionInfo"]
|
|
169
183
|
values["qMeasure"] = values[Constant.HYPERCUBE]["qMeasureInfo"]
|
|
170
184
|
return values
|
|
171
185
|
|
|
172
186
|
|
|
173
|
-
class Sheet(
|
|
187
|
+
class Sheet(_QlikBaseModel):
|
|
174
188
|
id: str
|
|
175
189
|
title: str
|
|
176
190
|
description: str
|
|
@@ -181,6 +195,8 @@ class Sheet(BaseModel):
|
|
|
181
195
|
|
|
182
196
|
@root_validator(pre=True)
|
|
183
197
|
def update_values(cls, values: Dict) -> Dict:
|
|
198
|
+
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
199
|
+
values = deepcopy(values)
|
|
184
200
|
values[Constant.CREATEDAT] = datetime.strptime(
|
|
185
201
|
values[Constant.CREATEDDATE], QLIK_DATETIME_FORMAT
|
|
186
202
|
)
|
|
@@ -190,7 +206,7 @@ class Sheet(BaseModel):
|
|
|
190
206
|
return values
|
|
191
207
|
|
|
192
208
|
|
|
193
|
-
class QlikTable(
|
|
209
|
+
class QlikTable(_QlikBaseModel):
|
|
194
210
|
tableName: str
|
|
195
211
|
type: BoxType = Field(alias="boxType")
|
|
196
212
|
tableAlias: str
|
|
@@ -206,6 +222,8 @@ class QlikTable(BaseModel):
|
|
|
206
222
|
|
|
207
223
|
@root_validator(pre=True)
|
|
208
224
|
def update_values(cls, values: Dict) -> Dict:
|
|
225
|
+
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
226
|
+
values = deepcopy(values)
|
|
209
227
|
values[Constant.DATACONNECTORID] = values[Constant.CONNECTIONINFO][Constant.ID]
|
|
210
228
|
values[Constant.DATACONNECTORPLATFORM] = values[Constant.CONNECTIONINFO][
|
|
211
229
|
Constant.SOURCECONNECTORID
|
|
@@ -223,6 +241,8 @@ class App(Item):
|
|
|
223
241
|
|
|
224
242
|
@root_validator(pre=True)
|
|
225
243
|
def update_values(cls, values: Dict) -> Dict:
|
|
244
|
+
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
245
|
+
values = deepcopy(values)
|
|
226
246
|
values[Constant.CREATEDAT] = datetime.strptime(
|
|
227
247
|
values[Constant.CREATEDDATE], QLIK_DATETIME_FORMAT
|
|
228
248
|
)
|
|
@@ -101,7 +101,7 @@ logger = logging.getLogger(__name__)
|
|
|
101
101
|
)
|
|
102
102
|
@capability(
|
|
103
103
|
SourceCapability.LINEAGE_FINE,
|
|
104
|
-
"Disabled by default.
|
|
104
|
+
"Disabled by default.",
|
|
105
105
|
)
|
|
106
106
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
107
107
|
@capability(
|
|
@@ -109,6 +109,7 @@ logger = logging.getLogger(__name__)
|
|
|
109
109
|
"Enabled by default, configured using `ingest_owner`",
|
|
110
110
|
)
|
|
111
111
|
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
|
|
112
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
112
113
|
class QlikSenseSource(StatefulIngestionSourceBase, TestableSource):
|
|
113
114
|
"""
|
|
114
115
|
This plugin extracts the following:
|
|
@@ -447,7 +447,7 @@ class RedashSource(StatefulIngestionSourceBase):
|
|
|
447
447
|
dataset_urns = sql_parser_in_tables.in_tables
|
|
448
448
|
if sql_parser_in_tables.debug_info.table_error:
|
|
449
449
|
self.report.queries_problem_parsing.add(str(query_id))
|
|
450
|
-
self.
|
|
450
|
+
self.warn(
|
|
451
451
|
logger,
|
|
452
452
|
"sql-parsing",
|
|
453
453
|
f"exception {sql_parser_in_tables.debug_info.table_error} in parsing query-{query_id}-datasource-{data_source_id}",
|