acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +26 -23
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +3 -3
- datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +18 -10
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +125 -27
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +153 -229
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +37 -6
- datahub/cli/specific/datacontract_cli.py +54 -7
- datahub/cli/specific/dataproduct_cli.py +2 -15
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +172 -3
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/configuration/common.py +40 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +8 -4
- datahub/emitter/rest_emitter.py +103 -30
- datahub/entrypoints.py +6 -3
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +165 -58
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +330 -25
- datahub/ingestion/graph/config.py +3 -2
- datahub/ingestion/graph/filters.py +30 -11
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +81 -11
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +13 -5
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +6 -8
- datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
- datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/common/subtypes.py +53 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
- datahub/ingestion/source/datahub/config.py +12 -9
- datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
- datahub/ingestion/source/datahub/datahub_source.py +10 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
- datahub/ingestion/source/dbt/dbt_common.py +224 -9
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +132 -98
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +66 -7
- datahub/ingestion/source/fivetran/fivetran.py +227 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
- datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gcs/gcs_source.py +32 -4
- datahub/ingestion/source/ge_data_profiler.py +108 -31
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/api.py +28 -1
- datahub/ingestion/source/hex/hex.py +16 -5
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +123 -59
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
- datahub/ingestion/source/looker/looker_common.py +148 -79
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +503 -547
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +96 -117
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +9 -9
- datahub/ingestion/source/mlflow.py +12 -2
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +26 -5
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +47 -21
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
- datahub/ingestion/source/powerbi/powerbi.py +10 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +6 -5
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +449 -248
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +10 -16
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
- datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
- datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +217 -25
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +24 -8
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +19 -20
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +336 -57
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +5 -5
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +11 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/superset.py +314 -67
- datahub/ingestion/source/tableau/tableau.py +135 -59
- datahub/ingestion/source/tableau/tableau_common.py +9 -2
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +160 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
- datahub/ingestion/source/usage/usage_common.py +4 -3
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +6806 -4871
- datahub/metadata/_urns/urn_defs.py +1767 -1539
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18395 -16979
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +4 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +249 -5
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +56 -2
- datahub/sdk/entity_client.py +111 -9
- datahub/sdk/lineage_client.py +663 -82
- datahub/sdk/main_client.py +50 -16
- datahub/sdk/mlmodel.py +120 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +7 -3
- datahub/sdk/search_filters.py +304 -36
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +39 -59
- datahub/sql_parsing/split_statements.py +13 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
- datahub/sql_parsing/sqlglot_lineage.py +196 -42
- datahub/sql_parsing/sqlglot_utils.py +12 -4
- datahub/sql_parsing/tool_meta_extractor.py +1 -3
- datahub/telemetry/telemetry.py +28 -14
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +73 -17
- datahub/utilities/file_backed_collections.py +8 -9
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +22 -6
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +10 -1
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -466
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -120,7 +120,6 @@ SNOWFLAKE = "snowflake"
|
|
|
120
120
|
BIGQUERY = "bigquery"
|
|
121
121
|
REDSHIFT = "redshift"
|
|
122
122
|
DATABRICKS = "databricks"
|
|
123
|
-
TRINO = "trino"
|
|
124
123
|
|
|
125
124
|
# Type names for Databricks, to match Title Case types in sqlalchemy
|
|
126
125
|
ProfilerTypeMapping.INT_TYPE_NAMES.append("Integer")
|
|
@@ -206,6 +205,25 @@ def get_column_unique_count_dh_patch(self: SqlAlchemyDataset, column: str) -> in
|
|
|
206
205
|
)
|
|
207
206
|
)
|
|
208
207
|
return convert_to_json_serializable(element_values.fetchone()[0])
|
|
208
|
+
elif (
|
|
209
|
+
self.engine.dialect.name.lower() == GXSqlDialect.AWSATHENA
|
|
210
|
+
or self.engine.dialect.name.lower() == GXSqlDialect.TRINO
|
|
211
|
+
):
|
|
212
|
+
return convert_to_json_serializable(
|
|
213
|
+
self.engine.execute(
|
|
214
|
+
sa.select(sa.func.approx_distinct(sa.column(column))).select_from(
|
|
215
|
+
self._table
|
|
216
|
+
)
|
|
217
|
+
).scalar()
|
|
218
|
+
)
|
|
219
|
+
elif self.engine.dialect.name.lower() == DATABRICKS:
|
|
220
|
+
return convert_to_json_serializable(
|
|
221
|
+
self.engine.execute(
|
|
222
|
+
sa.select(sa.func.approx_count_distinct(sa.column(column))).select_from(
|
|
223
|
+
self._table
|
|
224
|
+
)
|
|
225
|
+
).scalar()
|
|
226
|
+
)
|
|
209
227
|
return convert_to_json_serializable(
|
|
210
228
|
self.engine.execute(
|
|
211
229
|
sa.select([sa.func.count(sa.func.distinct(sa.column(column)))]).select_from(
|
|
@@ -289,7 +307,6 @@ def _is_single_row_query_method(query: Any) -> bool:
|
|
|
289
307
|
"get_column_max",
|
|
290
308
|
"get_column_mean",
|
|
291
309
|
"get_column_stdev",
|
|
292
|
-
"get_column_nonnull_count",
|
|
293
310
|
"get_column_unique_count",
|
|
294
311
|
}
|
|
295
312
|
CONSTANT_ROW_QUERY_METHODS = {
|
|
@@ -313,6 +330,7 @@ def _is_single_row_query_method(query: Any) -> bool:
|
|
|
313
330
|
|
|
314
331
|
FIRST_PARTY_SINGLE_ROW_QUERY_METHODS = {
|
|
315
332
|
"get_column_unique_count_dh_patch",
|
|
333
|
+
"_get_column_cardinality",
|
|
316
334
|
}
|
|
317
335
|
|
|
318
336
|
# We'll do this the inefficient way since the arrays are pretty small.
|
|
@@ -479,7 +497,20 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
|
|
|
479
497
|
self, column_spec: _SingleColumnSpec, column: str
|
|
480
498
|
) -> None:
|
|
481
499
|
try:
|
|
482
|
-
|
|
500
|
+
# Don't use Great Expectations get_column_nonnull_count because it
|
|
501
|
+
# generates this SQL:
|
|
502
|
+
#
|
|
503
|
+
# sum(CASE WHEN (mycolumn IN (NULL) OR mycolumn IS NULL) THEN 1 ELSE 0 END)
|
|
504
|
+
#
|
|
505
|
+
# which fails for complex types (such as Databricks maps) that don't
|
|
506
|
+
# support the IN operator.
|
|
507
|
+
nonnull_count = convert_to_json_serializable(
|
|
508
|
+
self.dataset.engine.execute(
|
|
509
|
+
sa.select(sa.func.count(sa.column(column))).select_from(
|
|
510
|
+
self.dataset._table
|
|
511
|
+
)
|
|
512
|
+
).scalar()
|
|
513
|
+
)
|
|
483
514
|
column_spec.nonnull_count = nonnull_count
|
|
484
515
|
except Exception as e:
|
|
485
516
|
logger.debug(
|
|
@@ -734,11 +765,41 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
|
|
|
734
765
|
def _get_dataset_column_distinct_value_frequencies(
|
|
735
766
|
self, column_profile: DatasetFieldProfileClass, column: str
|
|
736
767
|
) -> None:
|
|
737
|
-
if self.config.include_field_distinct_value_frequencies:
|
|
768
|
+
if not self.config.include_field_distinct_value_frequencies:
|
|
769
|
+
return
|
|
770
|
+
try:
|
|
771
|
+
results = self.dataset.engine.execute(
|
|
772
|
+
sa.select(
|
|
773
|
+
[
|
|
774
|
+
sa.column(column),
|
|
775
|
+
sa.func.count(sa.column(column)),
|
|
776
|
+
]
|
|
777
|
+
)
|
|
778
|
+
.select_from(self.dataset._table)
|
|
779
|
+
.where(sa.column(column).is_not(None))
|
|
780
|
+
.group_by(sa.column(column))
|
|
781
|
+
).fetchall()
|
|
782
|
+
|
|
738
783
|
column_profile.distinctValueFrequencies = [
|
|
739
|
-
ValueFrequencyClass(value=str(value), frequency=count)
|
|
740
|
-
for value, count in
|
|
784
|
+
ValueFrequencyClass(value=str(value), frequency=int(count))
|
|
785
|
+
for value, count in results
|
|
741
786
|
]
|
|
787
|
+
# sort so output is deterministic. don't do it in SQL because not all column
|
|
788
|
+
# types are sortable in SQL (such as JSON data types on Athena/Trino).
|
|
789
|
+
column_profile.distinctValueFrequencies = sorted(
|
|
790
|
+
column_profile.distinctValueFrequencies, key=lambda x: x.value
|
|
791
|
+
)
|
|
792
|
+
except Exception as e:
|
|
793
|
+
logger.debug(
|
|
794
|
+
f"Caught exception while attempting to get distinct value frequencies for column {column}. {e}"
|
|
795
|
+
)
|
|
796
|
+
|
|
797
|
+
self.report.report_warning(
|
|
798
|
+
title="Profiling: Unable to Calculate Distinct Value Frequencies",
|
|
799
|
+
message="Distinct value frequencies for the column will not be accessible",
|
|
800
|
+
context=f"{self.dataset_name}.{column}",
|
|
801
|
+
exc=e,
|
|
802
|
+
)
|
|
742
803
|
|
|
743
804
|
@_run_with_query_combiner
|
|
744
805
|
def _get_dataset_column_histogram(
|
|
@@ -1173,26 +1234,34 @@ class DatahubGEProfiler:
|
|
|
1173
1234
|
f"Will profile {len(requests)} table(s) with {max_workers} worker(s) - this may take a while"
|
|
1174
1235
|
)
|
|
1175
1236
|
|
|
1176
|
-
with
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1237
|
+
with (
|
|
1238
|
+
PerfTimer() as timer,
|
|
1239
|
+
unittest.mock.patch(
|
|
1240
|
+
"great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyDataset.get_column_unique_count",
|
|
1241
|
+
get_column_unique_count_dh_patch,
|
|
1242
|
+
),
|
|
1243
|
+
unittest.mock.patch(
|
|
1244
|
+
"great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyDataset._get_column_quantiles_bigquery",
|
|
1245
|
+
_get_column_quantiles_bigquery_patch,
|
|
1246
|
+
),
|
|
1247
|
+
unittest.mock.patch(
|
|
1248
|
+
"great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyDataset._get_column_quantiles_awsathena",
|
|
1249
|
+
_get_column_quantiles_awsathena_patch,
|
|
1250
|
+
),
|
|
1251
|
+
unittest.mock.patch(
|
|
1252
|
+
"great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyDataset.get_column_median",
|
|
1253
|
+
_get_column_median_patch,
|
|
1254
|
+
),
|
|
1255
|
+
concurrent.futures.ThreadPoolExecutor(
|
|
1256
|
+
max_workers=max_workers
|
|
1257
|
+
) as async_executor,
|
|
1258
|
+
SQLAlchemyQueryCombiner(
|
|
1259
|
+
enabled=self.config.query_combiner_enabled,
|
|
1260
|
+
catch_exceptions=self.config.catch_exceptions,
|
|
1261
|
+
is_single_row_query_method=_is_single_row_query_method,
|
|
1262
|
+
serial_execution_fallback_enabled=True,
|
|
1263
|
+
).activate() as query_combiner,
|
|
1264
|
+
):
|
|
1196
1265
|
# Submit the profiling requests to the thread pool executor.
|
|
1197
1266
|
async_profiles = collections.deque(
|
|
1198
1267
|
async_executor.submit(
|
|
@@ -1395,12 +1464,12 @@ class DatahubGEProfiler:
|
|
|
1395
1464
|
)
|
|
1396
1465
|
return None
|
|
1397
1466
|
finally:
|
|
1398
|
-
if batch is not None and self.base_engine.engine.name.
|
|
1399
|
-
|
|
1400
|
-
|
|
1467
|
+
if batch is not None and self.base_engine.engine.name.lower() in [
|
|
1468
|
+
GXSqlDialect.TRINO,
|
|
1469
|
+
GXSqlDialect.AWSATHENA,
|
|
1401
1470
|
]:
|
|
1402
1471
|
if (
|
|
1403
|
-
self.base_engine.engine.name.
|
|
1472
|
+
self.base_engine.engine.name.lower() == GXSqlDialect.TRINO
|
|
1404
1473
|
or temp_view is not None
|
|
1405
1474
|
):
|
|
1406
1475
|
self._drop_temp_table(batch)
|
|
@@ -1449,9 +1518,17 @@ class DatahubGEProfiler:
|
|
|
1449
1518
|
logger.error(
|
|
1450
1519
|
f"Unexpected {pretty_name} while profiling. Should have 3 parts but has {len(name_parts)} parts."
|
|
1451
1520
|
)
|
|
1521
|
+
if platform == DATABRICKS:
|
|
1522
|
+
# TODO: Review logic for BigQuery as well, probably project.dataset.table should be quoted there as well
|
|
1523
|
+
quoted_name = ".".join(
|
|
1524
|
+
batch.engine.dialect.identifier_preparer.quote(part)
|
|
1525
|
+
for part in name_parts
|
|
1526
|
+
)
|
|
1527
|
+
batch._table = sa.text(quoted_name)
|
|
1528
|
+
logger.debug(f"Setting quoted table name to be {batch._table}")
|
|
1452
1529
|
# If we only have two parts that means the project_id is missing from the table name and we add it
|
|
1453
1530
|
# Temp tables has 3 parts while normal tables only has 2 parts
|
|
1454
|
-
|
|
1531
|
+
elif len(str(batch._table).split(".")) == 2:
|
|
1455
1532
|
batch._table = sa.text(f"{name_parts[0]}.{str(batch._table)}")
|
|
1456
1533
|
logger.debug(f"Setting table name to be {batch._table}")
|
|
1457
1534
|
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
|
-
from typing import Any, Dict, List, Optional
|
|
4
|
+
from typing import Annotated, Any, Dict, List, Optional
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
7
|
from pydantic.fields import Field
|
|
8
8
|
|
|
9
|
-
from datahub.configuration.common import AllowDenyPattern, ConfigModel
|
|
9
|
+
from datahub.configuration.common import AllowDenyPattern, ConfigModel, SupportedSources
|
|
10
10
|
from datahub.ingestion.source_config.operation_config import OperationConfig
|
|
11
11
|
|
|
12
12
|
_PROFILING_FLAGS_TO_REPORT = {
|
|
@@ -120,28 +120,37 @@ class GEProfilingConfig(GEProfilingBaseConfig):
|
|
|
120
120
|
"number of columns to profile goes up.",
|
|
121
121
|
)
|
|
122
122
|
|
|
123
|
-
profile_if_updated_since_days:
|
|
123
|
+
profile_if_updated_since_days: Annotated[
|
|
124
|
+
Optional[pydantic.PositiveFloat], SupportedSources(["snowflake", "bigquery"])
|
|
125
|
+
] = Field(
|
|
124
126
|
default=None,
|
|
125
127
|
description="Profile table only if it has been updated since these many number of days. "
|
|
126
128
|
"If set to `null`, no constraint of last modified time for tables to profile. "
|
|
127
129
|
"Supported only in `snowflake` and `BigQuery`.",
|
|
128
130
|
)
|
|
129
131
|
|
|
130
|
-
profile_table_size_limit:
|
|
132
|
+
profile_table_size_limit: Annotated[
|
|
133
|
+
Optional[int],
|
|
134
|
+
SupportedSources(["snowflake", "bigquery", "unity-catalog", "oracle"]),
|
|
135
|
+
] = Field(
|
|
131
136
|
default=5,
|
|
132
137
|
description="Profile tables only if their size is less than specified GBs. If set to `null`, "
|
|
133
138
|
"no limit on the size of tables to profile. Supported only in `Snowflake`, `BigQuery` and "
|
|
134
139
|
"`Databricks`. Supported for `Oracle` based on calculated size from gathered stats.",
|
|
135
140
|
)
|
|
136
141
|
|
|
137
|
-
profile_table_row_limit:
|
|
142
|
+
profile_table_row_limit: Annotated[
|
|
143
|
+
Optional[int], SupportedSources(["snowflake", "bigquery", "oracle"])
|
|
144
|
+
] = Field(
|
|
138
145
|
default=5000000,
|
|
139
146
|
description="Profile tables only if their row count is less than specified count. "
|
|
140
147
|
"If set to `null`, no limit on the row count of tables to profile. Supported only in "
|
|
141
148
|
"`Snowflake`, `BigQuery`. Supported for `Oracle` based on gathered stats.",
|
|
142
149
|
)
|
|
143
150
|
|
|
144
|
-
profile_table_row_count_estimate_only:
|
|
151
|
+
profile_table_row_count_estimate_only: Annotated[
|
|
152
|
+
bool, SupportedSources(["postgres", "mysql"])
|
|
153
|
+
] = Field(
|
|
145
154
|
default=False,
|
|
146
155
|
description="Use an approximate query for row count. This will be much faster but slightly "
|
|
147
156
|
"less accurate. Only supported for Postgres and MySQL. ",
|
|
@@ -157,29 +166,35 @@ class GEProfilingConfig(GEProfilingBaseConfig):
|
|
|
157
166
|
# Hidden option - used for debugging purposes.
|
|
158
167
|
catch_exceptions: bool = Field(default=True, description="")
|
|
159
168
|
|
|
160
|
-
partition_profiling_enabled:
|
|
169
|
+
partition_profiling_enabled: Annotated[
|
|
170
|
+
bool, SupportedSources(["athena", "bigquery"])
|
|
171
|
+
] = Field(
|
|
161
172
|
default=True,
|
|
162
173
|
description="Whether to profile partitioned tables. Only BigQuery and Aws Athena supports this. "
|
|
163
174
|
"If enabled, latest partition data is used for profiling.",
|
|
164
175
|
)
|
|
165
|
-
partition_datetime:
|
|
176
|
+
partition_datetime: Annotated[
|
|
177
|
+
Optional[datetime.datetime], SupportedSources(["bigquery"])
|
|
178
|
+
] = Field(
|
|
166
179
|
default=None,
|
|
167
180
|
description="If specified, profile only the partition which matches this datetime. "
|
|
168
181
|
"If not specified, profile the latest partition. Only Bigquery supports this.",
|
|
169
182
|
)
|
|
170
|
-
use_sampling: bool = Field(
|
|
183
|
+
use_sampling: Annotated[bool, SupportedSources(["bigquery", "snowflake"])] = Field(
|
|
171
184
|
default=True,
|
|
172
185
|
description="Whether to profile column level stats on sample of table. Only BigQuery and Snowflake support this. "
|
|
173
186
|
"If enabled, profiling is done on rows sampled from table. Sampling is not done for smaller tables. ",
|
|
174
187
|
)
|
|
175
188
|
|
|
176
|
-
sample_size: int = Field(
|
|
189
|
+
sample_size: Annotated[int, SupportedSources(["bigquery", "snowflake"])] = Field(
|
|
177
190
|
default=10000,
|
|
178
191
|
description="Number of rows to be sampled from table for column level profiling."
|
|
179
192
|
"Applicable only if `use_sampling` is set to True.",
|
|
180
193
|
)
|
|
181
194
|
|
|
182
|
-
profile_external_tables:
|
|
195
|
+
profile_external_tables: Annotated[
|
|
196
|
+
bool, SupportedSources(["redshift", "snowflake"])
|
|
197
|
+
] = Field(
|
|
183
198
|
default=False,
|
|
184
199
|
description="Whether to profile external tables. Only Snowflake and Redshift supports this.",
|
|
185
200
|
)
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
from typing import Dict, List, Optional, Tuple
|
|
2
|
+
|
|
3
|
+
from datahub.emitter.mce_builder import (
|
|
4
|
+
make_chart_urn,
|
|
5
|
+
make_dashboard_urn,
|
|
6
|
+
make_data_platform_urn,
|
|
7
|
+
make_dataplatform_instance_urn,
|
|
8
|
+
make_dataset_urn_with_platform_instance,
|
|
9
|
+
make_tag_urn,
|
|
10
|
+
make_user_urn,
|
|
11
|
+
)
|
|
12
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
13
|
+
from datahub.ingestion.source.grafana.models import Dashboard, Panel
|
|
14
|
+
from datahub.ingestion.source.grafana.types import CHART_TYPE_MAPPINGS
|
|
15
|
+
from datahub.metadata.schema_classes import (
|
|
16
|
+
ChangeAuditStampsClass,
|
|
17
|
+
ChartInfoClass,
|
|
18
|
+
DashboardInfoClass,
|
|
19
|
+
DataPlatformInstanceClass,
|
|
20
|
+
GlobalTagsClass,
|
|
21
|
+
OwnerClass,
|
|
22
|
+
OwnershipClass,
|
|
23
|
+
OwnershipTypeClass,
|
|
24
|
+
StatusClass,
|
|
25
|
+
TagAssociationClass,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def build_chart_mcps(
|
|
30
|
+
panel: Panel,
|
|
31
|
+
dashboard: Dashboard,
|
|
32
|
+
platform: str,
|
|
33
|
+
platform_instance: Optional[str],
|
|
34
|
+
env: str,
|
|
35
|
+
base_url: str,
|
|
36
|
+
ingest_tags: bool,
|
|
37
|
+
) -> Tuple[Optional[str], str, List[MetadataChangeProposalWrapper]]:
|
|
38
|
+
"""Build chart metadata change proposals"""
|
|
39
|
+
ds_urn = None
|
|
40
|
+
mcps = []
|
|
41
|
+
|
|
42
|
+
chart_urn = make_chart_urn(
|
|
43
|
+
platform,
|
|
44
|
+
f"{dashboard.uid}.{panel.id}",
|
|
45
|
+
platform_instance,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Platform instance aspect
|
|
49
|
+
mcps.append(
|
|
50
|
+
MetadataChangeProposalWrapper(
|
|
51
|
+
entityUrn=chart_urn,
|
|
52
|
+
aspect=DataPlatformInstanceClass(
|
|
53
|
+
platform=make_data_platform_urn(platform),
|
|
54
|
+
instance=make_dataplatform_instance_urn(
|
|
55
|
+
platform=platform,
|
|
56
|
+
instance=platform_instance,
|
|
57
|
+
)
|
|
58
|
+
if platform_instance
|
|
59
|
+
else None,
|
|
60
|
+
),
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Status aspect
|
|
65
|
+
mcps.append(
|
|
66
|
+
MetadataChangeProposalWrapper(
|
|
67
|
+
entityUrn=chart_urn,
|
|
68
|
+
aspect=StatusClass(removed=False),
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Get input datasets
|
|
73
|
+
input_datasets = []
|
|
74
|
+
if panel.datasource_ref:
|
|
75
|
+
ds_type = panel.datasource_ref.type or "unknown"
|
|
76
|
+
ds_uid = panel.datasource_ref.uid or "unknown"
|
|
77
|
+
|
|
78
|
+
# Add Grafana dataset
|
|
79
|
+
dataset_name = f"{ds_type}.{ds_uid}.{panel.id}"
|
|
80
|
+
ds_urn = make_dataset_urn_with_platform_instance(
|
|
81
|
+
platform=platform,
|
|
82
|
+
name=dataset_name,
|
|
83
|
+
platform_instance=platform_instance,
|
|
84
|
+
env=env,
|
|
85
|
+
)
|
|
86
|
+
input_datasets.append(ds_urn)
|
|
87
|
+
|
|
88
|
+
# Chart info aspect
|
|
89
|
+
title = panel.title or f"Panel {panel.id}"
|
|
90
|
+
mcps.append(
|
|
91
|
+
MetadataChangeProposalWrapper(
|
|
92
|
+
entityUrn=chart_urn,
|
|
93
|
+
aspect=ChartInfoClass(
|
|
94
|
+
type=CHART_TYPE_MAPPINGS.get(panel.type) if panel.type else None,
|
|
95
|
+
description=panel.description,
|
|
96
|
+
title=title,
|
|
97
|
+
lastModified=ChangeAuditStampsClass(),
|
|
98
|
+
chartUrl=f"{base_url}/d/{dashboard.uid}?viewPanel={panel.id}",
|
|
99
|
+
customProperties=_build_custom_properties(panel),
|
|
100
|
+
inputs=input_datasets,
|
|
101
|
+
),
|
|
102
|
+
)
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Tags aspect
|
|
106
|
+
if dashboard.tags and ingest_tags:
|
|
107
|
+
tags = []
|
|
108
|
+
for tag in dashboard.tags:
|
|
109
|
+
if ":" in tag:
|
|
110
|
+
key, value = tag.split(":", 1)
|
|
111
|
+
tag_urn = make_tag_urn(f"{key}.{value}")
|
|
112
|
+
else:
|
|
113
|
+
tag_urn = make_tag_urn(tag)
|
|
114
|
+
tags.append(TagAssociationClass(tag=tag_urn))
|
|
115
|
+
|
|
116
|
+
if tags:
|
|
117
|
+
mcps.append(
|
|
118
|
+
MetadataChangeProposalWrapper(
|
|
119
|
+
entityUrn=chart_urn,
|
|
120
|
+
aspect=GlobalTagsClass(tags=tags),
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
return ds_urn, chart_urn, mcps
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def build_dashboard_mcps(
|
|
128
|
+
dashboard: Dashboard,
|
|
129
|
+
platform: str,
|
|
130
|
+
platform_instance: Optional[str],
|
|
131
|
+
chart_urns: List[str],
|
|
132
|
+
base_url: str,
|
|
133
|
+
ingest_owners: bool,
|
|
134
|
+
ingest_tags: bool,
|
|
135
|
+
) -> Tuple[str, List[MetadataChangeProposalWrapper]]:
|
|
136
|
+
"""Build dashboard metadata change proposals"""
|
|
137
|
+
mcps = []
|
|
138
|
+
dashboard_urn = make_dashboard_urn(platform, dashboard.uid, platform_instance)
|
|
139
|
+
|
|
140
|
+
# Platform instance aspect
|
|
141
|
+
mcps.append(
|
|
142
|
+
MetadataChangeProposalWrapper(
|
|
143
|
+
entityUrn=dashboard_urn,
|
|
144
|
+
aspect=DataPlatformInstanceClass(
|
|
145
|
+
platform=make_data_platform_urn(platform),
|
|
146
|
+
instance=make_dataplatform_instance_urn(
|
|
147
|
+
platform=platform,
|
|
148
|
+
instance=platform_instance,
|
|
149
|
+
)
|
|
150
|
+
if platform_instance
|
|
151
|
+
else None,
|
|
152
|
+
),
|
|
153
|
+
)
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# Dashboard info aspect
|
|
157
|
+
mcps.append(
|
|
158
|
+
MetadataChangeProposalWrapper(
|
|
159
|
+
entityUrn=dashboard_urn,
|
|
160
|
+
aspect=DashboardInfoClass(
|
|
161
|
+
description=dashboard.description,
|
|
162
|
+
title=dashboard.title,
|
|
163
|
+
charts=chart_urns,
|
|
164
|
+
lastModified=ChangeAuditStampsClass(),
|
|
165
|
+
dashboardUrl=f"{base_url}/d/{dashboard.uid}",
|
|
166
|
+
customProperties=_build_dashboard_properties(dashboard),
|
|
167
|
+
),
|
|
168
|
+
)
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# Ownership aspect
|
|
172
|
+
if dashboard.uid and ingest_owners:
|
|
173
|
+
owner = _build_ownership(dashboard)
|
|
174
|
+
if owner:
|
|
175
|
+
mcps.append(
|
|
176
|
+
MetadataChangeProposalWrapper(
|
|
177
|
+
entityUrn=dashboard_urn,
|
|
178
|
+
aspect=owner,
|
|
179
|
+
)
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# Tags aspect
|
|
183
|
+
if dashboard.tags and ingest_tags:
|
|
184
|
+
tags = [TagAssociationClass(tag=make_tag_urn(tag)) for tag in dashboard.tags]
|
|
185
|
+
if tags:
|
|
186
|
+
mcps.append(
|
|
187
|
+
MetadataChangeProposalWrapper(
|
|
188
|
+
entityUrn=dashboard_urn,
|
|
189
|
+
aspect=GlobalTagsClass(tags=tags),
|
|
190
|
+
)
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Status aspect
|
|
194
|
+
mcps.append(
|
|
195
|
+
MetadataChangeProposalWrapper(
|
|
196
|
+
entityUrn=dashboard_urn,
|
|
197
|
+
aspect=StatusClass(removed=False),
|
|
198
|
+
)
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
return dashboard_urn, mcps
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _build_custom_properties(panel: Panel) -> Dict[str, str]:
|
|
205
|
+
"""Build custom properties for chart"""
|
|
206
|
+
props = {}
|
|
207
|
+
|
|
208
|
+
if panel.type:
|
|
209
|
+
props["type"] = panel.type
|
|
210
|
+
|
|
211
|
+
if panel.datasource_ref:
|
|
212
|
+
props["datasourceType"] = panel.datasource_ref.type or ""
|
|
213
|
+
props["datasourceUid"] = panel.datasource_ref.uid or ""
|
|
214
|
+
|
|
215
|
+
for key in [
|
|
216
|
+
"description",
|
|
217
|
+
"format",
|
|
218
|
+
"pluginVersion",
|
|
219
|
+
"repeatDirection",
|
|
220
|
+
"maxDataPoints",
|
|
221
|
+
]:
|
|
222
|
+
value = getattr(panel, key, None)
|
|
223
|
+
if value:
|
|
224
|
+
props[key] = str(value)
|
|
225
|
+
|
|
226
|
+
if panel.query_targets:
|
|
227
|
+
props["targetsCount"] = str(len(panel.query_targets))
|
|
228
|
+
|
|
229
|
+
return props
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _build_dashboard_properties(dashboard: Dashboard) -> Dict[str, str]:
|
|
233
|
+
"""Build custom properties for dashboard"""
|
|
234
|
+
props = {}
|
|
235
|
+
|
|
236
|
+
if dashboard.timezone:
|
|
237
|
+
props["timezone"] = dashboard.timezone
|
|
238
|
+
|
|
239
|
+
if dashboard.schema_version:
|
|
240
|
+
props["schema_version"] = dashboard.schema_version
|
|
241
|
+
|
|
242
|
+
if dashboard.version:
|
|
243
|
+
props["version"] = dashboard.version
|
|
244
|
+
|
|
245
|
+
if dashboard.refresh:
|
|
246
|
+
props["refresh"] = dashboard.refresh
|
|
247
|
+
|
|
248
|
+
return props
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _build_ownership(dashboard: Dashboard) -> Optional[OwnershipClass]:
|
|
252
|
+
"""Build ownership information"""
|
|
253
|
+
owners = []
|
|
254
|
+
|
|
255
|
+
if dashboard.uid:
|
|
256
|
+
owners.append(
|
|
257
|
+
OwnerClass(
|
|
258
|
+
owner=make_user_urn(dashboard.uid),
|
|
259
|
+
type=OwnershipTypeClass.TECHNICAL_OWNER,
|
|
260
|
+
)
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
if dashboard.created_by:
|
|
264
|
+
owner_id = dashboard.created_by.split("@")[0]
|
|
265
|
+
owners.append(
|
|
266
|
+
OwnerClass(
|
|
267
|
+
owner=make_user_urn(owner_id),
|
|
268
|
+
type=OwnershipTypeClass.DATAOWNER,
|
|
269
|
+
)
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
return OwnershipClass(owners=owners) if owners else None
|