acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, List, Optional, Tuple
|
|
3
|
+
|
|
4
|
+
from datahub.emitter.mce_builder import (
|
|
5
|
+
make_dataset_urn_with_platform_instance,
|
|
6
|
+
make_schema_field_urn,
|
|
7
|
+
)
|
|
8
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
9
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
10
|
+
from datahub.ingestion.source.grafana.grafana_config import PlatformConnectionConfig
|
|
11
|
+
from datahub.ingestion.source.grafana.models import (
|
|
12
|
+
DatasourceRef,
|
|
13
|
+
GrafanaQueryTarget,
|
|
14
|
+
Panel,
|
|
15
|
+
)
|
|
16
|
+
from datahub.ingestion.source.grafana.report import GrafanaSourceReport
|
|
17
|
+
from datahub.metadata.schema_classes import (
|
|
18
|
+
DatasetLineageTypeClass,
|
|
19
|
+
FineGrainedLineageClass,
|
|
20
|
+
FineGrainedLineageDownstreamTypeClass,
|
|
21
|
+
FineGrainedLineageUpstreamTypeClass,
|
|
22
|
+
UpstreamClass,
|
|
23
|
+
UpstreamLineageClass,
|
|
24
|
+
)
|
|
25
|
+
from datahub.sql_parsing.sqlglot_lineage import (
|
|
26
|
+
SqlParsingResult,
|
|
27
|
+
create_lineage_sql_parsed_result,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class LineageExtractor:
|
|
34
|
+
"""Handles extraction of lineage information from Grafana panels"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
platform: str,
|
|
39
|
+
platform_instance: Optional[str],
|
|
40
|
+
env: str,
|
|
41
|
+
connection_to_platform_map: Dict[str, PlatformConnectionConfig],
|
|
42
|
+
report: GrafanaSourceReport,
|
|
43
|
+
graph: Optional[DataHubGraph] = None,
|
|
44
|
+
include_column_lineage: bool = True,
|
|
45
|
+
):
|
|
46
|
+
self.platform = platform
|
|
47
|
+
self.platform_instance = platform_instance
|
|
48
|
+
self.env = env
|
|
49
|
+
self.connection_map = connection_to_platform_map
|
|
50
|
+
self.graph = graph
|
|
51
|
+
self.report = report
|
|
52
|
+
self.include_column_lineage = include_column_lineage
|
|
53
|
+
|
|
54
|
+
def extract_panel_lineage(
|
|
55
|
+
self, panel: Panel
|
|
56
|
+
) -> Optional[MetadataChangeProposalWrapper]:
|
|
57
|
+
"""Extract lineage information from a panel."""
|
|
58
|
+
if not panel.datasource_ref:
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
ds_type, ds_uid = self._extract_datasource_info(panel.datasource_ref)
|
|
62
|
+
raw_sql = self._extract_raw_sql(panel.query_targets)
|
|
63
|
+
ds_urn = self._build_dataset_urn(ds_type, ds_uid, panel.id)
|
|
64
|
+
|
|
65
|
+
# Handle platform-specific lineage
|
|
66
|
+
if ds_uid in self.connection_map:
|
|
67
|
+
if raw_sql:
|
|
68
|
+
parsed_sql = self._parse_sql(raw_sql, self.connection_map[ds_uid])
|
|
69
|
+
if parsed_sql:
|
|
70
|
+
lineage = self._create_column_lineage(ds_urn, parsed_sql)
|
|
71
|
+
if lineage:
|
|
72
|
+
return lineage
|
|
73
|
+
|
|
74
|
+
# Fall back to basic lineage if SQL parsing fails or no column lineage created
|
|
75
|
+
return self._create_basic_lineage(
|
|
76
|
+
ds_uid, self.connection_map[ds_uid], ds_urn
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
def _extract_datasource_info(
|
|
82
|
+
self, datasource_ref: "DatasourceRef"
|
|
83
|
+
) -> Tuple[str, str]:
|
|
84
|
+
"""Extract datasource type and UID."""
|
|
85
|
+
return datasource_ref.type or "unknown", datasource_ref.uid or "unknown"
|
|
86
|
+
|
|
87
|
+
def _extract_raw_sql(
|
|
88
|
+
self, query_targets: List["GrafanaQueryTarget"]
|
|
89
|
+
) -> Optional[str]:
|
|
90
|
+
"""Extract raw SQL from panel query targets."""
|
|
91
|
+
for target in query_targets:
|
|
92
|
+
if target.get("rawSql"):
|
|
93
|
+
return target["rawSql"]
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
def _build_dataset_urn(self, ds_type: str, ds_uid: str, panel_id: str) -> str:
|
|
97
|
+
"""Build dataset URN."""
|
|
98
|
+
dataset_name = f"{ds_type}.{ds_uid}.{panel_id}"
|
|
99
|
+
return make_dataset_urn_with_platform_instance(
|
|
100
|
+
platform=self.platform,
|
|
101
|
+
name=dataset_name,
|
|
102
|
+
platform_instance=self.platform_instance,
|
|
103
|
+
env=self.env,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def _create_basic_lineage(
|
|
107
|
+
self, ds_uid: str, platform_config: PlatformConnectionConfig, ds_urn: str
|
|
108
|
+
) -> MetadataChangeProposalWrapper:
|
|
109
|
+
"""Create basic upstream lineage."""
|
|
110
|
+
name = (
|
|
111
|
+
f"{platform_config.database}.{ds_uid}"
|
|
112
|
+
if platform_config.database
|
|
113
|
+
else ds_uid
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
upstream_urn = make_dataset_urn_with_platform_instance(
|
|
117
|
+
platform=platform_config.platform,
|
|
118
|
+
name=name,
|
|
119
|
+
platform_instance=platform_config.platform_instance,
|
|
120
|
+
env=platform_config.env,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
logger.info(f"Generated upstream URN: {upstream_urn}")
|
|
124
|
+
|
|
125
|
+
return MetadataChangeProposalWrapper(
|
|
126
|
+
entityUrn=ds_urn,
|
|
127
|
+
aspect=UpstreamLineageClass(
|
|
128
|
+
upstreams=[
|
|
129
|
+
UpstreamClass(
|
|
130
|
+
dataset=upstream_urn,
|
|
131
|
+
type=DatasetLineageTypeClass.TRANSFORMED,
|
|
132
|
+
)
|
|
133
|
+
]
|
|
134
|
+
),
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
def _parse_sql(
|
|
138
|
+
self, sql: str, platform_config: PlatformConnectionConfig
|
|
139
|
+
) -> Optional[SqlParsingResult]:
|
|
140
|
+
"""Parse SQL query for lineage information."""
|
|
141
|
+
if not self.graph:
|
|
142
|
+
logger.warning("No DataHub graph specified for SQL parsing.")
|
|
143
|
+
return None
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
return create_lineage_sql_parsed_result(
|
|
147
|
+
query=sql,
|
|
148
|
+
platform=platform_config.platform,
|
|
149
|
+
platform_instance=platform_config.platform_instance,
|
|
150
|
+
env=platform_config.env,
|
|
151
|
+
default_db=platform_config.database,
|
|
152
|
+
default_schema=platform_config.database_schema,
|
|
153
|
+
graph=self.graph,
|
|
154
|
+
)
|
|
155
|
+
except ValueError as e:
|
|
156
|
+
logger.error(f"SQL parsing error for query: {sql}", exc_info=e)
|
|
157
|
+
except Exception as e:
|
|
158
|
+
logger.exception(f"Unexpected error during SQL parsing: {sql}", exc_info=e)
|
|
159
|
+
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
def _create_column_lineage(
|
|
163
|
+
self,
|
|
164
|
+
dataset_urn: str,
|
|
165
|
+
parsed_sql: SqlParsingResult,
|
|
166
|
+
) -> Optional[MetadataChangeProposalWrapper]:
|
|
167
|
+
"""Create column-level lineage"""
|
|
168
|
+
if not parsed_sql.column_lineage or not self.include_column_lineage:
|
|
169
|
+
return None
|
|
170
|
+
|
|
171
|
+
upstream_lineages = []
|
|
172
|
+
for col_lineage in parsed_sql.column_lineage:
|
|
173
|
+
upstream_lineages.append(
|
|
174
|
+
FineGrainedLineageClass(
|
|
175
|
+
downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD,
|
|
176
|
+
downstreams=[
|
|
177
|
+
make_schema_field_urn(
|
|
178
|
+
dataset_urn, col_lineage.downstream.column
|
|
179
|
+
)
|
|
180
|
+
],
|
|
181
|
+
upstreamType=FineGrainedLineageUpstreamTypeClass.FIELD_SET,
|
|
182
|
+
upstreams=[
|
|
183
|
+
make_schema_field_urn(upstream_dataset, col.column)
|
|
184
|
+
for col in col_lineage.upstreams
|
|
185
|
+
for upstream_dataset in parsed_sql.in_tables
|
|
186
|
+
],
|
|
187
|
+
)
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
return MetadataChangeProposalWrapper(
|
|
191
|
+
entityUrn=dataset_urn,
|
|
192
|
+
aspect=UpstreamLineageClass(
|
|
193
|
+
upstreams=[
|
|
194
|
+
UpstreamClass(
|
|
195
|
+
dataset=table,
|
|
196
|
+
type=DatasetLineageTypeClass.TRANSFORMED,
|
|
197
|
+
)
|
|
198
|
+
for table in parsed_sql.in_tables
|
|
199
|
+
],
|
|
200
|
+
fineGrainedLineages=upstream_lineages,
|
|
201
|
+
),
|
|
202
|
+
)
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""Grafana data models for DataHub ingestion.
|
|
2
|
+
|
|
3
|
+
References:
|
|
4
|
+
- Grafana HTTP API: https://grafana.com/docs/grafana/latest/developers/http_api/
|
|
5
|
+
- Dashboard API: https://grafana.com/docs/grafana/latest/developers/http_api/dashboard/
|
|
6
|
+
- Folder API: https://grafana.com/docs/grafana/latest/developers/http_api/folder/
|
|
7
|
+
- Search API: https://grafana.com/docs/grafana/latest/developers/http_api/other/#search-api
|
|
8
|
+
- Dashboard JSON structure: https://grafana.com/docs/grafana/latest/dashboards/build-dashboards/view-dashboard-json-model/
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
from typing import Any, Dict, List, Optional
|
|
13
|
+
|
|
14
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
15
|
+
|
|
16
|
+
from datahub.emitter.mcp_builder import ContainerKey
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
# Grafana-specific type definitions for better type safety
|
|
20
|
+
GrafanaQueryTarget = Dict[
|
|
21
|
+
str, Any
|
|
22
|
+
] # Query targets: refId, expr/query, datasource, hide, etc.
|
|
23
|
+
GrafanaFieldConfig = Dict[
|
|
24
|
+
str, Any
|
|
25
|
+
] # Field config: defaults, overrides, display settings
|
|
26
|
+
GrafanaTransformation = Dict[str, Any] # Transformations: id, options
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class _GrafanaBaseModel(BaseModel):
|
|
30
|
+
model_config = ConfigDict(coerce_numbers_to_str=True)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class DatasourceRef(_GrafanaBaseModel):
|
|
34
|
+
"""Reference to a Grafana datasource."""
|
|
35
|
+
|
|
36
|
+
type: Optional[str] = None # Datasource type (prometheus, mysql, postgres, etc.)
|
|
37
|
+
uid: Optional[str] = None # Datasource unique identifier
|
|
38
|
+
name: Optional[str] = None # Datasource display name
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class Panel(_GrafanaBaseModel):
|
|
42
|
+
"""Represents a Grafana dashboard panel."""
|
|
43
|
+
|
|
44
|
+
id: str
|
|
45
|
+
title: str
|
|
46
|
+
description: str = ""
|
|
47
|
+
type: Optional[str] = None
|
|
48
|
+
# Query targets - each contains refId (A,B,C...), query/expr, datasource ref, etc.
|
|
49
|
+
query_targets: List[GrafanaQueryTarget] = Field(
|
|
50
|
+
default_factory=list, alias="targets"
|
|
51
|
+
)
|
|
52
|
+
# Datasource reference - contains type, uid, name
|
|
53
|
+
datasource_ref: Optional[DatasourceRef] = Field(default=None, alias="datasource")
|
|
54
|
+
# Field configuration - display settings, defaults, overrides
|
|
55
|
+
field_config: GrafanaFieldConfig = Field(default_factory=dict, alias="fieldConfig")
|
|
56
|
+
# Data transformations - each contains id and transformation-specific options
|
|
57
|
+
transformations: List[GrafanaTransformation] = Field(default_factory=list)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class Dashboard(_GrafanaBaseModel):
|
|
61
|
+
"""Represents a Grafana dashboard."""
|
|
62
|
+
|
|
63
|
+
uid: str
|
|
64
|
+
title: str
|
|
65
|
+
description: str = ""
|
|
66
|
+
version: Optional[str] = None
|
|
67
|
+
panels: List[Panel]
|
|
68
|
+
tags: List[str]
|
|
69
|
+
timezone: Optional[str] = None
|
|
70
|
+
refresh: Optional[str] = None
|
|
71
|
+
schema_version: Optional[str] = Field(default=None, alias="schemaVersion")
|
|
72
|
+
folder_id: Optional[str] = Field(default=None, alias="meta.folderId")
|
|
73
|
+
created_by: Optional[str] = None
|
|
74
|
+
|
|
75
|
+
@staticmethod
|
|
76
|
+
def extract_panels(panels_data: List[Dict[str, Any]]) -> List[Panel]:
|
|
77
|
+
"""Extract panels, including nested ones."""
|
|
78
|
+
panels: List[Panel] = []
|
|
79
|
+
for panel_data in panels_data:
|
|
80
|
+
if panel_data.get("type") == "row" and "panels" in panel_data:
|
|
81
|
+
panels.extend(
|
|
82
|
+
Panel.parse_obj(p)
|
|
83
|
+
for p in panel_data["panels"]
|
|
84
|
+
if p.get("type") != "row"
|
|
85
|
+
)
|
|
86
|
+
elif panel_data.get("type") != "row":
|
|
87
|
+
panels.append(Panel.parse_obj(panel_data))
|
|
88
|
+
return panels
|
|
89
|
+
|
|
90
|
+
@classmethod
|
|
91
|
+
def parse_obj(cls, data: Dict[str, Any]) -> "Dashboard":
|
|
92
|
+
"""Custom parsing to handle nested panel extraction."""
|
|
93
|
+
dashboard_data = data.get("dashboard", {})
|
|
94
|
+
_panel_data = dashboard_data.get("panels", [])
|
|
95
|
+
panels = []
|
|
96
|
+
try:
|
|
97
|
+
panels = cls.extract_panels(_panel_data)
|
|
98
|
+
except Exception as e:
|
|
99
|
+
logger.warning(
|
|
100
|
+
f"Error extracting panels from dashboard for dashboard panels {_panel_data} : {e}"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Extract meta.folderId from nested structure
|
|
104
|
+
meta = dashboard_data.get("meta", {})
|
|
105
|
+
folder_id = meta.get("folderId")
|
|
106
|
+
|
|
107
|
+
# Create dashboard data without meta to avoid conflicts
|
|
108
|
+
dashboard_dict = {**dashboard_data, "panels": panels, "folder_id": folder_id}
|
|
109
|
+
if "meta" in dashboard_dict:
|
|
110
|
+
del dashboard_dict["meta"]
|
|
111
|
+
|
|
112
|
+
# Handle refresh field type mismatch - convert boolean to string
|
|
113
|
+
if "refresh" in dashboard_dict and isinstance(dashboard_dict["refresh"], bool):
|
|
114
|
+
dashboard_dict["refresh"] = str(dashboard_dict["refresh"])
|
|
115
|
+
|
|
116
|
+
return super().parse_obj(dashboard_dict)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class Folder(_GrafanaBaseModel):
|
|
120
|
+
"""Represents a Grafana folder."""
|
|
121
|
+
|
|
122
|
+
id: str
|
|
123
|
+
title: str
|
|
124
|
+
description: Optional[str] = ""
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class FolderKey(ContainerKey):
|
|
128
|
+
"""Key for identifying a Grafana folder."""
|
|
129
|
+
|
|
130
|
+
folder_id: str
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class DashboardContainerKey(ContainerKey):
|
|
134
|
+
"""Key for identifying a Grafana dashboard."""
|
|
135
|
+
|
|
136
|
+
dashboard_id: str
|
|
137
|
+
folder_id: Optional[str] = None # Reference to parent folder
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
4
|
+
StaleEntityRemovalSourceReport,
|
|
5
|
+
)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class GrafanaSourceReport(StaleEntityRemovalSourceReport):
|
|
10
|
+
# Entity counters
|
|
11
|
+
dashboards_scanned: int = 0
|
|
12
|
+
charts_scanned: int = 0
|
|
13
|
+
folders_scanned: int = 0
|
|
14
|
+
datasets_scanned: int = 0
|
|
15
|
+
|
|
16
|
+
# Lineage counters
|
|
17
|
+
panels_with_lineage: int = 0
|
|
18
|
+
panels_without_lineage: int = 0
|
|
19
|
+
lineage_extraction_failures: int = 0
|
|
20
|
+
sql_parsing_attempts: int = 0
|
|
21
|
+
sql_parsing_successes: int = 0
|
|
22
|
+
sql_parsing_failures: int = 0
|
|
23
|
+
|
|
24
|
+
# Schema extraction counters
|
|
25
|
+
panels_with_schema_fields: int = 0
|
|
26
|
+
panels_without_schema_fields: int = 0
|
|
27
|
+
|
|
28
|
+
# Warning counters
|
|
29
|
+
permission_warnings: int = 0
|
|
30
|
+
datasource_warnings: int = 0
|
|
31
|
+
panel_parsing_warnings: int = 0
|
|
32
|
+
|
|
33
|
+
def report_dashboard_scanned(self) -> None:
|
|
34
|
+
self.dashboards_scanned += 1
|
|
35
|
+
|
|
36
|
+
def report_chart_scanned(self) -> None:
|
|
37
|
+
self.charts_scanned += 1
|
|
38
|
+
|
|
39
|
+
def report_folder_scanned(self) -> None:
|
|
40
|
+
self.folders_scanned += 1
|
|
41
|
+
|
|
42
|
+
def report_dataset_scanned(self) -> None:
|
|
43
|
+
self.datasets_scanned += 1
|
|
44
|
+
|
|
45
|
+
# Lineage reporting methods
|
|
46
|
+
def report_lineage_extracted(self) -> None:
|
|
47
|
+
"""Report successful lineage extraction for a panel"""
|
|
48
|
+
self.panels_with_lineage += 1
|
|
49
|
+
|
|
50
|
+
def report_no_lineage(self) -> None:
|
|
51
|
+
"""Report that no lineage was found for a panel"""
|
|
52
|
+
self.panels_without_lineage += 1
|
|
53
|
+
|
|
54
|
+
def report_lineage_extraction_failure(self) -> None:
|
|
55
|
+
"""Report failure to extract lineage for a panel"""
|
|
56
|
+
self.lineage_extraction_failures += 1
|
|
57
|
+
|
|
58
|
+
def report_sql_parsing_attempt(self) -> None:
|
|
59
|
+
"""Report attempt to parse SQL"""
|
|
60
|
+
self.sql_parsing_attempts += 1
|
|
61
|
+
|
|
62
|
+
def report_sql_parsing_success(self) -> None:
|
|
63
|
+
"""Report successful SQL parsing"""
|
|
64
|
+
self.sql_parsing_successes += 1
|
|
65
|
+
|
|
66
|
+
def report_sql_parsing_failure(self) -> None:
|
|
67
|
+
"""Report failed SQL parsing"""
|
|
68
|
+
self.sql_parsing_failures += 1
|
|
69
|
+
|
|
70
|
+
# Schema field reporting methods
|
|
71
|
+
def report_schema_fields_extracted(self) -> None:
|
|
72
|
+
"""Report that schema fields were extracted for a panel"""
|
|
73
|
+
self.panels_with_schema_fields += 1
|
|
74
|
+
|
|
75
|
+
def report_no_schema_fields(self) -> None:
|
|
76
|
+
"""Report that no schema fields were found for a panel"""
|
|
77
|
+
self.panels_without_schema_fields += 1
|
|
78
|
+
|
|
79
|
+
# Warning reporting methods
|
|
80
|
+
def report_permission_warning(self) -> None:
|
|
81
|
+
"""Report a permission-related warning"""
|
|
82
|
+
self.permission_warnings += 1
|
|
83
|
+
|
|
84
|
+
def report_datasource_warning(self) -> None:
|
|
85
|
+
"""Report a datasource-related warning"""
|
|
86
|
+
self.datasource_warnings += 1
|
|
87
|
+
|
|
88
|
+
def report_panel_parsing_warning(self) -> None:
|
|
89
|
+
"""Report a panel parsing warning"""
|
|
90
|
+
self.panel_parsing_warnings += 1
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from datahub.metadata.schema_classes import (
|
|
2
|
+
ChartTypeClass,
|
|
3
|
+
)
|
|
4
|
+
|
|
5
|
+
CHART_TYPE_MAPPINGS = {
|
|
6
|
+
"graph": ChartTypeClass.LINE,
|
|
7
|
+
"timeseries": ChartTypeClass.LINE,
|
|
8
|
+
"table": ChartTypeClass.TABLE,
|
|
9
|
+
"stat": ChartTypeClass.TEXT,
|
|
10
|
+
"gauge": ChartTypeClass.TEXT,
|
|
11
|
+
"bargauge": ChartTypeClass.TEXT,
|
|
12
|
+
"bar": ChartTypeClass.BAR,
|
|
13
|
+
"pie": ChartTypeClass.PIE,
|
|
14
|
+
"heatmap": ChartTypeClass.TABLE,
|
|
15
|
+
"histogram": ChartTypeClass.BAR,
|
|
16
|
+
}
|
|
File without changes
|