acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +18 -3
- datahub/api/entities/datajob/datajob.py +24 -4
- datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
- datahub/api/entities/dataproduct/dataproduct.py +32 -3
- datahub/api/entities/dataset/dataset.py +47 -72
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +724 -0
- datahub/api/entities/external/external_tag.py +147 -0
- datahub/api/entities/external/lake_formation_external_entites.py +162 -0
- datahub/api/entities/external/restricted_text.py +172 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
- datahub/api/entities/forms/forms.py +37 -37
- datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/base.py +8 -6
- datahub/api/graphql/operation.py +14 -10
- datahub/cli/check_cli.py +91 -9
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/config_utils.py +20 -12
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +133 -34
- datahub/cli/docker_check.py +110 -14
- datahub/cli/docker_cli.py +155 -231
- datahub/cli/exists_cli.py +2 -3
- datahub/cli/get_cli.py +2 -3
- datahub/cli/graphql_cli.py +1422 -0
- datahub/cli/iceberg_cli.py +11 -5
- datahub/cli/ingest_cli.py +25 -26
- datahub/cli/migrate.py +12 -9
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +4 -6
- datahub/cli/quickstart_versioning.py +53 -10
- datahub/cli/specific/assertions_cli.py +39 -7
- datahub/cli/specific/datacontract_cli.py +57 -9
- datahub/cli/specific/dataproduct_cli.py +12 -24
- datahub/cli/specific/dataset_cli.py +31 -21
- datahub/cli/specific/forms_cli.py +2 -5
- datahub/cli/specific/group_cli.py +2 -3
- datahub/cli/specific/structuredproperties_cli.py +5 -7
- datahub/cli/specific/user_cli.py +174 -4
- datahub/cli/state_cli.py +2 -3
- datahub/cli/timeline_cli.py +2 -3
- datahub/configuration/common.py +46 -2
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/env_vars.py +331 -0
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/kafka.py +21 -1
- datahub/configuration/pydantic_migration_helpers.py +6 -13
- datahub/configuration/source_common.py +4 -3
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +8 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/emitter/mce_builder.py +12 -8
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/mcp_builder.py +12 -0
- datahub/emitter/request_helper.py +138 -15
- datahub/emitter/response_helper.py +111 -19
- datahub/emitter/rest_emitter.py +399 -163
- datahub/entrypoints.py +10 -5
- datahub/errors.py +12 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
- datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
- datahub/ingestion/api/common.py +9 -0
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +381 -3
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/api/source.py +174 -62
- datahub/ingestion/api/source_helpers.py +41 -3
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3652 -0
- datahub/ingestion/autogenerated/lineage.json +402 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +31 -5
- datahub/ingestion/glossary/classification_mixin.py +9 -2
- datahub/ingestion/graph/client.py +492 -55
- datahub/ingestion/graph/config.py +18 -2
- datahub/ingestion/graph/filters.py +96 -32
- datahub/ingestion/graph/links.py +55 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
- datahub/ingestion/run/pipeline.py +90 -23
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +31 -23
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/abs/config.py +1 -1
- datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
- datahub/ingestion/source/abs/source.py +15 -30
- datahub/ingestion/source/apply/datahub_apply.py +6 -5
- datahub/ingestion/source/aws/aws_common.py +185 -13
- datahub/ingestion/source/aws/glue.py +517 -244
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
- datahub/ingestion/source/aws/tag_entities.py +270 -0
- datahub/ingestion/source/azure/azure_common.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
- datahub/ingestion/source/cassandra/cassandra.py +7 -18
- datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
- datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
- datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
- datahub/ingestion/source/common/subtypes.py +73 -1
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
- datahub/ingestion/source/data_lake_common/object_store.py +732 -0
- datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
- datahub/ingestion/source/datahub/config.py +19 -5
- datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
- datahub/ingestion/source/datahub/datahub_source.py +11 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
- datahub/ingestion/source/dbt/dbt_common.py +270 -26
- datahub/ingestion/source/dbt/dbt_core.py +88 -47
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/delta_lake/config.py +9 -5
- datahub/ingestion/source/delta_lake/source.py +8 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
- datahub/ingestion/source/dremio/dremio_config.py +5 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
- datahub/ingestion/source/dremio/dremio_source.py +228 -215
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
- datahub/ingestion/source/excel/__init__.py +0 -0
- datahub/ingestion/source/excel/config.py +92 -0
- datahub/ingestion/source/excel/excel_file.py +539 -0
- datahub/ingestion/source/excel/profiling.py +308 -0
- datahub/ingestion/source/excel/report.py +49 -0
- datahub/ingestion/source/excel/source.py +662 -0
- datahub/ingestion/source/excel/util.py +18 -0
- datahub/ingestion/source/feast.py +12 -14
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/config.py +67 -8
- datahub/ingestion/source/fivetran/fivetran.py +228 -43
- datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
- datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/gcs/gcs_source.py +53 -10
- datahub/ingestion/source/gcs/gcs_utils.py +36 -9
- datahub/ingestion/source/ge_data_profiler.py +146 -33
- datahub/ingestion/source/ge_profiling_config.py +26 -11
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +137 -0
- datahub/ingestion/source/grafana/report.py +90 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +402 -0
- datahub/ingestion/source/hex/constants.py +8 -0
- datahub/ingestion/source/hex/hex.py +311 -0
- datahub/ingestion/source/hex/mapper.py +412 -0
- datahub/ingestion/source/hex/model.py +78 -0
- datahub/ingestion/source/hex/query_fetcher.py +307 -0
- datahub/ingestion/source/iceberg/iceberg.py +385 -164
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +28 -71
- datahub/ingestion/source/kafka/kafka_config.py +78 -0
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
- datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +216 -86
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
- datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
- datahub/ingestion/source/looker/looker_source.py +539 -555
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +31 -3
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +103 -118
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/ingestion/source/metabase.py +32 -6
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +11 -10
- datahub/ingestion/source/mlflow.py +254 -23
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
- datahub/ingestion/source/mode.py +359 -181
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
- datahub/ingestion/source/nifi.py +5 -5
- datahub/ingestion/source/openapi.py +85 -38
- datahub/ingestion/source/openapi_parser.py +59 -40
- datahub/ingestion/source/powerbi/config.py +92 -27
- datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +66 -32
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
- datahub/ingestion/source/preset.py +3 -3
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +15 -9
- datahub/ingestion/source/redshift/datashares.py +1 -1
- datahub/ingestion/source/redshift/lineage.py +386 -687
- datahub/ingestion/source/redshift/profile.py +2 -2
- datahub/ingestion/source/redshift/query.py +24 -20
- datahub/ingestion/source/redshift/redshift.py +52 -111
- datahub/ingestion/source/redshift/redshift_schema.py +17 -12
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/redshift/usage.py +13 -11
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +515 -244
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -13
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/schema_inference/object.py +22 -6
- datahub/ingestion/source/sigma/config.py +75 -8
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/sigma/sigma.py +36 -7
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +403 -140
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/constants.py +4 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
- datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
- datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
- datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
- datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
- datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
- datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +219 -26
- datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
- datahub/ingestion/source/sql/clickhouse.py +29 -9
- datahub/ingestion/source/sql/cockroachdb.py +5 -4
- datahub/ingestion/source/sql/druid.py +9 -4
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive.py +28 -8
- datahub/ingestion/source/sql/hive_metastore.py +24 -25
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/job_models.py +18 -2
- datahub/ingestion/source/sql/mssql/source.py +376 -62
- datahub/ingestion/source/sql/mysql.py +154 -4
- datahub/ingestion/source/sql/oracle.py +62 -11
- datahub/ingestion/source/sql/postgres.py +142 -6
- datahub/ingestion/source/sql/presto.py +20 -2
- datahub/ingestion/source/sql/sql_common.py +281 -49
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/sql_types.py +27 -2
- datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
- datahub/ingestion/source/sql/teradata.py +1028 -245
- datahub/ingestion/source/sql/trino.py +43 -10
- datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
- datahub/ingestion/source/sql/vertica.py +14 -7
- datahub/ingestion/source/sql_queries.py +219 -121
- datahub/ingestion/source/state/checkpoint.py +8 -29
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
- datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/superset.py +810 -126
- datahub/ingestion/source/tableau/tableau.py +172 -69
- datahub/ingestion/source/tableau/tableau_common.py +11 -4
- datahub/ingestion/source/tableau/tableau_constant.py +1 -4
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +161 -40
- datahub/ingestion/source/unity/connection.py +61 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +794 -51
- datahub/ingestion/source/unity/proxy_patch.py +321 -0
- datahub/ingestion/source/unity/proxy_types.py +36 -2
- datahub/ingestion/source/unity/report.py +15 -3
- datahub/ingestion/source/unity/source.py +465 -131
- datahub/ingestion/source/unity/tag_entities.py +197 -0
- datahub/ingestion/source/unity/usage.py +46 -4
- datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
- datahub/ingestion/source/usage/usage_common.py +4 -68
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1367 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/source_report/ingestion_stage.py +50 -11
- datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
- datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
- datahub/ingestion/transformer/base_transformer.py +8 -5
- datahub/ingestion/transformer/dataset_domain.py +1 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/integrations/assertion/common.py +3 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/lite/lite_util.py +2 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
- datahub/metadata/_urns/urn_defs.py +1866 -1582
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- datahub/metadata/schema.avsc +18404 -16617
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/AssetSettings.avsc +63 -0
- datahub/metadata/schemas/ChartInfo.avsc +2 -1
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +3 -1
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/IncidentInfo.avsc +3 -3
- datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
- datahub/metadata/schemas/LogicalParent.avsc +145 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
- datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/Operation.avsc +21 -2
- datahub/metadata/schemas/Ownership.avsc +69 -0
- datahub/metadata/schemas/QueryProperties.avsc +24 -2
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/metadata/schemas/StructuredProperties.avsc +69 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- datahub/metadata/schemas/SystemMetadata.avsc +147 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +7 -0
- datahub/sdk/_all_entities.py +15 -0
- datahub/sdk/_shared.py +393 -10
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/chart.py +386 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +453 -0
- datahub/sdk/dataflow.py +309 -0
- datahub/sdk/datajob.py +367 -0
- datahub/sdk/dataset.py +180 -4
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +154 -12
- datahub/sdk/lineage_client.py +943 -0
- datahub/sdk/main_client.py +83 -8
- datahub/sdk/mlmodel.py +383 -0
- datahub/sdk/mlmodelgroup.py +240 -0
- datahub/sdk/search_client.py +85 -8
- datahub/sdk/search_filters.py +393 -68
- datahub/secret/datahub_secret_store.py +5 -1
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
- datahub/specific/aspect_helpers/siblings.py +73 -0
- datahub/specific/aspect_helpers/structured_properties.py +27 -0
- datahub/specific/chart.py +1 -1
- datahub/specific/datajob.py +15 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/specific/dataset.py +51 -59
- datahub/sql_parsing/_sqlglot_patch.py +1 -2
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/split_statements.py +30 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
- datahub/sql_parsing/sqlglot_lineage.py +517 -44
- datahub/sql_parsing/sqlglot_utils.py +30 -18
- datahub/sql_parsing/tool_meta_extractor.py +25 -2
- datahub/telemetry/telemetry.py +30 -16
- datahub/testing/check_imports.py +1 -1
- datahub/testing/docker_utils.py +8 -2
- datahub/testing/mce_helpers.py +421 -0
- datahub/testing/mcp_diff.py +17 -21
- datahub/testing/sdk_v2_helpers.py +18 -0
- datahub/upgrade/upgrade.py +86 -30
- datahub/utilities/file_backed_collections.py +14 -15
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/ingest_utils.py +2 -2
- datahub/utilities/is_pytest.py +3 -2
- datahub/utilities/logging_manager.py +30 -7
- datahub/utilities/mapping.py +29 -2
- datahub/utilities/sample_data.py +5 -4
- datahub/utilities/server_config_util.py +298 -10
- datahub/utilities/sqlalchemy_query_combiner.py +6 -4
- datahub/utilities/stats_collections.py +4 -0
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/utilities/urn_encoder.py +1 -1
- datahub/utilities/urns/urn.py +41 -2
- datahub/emitter/sql_parsing_builder.py +0 -306
- datahub/ingestion/source/redshift/lineage_v2.py +0 -458
- datahub/ingestion/source/vertexai.py +0 -697
- datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
datahub/emitter/mcp.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import json
|
|
3
|
-
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
|
|
4
5
|
|
|
5
6
|
from datahub.emitter.aspect import ASPECT_MAP, JSON_CONTENT_TYPE
|
|
6
7
|
from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform
|
|
8
|
+
from datahub.errors import DataHubDeprecationWarning
|
|
7
9
|
from datahub.metadata.schema_classes import (
|
|
8
10
|
ChangeTypeClass,
|
|
9
11
|
DictWrapper,
|
|
@@ -69,18 +71,28 @@ class MetadataChangeProposalWrapper:
|
|
|
69
71
|
aspectName: Union[None, str] = None
|
|
70
72
|
aspect: Union[None, _Aspect] = None
|
|
71
73
|
systemMetadata: Union[None, SystemMetadataClass] = None
|
|
74
|
+
headers: Union[None, Dict[str, str]] = None
|
|
72
75
|
|
|
73
76
|
def __post_init__(self) -> None:
|
|
74
77
|
if self.entityUrn and self.entityType == _ENTITY_TYPE_UNSET:
|
|
75
78
|
self.entityType = guess_entity_type(self.entityUrn)
|
|
76
79
|
elif self.entityUrn and self.entityType:
|
|
77
|
-
guessed_entity_type = guess_entity_type(self.entityUrn)
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
if self.entityType.lower() != guessed_entity_type:
|
|
80
|
+
guessed_entity_type = guess_entity_type(self.entityUrn)
|
|
81
|
+
if self.entityType.lower() != guessed_entity_type.lower():
|
|
82
|
+
# If they aren't a case-ignored match, raise an error.
|
|
81
83
|
raise ValueError(
|
|
82
84
|
f"entityType {self.entityType} does not match the entity type {guessed_entity_type} from entityUrn {self.entityUrn}",
|
|
83
85
|
)
|
|
86
|
+
elif self.entityType != guessed_entity_type:
|
|
87
|
+
# If they only differ in case, normalize and print a warning.
|
|
88
|
+
self.entityType = guessed_entity_type
|
|
89
|
+
warnings.warn(
|
|
90
|
+
f"The passed entityType {self.entityType} differs in case from the expected entity type {guessed_entity_type}. "
|
|
91
|
+
"This will be automatically corrected for now, but will become an error in a future release. "
|
|
92
|
+
"Note that the entityType field is optional and will be automatically inferred from the entityUrn.",
|
|
93
|
+
DataHubDeprecationWarning,
|
|
94
|
+
stacklevel=3,
|
|
95
|
+
)
|
|
84
96
|
elif self.entityType == _ENTITY_TYPE_UNSET:
|
|
85
97
|
raise ValueError("entityType must be set if entityUrn is not set")
|
|
86
98
|
|
|
@@ -112,6 +124,7 @@ class MetadataChangeProposalWrapper:
|
|
|
112
124
|
auditHeader=self.auditHeader,
|
|
113
125
|
aspectName=self.aspectName,
|
|
114
126
|
systemMetadata=self.systemMetadata,
|
|
127
|
+
headers=self.headers,
|
|
115
128
|
)
|
|
116
129
|
|
|
117
130
|
def make_mcp(self) -> MetadataChangeProposalClass:
|
|
@@ -211,6 +224,7 @@ class MetadataChangeProposalWrapper:
|
|
|
211
224
|
aspectName=mcpc.aspectName,
|
|
212
225
|
aspect=aspect,
|
|
213
226
|
systemMetadata=mcpc.systemMetadata,
|
|
227
|
+
headers=mcpc.headers,
|
|
214
228
|
)
|
|
215
229
|
else:
|
|
216
230
|
return None
|
|
@@ -228,6 +242,7 @@ class MetadataChangeProposalWrapper:
|
|
|
228
242
|
changeType=mcl.changeType,
|
|
229
243
|
auditHeader=mcl.auditHeader,
|
|
230
244
|
systemMetadata=mcl.systemMetadata,
|
|
245
|
+
headers=mcl.headers,
|
|
231
246
|
)
|
|
232
247
|
return cls.try_from_mcpc(mcpc) or mcpc
|
|
233
248
|
|
datahub/emitter/mcp_builder.py
CHANGED
|
@@ -117,6 +117,14 @@ class ContainerKey(DatahubKey):
|
|
|
117
117
|
PlatformKey = ContainerKey
|
|
118
118
|
|
|
119
119
|
|
|
120
|
+
class NamespaceKey(ContainerKey):
|
|
121
|
+
"""
|
|
122
|
+
For Iceberg namespaces (databases/schemas)
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
namespace: str
|
|
126
|
+
|
|
127
|
+
|
|
120
128
|
class DatabaseKey(ContainerKey):
|
|
121
129
|
database: str
|
|
122
130
|
|
|
@@ -129,6 +137,10 @@ class ProjectIdKey(ContainerKey):
|
|
|
129
137
|
project_id: str
|
|
130
138
|
|
|
131
139
|
|
|
140
|
+
class ExperimentKey(ContainerKey):
|
|
141
|
+
id: str
|
|
142
|
+
|
|
143
|
+
|
|
132
144
|
class MetastoreKey(ContainerKey):
|
|
133
145
|
metastore: str
|
|
134
146
|
|
|
@@ -1,28 +1,151 @@
|
|
|
1
|
-
import
|
|
1
|
+
import json
|
|
2
2
|
import shlex
|
|
3
|
-
from
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any, Dict, List, Optional, Union
|
|
4
5
|
|
|
5
6
|
import requests
|
|
7
|
+
from requests.auth import HTTPBasicAuth
|
|
8
|
+
|
|
9
|
+
from datahub.emitter.aspect import JSON_CONTENT_TYPE, JSON_PATCH_CONTENT_TYPE
|
|
10
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
11
|
+
from datahub.emitter.serialization_helper import pre_json_transform
|
|
12
|
+
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
|
13
|
+
MetadataChangeProposal,
|
|
14
|
+
)
|
|
15
|
+
from datahub.metadata.schema_classes import ChangeTypeClass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _decode_bytes(value: Union[str, bytes]) -> str:
|
|
19
|
+
"""Decode bytes to string, if necessary."""
|
|
20
|
+
if isinstance(value, bytes):
|
|
21
|
+
return value.decode()
|
|
22
|
+
return value
|
|
6
23
|
|
|
7
24
|
|
|
8
25
|
def _format_header(name: str, value: Union[str, bytes]) -> str:
|
|
9
26
|
if name == "Authorization":
|
|
10
27
|
return f"{name!s}: <redacted>"
|
|
11
|
-
return f"{name!s}: {value
|
|
28
|
+
return f"{name!s}: {_decode_bytes(value)}"
|
|
12
29
|
|
|
13
30
|
|
|
14
31
|
def make_curl_command(
|
|
15
|
-
session: requests.Session, method: str, url: str, payload: str
|
|
32
|
+
session: requests.Session, method: str, url: str, payload: Optional[str] = None
|
|
16
33
|
) -> str:
|
|
17
|
-
fragments: List[str] = [
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
34
|
+
fragments: List[str] = ["curl", "-X", method]
|
|
35
|
+
|
|
36
|
+
for header_name, header_value in session.headers.items():
|
|
37
|
+
fragments.extend(["-H", _format_header(header_name, header_value)])
|
|
38
|
+
|
|
39
|
+
if session.auth:
|
|
40
|
+
if isinstance(session.auth, HTTPBasicAuth):
|
|
41
|
+
fragments.extend(
|
|
42
|
+
["-u", f"{_decode_bytes(session.auth.username)}:<redacted>"]
|
|
43
|
+
)
|
|
44
|
+
else:
|
|
45
|
+
# For other auth types, they should be handled via headers
|
|
46
|
+
fragments.extend(["-H", "<unknown auth type>"])
|
|
47
|
+
|
|
48
|
+
if payload:
|
|
49
|
+
fragments.extend(["--data", payload])
|
|
50
|
+
|
|
51
|
+
fragments.append(url)
|
|
28
52
|
return shlex.join(fragments)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class OpenApiRequest:
|
|
57
|
+
"""Represents an OpenAPI request for entity operations."""
|
|
58
|
+
|
|
59
|
+
method: str
|
|
60
|
+
url: str
|
|
61
|
+
payload: List[Dict[str, Any]]
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def from_mcp(
|
|
65
|
+
cls,
|
|
66
|
+
mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper],
|
|
67
|
+
gms_server: str,
|
|
68
|
+
async_flag: bool = False,
|
|
69
|
+
search_sync_flag: bool = False,
|
|
70
|
+
) -> Optional["OpenApiRequest"]:
|
|
71
|
+
"""Factory method to create an OpenApiRequest from a MetadataChangeProposal."""
|
|
72
|
+
if not mcp.aspectName or (
|
|
73
|
+
mcp.changeType != ChangeTypeClass.DELETE and not mcp.aspect
|
|
74
|
+
):
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
method = "post"
|
|
78
|
+
url = f"{gms_server}/openapi/v3/entity/{mcp.entityType}?async={'true' if async_flag else 'false'}"
|
|
79
|
+
payload = []
|
|
80
|
+
|
|
81
|
+
if mcp.changeType == ChangeTypeClass.DELETE:
|
|
82
|
+
method = "delete"
|
|
83
|
+
url = f"{gms_server}/openapi/v3/entity/{mcp.entityType}/{mcp.entityUrn}"
|
|
84
|
+
else:
|
|
85
|
+
if mcp.aspect:
|
|
86
|
+
mcp_headers = {}
|
|
87
|
+
|
|
88
|
+
if not async_flag and search_sync_flag:
|
|
89
|
+
mcp_headers["X-DataHub-Sync-Index-Update"] = "true"
|
|
90
|
+
|
|
91
|
+
if mcp.changeType == ChangeTypeClass.PATCH:
|
|
92
|
+
method = "patch"
|
|
93
|
+
obj = mcp.aspect.to_obj()
|
|
94
|
+
content_type = obj.get("contentType")
|
|
95
|
+
if obj.get("value") and content_type == JSON_PATCH_CONTENT_TYPE:
|
|
96
|
+
# Undo double serialization.
|
|
97
|
+
obj = json.loads(obj["value"])
|
|
98
|
+
patch_value = obj
|
|
99
|
+
else:
|
|
100
|
+
raise NotImplementedError(
|
|
101
|
+
f"ChangeType {mcp.changeType} only supports context type {JSON_PATCH_CONTENT_TYPE}, found {content_type}."
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
if isinstance(patch_value, list):
|
|
105
|
+
patch_value = {"patch": patch_value}
|
|
106
|
+
|
|
107
|
+
payload = [
|
|
108
|
+
{
|
|
109
|
+
"urn": mcp.entityUrn,
|
|
110
|
+
mcp.aspectName: {
|
|
111
|
+
"value": patch_value,
|
|
112
|
+
"systemMetadata": mcp.systemMetadata.to_obj()
|
|
113
|
+
if mcp.systemMetadata
|
|
114
|
+
else None,
|
|
115
|
+
"headers": mcp_headers,
|
|
116
|
+
},
|
|
117
|
+
}
|
|
118
|
+
]
|
|
119
|
+
else:
|
|
120
|
+
if isinstance(mcp, MetadataChangeProposalWrapper):
|
|
121
|
+
aspect_value = pre_json_transform(
|
|
122
|
+
mcp.to_obj(simplified_structure=True)
|
|
123
|
+
)["aspect"]["json"]
|
|
124
|
+
else:
|
|
125
|
+
obj = mcp.aspect.to_obj()
|
|
126
|
+
content_type = obj.get("contentType")
|
|
127
|
+
if obj.get("value") and content_type == JSON_CONTENT_TYPE:
|
|
128
|
+
# Undo double serialization.
|
|
129
|
+
obj = json.loads(obj["value"])
|
|
130
|
+
elif content_type == JSON_PATCH_CONTENT_TYPE:
|
|
131
|
+
raise NotImplementedError(
|
|
132
|
+
f"ChangeType {mcp.changeType} does not support patch."
|
|
133
|
+
)
|
|
134
|
+
aspect_value = pre_json_transform(obj)
|
|
135
|
+
|
|
136
|
+
payload = [
|
|
137
|
+
{
|
|
138
|
+
"urn": mcp.entityUrn,
|
|
139
|
+
mcp.aspectName: {
|
|
140
|
+
"value": aspect_value,
|
|
141
|
+
"systemMetadata": mcp.systemMetadata.to_obj()
|
|
142
|
+
if mcp.systemMetadata
|
|
143
|
+
else None,
|
|
144
|
+
"headers": mcp_headers,
|
|
145
|
+
},
|
|
146
|
+
}
|
|
147
|
+
]
|
|
148
|
+
else:
|
|
149
|
+
raise ValueError(f"ChangeType {mcp.changeType} requires a value.")
|
|
150
|
+
|
|
151
|
+
return cls(method=method, url=url, payload=payload)
|
|
@@ -1,38 +1,124 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import re
|
|
4
|
+
import warnings
|
|
3
5
|
from dataclasses import dataclass
|
|
6
|
+
from datetime import datetime, timezone
|
|
4
7
|
from typing import Dict, List, Optional, Sequence, Union
|
|
5
8
|
|
|
6
9
|
from requests import Response
|
|
7
10
|
|
|
8
11
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
12
|
+
from datahub.errors import APITracingWarning
|
|
9
13
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
|
10
14
|
MetadataChangeProposal,
|
|
11
15
|
)
|
|
12
16
|
|
|
13
17
|
logger = logging.getLogger(__name__)
|
|
14
18
|
|
|
19
|
+
_TRACE_HEADER_NAME = "traceparent"
|
|
20
|
+
|
|
15
21
|
|
|
16
22
|
@dataclass
|
|
17
23
|
class TraceData:
|
|
18
24
|
trace_id: str
|
|
19
25
|
data: Dict[str, List[str]]
|
|
20
26
|
|
|
27
|
+
@staticmethod
|
|
28
|
+
def extract_trace_id(input_str: Optional[str]) -> Optional[str]:
|
|
29
|
+
"""
|
|
30
|
+
Extract the trace ID from various input formats.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
input_str (Optional[str]): Input string potentially containing a trace ID
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Optional[str]: Extracted trace ID or None if no valid trace ID found
|
|
37
|
+
"""
|
|
38
|
+
# Handle None or empty input
|
|
39
|
+
if input_str is None or not str(input_str).strip():
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
# Convert to string and clean
|
|
43
|
+
input_str = str(input_str).strip()
|
|
44
|
+
|
|
45
|
+
# Special case for test scenarios
|
|
46
|
+
if input_str == "test-trace-id":
|
|
47
|
+
return input_str
|
|
48
|
+
|
|
49
|
+
# Case 1: Full traceparent header (containing hyphens)
|
|
50
|
+
if "-" in input_str:
|
|
51
|
+
parts = input_str.split("-")
|
|
52
|
+
if len(parts) >= 2:
|
|
53
|
+
# The trace ID is the second part (index 1)
|
|
54
|
+
return parts[1]
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
# Case 2: Direct trace ID (32 hex characters)
|
|
58
|
+
if len(input_str) == 32 and re.match(r"^[0-9a-fA-F]+$", input_str):
|
|
59
|
+
return input_str
|
|
60
|
+
|
|
61
|
+
# Fallback: return the original input if it doesn't match strict criteria
|
|
62
|
+
return input_str
|
|
63
|
+
|
|
21
64
|
def __post_init__(self) -> None:
|
|
22
|
-
|
|
65
|
+
"""
|
|
66
|
+
Validate and potentially process the trace_id during initialization.
|
|
67
|
+
"""
|
|
68
|
+
# Explicitly check for None or empty string
|
|
69
|
+
if self.trace_id is None or self.trace_id == "":
|
|
23
70
|
raise ValueError("trace_id cannot be empty")
|
|
71
|
+
|
|
72
|
+
# Allow extracting trace ID from various input formats
|
|
73
|
+
extracted_id = self.extract_trace_id(self.trace_id)
|
|
74
|
+
if extracted_id is None:
|
|
75
|
+
raise ValueError("Invalid trace_id format")
|
|
76
|
+
|
|
77
|
+
# Update trace_id with the extracted version
|
|
78
|
+
self.trace_id = extracted_id
|
|
79
|
+
|
|
80
|
+
# Validate data
|
|
24
81
|
if not isinstance(self.data, dict):
|
|
25
82
|
raise TypeError("data must be a dictionary")
|
|
26
83
|
|
|
84
|
+
def extract_timestamp(self) -> datetime:
|
|
85
|
+
"""
|
|
86
|
+
Extract the timestamp from a trace ID generated by the TraceIdGenerator.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
datetime: The timestamp in UTC
|
|
27
90
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
91
|
+
Raises:
|
|
92
|
+
ValueError: If the trace ID is invalid
|
|
93
|
+
"""
|
|
94
|
+
# Special case for test trace ID
|
|
95
|
+
if self.trace_id == "test-trace-id":
|
|
96
|
+
return datetime.fromtimestamp(0, tz=timezone.utc)
|
|
97
|
+
|
|
98
|
+
# Validate trace ID length for hex-based trace IDs
|
|
99
|
+
if len(self.trace_id) < 16 or not re.match(
|
|
100
|
+
r"^[0-9a-fA-F]+$", self.trace_id[:16]
|
|
101
|
+
):
|
|
102
|
+
raise ValueError("Invalid trace ID format")
|
|
103
|
+
|
|
104
|
+
# Extract the first 16 hex characters representing timestamp in microseconds
|
|
105
|
+
timestamp_micros_hex = self.trace_id[:16]
|
|
106
|
+
|
|
107
|
+
# Convert hex to integer
|
|
108
|
+
timestamp_micros = int(timestamp_micros_hex, 16)
|
|
109
|
+
|
|
110
|
+
# Convert microseconds to milliseconds
|
|
111
|
+
timestamp_millis = timestamp_micros // 1000
|
|
112
|
+
|
|
113
|
+
# Convert to datetime in UTC
|
|
114
|
+
return datetime.fromtimestamp(timestamp_millis / 1000, tz=timezone.utc)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _extract_trace_id(response: Response) -> Optional[str]:
|
|
31
118
|
"""
|
|
32
119
|
Extract trace ID from response headers.
|
|
33
120
|
Args:
|
|
34
121
|
response: HTTP response object
|
|
35
|
-
trace_header: Name of the trace header to use
|
|
36
122
|
Returns:
|
|
37
123
|
Trace ID if found and response is valid, None otherwise
|
|
38
124
|
"""
|
|
@@ -40,9 +126,17 @@ def _extract_trace_id(
|
|
|
40
126
|
logger.debug(f"Invalid status code: {response.status_code}")
|
|
41
127
|
return None
|
|
42
128
|
|
|
43
|
-
trace_id = response.headers.get(
|
|
129
|
+
trace_id = response.headers.get(_TRACE_HEADER_NAME)
|
|
44
130
|
if not trace_id:
|
|
45
|
-
|
|
131
|
+
# This will only be printed if
|
|
132
|
+
# 1. we're in async mode (checked by the caller)
|
|
133
|
+
# 2. the server did not return a trace ID
|
|
134
|
+
logger.debug(f"Missing trace header: {_TRACE_HEADER_NAME}")
|
|
135
|
+
warnings.warn(
|
|
136
|
+
"No trace ID found in response headers. API tracing is not active - likely due to an outdated server version.",
|
|
137
|
+
APITracingWarning,
|
|
138
|
+
stacklevel=3,
|
|
139
|
+
)
|
|
46
140
|
return None
|
|
47
141
|
|
|
48
142
|
return trace_id
|
|
@@ -51,20 +145,19 @@ def _extract_trace_id(
|
|
|
51
145
|
def extract_trace_data(
|
|
52
146
|
response: Response,
|
|
53
147
|
aspects_to_trace: Optional[List[str]] = None,
|
|
54
|
-
trace_header: str = "traceparent",
|
|
55
148
|
) -> Optional[TraceData]:
|
|
56
|
-
"""
|
|
57
|
-
|
|
149
|
+
"""Extract trace data from a response object.
|
|
150
|
+
|
|
151
|
+
If we run into a JSONDecodeError, we'll log an error and return None.
|
|
152
|
+
|
|
58
153
|
Args:
|
|
59
154
|
response: HTTP response object
|
|
60
155
|
aspects_to_trace: Optional list of aspect names to extract. If None, extracts all aspects.
|
|
61
|
-
|
|
156
|
+
|
|
62
157
|
Returns:
|
|
63
158
|
TraceData object if successful, None otherwise
|
|
64
|
-
Raises:
|
|
65
|
-
JSONDecodeError: If response body cannot be decoded as JSON
|
|
66
159
|
"""
|
|
67
|
-
trace_id = _extract_trace_id(response
|
|
160
|
+
trace_id = _extract_trace_id(response)
|
|
68
161
|
if not trace_id:
|
|
69
162
|
return None
|
|
70
163
|
|
|
@@ -104,19 +197,18 @@ def extract_trace_data_from_mcps(
|
|
|
104
197
|
response: Response,
|
|
105
198
|
mcps: Sequence[Union[MetadataChangeProposal, MetadataChangeProposalWrapper]],
|
|
106
199
|
aspects_to_trace: Optional[List[str]] = None,
|
|
107
|
-
trace_header: str = "traceparent",
|
|
108
200
|
) -> Optional[TraceData]:
|
|
109
|
-
"""
|
|
110
|
-
|
|
201
|
+
"""Extract trace data from a response object and populate data from provided MCPs.
|
|
202
|
+
|
|
111
203
|
Args:
|
|
112
204
|
response: HTTP response object used only for trace_id extraction
|
|
113
205
|
mcps: List of MCP URN and aspect data
|
|
114
206
|
aspects_to_trace: Optional list of aspect names to extract. If None, extracts all aspects.
|
|
115
|
-
|
|
207
|
+
|
|
116
208
|
Returns:
|
|
117
209
|
TraceData object if successful, None otherwise
|
|
118
210
|
"""
|
|
119
|
-
trace_id = _extract_trace_id(response
|
|
211
|
+
trace_id = _extract_trace_id(response)
|
|
120
212
|
if not trace_id:
|
|
121
213
|
return None
|
|
122
214
|
|