ibm-watsonx-gov 1.3.3__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ibm_watsonx_gov/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
- ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
- ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
- ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
- ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
- ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
- ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
- ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
- ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
- ibm_watsonx_gov/clients/__init__.py +0 -0
- ibm_watsonx_gov/clients/api_client.py +99 -0
- ibm_watsonx_gov/clients/segment_client.py +46 -0
- ibm_watsonx_gov/clients/usage_client.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
- ibm_watsonx_gov/config/__init__.py +14 -0
- ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
- ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
- ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
- ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
- ibm_watsonx_gov/entities/__init__.py +8 -0
- ibm_watsonx_gov/entities/agentic_app.py +209 -0
- ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
- ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
- ibm_watsonx_gov/entities/ai_experiment.py +419 -0
- ibm_watsonx_gov/entities/base_classes.py +134 -0
- ibm_watsonx_gov/entities/container.py +54 -0
- ibm_watsonx_gov/entities/credentials.py +633 -0
- ibm_watsonx_gov/entities/criteria.py +508 -0
- ibm_watsonx_gov/entities/enums.py +274 -0
- ibm_watsonx_gov/entities/evaluation_result.py +444 -0
- ibm_watsonx_gov/entities/foundation_model.py +490 -0
- ibm_watsonx_gov/entities/llm_judge.py +44 -0
- ibm_watsonx_gov/entities/locale.py +17 -0
- ibm_watsonx_gov/entities/mapping.py +49 -0
- ibm_watsonx_gov/entities/metric.py +211 -0
- ibm_watsonx_gov/entities/metric_threshold.py +36 -0
- ibm_watsonx_gov/entities/model_provider.py +329 -0
- ibm_watsonx_gov/entities/model_risk_result.py +43 -0
- ibm_watsonx_gov/entities/monitor.py +71 -0
- ibm_watsonx_gov/entities/prompt_setup.py +40 -0
- ibm_watsonx_gov/entities/state.py +22 -0
- ibm_watsonx_gov/entities/utils.py +99 -0
- ibm_watsonx_gov/evaluators/__init__.py +26 -0
- ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
- ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
- ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
- ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
- ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
- ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
- ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
- ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
- ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
- ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
- ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
- ibm_watsonx_gov/metrics/__init__.py +74 -0
- ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
- ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
- ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
- ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
- ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
- ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
- ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
- ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
- ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
- ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
- ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
- ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
- ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
- ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
- ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
- ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
- ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
- ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
- ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
- ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
- ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
- ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
- ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
- ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
- ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
- ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
- ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
- ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
- ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
- ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
- ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
- ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
- ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
- ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
- ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
- ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
- ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
- ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
- ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
- ibm_watsonx_gov/metrics/status/__init__.py +0 -0
- ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
- ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
- ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
- ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
- ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
- ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
- ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
- ibm_watsonx_gov/metrics/utils.py +440 -0
- ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
- ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
- ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
- ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
- ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
- ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
- ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
- ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
- ibm_watsonx_gov/providers/__init__.py +8 -0
- ibm_watsonx_gov/providers/detectors_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/providers/detectors_provider.py +415 -0
- ibm_watsonx_gov/providers/eval_assist_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
- ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
- ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
- ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
- ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
- ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
- ibm_watsonx_gov/providers/unitxt_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/tools/__init__.py +10 -0
- ibm_watsonx_gov/tools/clients/__init__.py +11 -0
- ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
- ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
- ibm_watsonx_gov/tools/core/__init__.py +8 -0
- ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
- ibm_watsonx_gov/tools/entities/__init__.py +8 -0
- ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
- ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
- ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
- ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
- ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
- ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
- ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
- ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
- ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
- ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
- ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
- ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
- ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
- ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
- ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
- ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
- ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
- ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
- ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
- ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
- ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
- ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
- ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
- ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
- ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
- ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
- ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
- ibm_watsonx_gov/tools/utils/__init__.py +14 -0
- ibm_watsonx_gov/tools/utils/constants.py +69 -0
- ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
- ibm_watsonx_gov/tools/utils/environment.py +108 -0
- ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
- ibm_watsonx_gov/tools/utils/platform_url_mapping.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
- ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
- ibm_watsonx_gov/traces/__init__.py +8 -0
- ibm_watsonx_gov/traces/span_exporter.py +195 -0
- ibm_watsonx_gov/traces/span_node.py +251 -0
- ibm_watsonx_gov/traces/span_util.py +153 -0
- ibm_watsonx_gov/traces/trace_utils.py +1074 -0
- ibm_watsonx_gov/utils/__init__.py +8 -0
- ibm_watsonx_gov/utils/aggregation_util.py +346 -0
- ibm_watsonx_gov/utils/async_util.py +62 -0
- ibm_watsonx_gov/utils/authenticator.py +144 -0
- ibm_watsonx_gov/utils/constants.py +15 -0
- ibm_watsonx_gov/utils/errors.py +40 -0
- ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
- ibm_watsonx_gov/utils/insights_generator.py +1285 -0
- ibm_watsonx_gov/utils/python_utils.py +425 -0
- ibm_watsonx_gov/utils/rest_util.py +73 -0
- ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
- ibm_watsonx_gov/utils/singleton_meta.py +25 -0
- ibm_watsonx_gov/utils/url_mapping.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/utils/validation_util.py +126 -0
- ibm_watsonx_gov/visualizations/__init__.py +13 -0
- ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
- ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
- ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
- ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
- ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
- ibm_watsonx_gov-1.3.3.dist-info/WHEEL +6 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
import copy
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
13
|
+
from typing import Annotated, List, Literal, Optional
|
|
14
|
+
from pydantic import BaseModel, Field
|
|
15
|
+
from ibm_watsonx_gov.entities.agentic_app import Node
|
|
16
|
+
|
|
17
|
+
from ibm_watsonx_gov.entities.evaluation_result import AgentMetricResult, AggregateAgentMetricResult, MessageData, NodeData, MetricsMappingData
|
|
18
|
+
|
|
19
|
+
AGENTIC_RESULT_COMPONENTS = ["conversation", "message", "node"]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AgenticEvaluationResult(BaseModel):
|
|
23
|
+
metrics_results: Annotated[List[AgentMetricResult],
|
|
24
|
+
Field(title="Metrics result",
|
|
25
|
+
description="The list of metrics result.")]
|
|
26
|
+
aggregated_metrics_results: Annotated[List[AggregateAgentMetricResult],
|
|
27
|
+
Field(title="Aggregated metrics result",
|
|
28
|
+
description="The list of aggregated metrics result. The metrics are aggregated for each node in the agent.")]
|
|
29
|
+
messages_data: Annotated[List[MessageData],
|
|
30
|
+
Field(title="Messages",
|
|
31
|
+
description="The list of agent messages data.",
|
|
32
|
+
default=[])]
|
|
33
|
+
nodes_data: Annotated[List[NodeData],
|
|
34
|
+
Field(title="Node messages",
|
|
35
|
+
description="The list of nodes data.",
|
|
36
|
+
default=[])]
|
|
37
|
+
metrics_mapping_data: Annotated[List[MetricsMappingData],
|
|
38
|
+
Field(title="Metrics mapping data",
|
|
39
|
+
description="The mapping data used to compute the metric.",
|
|
40
|
+
default=[])]
|
|
41
|
+
nodes: Annotated[list[Node],
|
|
42
|
+
Field(title="Nodes",
|
|
43
|
+
description="The list of nodes details",
|
|
44
|
+
default=[])]
|
|
45
|
+
edges: Annotated[list[dict],
|
|
46
|
+
Field(title="Nodes",
|
|
47
|
+
description="The list of nodes details",
|
|
48
|
+
default=[])]
|
|
49
|
+
|
|
50
|
+
def get_aggregated_metrics_results(self,
|
|
51
|
+
applies_to: list[str] = AGENTIC_RESULT_COMPONENTS,
|
|
52
|
+
node_name: Optional[str] = None,
|
|
53
|
+
include_individual_results: bool = True,
|
|
54
|
+
format: Literal["json",
|
|
55
|
+
"object"] = "json",
|
|
56
|
+
**kwargs) -> list[AggregateAgentMetricResult] | list[dict]:
|
|
57
|
+
"""
|
|
58
|
+
Get the aggregated agentic metrics results based on the specified arguments.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
applies_to (AGENTIC_RESULT_COMPONENTS, optional): The type of component the metric result applies to. Defaults to ["conversation", "message", "node"].
|
|
62
|
+
node_name (str, optional): The name of the node to get the aggregated results for. Defaults to None.
|
|
63
|
+
include_individual_results (bool, optional): Whether to return the individual metrics results. Defaults to False.
|
|
64
|
+
format (Literal["json", "object"], optional): The format of the output. Defaults to "json".
|
|
65
|
+
Return:
|
|
66
|
+
returns: list[AggregateAgentMetricResult] | list [dict]
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
aggregated_results = []
|
|
70
|
+
for amr in self.aggregated_metrics_results:
|
|
71
|
+
if amr.applies_to in applies_to and (not node_name or amr.node_name == node_name):
|
|
72
|
+
if format == "json":
|
|
73
|
+
if kwargs.get("exclude_unset") is None:
|
|
74
|
+
kwargs["exclude_unset"] = True
|
|
75
|
+
if kwargs.get("exclude_none") is None:
|
|
76
|
+
kwargs["exclude_none"] = True
|
|
77
|
+
if include_individual_results:
|
|
78
|
+
aggregated_results.append(
|
|
79
|
+
amr.model_dump(mode="json", **kwargs))
|
|
80
|
+
else:
|
|
81
|
+
aggregated_results.append(
|
|
82
|
+
amr.model_dump(mode="json", exclude=["individual_results"], **kwargs))
|
|
83
|
+
else:
|
|
84
|
+
aggregated_results.append(copy.deepcopy(amr))
|
|
85
|
+
|
|
86
|
+
return aggregated_results
|
|
87
|
+
|
|
88
|
+
def get_metrics_results(self,
|
|
89
|
+
applies_to: list[str] = AGENTIC_RESULT_COMPONENTS,
|
|
90
|
+
node_name: Optional[str] = None,
|
|
91
|
+
format: Literal["json", "object"] = "json",
|
|
92
|
+
**kwargs) -> list[AgentMetricResult] | list[dict]:
|
|
93
|
+
"""
|
|
94
|
+
Get the agentic metrics results based on the specified arguments.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
applies_to (AGENTIC_RESULT_COMPONENTS, optional): The type of component the metrics results applies to. Defaults to ["conversation", "message", "node"].
|
|
98
|
+
node_name (str, optional): The name of the node to get the metrics results for. Defaults to None.
|
|
99
|
+
format (Literal["json", "object"], optional): The format of the output. Defaults to "json".
|
|
100
|
+
Return:
|
|
101
|
+
returns: list[AgentMetricResult] | list [dict]
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
metrics_results = []
|
|
105
|
+
for amr in self.metrics_results:
|
|
106
|
+
if amr.applies_to in applies_to and (not node_name or amr.node_name == node_name):
|
|
107
|
+
if format == "json":
|
|
108
|
+
if kwargs.get("exclude_unset") is None:
|
|
109
|
+
kwargs["exclude_unset"] = True
|
|
110
|
+
if kwargs.get("exclude_none") is None:
|
|
111
|
+
kwargs["exclude_none"] = True
|
|
112
|
+
metrics_results.append(
|
|
113
|
+
amr.model_dump(mode="json", **kwargs))
|
|
114
|
+
else:
|
|
115
|
+
metrics_results.append(copy.deepcopy(amr))
|
|
116
|
+
|
|
117
|
+
return metrics_results
|
|
118
|
+
|
|
119
|
+
def to_json(self, **kwargs) -> dict:
|
|
120
|
+
"""
|
|
121
|
+
Get the AgenticEvaluationResult as json
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
dict: The AgenticEvaluationResult
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
if kwargs.get("exclude_unset") is None:
|
|
128
|
+
kwargs["exclude_unset"] = True
|
|
129
|
+
|
|
130
|
+
if kwargs.get("exclude_none") is None:
|
|
131
|
+
kwargs["exclude_none"] = True
|
|
132
|
+
|
|
133
|
+
return self.model_dump(mode="json", **kwargs)
|
|
134
|
+
|
|
135
|
+
def to_df(self, input_data: Optional[pd.DataFrame] = None,
|
|
136
|
+
message_id_field: str = "message_id", wide_format: bool = True) -> pd.DataFrame:
|
|
137
|
+
"""
|
|
138
|
+
Get individual metrics dataframe.
|
|
139
|
+
|
|
140
|
+
If the input dataframe is provided, it will be merged with the metrics dataframe.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
input_data (Optional[pd.DataFrame], optional): Input data to merge with metrics dataframe. Defaults to None.
|
|
144
|
+
message_id_field (str, optional): Field to use for merging input data and metrics dataframe. Defaults to "message_id".
|
|
145
|
+
wide_format (bool): Determines whether to display the results in a pivot table format. Defaults to True
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
pd.DataFrame: Metrics dataframe.
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
def converter(m): return m.model_dump(
|
|
152
|
+
exclude={"provider"}, exclude_none=True)
|
|
153
|
+
|
|
154
|
+
metrics_df = pd.DataFrame(list(map(converter, self.metrics_results)))
|
|
155
|
+
if input_data is not None:
|
|
156
|
+
metrics_df = input_data.merge(metrics_df, on=message_id_field)
|
|
157
|
+
|
|
158
|
+
# Return the metric result dataframe
|
|
159
|
+
# if the wide_format is False
|
|
160
|
+
if not wide_format:
|
|
161
|
+
return metrics_df
|
|
162
|
+
|
|
163
|
+
# Prepare the dataframe for pivot table view
|
|
164
|
+
def col_name(row):
|
|
165
|
+
if row["applies_to"] == "node":
|
|
166
|
+
return f"{row['node_name']}.{row['name']}"
|
|
167
|
+
if row["applies_to"] == "message":
|
|
168
|
+
return f"message.{row['name']}"
|
|
169
|
+
# TODO support other types
|
|
170
|
+
|
|
171
|
+
metrics_df["idx"] = metrics_df.apply(col_name, axis=1)
|
|
172
|
+
|
|
173
|
+
# Pivot the table
|
|
174
|
+
metrics_df_wide = metrics_df.pivot_table(
|
|
175
|
+
index="message_id",
|
|
176
|
+
columns="idx",
|
|
177
|
+
values="value"
|
|
178
|
+
).reset_index().rename_axis("", axis=1)
|
|
179
|
+
|
|
180
|
+
# if input_data is provided add
|
|
181
|
+
# it to the pivot table
|
|
182
|
+
if input_data is not None:
|
|
183
|
+
metrics_df_wide = input_data.merge(
|
|
184
|
+
metrics_df_wide, on=message_id_field)
|
|
185
|
+
return metrics_df_wide
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
from typing import Annotated, Dict, List
|
|
11
|
+
from pydantic import BaseModel, Field
|
|
12
|
+
|
|
13
|
+
from ibm_watsonx_gov.entities.ai_experiment import Node
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class EvaluationAsset(BaseModel):
|
|
17
|
+
id: Annotated[
|
|
18
|
+
str,
|
|
19
|
+
Field(
|
|
20
|
+
description="The id of the AI Experiment asset",
|
|
21
|
+
examples=["asset-001"],
|
|
22
|
+
default="",
|
|
23
|
+
),
|
|
24
|
+
]
|
|
25
|
+
container_id: Annotated[
|
|
26
|
+
str,
|
|
27
|
+
Field(
|
|
28
|
+
description="The project id or space id.", examples=["proj-01"], default=""
|
|
29
|
+
),
|
|
30
|
+
]
|
|
31
|
+
container_type: Annotated[
|
|
32
|
+
str,
|
|
33
|
+
Field(
|
|
34
|
+
description="The container type of AI Experiment",
|
|
35
|
+
examples=["project", "spcae"],
|
|
36
|
+
default="",
|
|
37
|
+
),
|
|
38
|
+
]
|
|
39
|
+
name: Annotated[
|
|
40
|
+
str,
|
|
41
|
+
Field(
|
|
42
|
+
description="The name of the AI Experiment.",
|
|
43
|
+
examples=["AI_experiment_1"],
|
|
44
|
+
default="",
|
|
45
|
+
),
|
|
46
|
+
]
|
|
47
|
+
run_id: Annotated[
|
|
48
|
+
str,
|
|
49
|
+
Field(
|
|
50
|
+
description="The experiment run id of the AI Experiment.",
|
|
51
|
+
examples=["run-01"],
|
|
52
|
+
default="",
|
|
53
|
+
),
|
|
54
|
+
]
|
|
55
|
+
run_name: Annotated[
|
|
56
|
+
str,
|
|
57
|
+
Field(
|
|
58
|
+
description="The experiment run name of the AI Experiment.",
|
|
59
|
+
examples=["Test run 1"],
|
|
60
|
+
default="",
|
|
61
|
+
),
|
|
62
|
+
]
|
|
63
|
+
attachment_id: Annotated[
|
|
64
|
+
str,
|
|
65
|
+
Field(
|
|
66
|
+
description="the attachment id for the evaluation result for that experiment run.",
|
|
67
|
+
examples=["att-01"],
|
|
68
|
+
default="",
|
|
69
|
+
),
|
|
70
|
+
]
|
|
71
|
+
test_data: Annotated[
|
|
72
|
+
Dict,
|
|
73
|
+
Field(
|
|
74
|
+
description="The test data of that experiment run.", examples=[], default={}
|
|
75
|
+
),
|
|
76
|
+
]
|
|
77
|
+
nodes: Annotated[
|
|
78
|
+
List[Node],
|
|
79
|
+
Field(
|
|
80
|
+
description="List of the node for that experiment run.",
|
|
81
|
+
examples=[{"id": "node-001", "name": "Node_1", "type": "tool"}],
|
|
82
|
+
default="",
|
|
83
|
+
),
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class EvaluationConfig(BaseModel):
|
|
88
|
+
monitors: Annotated[
|
|
89
|
+
Dict,
|
|
90
|
+
Field(
|
|
91
|
+
description="The monitors configuration of for that AI Evaluation.",
|
|
92
|
+
examples=[
|
|
93
|
+
{"agentic_ai_quality": {"parameters": {"metrics_configuration": {}}}}
|
|
94
|
+
],
|
|
95
|
+
default={},
|
|
96
|
+
),
|
|
97
|
+
]
|
|
98
|
+
evaluation_assets: Annotated[
|
|
99
|
+
List[EvaluationAsset],
|
|
100
|
+
Field(
|
|
101
|
+
description="The evaluation asset details.",
|
|
102
|
+
examples=[
|
|
103
|
+
[
|
|
104
|
+
{
|
|
105
|
+
"id": "d4d6ac43-0bec-47f9-8924-0b74ea1b8ec3",
|
|
106
|
+
"container_id": "b76d2ebb-4e05-496e-b377-557d409e8c45",
|
|
107
|
+
"container_type": "project",
|
|
108
|
+
"name": "AI_Experiment asset",
|
|
109
|
+
"run_id": "fa7629e9-e1bb-4779-9198-9a6343dab1ad",
|
|
110
|
+
"run_name": "Experiment run 1",
|
|
111
|
+
"attachment_id": "ab914f9b-9475-4c10-88d8-480b6c9f4963",
|
|
112
|
+
"test_data": {"total_rows": 0},
|
|
113
|
+
"nodes": [],
|
|
114
|
+
}
|
|
115
|
+
]
|
|
116
|
+
],
|
|
117
|
+
default=[],
|
|
118
|
+
),
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class AIEvaluationAsset(BaseModel):
|
|
123
|
+
"""
|
|
124
|
+
The class for AIEvaluationAsset.
|
|
125
|
+
|
|
126
|
+
Examples
|
|
127
|
+
--------
|
|
128
|
+
Create AIEvaluationAsset instance:
|
|
129
|
+
.. code-block:: python
|
|
130
|
+
|
|
131
|
+
# Define evaluation configuration
|
|
132
|
+
evaluation_config = EvaluationConfig(
|
|
133
|
+
monitors={
|
|
134
|
+
"agentic_ai_quality": {
|
|
135
|
+
"parameters": {
|
|
136
|
+
"metrics_configuration": {}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Create the evaluation asset
|
|
143
|
+
ai_evaluation_asset = AIEvaluationAsset(
|
|
144
|
+
name="AI Evaluation for agent",
|
|
145
|
+
evaluation_configuration=evaluation_config
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# Compare two or more AI experiments using the evaluation asset
|
|
149
|
+
response = ai_experiment_client.compare_ai_experiments(
|
|
150
|
+
ai_experiment_ids=["experiment_id_1", "experiment_id_2"],
|
|
151
|
+
ai_evaluation_asset=ai_evaluation_asset
|
|
152
|
+
)
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
container_id: Annotated[
|
|
156
|
+
str,
|
|
157
|
+
Field(
|
|
158
|
+
description="The project or space id for the AI Evaluation.",
|
|
159
|
+
examples=["proj--1"],
|
|
160
|
+
default="",
|
|
161
|
+
),
|
|
162
|
+
]
|
|
163
|
+
container_type: Annotated[
|
|
164
|
+
str,
|
|
165
|
+
Field(
|
|
166
|
+
description="The container type for the AI Evaluation.",
|
|
167
|
+
examples=["project", "space"],
|
|
168
|
+
default="",
|
|
169
|
+
),
|
|
170
|
+
]
|
|
171
|
+
container_name: Annotated[
|
|
172
|
+
str,
|
|
173
|
+
Field(
|
|
174
|
+
description="The name of the project or the space.",
|
|
175
|
+
examples=["Project_1"],
|
|
176
|
+
default="",
|
|
177
|
+
),
|
|
178
|
+
]
|
|
179
|
+
name: Annotated[
|
|
180
|
+
str,
|
|
181
|
+
Field(
|
|
182
|
+
description="The name of the AI Evaluation asset.",
|
|
183
|
+
examples=["AI agents evaluation"],
|
|
184
|
+
default="",
|
|
185
|
+
),
|
|
186
|
+
]
|
|
187
|
+
description: Annotated[
|
|
188
|
+
str,
|
|
189
|
+
Field(
|
|
190
|
+
description="The description of the AI Evaluation asset.",
|
|
191
|
+
examples=["AI agents evaluation"],
|
|
192
|
+
default="",
|
|
193
|
+
),
|
|
194
|
+
]
|
|
195
|
+
asset_type: Annotated[
|
|
196
|
+
str,
|
|
197
|
+
Field(
|
|
198
|
+
description="The asset type of the AI Evaluation.",
|
|
199
|
+
examples=["ai_evaluation"],
|
|
200
|
+
default="ai_evaluation",
|
|
201
|
+
),
|
|
202
|
+
]
|
|
203
|
+
created_at: Annotated[
|
|
204
|
+
str,
|
|
205
|
+
Field(
|
|
206
|
+
description="The timestamp of creation of AI Evaluation asset.",
|
|
207
|
+
examples=["2025-04-01T12:00:00Z"],
|
|
208
|
+
default="",
|
|
209
|
+
),
|
|
210
|
+
]
|
|
211
|
+
owner_id: Annotated[
|
|
212
|
+
str,
|
|
213
|
+
Field(
|
|
214
|
+
description="The owner of the AI Evaluation.",
|
|
215
|
+
examples=["user-123"],
|
|
216
|
+
default="",
|
|
217
|
+
),
|
|
218
|
+
]
|
|
219
|
+
asset_id: Annotated[
|
|
220
|
+
str,
|
|
221
|
+
Field(
|
|
222
|
+
description="The asset id of the AI Evaluation.",
|
|
223
|
+
examples=["43676d70-1ecc-412e-832f-8762aa899247"],
|
|
224
|
+
default="",
|
|
225
|
+
),
|
|
226
|
+
]
|
|
227
|
+
creator_id: Annotated[
|
|
228
|
+
str,
|
|
229
|
+
Field(
|
|
230
|
+
description="The creator id of the AI Evaluation.",
|
|
231
|
+
examples=["user-123"],
|
|
232
|
+
default="",
|
|
233
|
+
),
|
|
234
|
+
]
|
|
235
|
+
asset_details: Annotated[
|
|
236
|
+
Dict,
|
|
237
|
+
Field(
|
|
238
|
+
description="The asset details of the AI Evluation asset.",
|
|
239
|
+
examples=[
|
|
240
|
+
{
|
|
241
|
+
"task_ids": [],
|
|
242
|
+
"label_column": "",
|
|
243
|
+
"operational_space_id": "development",
|
|
244
|
+
"input_data_type": "unstructured_text",
|
|
245
|
+
"job_id": "",
|
|
246
|
+
"service_instance_id": "",
|
|
247
|
+
"evaluation_asset_type": "ai_experiment|prompt",
|
|
248
|
+
}
|
|
249
|
+
],
|
|
250
|
+
default={},
|
|
251
|
+
),
|
|
252
|
+
]
|
|
253
|
+
evaluation_configuration: Annotated[
|
|
254
|
+
EvaluationConfig,
|
|
255
|
+
Field(
|
|
256
|
+
description="The list of the evaluation configuration",
|
|
257
|
+
examples=[
|
|
258
|
+
{
|
|
259
|
+
"monitors": {
|
|
260
|
+
"agentic_ai_quality": {
|
|
261
|
+
"parameters": {"metrics_configuration": {}}
|
|
262
|
+
}
|
|
263
|
+
},
|
|
264
|
+
"evaluation_assets": [
|
|
265
|
+
{
|
|
266
|
+
"id": "d4d6ac43-0bec-47f9-8924-0b74ea1b8ec3",
|
|
267
|
+
"container_id": "b76d2ebb-4e05-496e-b377-557d409e8c45",
|
|
268
|
+
"container_type": "project",
|
|
269
|
+
"name": "AI_Experiment asset for Agent governence",
|
|
270
|
+
"run_id": "fa7629e9-e1bb-4779-9198-9a6343dab1ad",
|
|
271
|
+
"run_name": "Experiment run 1",
|
|
272
|
+
"attachment_id": "ab914f9b-9475-4c10-88d8-480b6c9f4963",
|
|
273
|
+
"test_data": {"total_rows": 0},
|
|
274
|
+
"nodes": [],
|
|
275
|
+
}
|
|
276
|
+
],
|
|
277
|
+
}
|
|
278
|
+
],
|
|
279
|
+
default=[],
|
|
280
|
+
),
|
|
281
|
+
]
|
|
282
|
+
href: Annotated[
|
|
283
|
+
str, Field(description="The link of the AI Evaluation Asset", default="")
|
|
284
|
+
]
|
|
285
|
+
|
|
286
|
+
def to_json(self):
|
|
287
|
+
"""
|
|
288
|
+
Transform the AIEvaluationAsset instance to json
|
|
289
|
+
"""
|
|
290
|
+
return self.model_dump(mode="json")
|