ibm-watsonx-gov 1.3.3__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ibm_watsonx_gov/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
- ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
- ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
- ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
- ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
- ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
- ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
- ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
- ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
- ibm_watsonx_gov/clients/__init__.py +0 -0
- ibm_watsonx_gov/clients/api_client.py +99 -0
- ibm_watsonx_gov/clients/segment_client.py +46 -0
- ibm_watsonx_gov/clients/usage_client.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
- ibm_watsonx_gov/config/__init__.py +14 -0
- ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
- ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
- ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
- ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
- ibm_watsonx_gov/entities/__init__.py +8 -0
- ibm_watsonx_gov/entities/agentic_app.py +209 -0
- ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
- ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
- ibm_watsonx_gov/entities/ai_experiment.py +419 -0
- ibm_watsonx_gov/entities/base_classes.py +134 -0
- ibm_watsonx_gov/entities/container.py +54 -0
- ibm_watsonx_gov/entities/credentials.py +633 -0
- ibm_watsonx_gov/entities/criteria.py +508 -0
- ibm_watsonx_gov/entities/enums.py +274 -0
- ibm_watsonx_gov/entities/evaluation_result.py +444 -0
- ibm_watsonx_gov/entities/foundation_model.py +490 -0
- ibm_watsonx_gov/entities/llm_judge.py +44 -0
- ibm_watsonx_gov/entities/locale.py +17 -0
- ibm_watsonx_gov/entities/mapping.py +49 -0
- ibm_watsonx_gov/entities/metric.py +211 -0
- ibm_watsonx_gov/entities/metric_threshold.py +36 -0
- ibm_watsonx_gov/entities/model_provider.py +329 -0
- ibm_watsonx_gov/entities/model_risk_result.py +43 -0
- ibm_watsonx_gov/entities/monitor.py +71 -0
- ibm_watsonx_gov/entities/prompt_setup.py +40 -0
- ibm_watsonx_gov/entities/state.py +22 -0
- ibm_watsonx_gov/entities/utils.py +99 -0
- ibm_watsonx_gov/evaluators/__init__.py +26 -0
- ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
- ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
- ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
- ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
- ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
- ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
- ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
- ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
- ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
- ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
- ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
- ibm_watsonx_gov/metrics/__init__.py +74 -0
- ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
- ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
- ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
- ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
- ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
- ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
- ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
- ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
- ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
- ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
- ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
- ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
- ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
- ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
- ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
- ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
- ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
- ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
- ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
- ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
- ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
- ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
- ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
- ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
- ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
- ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
- ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
- ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
- ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
- ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
- ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
- ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
- ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
- ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
- ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
- ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
- ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
- ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
- ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
- ibm_watsonx_gov/metrics/status/__init__.py +0 -0
- ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
- ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
- ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
- ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
- ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
- ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
- ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
- ibm_watsonx_gov/metrics/utils.py +440 -0
- ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
- ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
- ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
- ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
- ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
- ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
- ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
- ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
- ibm_watsonx_gov/providers/__init__.py +8 -0
- ibm_watsonx_gov/providers/detectors_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/providers/detectors_provider.py +415 -0
- ibm_watsonx_gov/providers/eval_assist_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
- ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
- ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
- ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
- ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
- ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
- ibm_watsonx_gov/providers/unitxt_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/tools/__init__.py +10 -0
- ibm_watsonx_gov/tools/clients/__init__.py +11 -0
- ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
- ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
- ibm_watsonx_gov/tools/core/__init__.py +8 -0
- ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
- ibm_watsonx_gov/tools/entities/__init__.py +8 -0
- ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
- ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
- ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
- ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
- ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
- ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
- ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
- ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
- ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
- ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
- ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
- ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
- ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
- ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
- ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
- ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
- ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
- ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
- ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
- ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
- ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
- ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
- ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
- ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
- ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
- ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
- ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
- ibm_watsonx_gov/tools/utils/__init__.py +14 -0
- ibm_watsonx_gov/tools/utils/constants.py +69 -0
- ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
- ibm_watsonx_gov/tools/utils/environment.py +108 -0
- ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
- ibm_watsonx_gov/tools/utils/platform_url_mapping.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
- ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
- ibm_watsonx_gov/traces/__init__.py +8 -0
- ibm_watsonx_gov/traces/span_exporter.py +195 -0
- ibm_watsonx_gov/traces/span_node.py +251 -0
- ibm_watsonx_gov/traces/span_util.py +153 -0
- ibm_watsonx_gov/traces/trace_utils.py +1074 -0
- ibm_watsonx_gov/utils/__init__.py +8 -0
- ibm_watsonx_gov/utils/aggregation_util.py +346 -0
- ibm_watsonx_gov/utils/async_util.py +62 -0
- ibm_watsonx_gov/utils/authenticator.py +144 -0
- ibm_watsonx_gov/utils/constants.py +15 -0
- ibm_watsonx_gov/utils/errors.py +40 -0
- ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
- ibm_watsonx_gov/utils/insights_generator.py +1285 -0
- ibm_watsonx_gov/utils/python_utils.py +425 -0
- ibm_watsonx_gov/utils/rest_util.py +73 -0
- ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
- ibm_watsonx_gov/utils/singleton_meta.py +25 -0
- ibm_watsonx_gov/utils/url_mapping.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/utils/validation_util.py +126 -0
- ibm_watsonx_gov/visualizations/__init__.py +13 -0
- ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
- ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
- ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
- ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
- ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
- ibm_watsonx_gov-1.3.3.dist-info/WHEEL +6 -0
|
@@ -0,0 +1,656 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Any, Dict, List, Optional, Type, TypeVar, Union
|
|
3
|
+
|
|
4
|
+
try:
|
|
5
|
+
from ibm_watsonx_ai import Credentials
|
|
6
|
+
from ibm_watsonx_ai.foundation_models import ModelInference
|
|
7
|
+
except ImportError as e:
|
|
8
|
+
raise ImportError(
|
|
9
|
+
"Please install the ibm-watsonx-ai package: pip install 'llmevalkit[ibm_watsonx_ai]'"
|
|
10
|
+
) from e
|
|
11
|
+
|
|
12
|
+
from llmevalkit.llm.base import Hook, LLMClient, register_llm
|
|
13
|
+
from llmevalkit.llm.output_parser import ValidatingLLMClient
|
|
14
|
+
from llmevalkit.llm.types import GenerationMode, LLMResponse, ParameterMapper
|
|
15
|
+
from pydantic import BaseModel
|
|
16
|
+
|
|
17
|
+
from ..consts import WX_API_KEY, WX_PROJECT_ID, WX_SPACE_ID, WX_URL
|
|
18
|
+
|
|
19
|
+
T = TypeVar("T", bound="WatsonxLLMClient")
|
|
20
|
+
SchemaType = Union[Dict[str, Any], Type["BaseModel"], Type]
|
|
21
|
+
|
|
22
|
+
# -------------------------------------------------------------------
|
|
23
|
+
# 1. Non-validating Watsonx wrapper
|
|
24
|
+
# -------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@register_llm("watsonx")
|
|
28
|
+
class WatsonxLLMClient(LLMClient):
|
|
29
|
+
"""
|
|
30
|
+
Adapter for IBM watsonx.ai Foundation Model (via ibm_watsonx_ai.foundation_models.ModelInference).
|
|
31
|
+
|
|
32
|
+
Supports:
|
|
33
|
+
- text: sync generation (ModelInference.generate)
|
|
34
|
+
- chat: sync chat (ModelInference.chat)
|
|
35
|
+
- text_async: async generation (ModelInference.agenerate)
|
|
36
|
+
- chat_async: async chat (ModelInference.achat)
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
model_name: Optional[str] = None,
|
|
42
|
+
api_key: Optional[str] = None,
|
|
43
|
+
project_id: Optional[str] = None,
|
|
44
|
+
space_id: Optional[str] = None,
|
|
45
|
+
deployment_id: Optional[str] = None,
|
|
46
|
+
url: Optional[str] = None,
|
|
47
|
+
hooks: Optional[List[Hook]] = None,
|
|
48
|
+
model_id: Optional[str] = None,
|
|
49
|
+
**model_kwargs: Any,
|
|
50
|
+
) -> None:
|
|
51
|
+
"""
|
|
52
|
+
Initialize the Watsonx client.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
model_name: Identifier of the watsonx model (e.g., "meta-llama/llama-3-3-70b-instruct").
|
|
56
|
+
api_key: (Optional) Your IBM Cloud API Key for watsonx.ai.
|
|
57
|
+
project_id: (Optional) watsonx project ID.
|
|
58
|
+
space_id: (Optional) watsonx space ID.
|
|
59
|
+
deployment_id: (Optional) watsonx deployment ID.
|
|
60
|
+
url: (Optional) Base URL for the watsonx endpoint (e.g., "https://us-south.ml.cloud.ibm.com").
|
|
61
|
+
hooks: Optional observability hooks.
|
|
62
|
+
model_kwargs: Additional keyword args passed to ModelInference constructor.
|
|
63
|
+
"""
|
|
64
|
+
self.model_name = model_name
|
|
65
|
+
self._model_kwargs = model_kwargs
|
|
66
|
+
|
|
67
|
+
if not url:
|
|
68
|
+
url = os.getenv(WX_URL)
|
|
69
|
+
if not url:
|
|
70
|
+
raise EnvironmentError(
|
|
71
|
+
f"Missing API URL; please set the '{WX_URL}' environment variable."
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
if not api_key:
|
|
75
|
+
api_key = os.getenv(WX_API_KEY)
|
|
76
|
+
if not api_key:
|
|
77
|
+
raise EnvironmentError(
|
|
78
|
+
f"Missing API key; please set the '{WX_API_KEY}' environment variable."
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
if not project_id:
|
|
82
|
+
project_id = os.getenv(WX_PROJECT_ID)
|
|
83
|
+
if not project_id:
|
|
84
|
+
if not space_id:
|
|
85
|
+
space_id = os.getenv(WX_SPACE_ID)
|
|
86
|
+
raise EnvironmentError(
|
|
87
|
+
f"Missing project ID; please set the '{WX_PROJECT_ID}' or '{WX_SPACE_ID}' environment variable."
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
creds = Credentials(api_key=api_key, url=url)
|
|
91
|
+
|
|
92
|
+
if model_id:
|
|
93
|
+
# If model_id is provided, use it as the model_name
|
|
94
|
+
model_name = model_id
|
|
95
|
+
|
|
96
|
+
self.model_name = model_name
|
|
97
|
+
self.model_id = model_name
|
|
98
|
+
|
|
99
|
+
if not model_name:
|
|
100
|
+
raise ValueError("model_name or model_id must be provided")
|
|
101
|
+
|
|
102
|
+
# Assemble provider_kwargs for LLMClient base class
|
|
103
|
+
provider_kwargs: Dict[str, Any] = {
|
|
104
|
+
"model_id": model_name,
|
|
105
|
+
"credentials": creds,
|
|
106
|
+
}
|
|
107
|
+
if project_id:
|
|
108
|
+
provider_kwargs["project_id"] = project_id
|
|
109
|
+
elif space_id:
|
|
110
|
+
provider_kwargs["space_id"] = space_id
|
|
111
|
+
|
|
112
|
+
if deployment_id:
|
|
113
|
+
provider_kwargs["deployment_id"] = deployment_id
|
|
114
|
+
|
|
115
|
+
# Pass through any additional ModelInference args (params, space_id, verify, validate, etc.)
|
|
116
|
+
provider_kwargs.update(model_kwargs)
|
|
117
|
+
|
|
118
|
+
# Initialize underlying ModelInference instance via LLMClient logic
|
|
119
|
+
super().__init__(
|
|
120
|
+
client=None, client_needs_init=True, hooks=hooks, **provider_kwargs
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
@classmethod
|
|
124
|
+
def provider_class(cls) -> Type:
|
|
125
|
+
"""
|
|
126
|
+
Underlying SDK client class for watsonx.ai: ModelInference.
|
|
127
|
+
"""
|
|
128
|
+
return ModelInference
|
|
129
|
+
|
|
130
|
+
def _register_methods(self) -> None:
|
|
131
|
+
"""
|
|
132
|
+
Register how to call watsonx methods:
|
|
133
|
+
|
|
134
|
+
- 'text' -> ModelInference.generate
|
|
135
|
+
- 'text_async' -> ModelInference.agenerate
|
|
136
|
+
- 'chat' -> ModelInference.chat
|
|
137
|
+
- 'chat_async' -> ModelInference.achat
|
|
138
|
+
"""
|
|
139
|
+
self.set_method_config(GenerationMode.TEXT.value, "generate", "prompt")
|
|
140
|
+
self.set_method_config(
|
|
141
|
+
GenerationMode.TEXT_ASYNC.value, "agenerate", "prompt")
|
|
142
|
+
self.set_method_config(GenerationMode.CHAT.value, "chat", "messages")
|
|
143
|
+
self.set_method_config(
|
|
144
|
+
GenerationMode.CHAT_ASYNC.value, "achat", "messages")
|
|
145
|
+
|
|
146
|
+
def _setup_parameter_mapper(self) -> None:
|
|
147
|
+
"""Setup parameter mapping for IBM WatsonX provider."""
|
|
148
|
+
self._parameter_mapper = ParameterMapper()
|
|
149
|
+
|
|
150
|
+
# Text generation parameters (based on TextGenParameters)
|
|
151
|
+
self._parameter_mapper.set_text_mapping("temperature", "temperature")
|
|
152
|
+
self._parameter_mapper.set_text_mapping("top_p", "top_p")
|
|
153
|
+
self._parameter_mapper.set_text_mapping("top_k", "top_k")
|
|
154
|
+
self._parameter_mapper.set_text_mapping("max_tokens", "max_new_tokens")
|
|
155
|
+
self._parameter_mapper.set_text_mapping("min_tokens", "min_new_tokens")
|
|
156
|
+
self._parameter_mapper.set_text_mapping(
|
|
157
|
+
"repetition_penalty", "repetition_penalty"
|
|
158
|
+
)
|
|
159
|
+
self._parameter_mapper.set_text_mapping("seed", "random_seed")
|
|
160
|
+
self._parameter_mapper.set_text_mapping(
|
|
161
|
+
"stop_sequences", "stop_sequences")
|
|
162
|
+
self._parameter_mapper.set_text_mapping("timeout", "time_limit")
|
|
163
|
+
self._parameter_mapper.set_text_mapping(
|
|
164
|
+
"decoding_method", "decoding_method")
|
|
165
|
+
|
|
166
|
+
# Chat parameters (based on TextChatParameters)
|
|
167
|
+
self._parameter_mapper.set_chat_mapping("temperature", "temperature")
|
|
168
|
+
self._parameter_mapper.set_chat_mapping("top_p", "top_p")
|
|
169
|
+
self._parameter_mapper.set_chat_mapping("max_tokens", "max_tokens")
|
|
170
|
+
self._parameter_mapper.set_chat_mapping(
|
|
171
|
+
"frequency_penalty", "frequency_penalty"
|
|
172
|
+
)
|
|
173
|
+
self._parameter_mapper.set_chat_mapping(
|
|
174
|
+
"presence_penalty", "presence_penalty")
|
|
175
|
+
self._parameter_mapper.set_chat_mapping("seed", "seed")
|
|
176
|
+
self._parameter_mapper.set_chat_mapping("stop_sequences", "stop")
|
|
177
|
+
self._parameter_mapper.set_chat_mapping("timeout", "time_limit")
|
|
178
|
+
self._parameter_mapper.set_chat_mapping("logprobs", "logprobs")
|
|
179
|
+
self._parameter_mapper.set_chat_mapping("top_logprobs", "top_logprobs")
|
|
180
|
+
|
|
181
|
+
# Custom transforms for complex parameters
|
|
182
|
+
def transform_echo_text_mode(value, mode):
|
|
183
|
+
if mode in ["text", "text_async"]:
|
|
184
|
+
# Text mode can include input text in response
|
|
185
|
+
return (
|
|
186
|
+
{"include_stop_sequence": value}
|
|
187
|
+
if "stop" in str(value).lower()
|
|
188
|
+
else {}
|
|
189
|
+
)
|
|
190
|
+
return {}
|
|
191
|
+
|
|
192
|
+
self._parameter_mapper.set_custom_transform(
|
|
193
|
+
"echo", transform_echo_text_mode)
|
|
194
|
+
|
|
195
|
+
def _parse_llm_response(self, raw: Any) -> Union[str, LLMResponse]:
|
|
196
|
+
"""
|
|
197
|
+
Extract the generated text and tool calls from a watsonx response.
|
|
198
|
+
|
|
199
|
+
- For text generation: raw['results'][0]['generated_text']
|
|
200
|
+
- For chat: raw['choices'][0]['message']['content']
|
|
201
|
+
"""
|
|
202
|
+
content = ""
|
|
203
|
+
tool_calls = []
|
|
204
|
+
|
|
205
|
+
# Text‐generation style
|
|
206
|
+
if isinstance(raw, dict) and "results" in raw:
|
|
207
|
+
results = raw["results"]
|
|
208
|
+
if isinstance(results, list) and results:
|
|
209
|
+
first = results[0]
|
|
210
|
+
content = first.get("generated_text", "")
|
|
211
|
+
|
|
212
|
+
# Chat style
|
|
213
|
+
elif isinstance(raw, dict) and "choices" in raw:
|
|
214
|
+
choices = raw["choices"]
|
|
215
|
+
if isinstance(choices, list) and choices:
|
|
216
|
+
first = choices[0]
|
|
217
|
+
msg = first.get("message")
|
|
218
|
+
if isinstance(msg, dict):
|
|
219
|
+
content = msg.get("content", "")
|
|
220
|
+
# Extract tool calls if present
|
|
221
|
+
if "tool_calls" in msg and msg["tool_calls"]:
|
|
222
|
+
tool_calls = []
|
|
223
|
+
for tool_call in msg["tool_calls"]:
|
|
224
|
+
tool_call_dict = {
|
|
225
|
+
"id": tool_call.get("id"),
|
|
226
|
+
"type": tool_call.get("type", "function"),
|
|
227
|
+
"function": {
|
|
228
|
+
"name": tool_call.get("function", {}).get("name"),
|
|
229
|
+
"arguments": tool_call.get("function", {}).get(
|
|
230
|
+
"arguments"
|
|
231
|
+
),
|
|
232
|
+
},
|
|
233
|
+
}
|
|
234
|
+
tool_calls.append(tool_call_dict)
|
|
235
|
+
elif "text" in first:
|
|
236
|
+
content = first["text"]
|
|
237
|
+
|
|
238
|
+
if not content and not tool_calls:
|
|
239
|
+
raise ValueError(f"Unexpected watsonx response format: {raw!r}")
|
|
240
|
+
|
|
241
|
+
# Return LLMResponse if tool calls exist, otherwise just content
|
|
242
|
+
if tool_calls:
|
|
243
|
+
return LLMResponse(content=content, tool_calls=tool_calls)
|
|
244
|
+
return content
|
|
245
|
+
|
|
246
|
+
def generate(
|
|
247
|
+
self,
|
|
248
|
+
prompt: Union[str, List[Dict[str, Any]]],
|
|
249
|
+
mode: Union[str, GenerationMode] = GenerationMode.CHAT,
|
|
250
|
+
generation_args: Optional[Any] = None,
|
|
251
|
+
**kwargs: Any,
|
|
252
|
+
) -> str:
|
|
253
|
+
"""
|
|
254
|
+
Synchronous generation override for WatsonX.
|
|
255
|
+
|
|
256
|
+
- If mode is 'chat' and prompt is str, wrap into messages list.
|
|
257
|
+
- If mode is 'text', prompt must be str or list of strings.
|
|
258
|
+
- Handle WatsonX-specific params structure.
|
|
259
|
+
"""
|
|
260
|
+
mode_str = mode.value if isinstance(mode, GenerationMode) else mode
|
|
261
|
+
mode_str = mode_str.lower()
|
|
262
|
+
|
|
263
|
+
if mode_str not in ("text", "chat"):
|
|
264
|
+
raise KeyError(
|
|
265
|
+
f"Unsupported mode '{mode_str}' for WatsonxLLMClient.generate"
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# Normalize prompt format based on mode
|
|
269
|
+
if mode_str == GenerationMode.CHAT.value:
|
|
270
|
+
# Chat mode expects list of messages
|
|
271
|
+
if isinstance(prompt, str):
|
|
272
|
+
prompt = [{"role": "user", "content": prompt}]
|
|
273
|
+
elif isinstance(prompt, list):
|
|
274
|
+
prompt = prompt
|
|
275
|
+
else:
|
|
276
|
+
raise TypeError(
|
|
277
|
+
"For chat mode, prompt must be a string or List[Dict[str,str]]"
|
|
278
|
+
)
|
|
279
|
+
elif mode_str == GenerationMode.TEXT.value:
|
|
280
|
+
# Text mode expects a string prompt
|
|
281
|
+
if isinstance(prompt, list):
|
|
282
|
+
# Convert messages to simple string
|
|
283
|
+
prompt = "\n".join(
|
|
284
|
+
[msg.get("content", "")
|
|
285
|
+
for msg in prompt if msg.get("content")]
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
# Handle WatsonX params structure
|
|
289
|
+
watsonx_kwargs = {}
|
|
290
|
+
|
|
291
|
+
# Extract any existing params from kwargs
|
|
292
|
+
existing_params = kwargs.pop("params", {})
|
|
293
|
+
|
|
294
|
+
# Map generation_args to WatsonX parameters if provided
|
|
295
|
+
if generation_args and self._parameter_mapper:
|
|
296
|
+
from llmevalkit.llm.types import GenerationArgs
|
|
297
|
+
|
|
298
|
+
if isinstance(generation_args, GenerationArgs):
|
|
299
|
+
mapped_args = self._parameter_mapper.map_args(
|
|
300
|
+
generation_args, mode_str)
|
|
301
|
+
# Merge mapped args with existing params
|
|
302
|
+
existing_params.update(mapped_args)
|
|
303
|
+
|
|
304
|
+
# Set params if we have any
|
|
305
|
+
if existing_params:
|
|
306
|
+
watsonx_kwargs["params"] = existing_params
|
|
307
|
+
|
|
308
|
+
# Add any other kwargs that aren't generation parameters
|
|
309
|
+
watsonx_kwargs.update(kwargs)
|
|
310
|
+
|
|
311
|
+
return super().generate(prompt=prompt, mode=mode_str, **watsonx_kwargs)
|
|
312
|
+
|
|
313
|
+
async def generate_async(
|
|
314
|
+
self,
|
|
315
|
+
prompt: Union[str, List[Dict[str, Any]]],
|
|
316
|
+
mode: Union[str, GenerationMode] = GenerationMode.CHAT_ASYNC,
|
|
317
|
+
generation_args: Optional[Any] = None,
|
|
318
|
+
**kwargs: Any,
|
|
319
|
+
) -> str:
|
|
320
|
+
"""
|
|
321
|
+
Asynchronous generation override for WatsonX.
|
|
322
|
+
|
|
323
|
+
- If mode is 'chat_async', wrap prompt into messages.
|
|
324
|
+
- If mode is 'text_async', prompt must be str or list of strings.
|
|
325
|
+
- Handle WatsonX-specific params structure.
|
|
326
|
+
"""
|
|
327
|
+
mode_str = mode.value if isinstance(mode, GenerationMode) else mode
|
|
328
|
+
mode_str = mode_str.lower()
|
|
329
|
+
|
|
330
|
+
if mode_str not in ("text_async", "chat_async"):
|
|
331
|
+
raise KeyError(
|
|
332
|
+
f"Unsupported mode '{mode_str}' for WatsonxLLMClient.generate_async"
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
if mode_str == GenerationMode.CHAT_ASYNC.value:
|
|
336
|
+
# Chat mode expects list of messages
|
|
337
|
+
if isinstance(prompt, str):
|
|
338
|
+
prompt = [{"role": "user", "content": prompt}]
|
|
339
|
+
elif isinstance(prompt, list):
|
|
340
|
+
prompt = prompt
|
|
341
|
+
else:
|
|
342
|
+
raise TypeError(
|
|
343
|
+
"For chat_async mode, prompt must be a string or List[Dict[str,str]]"
|
|
344
|
+
)
|
|
345
|
+
elif mode_str == GenerationMode.TEXT_ASYNC.value:
|
|
346
|
+
# Text mode expects a string prompt
|
|
347
|
+
if isinstance(prompt, list):
|
|
348
|
+
# Convert messages to simple string
|
|
349
|
+
prompt = "\n".join(
|
|
350
|
+
[msg.get("content", "")
|
|
351
|
+
for msg in prompt if msg.get("content")]
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
# Handle WatsonX params structure
|
|
355
|
+
watsonx_kwargs = {}
|
|
356
|
+
|
|
357
|
+
# Extract any existing params from kwargs
|
|
358
|
+
existing_params = kwargs.pop("params", {})
|
|
359
|
+
|
|
360
|
+
# Map generation_args to WatsonX parameters if provided
|
|
361
|
+
if generation_args and self._parameter_mapper:
|
|
362
|
+
from llmevalkit.llm.types import GenerationArgs
|
|
363
|
+
|
|
364
|
+
if isinstance(generation_args, GenerationArgs):
|
|
365
|
+
mapped_args = self._parameter_mapper.map_args(
|
|
366
|
+
generation_args, mode_str)
|
|
367
|
+
# Merge mapped args with existing params
|
|
368
|
+
existing_params.update(mapped_args)
|
|
369
|
+
|
|
370
|
+
# Set params if we have any
|
|
371
|
+
if existing_params:
|
|
372
|
+
watsonx_kwargs["params"] = existing_params
|
|
373
|
+
|
|
374
|
+
# Add any other kwargs that aren't generation parameters
|
|
375
|
+
watsonx_kwargs.update(kwargs)
|
|
376
|
+
|
|
377
|
+
return await super().generate_async(
|
|
378
|
+
prompt=prompt, mode=mode_str, **watsonx_kwargs
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
# -------------------------------------------------------------------
|
|
383
|
+
# 2. Validating Watsonx wrapper
|
|
384
|
+
# -------------------------------------------------------------------
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
@register_llm("watsonx.output_val")
|
|
388
|
+
class WatsonxLLMClientOutputVal(ValidatingLLMClient):
|
|
389
|
+
"""
|
|
390
|
+
Validating adapter for IBM watsonx.ai Foundation Model.
|
|
391
|
+
|
|
392
|
+
Extends ValidatingLLMClient to enforce output structure (via JSON Schema,
|
|
393
|
+
Pydantic models, or simple Python types) on all generate calls,
|
|
394
|
+
with retries and batch support (sync & async).
|
|
395
|
+
"""
|
|
396
|
+
|
|
397
|
+
def __init__(
|
|
398
|
+
self,
|
|
399
|
+
model_id: str,
|
|
400
|
+
api_key: Optional[str] = None,
|
|
401
|
+
project_id: Optional[str] = None,
|
|
402
|
+
space_id: Optional[str] = None,
|
|
403
|
+
username: Optional[str] = None,
|
|
404
|
+
version: Optional[str] = None,
|
|
405
|
+
instance_id: Optional[str] = None,
|
|
406
|
+
password: Optional[str] = None,
|
|
407
|
+
url: Optional[str] = "https://us-south.ml.cloud.ibm.com",
|
|
408
|
+
hooks: Optional[List[Hook]] = None,
|
|
409
|
+
**model_kwargs: Any,
|
|
410
|
+
) -> None:
|
|
411
|
+
"""
|
|
412
|
+
Initialize a Watsonx client with output validation.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
model_id: Identifier of the watsonx model.
|
|
416
|
+
api_key: Your IBM Cloud API Key.
|
|
417
|
+
project_id: (Optional) watsonx project ID.
|
|
418
|
+
url: (Optional) Base URL for the watsonx endpoint.
|
|
419
|
+
hooks: Optional observability hooks.
|
|
420
|
+
model_kwargs: Additional arguments passed to the ModelInference constructor.
|
|
421
|
+
"""
|
|
422
|
+
self.model_id = model_id
|
|
423
|
+
self._model_kwargs = model_kwargs
|
|
424
|
+
|
|
425
|
+
creds_args = {"url": url}
|
|
426
|
+
if api_key is not None:
|
|
427
|
+
creds_args["api_key"] = api_key
|
|
428
|
+
if version is not None:
|
|
429
|
+
creds_args["version"] = version
|
|
430
|
+
creds_args["instance_id"] = instance_id
|
|
431
|
+
creds_args["username"] = username
|
|
432
|
+
if api_key is None and password is not None:
|
|
433
|
+
creds_args["password"] = password
|
|
434
|
+
|
|
435
|
+
creds = Credentials(**creds_args)
|
|
436
|
+
provider_kwargs: Dict[str, Any] = {
|
|
437
|
+
"model_id": model_id,
|
|
438
|
+
"credentials": creds,
|
|
439
|
+
}
|
|
440
|
+
if project_id is not None:
|
|
441
|
+
provider_kwargs["project_id"] = project_id
|
|
442
|
+
if space_id is not None:
|
|
443
|
+
provider_kwargs["space_id"] = space_id
|
|
444
|
+
|
|
445
|
+
provider_kwargs.update(model_kwargs)
|
|
446
|
+
|
|
447
|
+
super().__init__(
|
|
448
|
+
client=None, client_needs_init=True, hooks=hooks, **provider_kwargs
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
@classmethod
|
|
452
|
+
def provider_class(cls) -> Type:
|
|
453
|
+
"""
|
|
454
|
+
Underlying SDK client class: ModelInference.
|
|
455
|
+
"""
|
|
456
|
+
return ModelInference
|
|
457
|
+
|
|
458
|
+
def _register_methods(self) -> None:
|
|
459
|
+
"""
|
|
460
|
+
Register how to call watsonx methods for validation:
|
|
461
|
+
|
|
462
|
+
- 'text' -> ModelInference.generate
|
|
463
|
+
- 'text_async' -> ModelInference.agenerate
|
|
464
|
+
- 'chat' -> ModelInference.chat
|
|
465
|
+
- 'chat_async' -> ModelInference.achat
|
|
466
|
+
"""
|
|
467
|
+
self.set_method_config(GenerationMode.TEXT.value, "generate", "prompt")
|
|
468
|
+
self.set_method_config(
|
|
469
|
+
GenerationMode.TEXT_ASYNC.value, "agenerate", "prompt")
|
|
470
|
+
self.set_method_config(GenerationMode.CHAT.value, "chat", "messages")
|
|
471
|
+
self.set_method_config(
|
|
472
|
+
GenerationMode.CHAT_ASYNC.value, "achat", "messages")
|
|
473
|
+
|
|
474
|
+
def _setup_parameter_mapper(self) -> None:
|
|
475
|
+
"""Setup parameter mapping for IBM WatsonX provider (same as regular WatsonX)."""
|
|
476
|
+
self._parameter_mapper = ParameterMapper()
|
|
477
|
+
|
|
478
|
+
# Text generation parameters (based on TextGenParameters)
|
|
479
|
+
self._parameter_mapper.set_text_mapping("temperature", "temperature")
|
|
480
|
+
self._parameter_mapper.set_text_mapping("top_p", "top_p")
|
|
481
|
+
self._parameter_mapper.set_text_mapping("top_k", "top_k")
|
|
482
|
+
self._parameter_mapper.set_text_mapping("max_tokens", "max_new_tokens")
|
|
483
|
+
self._parameter_mapper.set_text_mapping("min_tokens", "min_new_tokens")
|
|
484
|
+
self._parameter_mapper.set_text_mapping(
|
|
485
|
+
"repetition_penalty", "repetition_penalty"
|
|
486
|
+
)
|
|
487
|
+
self._parameter_mapper.set_text_mapping("seed", "random_seed")
|
|
488
|
+
self._parameter_mapper.set_text_mapping(
|
|
489
|
+
"stop_sequences", "stop_sequences")
|
|
490
|
+
self._parameter_mapper.set_text_mapping("timeout", "time_limit")
|
|
491
|
+
|
|
492
|
+
# Chat parameters (based on TextChatParameters)
|
|
493
|
+
self._parameter_mapper.set_chat_mapping("temperature", "temperature")
|
|
494
|
+
self._parameter_mapper.set_chat_mapping("top_p", "top_p")
|
|
495
|
+
self._parameter_mapper.set_chat_mapping("max_tokens", "max_tokens")
|
|
496
|
+
self._parameter_mapper.set_chat_mapping(
|
|
497
|
+
"frequency_penalty", "frequency_penalty"
|
|
498
|
+
)
|
|
499
|
+
self._parameter_mapper.set_chat_mapping(
|
|
500
|
+
"presence_penalty", "presence_penalty")
|
|
501
|
+
self._parameter_mapper.set_chat_mapping("seed", "seed")
|
|
502
|
+
self._parameter_mapper.set_chat_mapping("stop_sequences", "stop")
|
|
503
|
+
self._parameter_mapper.set_chat_mapping("timeout", "time_limit")
|
|
504
|
+
self._parameter_mapper.set_chat_mapping("logprobs", "logprobs")
|
|
505
|
+
self._parameter_mapper.set_chat_mapping("top_logprobs", "top_logprobs")
|
|
506
|
+
|
|
507
|
+
def transform_echo_text_mode(value, mode):
|
|
508
|
+
if mode in ["text", "text_async"]:
|
|
509
|
+
return (
|
|
510
|
+
{"include_stop_sequence": value}
|
|
511
|
+
if "stop" in str(value).lower()
|
|
512
|
+
else {}
|
|
513
|
+
)
|
|
514
|
+
return {}
|
|
515
|
+
|
|
516
|
+
self._parameter_mapper.set_custom_transform(
|
|
517
|
+
"echo", transform_echo_text_mode)
|
|
518
|
+
|
|
519
|
+
def _parse_llm_response(self, raw: Any) -> str:
|
|
520
|
+
"""
|
|
521
|
+
Extract the assistant-generated text from a watsonx response.
|
|
522
|
+
|
|
523
|
+
Same logic as non-validating client.
|
|
524
|
+
"""
|
|
525
|
+
if isinstance(raw, dict) and "results" in raw:
|
|
526
|
+
results = raw["results"]
|
|
527
|
+
if isinstance(results, list) and results:
|
|
528
|
+
first = results[0]
|
|
529
|
+
return first.get("generated_text", "")
|
|
530
|
+
if isinstance(raw, dict) and "choices" in raw:
|
|
531
|
+
choices = raw["choices"]
|
|
532
|
+
if isinstance(choices, list) and choices:
|
|
533
|
+
first = choices[0]
|
|
534
|
+
msg = first.get("message")
|
|
535
|
+
if isinstance(msg, dict) and "content" in msg:
|
|
536
|
+
return msg["content"]
|
|
537
|
+
if "text" in first:
|
|
538
|
+
return first["text"]
|
|
539
|
+
raise ValueError(f"Unexpected watsonx response format: {raw!r}")
|
|
540
|
+
|
|
541
|
+
def generate(
|
|
542
|
+
self,
|
|
543
|
+
prompt: Union[str, List[Dict[str, Any]]],
|
|
544
|
+
*,
|
|
545
|
+
schema: SchemaType,
|
|
546
|
+
retries: int = 3,
|
|
547
|
+
generation_args: Optional[Any] = None,
|
|
548
|
+
**kwargs: Any,
|
|
549
|
+
) -> Any:
|
|
550
|
+
"""
|
|
551
|
+
Synchronous chat generation with validation + retries.
|
|
552
|
+
|
|
553
|
+
Args:
|
|
554
|
+
prompt: Either a string or a list of chat messages.
|
|
555
|
+
schema: JSON Schema dict, Pydantic model class, or built-in Python type.
|
|
556
|
+
retries: Maximum attempts (including the first).
|
|
557
|
+
generation_args: GenerationArgs to map to provider parameters.
|
|
558
|
+
**kwargs: Passed to the underlying ModelInference call (e.g., temperature).
|
|
559
|
+
"""
|
|
560
|
+
mode = "chat"
|
|
561
|
+
|
|
562
|
+
# Normalize prompt to chat-messages
|
|
563
|
+
if isinstance(prompt, str):
|
|
564
|
+
prompt = [{"role": "user", "content": prompt}]
|
|
565
|
+
|
|
566
|
+
# Handle WatsonX params structure
|
|
567
|
+
watsonx_kwargs = {}
|
|
568
|
+
|
|
569
|
+
# Extract any existing params from kwargs
|
|
570
|
+
existing_params = kwargs.pop("params", {})
|
|
571
|
+
|
|
572
|
+
# Map generation_args to WatsonX parameters if provided
|
|
573
|
+
if generation_args and self._parameter_mapper:
|
|
574
|
+
from llmevalkit.llm.types import GenerationArgs
|
|
575
|
+
|
|
576
|
+
if isinstance(generation_args, GenerationArgs):
|
|
577
|
+
mapped_args = self._parameter_mapper.map_args(
|
|
578
|
+
generation_args, mode)
|
|
579
|
+
# Merge mapped args with existing params
|
|
580
|
+
existing_params.update(mapped_args)
|
|
581
|
+
|
|
582
|
+
# Set params if we have any
|
|
583
|
+
if existing_params:
|
|
584
|
+
watsonx_kwargs["params"] = existing_params
|
|
585
|
+
|
|
586
|
+
# Add any other kwargs that aren't generation parameters
|
|
587
|
+
watsonx_kwargs.update(kwargs)
|
|
588
|
+
|
|
589
|
+
return super().generate(
|
|
590
|
+
**{
|
|
591
|
+
"prompt": prompt,
|
|
592
|
+
"schema": schema,
|
|
593
|
+
"retries": retries,
|
|
594
|
+
"mode": mode,
|
|
595
|
+
**self._model_kwargs,
|
|
596
|
+
**watsonx_kwargs,
|
|
597
|
+
}
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
async def generate_async(
|
|
601
|
+
self,
|
|
602
|
+
prompt: Union[str, List[Dict[str, Any]]],
|
|
603
|
+
*,
|
|
604
|
+
schema: SchemaType,
|
|
605
|
+
retries: int = 3,
|
|
606
|
+
generation_args: Optional[Any] = None,
|
|
607
|
+
**kwargs: Any,
|
|
608
|
+
) -> Any:
|
|
609
|
+
"""
|
|
610
|
+
Asynchronous chat generation with validation + retries.
|
|
611
|
+
|
|
612
|
+
Args:
|
|
613
|
+
prompt: Either a string or a list of chat messages.
|
|
614
|
+
schema: JSON Schema dict, Pydantic model class, or built-in Python type.
|
|
615
|
+
retries: Maximum attempts.
|
|
616
|
+
generation_args: GenerationArgs to map to provider parameters.
|
|
617
|
+
**kwargs: Passed to the underlying ModelInference call.
|
|
618
|
+
"""
|
|
619
|
+
mode = "chat_async"
|
|
620
|
+
|
|
621
|
+
if isinstance(prompt, str):
|
|
622
|
+
prompt = [{"role": "user", "content": prompt}]
|
|
623
|
+
|
|
624
|
+
# Handle WatsonX params structure
|
|
625
|
+
watsonx_kwargs = {}
|
|
626
|
+
|
|
627
|
+
# Extract any existing params from kwargs
|
|
628
|
+
existing_params = kwargs.pop("params", {})
|
|
629
|
+
|
|
630
|
+
# Map generation_args to WatsonX parameters if provided
|
|
631
|
+
if generation_args and self._parameter_mapper:
|
|
632
|
+
from llmevalkit.llm.types import GenerationArgs
|
|
633
|
+
|
|
634
|
+
if isinstance(generation_args, GenerationArgs):
|
|
635
|
+
mapped_args = self._parameter_mapper.map_args(
|
|
636
|
+
generation_args, mode)
|
|
637
|
+
# Merge mapped args with existing params
|
|
638
|
+
existing_params.update(mapped_args)
|
|
639
|
+
|
|
640
|
+
# Set params if we have any
|
|
641
|
+
if existing_params:
|
|
642
|
+
watsonx_kwargs["params"] = existing_params
|
|
643
|
+
|
|
644
|
+
# Add any other kwargs that aren't generation parameters
|
|
645
|
+
watsonx_kwargs.update(kwargs)
|
|
646
|
+
|
|
647
|
+
return await super().generate_async(
|
|
648
|
+
**{
|
|
649
|
+
"prompt": prompt,
|
|
650
|
+
"schema": schema,
|
|
651
|
+
"retries": retries,
|
|
652
|
+
"mode": mode,
|
|
653
|
+
**self._model_kwargs,
|
|
654
|
+
**watsonx_kwargs,
|
|
655
|
+
}
|
|
656
|
+
)
|
|
File without changes
|