ibm-watsonx-gov 1.3.3__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ibm_watsonx_gov/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
- ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
- ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
- ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
- ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
- ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
- ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
- ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
- ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
- ibm_watsonx_gov/clients/__init__.py +0 -0
- ibm_watsonx_gov/clients/api_client.py +99 -0
- ibm_watsonx_gov/clients/segment_client.py +46 -0
- ibm_watsonx_gov/clients/usage_client.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
- ibm_watsonx_gov/config/__init__.py +14 -0
- ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
- ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
- ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
- ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
- ibm_watsonx_gov/entities/__init__.py +8 -0
- ibm_watsonx_gov/entities/agentic_app.py +209 -0
- ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
- ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
- ibm_watsonx_gov/entities/ai_experiment.py +419 -0
- ibm_watsonx_gov/entities/base_classes.py +134 -0
- ibm_watsonx_gov/entities/container.py +54 -0
- ibm_watsonx_gov/entities/credentials.py +633 -0
- ibm_watsonx_gov/entities/criteria.py +508 -0
- ibm_watsonx_gov/entities/enums.py +274 -0
- ibm_watsonx_gov/entities/evaluation_result.py +444 -0
- ibm_watsonx_gov/entities/foundation_model.py +490 -0
- ibm_watsonx_gov/entities/llm_judge.py +44 -0
- ibm_watsonx_gov/entities/locale.py +17 -0
- ibm_watsonx_gov/entities/mapping.py +49 -0
- ibm_watsonx_gov/entities/metric.py +211 -0
- ibm_watsonx_gov/entities/metric_threshold.py +36 -0
- ibm_watsonx_gov/entities/model_provider.py +329 -0
- ibm_watsonx_gov/entities/model_risk_result.py +43 -0
- ibm_watsonx_gov/entities/monitor.py +71 -0
- ibm_watsonx_gov/entities/prompt_setup.py +40 -0
- ibm_watsonx_gov/entities/state.py +22 -0
- ibm_watsonx_gov/entities/utils.py +99 -0
- ibm_watsonx_gov/evaluators/__init__.py +26 -0
- ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
- ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
- ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
- ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
- ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
- ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
- ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
- ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
- ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
- ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
- ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
- ibm_watsonx_gov/metrics/__init__.py +74 -0
- ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
- ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
- ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
- ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
- ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
- ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
- ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
- ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
- ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
- ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
- ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
- ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
- ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
- ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
- ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
- ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
- ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
- ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
- ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
- ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
- ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
- ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
- ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
- ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
- ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
- ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
- ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
- ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
- ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
- ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
- ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
- ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
- ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
- ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
- ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
- ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
- ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
- ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
- ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
- ibm_watsonx_gov/metrics/status/__init__.py +0 -0
- ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
- ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
- ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
- ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
- ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
- ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
- ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
- ibm_watsonx_gov/metrics/utils.py +440 -0
- ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
- ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
- ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
- ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
- ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
- ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
- ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
- ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
- ibm_watsonx_gov/providers/__init__.py +8 -0
- ibm_watsonx_gov/providers/detectors_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/providers/detectors_provider.py +415 -0
- ibm_watsonx_gov/providers/eval_assist_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
- ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
- ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
- ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
- ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
- ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
- ibm_watsonx_gov/providers/unitxt_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/tools/__init__.py +10 -0
- ibm_watsonx_gov/tools/clients/__init__.py +11 -0
- ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
- ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
- ibm_watsonx_gov/tools/core/__init__.py +8 -0
- ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
- ibm_watsonx_gov/tools/entities/__init__.py +8 -0
- ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
- ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
- ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
- ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
- ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
- ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
- ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
- ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
- ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
- ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
- ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
- ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
- ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
- ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
- ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
- ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
- ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
- ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
- ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
- ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
- ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
- ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
- ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
- ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
- ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
- ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
- ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
- ibm_watsonx_gov/tools/utils/__init__.py +14 -0
- ibm_watsonx_gov/tools/utils/constants.py +69 -0
- ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
- ibm_watsonx_gov/tools/utils/environment.py +108 -0
- ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
- ibm_watsonx_gov/tools/utils/platform_url_mapping.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
- ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
- ibm_watsonx_gov/traces/__init__.py +8 -0
- ibm_watsonx_gov/traces/span_exporter.py +195 -0
- ibm_watsonx_gov/traces/span_node.py +251 -0
- ibm_watsonx_gov/traces/span_util.py +153 -0
- ibm_watsonx_gov/traces/trace_utils.py +1074 -0
- ibm_watsonx_gov/utils/__init__.py +8 -0
- ibm_watsonx_gov/utils/aggregation_util.py +346 -0
- ibm_watsonx_gov/utils/async_util.py +62 -0
- ibm_watsonx_gov/utils/authenticator.py +144 -0
- ibm_watsonx_gov/utils/constants.py +15 -0
- ibm_watsonx_gov/utils/errors.py +40 -0
- ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
- ibm_watsonx_gov/utils/insights_generator.py +1285 -0
- ibm_watsonx_gov/utils/python_utils.py +425 -0
- ibm_watsonx_gov/utils/rest_util.py +73 -0
- ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
- ibm_watsonx_gov/utils/singleton_meta.py +25 -0
- ibm_watsonx_gov/utils/url_mapping.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/utils/validation_util.py +126 -0
- ibm_watsonx_gov/visualizations/__init__.py +13 -0
- ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
- ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
- ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
- ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
- ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
- ibm_watsonx_gov-1.3.3.dist-info/WHEEL +6 -0
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
import os
|
|
10
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
11
|
+
|
|
12
|
+
from llmevalkit.llm.base import Hook, LLMClient, register_llm
|
|
13
|
+
from llmevalkit.llm.output_parser import ValidatingLLMClient
|
|
14
|
+
from llmevalkit.llm.types import GenerationMode, LLMResponse, ParameterMapper
|
|
15
|
+
|
|
16
|
+
from .wxo_ai_gateway_inference import WxoAIGatewayInference
|
|
17
|
+
|
|
18
|
+
SchemaType = Union[Dict[str, Any], Type["BaseModel"], Type]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@register_llm("wxo_ai_gateway.output_val")
|
|
22
|
+
class WxoAIGatewayClientOutputVal(ValidatingLLMClient):
|
|
23
|
+
|
|
24
|
+
def __init__(self, api_key: Optional[str] = None, url: Optional[str] = None, hooks: Optional[List[Hook]] = None, **kwargs):
|
|
25
|
+
provider_kwargs = {"api_key": api_key, "url": url}
|
|
26
|
+
super().__init__(
|
|
27
|
+
client=None, client_needs_init=True, hooks=hooks, **provider_kwargs
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def provider_class(cls) -> Type:
|
|
32
|
+
"""
|
|
33
|
+
Underlying client class: WxoAIGatewayInference.
|
|
34
|
+
"""
|
|
35
|
+
return WxoAIGatewayInference
|
|
36
|
+
|
|
37
|
+
def _register_methods(self) -> None:
|
|
38
|
+
"""
|
|
39
|
+
Register how to call wxo ai gateway methods for validation:
|
|
40
|
+
- 'chat' -> ModelInference.chat
|
|
41
|
+
- 'chat_async' -> ModelInference.achat
|
|
42
|
+
"""
|
|
43
|
+
self.set_method_config(GenerationMode.CHAT.value, "chat", "messages")
|
|
44
|
+
self.set_method_config(
|
|
45
|
+
GenerationMode.CHAT_ASYNC.value, "achat", "messages")
|
|
46
|
+
|
|
47
|
+
def _parse_llm_response(self, raw: Any) -> str:
|
|
48
|
+
"""
|
|
49
|
+
Extract the assistant-generated text from a wxo ai gateway response.
|
|
50
|
+
|
|
51
|
+
Same logic as non-validating client.
|
|
52
|
+
"""
|
|
53
|
+
if isinstance(raw, dict) and "results" in raw:
|
|
54
|
+
results = raw["results"]
|
|
55
|
+
if isinstance(results, list) and results:
|
|
56
|
+
first = results[0]
|
|
57
|
+
return first.get("generated_text", "")
|
|
58
|
+
if isinstance(raw, dict) and "choices" in raw:
|
|
59
|
+
choices = raw["choices"]
|
|
60
|
+
if isinstance(choices, list) and choices:
|
|
61
|
+
first = choices[0]
|
|
62
|
+
msg = first.get("message")
|
|
63
|
+
if isinstance(msg, dict) and "content" in msg:
|
|
64
|
+
return msg["content"]
|
|
65
|
+
if "text" in first:
|
|
66
|
+
return first["text"]
|
|
67
|
+
raise ValueError(raw.get("message", "Invalid response format"))
|
|
68
|
+
|
|
69
|
+
def generate(
|
|
70
|
+
self,
|
|
71
|
+
prompt: Union[str, List[Dict[str, Any]]],
|
|
72
|
+
*,
|
|
73
|
+
schema: SchemaType,
|
|
74
|
+
retries: int = 3,
|
|
75
|
+
generation_args: Optional[Any] = None,
|
|
76
|
+
**kwargs: Any,
|
|
77
|
+
) -> Any:
|
|
78
|
+
"""
|
|
79
|
+
Synchronous chat generation with validation + retries.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
prompt: Either a string or a list of chat messages.
|
|
83
|
+
schema: JSON Schema dict, Pydantic model class, or built-in Python type.
|
|
84
|
+
retries: Maximum attempts (including the first).
|
|
85
|
+
generation_args: GenerationArgs to map to provider parameters.
|
|
86
|
+
**kwargs: Passed to the underlying ModelInference call (e.g., temperature).
|
|
87
|
+
"""
|
|
88
|
+
mode = "chat"
|
|
89
|
+
|
|
90
|
+
# Normalize prompt to chat-messages
|
|
91
|
+
if isinstance(prompt, str):
|
|
92
|
+
prompt = [{"role": "user", "content": prompt}]
|
|
93
|
+
|
|
94
|
+
return super().generate(
|
|
95
|
+
**{
|
|
96
|
+
"prompt": prompt,
|
|
97
|
+
"schema": schema,
|
|
98
|
+
"retries": retries,
|
|
99
|
+
"mode": mode
|
|
100
|
+
}
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
async def generate_async(
|
|
104
|
+
self,
|
|
105
|
+
prompt: Union[str, List[Dict[str, Any]]],
|
|
106
|
+
*,
|
|
107
|
+
schema: SchemaType,
|
|
108
|
+
retries: int = 3,
|
|
109
|
+
generation_args: Optional[Any] = None,
|
|
110
|
+
**kwargs: Any,
|
|
111
|
+
) -> Any:
|
|
112
|
+
"""
|
|
113
|
+
Asynchronous chat generation with validation + retries.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
prompt: Either a string or a list of chat messages.
|
|
117
|
+
schema: JSON Schema dict, Pydantic model class, or built-in Python type.
|
|
118
|
+
retries: Maximum attempts.
|
|
119
|
+
generation_args: GenerationArgs to map to provider parameters.
|
|
120
|
+
**kwargs: Passed to the underlying ModelInference call.
|
|
121
|
+
"""
|
|
122
|
+
mode = "chat_async"
|
|
123
|
+
|
|
124
|
+
if isinstance(prompt, str):
|
|
125
|
+
prompt = [{"role": "user", "content": prompt}]
|
|
126
|
+
|
|
127
|
+
return await super().generate_async(
|
|
128
|
+
**{
|
|
129
|
+
"prompt": prompt,
|
|
130
|
+
"schema": schema,
|
|
131
|
+
"retries": retries,
|
|
132
|
+
"mode": mode
|
|
133
|
+
}
|
|
134
|
+
)
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import time
|
|
14
|
+
import weakref
|
|
15
|
+
from typing import Dict
|
|
16
|
+
|
|
17
|
+
import aiohttp
|
|
18
|
+
import requests
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class WxoAIGatewayInference():
|
|
24
|
+
RETRY_AFTER_STATUS_CODES = [502, 503, 504]
|
|
25
|
+
RETRY_COUNT = 3
|
|
26
|
+
BACK_OFF_FACTOR = 1
|
|
27
|
+
|
|
28
|
+
def __init__(self, api_key: str, url: str) -> None:
|
|
29
|
+
self.api_key = api_key
|
|
30
|
+
self.url = url
|
|
31
|
+
self.session: aiohttp.ClientSession = None
|
|
32
|
+
self._loop: asyncio.AbstractEventLoop = None
|
|
33
|
+
weakref.finalize(self, self._finalize_cleanup, self)
|
|
34
|
+
|
|
35
|
+
async def get_session(self) -> aiohttp.ClientSession:
|
|
36
|
+
"""Get or create an aiohttp session for the current event loop."""
|
|
37
|
+
try:
|
|
38
|
+
current_loop = asyncio.get_running_loop()
|
|
39
|
+
except Exception:
|
|
40
|
+
raise logger.debug("get_session must be called from within an async context")
|
|
41
|
+
|
|
42
|
+
# Check if session exists and is valid for current loop
|
|
43
|
+
if (self.session is None or
|
|
44
|
+
self.session.closed or
|
|
45
|
+
self._loop is not current_loop):
|
|
46
|
+
|
|
47
|
+
# Close old session if it exists and is from a different loop
|
|
48
|
+
if self.session and not self.session.closed:
|
|
49
|
+
try:
|
|
50
|
+
await self.session.close()
|
|
51
|
+
except Exception as e:
|
|
52
|
+
logger.debug(f"Error closing old session: {e}")
|
|
53
|
+
|
|
54
|
+
# Create new session for current loop
|
|
55
|
+
self.session = aiohttp.ClientSession()
|
|
56
|
+
self._loop = current_loop
|
|
57
|
+
logger.debug(f"Created new session for event loop {id(current_loop)}")
|
|
58
|
+
|
|
59
|
+
return self.session
|
|
60
|
+
|
|
61
|
+
@staticmethod
|
|
62
|
+
def _finalize_cleanup(self):
|
|
63
|
+
"""
|
|
64
|
+
Called automatically when the object is garbage-collected.
|
|
65
|
+
This function is synchronous, so it schedules async cleanup properly.
|
|
66
|
+
"""
|
|
67
|
+
if self.session and not self.session.closed:
|
|
68
|
+
try:
|
|
69
|
+
loop = asyncio.get_event_loop()
|
|
70
|
+
if loop.is_running():
|
|
71
|
+
loop.create_task(self.session.close())
|
|
72
|
+
else:
|
|
73
|
+
loop.run_until_complete(self.session.close())
|
|
74
|
+
except Exception as e:
|
|
75
|
+
logger.info(f"Error during cleanup: {e}")
|
|
76
|
+
|
|
77
|
+
def chat(self, messages: list[Dict], **kwargs) -> dict:
|
|
78
|
+
"""
|
|
79
|
+
Sync chat method.
|
|
80
|
+
Args:
|
|
81
|
+
messages (list[Dict]): List of messages to send.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
dict: Response from the API.
|
|
85
|
+
"""
|
|
86
|
+
payload_data = {
|
|
87
|
+
"messages": messages
|
|
88
|
+
}
|
|
89
|
+
response = self._post(payload_data)
|
|
90
|
+
return response
|
|
91
|
+
|
|
92
|
+
async def achat(self, messages: list[Dict], **kwargs) -> dict:
|
|
93
|
+
"""
|
|
94
|
+
async implementation of the chat method.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
messages (list[Dict]): List of messages to send.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
dict: Response from the API.
|
|
101
|
+
"""
|
|
102
|
+
payload_data = {
|
|
103
|
+
"messages": messages
|
|
104
|
+
}
|
|
105
|
+
response = await self._apost(payload_data)
|
|
106
|
+
return response
|
|
107
|
+
|
|
108
|
+
def _get_headers(self) -> Dict[str, str]:
|
|
109
|
+
return {
|
|
110
|
+
"Content-Type": "application/json",
|
|
111
|
+
"accept": "application/json",
|
|
112
|
+
"IAM-API_KEY": self.api_key
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
def _post(self, payload_data: dict) -> dict:
|
|
116
|
+
"""
|
|
117
|
+
Make a synchronous POST request to the Wxo AI Gateway API.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
payload_data: Dictionary containing the request payload
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
dict: JSON response from the API
|
|
124
|
+
|
|
125
|
+
Raises:
|
|
126
|
+
requests.HTTPError: If the request fails after all retries
|
|
127
|
+
json.JSONDecodeError: If response cannot be parsed as JSON
|
|
128
|
+
"""
|
|
129
|
+
headers = self._get_headers()
|
|
130
|
+
|
|
131
|
+
for attempt in range(self.RETRY_COUNT):
|
|
132
|
+
try:
|
|
133
|
+
response = requests.post(
|
|
134
|
+
url=self.url,
|
|
135
|
+
json=payload_data,
|
|
136
|
+
headers=headers,
|
|
137
|
+
timeout=30.0,
|
|
138
|
+
verify=False
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
response_status = response.status_code
|
|
142
|
+
|
|
143
|
+
if response_status == 200:
|
|
144
|
+
return response.json()
|
|
145
|
+
|
|
146
|
+
if response_status in self.RETRY_AFTER_STATUS_CODES and attempt < self.RETRY_COUNT - 1:
|
|
147
|
+
backoff_time = self.BACK_OFF_FACTOR * (2 ** attempt)
|
|
148
|
+
logger.info(
|
|
149
|
+
f"Received status {response_status}, retrying in {backoff_time}s (attempt {attempt + 1}/{self.RETRY_COUNT})...")
|
|
150
|
+
time.sleep(backoff_time)
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
response.raise_for_status()
|
|
154
|
+
return response.json()
|
|
155
|
+
|
|
156
|
+
except requests.RequestException as e:
|
|
157
|
+
if attempt < self.RETRY_COUNT - 1:
|
|
158
|
+
backoff_time = self.BACK_OFF_FACTOR * (2 ** attempt)
|
|
159
|
+
logger.info(
|
|
160
|
+
f"Request failed with {type(e).__name__}, retrying in {backoff_time}s (attempt {attempt + 1}/{self.RETRY_COUNT})...")
|
|
161
|
+
time.sleep(backoff_time)
|
|
162
|
+
continue
|
|
163
|
+
else:
|
|
164
|
+
raise
|
|
165
|
+
|
|
166
|
+
async def _apost(self, payload_data: dict) -> dict:
|
|
167
|
+
"""
|
|
168
|
+
Make an asynchronous POST request to the Wxo AI Gateway API.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
payload_data: Dictionary containing the request payload
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
dict: JSON response from the API
|
|
175
|
+
|
|
176
|
+
Raises:
|
|
177
|
+
aiohttp.ClientError: If the request fails after all retries
|
|
178
|
+
json.JSONDecodeError: If response cannot be parsed as JSON
|
|
179
|
+
"""
|
|
180
|
+
headers = self._get_headers()
|
|
181
|
+
session = await self.get_session()
|
|
182
|
+
for attempt in range(self.RETRY_COUNT):
|
|
183
|
+
try:
|
|
184
|
+
async with session.post(
|
|
185
|
+
url=self.url,
|
|
186
|
+
json=payload_data,
|
|
187
|
+
headers=headers,
|
|
188
|
+
timeout=aiohttp.ClientTimeout(total=30.0),
|
|
189
|
+
ssl=False
|
|
190
|
+
) as response:
|
|
191
|
+
response_status = response.status
|
|
192
|
+
|
|
193
|
+
if response_status == 200:
|
|
194
|
+
return await response.json()
|
|
195
|
+
|
|
196
|
+
if response_status in self.RETRY_AFTER_STATUS_CODES and attempt < self.RETRY_COUNT - 1:
|
|
197
|
+
backoff_time = self.BACK_OFF_FACTOR * \
|
|
198
|
+
(2 ** attempt)
|
|
199
|
+
logger.info(
|
|
200
|
+
f"Received status {response_status}, retrying in {backoff_time}s (attempt {attempt + 1}/{self.RETRY_COUNT})...")
|
|
201
|
+
await asyncio.sleep(backoff_time)
|
|
202
|
+
continue
|
|
203
|
+
|
|
204
|
+
return await response.json()
|
|
205
|
+
|
|
206
|
+
except aiohttp.ClientError as e:
|
|
207
|
+
if attempt < self.RETRY_COUNT - 1:
|
|
208
|
+
backoff_time = self.BACK_OFF_FACTOR * (2 ** attempt)
|
|
209
|
+
logger.info(
|
|
210
|
+
f"Request failed with {type(e).__name__}, retrying in {backoff_time}s (attempt {attempt + 1}/{self.RETRY_COUNT})...")
|
|
211
|
+
await asyncio.sleep(backoff_time)
|
|
212
|
+
continue
|
|
213
|
+
else:
|
|
214
|
+
raise
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Any, Dict, List, Optional, Union
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class GenerationMode(Enum):
|
|
7
|
+
"""
|
|
8
|
+
Enum for different generation modes across LLM providers.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
TEXT = "text"
|
|
12
|
+
CHAT = "chat"
|
|
13
|
+
TEXT_ASYNC = "text_async"
|
|
14
|
+
CHAT_ASYNC = "chat_async"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class LLMResponse:
|
|
18
|
+
"""Response object that can contain both content and tool calls"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, content: str, tool_calls: Optional[List[Dict[str, Any]]] = None):
|
|
21
|
+
self.content = content
|
|
22
|
+
self.tool_calls = tool_calls or []
|
|
23
|
+
|
|
24
|
+
def __str__(self) -> str:
|
|
25
|
+
"""Return the content of the response as a string."""
|
|
26
|
+
return self.content
|
|
27
|
+
|
|
28
|
+
def __repr__(self) -> str:
|
|
29
|
+
"""Return a string representation of the LLMResponse object."""
|
|
30
|
+
return f"LLMResponse(content='{self.content}', tool_calls={self.tool_calls})"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class GenerationArgs:
|
|
35
|
+
"""
|
|
36
|
+
Provider-agnostic generation arguments.
|
|
37
|
+
|
|
38
|
+
These arguments represent common parameters across LLM providers.
|
|
39
|
+
Each provider should implement mapping from these generic arguments
|
|
40
|
+
to their specific parameter names.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
# Core generation parameters
|
|
44
|
+
max_tokens: Optional[int] = None
|
|
45
|
+
min_tokens: Optional[int] = None
|
|
46
|
+
temperature: Optional[float] = None
|
|
47
|
+
top_p: Optional[float] = None
|
|
48
|
+
top_k: Optional[int] = None
|
|
49
|
+
|
|
50
|
+
# Penalties and biases
|
|
51
|
+
frequency_penalty: Optional[float] = None
|
|
52
|
+
presence_penalty: Optional[float] = None
|
|
53
|
+
repetition_penalty: Optional[float] = None
|
|
54
|
+
|
|
55
|
+
# Stop conditions
|
|
56
|
+
stop_sequences: Optional[List[str]] = None
|
|
57
|
+
|
|
58
|
+
# Randomness control
|
|
59
|
+
seed: Optional[int] = None
|
|
60
|
+
|
|
61
|
+
# Generation control
|
|
62
|
+
decoding_method: Optional[str] = None # "greedy" or "sample"
|
|
63
|
+
|
|
64
|
+
# Output control
|
|
65
|
+
stream: Optional[bool] = None
|
|
66
|
+
echo: Optional[bool] = None
|
|
67
|
+
logprobs: Optional[bool] = None
|
|
68
|
+
top_logprobs: Optional[int] = None
|
|
69
|
+
|
|
70
|
+
# Other common parameters
|
|
71
|
+
timeout: Optional[float] = None
|
|
72
|
+
|
|
73
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
74
|
+
"""Convert to dictionary, excluding None values."""
|
|
75
|
+
return {k: v for k, v in self.__dict__.items() if v is not None}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class ParameterMapper:
|
|
79
|
+
"""
|
|
80
|
+
Abstract base class for mapping generic generation arguments to provider-specific parameters.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
def __init__(self):
|
|
84
|
+
self._text_mappings: Dict[str, str] = {}
|
|
85
|
+
self._chat_mappings: Dict[str, str] = {}
|
|
86
|
+
self._custom_transforms: Dict[str, callable] = {}
|
|
87
|
+
|
|
88
|
+
def set_text_mapping(self, generic_param: str, provider_param: str) -> None:
|
|
89
|
+
"""Set parameter mapping for text generation mode."""
|
|
90
|
+
self._text_mappings[generic_param] = provider_param
|
|
91
|
+
|
|
92
|
+
def set_chat_mapping(self, generic_param: str, provider_param: str) -> None:
|
|
93
|
+
"""Set parameter mapping for chat generation mode."""
|
|
94
|
+
self._chat_mappings[generic_param] = provider_param
|
|
95
|
+
|
|
96
|
+
def set_custom_transform(
|
|
97
|
+
self, generic_param: str, transform_func: callable
|
|
98
|
+
) -> None:
|
|
99
|
+
"""Set a custom transformation function for a parameter."""
|
|
100
|
+
self._custom_transforms[generic_param] = transform_func
|
|
101
|
+
|
|
102
|
+
def map_args(self, args: GenerationArgs, mode: str) -> Dict[str, Any]:
|
|
103
|
+
"""
|
|
104
|
+
Map generic arguments to provider-specific parameters.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
args: Generic generation arguments
|
|
108
|
+
mode: Generation mode ('text', 'chat', 'text_async', 'chat_async')
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
Dictionary of provider-specific parameters
|
|
112
|
+
"""
|
|
113
|
+
# Determine which mapping to use based on mode
|
|
114
|
+
is_chat_mode = mode in ["chat", "chat_async"]
|
|
115
|
+
mappings = self._chat_mappings if is_chat_mode else self._text_mappings
|
|
116
|
+
|
|
117
|
+
provider_args = {}
|
|
118
|
+
args_dict = args.to_dict()
|
|
119
|
+
|
|
120
|
+
for generic_param, value in args_dict.items():
|
|
121
|
+
# Check for custom transform first
|
|
122
|
+
if generic_param in self._custom_transforms:
|
|
123
|
+
transformed = self._custom_transforms[generic_param](value, mode)
|
|
124
|
+
if isinstance(transformed, dict):
|
|
125
|
+
provider_args.update(transformed)
|
|
126
|
+
else:
|
|
127
|
+
# If transform returns a single value, use the generic param name
|
|
128
|
+
provider_args[generic_param] = transformed
|
|
129
|
+
# Use direct mapping if available
|
|
130
|
+
elif generic_param in mappings:
|
|
131
|
+
provider_args[mappings[generic_param]] = value
|
|
132
|
+
# Fall back to generic parameter name
|
|
133
|
+
else:
|
|
134
|
+
provider_args[generic_param] = value
|
|
135
|
+
|
|
136
|
+
return provider_args
|