ibm-watsonx-gov 1.3.3__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ibm_watsonx_gov/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
- ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
- ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
- ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
- ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
- ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
- ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
- ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
- ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
- ibm_watsonx_gov/clients/__init__.py +0 -0
- ibm_watsonx_gov/clients/api_client.py +99 -0
- ibm_watsonx_gov/clients/segment_client.py +46 -0
- ibm_watsonx_gov/clients/usage_client.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
- ibm_watsonx_gov/config/__init__.py +14 -0
- ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
- ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
- ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
- ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
- ibm_watsonx_gov/entities/__init__.py +8 -0
- ibm_watsonx_gov/entities/agentic_app.py +209 -0
- ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
- ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
- ibm_watsonx_gov/entities/ai_experiment.py +419 -0
- ibm_watsonx_gov/entities/base_classes.py +134 -0
- ibm_watsonx_gov/entities/container.py +54 -0
- ibm_watsonx_gov/entities/credentials.py +633 -0
- ibm_watsonx_gov/entities/criteria.py +508 -0
- ibm_watsonx_gov/entities/enums.py +274 -0
- ibm_watsonx_gov/entities/evaluation_result.py +444 -0
- ibm_watsonx_gov/entities/foundation_model.py +490 -0
- ibm_watsonx_gov/entities/llm_judge.py +44 -0
- ibm_watsonx_gov/entities/locale.py +17 -0
- ibm_watsonx_gov/entities/mapping.py +49 -0
- ibm_watsonx_gov/entities/metric.py +211 -0
- ibm_watsonx_gov/entities/metric_threshold.py +36 -0
- ibm_watsonx_gov/entities/model_provider.py +329 -0
- ibm_watsonx_gov/entities/model_risk_result.py +43 -0
- ibm_watsonx_gov/entities/monitor.py +71 -0
- ibm_watsonx_gov/entities/prompt_setup.py +40 -0
- ibm_watsonx_gov/entities/state.py +22 -0
- ibm_watsonx_gov/entities/utils.py +99 -0
- ibm_watsonx_gov/evaluators/__init__.py +26 -0
- ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
- ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
- ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
- ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
- ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
- ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
- ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
- ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
- ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
- ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
- ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
- ibm_watsonx_gov/metrics/__init__.py +74 -0
- ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
- ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
- ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
- ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
- ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
- ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
- ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
- ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
- ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
- ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
- ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
- ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
- ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
- ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
- ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
- ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
- ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
- ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
- ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
- ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
- ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
- ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
- ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
- ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
- ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
- ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
- ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
- ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
- ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
- ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
- ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
- ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
- ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
- ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
- ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
- ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
- ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
- ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
- ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
- ibm_watsonx_gov/metrics/status/__init__.py +0 -0
- ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
- ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
- ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
- ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
- ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
- ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
- ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
- ibm_watsonx_gov/metrics/utils.py +440 -0
- ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
- ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
- ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
- ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
- ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
- ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
- ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
- ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
- ibm_watsonx_gov/providers/__init__.py +8 -0
- ibm_watsonx_gov/providers/detectors_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/providers/detectors_provider.py +415 -0
- ibm_watsonx_gov/providers/eval_assist_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
- ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
- ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
- ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
- ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
- ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
- ibm_watsonx_gov/providers/unitxt_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/tools/__init__.py +10 -0
- ibm_watsonx_gov/tools/clients/__init__.py +11 -0
- ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
- ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
- ibm_watsonx_gov/tools/core/__init__.py +8 -0
- ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
- ibm_watsonx_gov/tools/entities/__init__.py +8 -0
- ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
- ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
- ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
- ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
- ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
- ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
- ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
- ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
- ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
- ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
- ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
- ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
- ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
- ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
- ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
- ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
- ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
- ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
- ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
- ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
- ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
- ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
- ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
- ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
- ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
- ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
- ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
- ibm_watsonx_gov/tools/utils/__init__.py +14 -0
- ibm_watsonx_gov/tools/utils/constants.py +69 -0
- ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
- ibm_watsonx_gov/tools/utils/environment.py +108 -0
- ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
- ibm_watsonx_gov/tools/utils/platform_url_mapping.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
- ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
- ibm_watsonx_gov/traces/__init__.py +8 -0
- ibm_watsonx_gov/traces/span_exporter.py +195 -0
- ibm_watsonx_gov/traces/span_node.py +251 -0
- ibm_watsonx_gov/traces/span_util.py +153 -0
- ibm_watsonx_gov/traces/trace_utils.py +1074 -0
- ibm_watsonx_gov/utils/__init__.py +8 -0
- ibm_watsonx_gov/utils/aggregation_util.py +346 -0
- ibm_watsonx_gov/utils/async_util.py +62 -0
- ibm_watsonx_gov/utils/authenticator.py +144 -0
- ibm_watsonx_gov/utils/constants.py +15 -0
- ibm_watsonx_gov/utils/errors.py +40 -0
- ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
- ibm_watsonx_gov/utils/insights_generator.py +1285 -0
- ibm_watsonx_gov/utils/python_utils.py +425 -0
- ibm_watsonx_gov/utils/rest_util.py +73 -0
- ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
- ibm_watsonx_gov/utils/singleton_meta.py +25 -0
- ibm_watsonx_gov/utils/url_mapping.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/utils/validation_util.py +126 -0
- ibm_watsonx_gov/visualizations/__init__.py +13 -0
- ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
- ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
- ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
- ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
- ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
- ibm_watsonx_gov-1.3.3.dist-info/WHEEL +6 -0
|
@@ -0,0 +1,639 @@
|
|
|
1
|
+
try:
|
|
2
|
+
import openai
|
|
3
|
+
except ImportError as e:
|
|
4
|
+
raise ImportError(
|
|
5
|
+
"OpenAI library is required for this module. Please install it with 'pip install llmevalkit[openai]'"
|
|
6
|
+
) from e
|
|
7
|
+
|
|
8
|
+
from typing import Any, Optional, Dict, List, Union
|
|
9
|
+
from llmevalkit.llm.base import LLMClient, register_llm
|
|
10
|
+
from llmevalkit.llm.output_parser import ValidatingLLMClient
|
|
11
|
+
from llmevalkit.llm.types import LLMResponse, GenerationMode, ParameterMapper
|
|
12
|
+
from pydantic import BaseModel
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BaseOpenAIClient(LLMClient):
|
|
16
|
+
"""Base class for OpenAI and Azure OpenAI clients with shared parameter mapping"""
|
|
17
|
+
|
|
18
|
+
def _setup_parameter_mapper(self) -> None:
|
|
19
|
+
"""Set up parameter mapper for OpenAI-compatible APIs"""
|
|
20
|
+
self._parameter_mapper = ParameterMapper()
|
|
21
|
+
|
|
22
|
+
# Direct mappings for text and chat modes
|
|
23
|
+
# Text mode parameters
|
|
24
|
+
self._parameter_mapper.set_text_mapping("max_tokens", "max_tokens")
|
|
25
|
+
self._parameter_mapper.set_text_mapping("temperature", "temperature")
|
|
26
|
+
self._parameter_mapper.set_text_mapping("top_p", "top_p")
|
|
27
|
+
self._parameter_mapper.set_text_mapping("presence_penalty", "presence_penalty")
|
|
28
|
+
self._parameter_mapper.set_text_mapping(
|
|
29
|
+
"frequency_penalty", "frequency_penalty"
|
|
30
|
+
)
|
|
31
|
+
self._parameter_mapper.set_text_mapping("stop_sequences", "stop")
|
|
32
|
+
self._parameter_mapper.set_text_mapping("logprobs", "logprobs")
|
|
33
|
+
self._parameter_mapper.set_text_mapping("top_logprobs", "top_logprobs")
|
|
34
|
+
self._parameter_mapper.set_text_mapping("echo", "echo")
|
|
35
|
+
self._parameter_mapper.set_text_mapping("seed", "seed")
|
|
36
|
+
self._parameter_mapper.set_text_mapping("stream", "stream")
|
|
37
|
+
self._parameter_mapper.set_text_mapping("timeout", "timeout")
|
|
38
|
+
|
|
39
|
+
# Chat mode parameters
|
|
40
|
+
self._parameter_mapper.set_chat_mapping("max_tokens", "max_tokens")
|
|
41
|
+
self._parameter_mapper.set_chat_mapping("temperature", "temperature")
|
|
42
|
+
self._parameter_mapper.set_chat_mapping("top_p", "top_p")
|
|
43
|
+
self._parameter_mapper.set_chat_mapping("presence_penalty", "presence_penalty")
|
|
44
|
+
self._parameter_mapper.set_chat_mapping(
|
|
45
|
+
"frequency_penalty", "frequency_penalty"
|
|
46
|
+
)
|
|
47
|
+
self._parameter_mapper.set_chat_mapping("stop_sequences", "stop")
|
|
48
|
+
self._parameter_mapper.set_chat_mapping("logprobs", "logprobs")
|
|
49
|
+
self._parameter_mapper.set_chat_mapping("top_logprobs", "top_logprobs")
|
|
50
|
+
self._parameter_mapper.set_chat_mapping("seed", "seed")
|
|
51
|
+
self._parameter_mapper.set_chat_mapping("stream", "stream")
|
|
52
|
+
self._parameter_mapper.set_chat_mapping("timeout", "timeout")
|
|
53
|
+
|
|
54
|
+
# Custom transform for decoding_method
|
|
55
|
+
def transform_decoding_method(value, mode):
|
|
56
|
+
# OpenAI doesn't have direct decoding_method, map to temperature for approximation
|
|
57
|
+
if value == "greedy":
|
|
58
|
+
return {"temperature": 0.0}
|
|
59
|
+
elif value == "sample":
|
|
60
|
+
return {} # Use default temperature
|
|
61
|
+
else:
|
|
62
|
+
return {} # Unknown method, no transformation
|
|
63
|
+
|
|
64
|
+
# Custom transform for min_tokens (not supported by OpenAI)
|
|
65
|
+
def transform_min_tokens(value, mode):
|
|
66
|
+
# OpenAI doesn't support min_tokens, so we ignore it and emit a warning
|
|
67
|
+
import warnings
|
|
68
|
+
|
|
69
|
+
warnings.warn(
|
|
70
|
+
f"min_tokens parameter ({value}) is not supported by OpenAI. Parameter will be ignored.",
|
|
71
|
+
UserWarning,
|
|
72
|
+
)
|
|
73
|
+
return {} # Return empty dict to ignore the parameter
|
|
74
|
+
|
|
75
|
+
self._parameter_mapper.set_custom_transform(
|
|
76
|
+
"decoding_method", transform_decoding_method
|
|
77
|
+
)
|
|
78
|
+
self._parameter_mapper.set_custom_transform("min_tokens", transform_min_tokens)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class BaseValidatingOpenAIClient(ValidatingLLMClient):
|
|
82
|
+
"""Base class for validating OpenAI and Azure OpenAI clients with shared parameter mapping"""
|
|
83
|
+
|
|
84
|
+
def _setup_parameter_mapper(self) -> None:
|
|
85
|
+
"""Set up parameter mapper for OpenAI-compatible APIs"""
|
|
86
|
+
self._parameter_mapper = ParameterMapper()
|
|
87
|
+
|
|
88
|
+
# Direct mappings for text and chat modes
|
|
89
|
+
# Text mode parameters
|
|
90
|
+
self._parameter_mapper.set_text_mapping("max_tokens", "max_tokens")
|
|
91
|
+
self._parameter_mapper.set_text_mapping("temperature", "temperature")
|
|
92
|
+
self._parameter_mapper.set_text_mapping("top_p", "top_p")
|
|
93
|
+
self._parameter_mapper.set_text_mapping("presence_penalty", "presence_penalty")
|
|
94
|
+
self._parameter_mapper.set_text_mapping(
|
|
95
|
+
"frequency_penalty", "frequency_penalty"
|
|
96
|
+
)
|
|
97
|
+
self._parameter_mapper.set_text_mapping("stop_sequences", "stop")
|
|
98
|
+
self._parameter_mapper.set_text_mapping("logprobs", "logprobs")
|
|
99
|
+
self._parameter_mapper.set_text_mapping("top_logprobs", "top_logprobs")
|
|
100
|
+
self._parameter_mapper.set_text_mapping("echo", "echo")
|
|
101
|
+
self._parameter_mapper.set_text_mapping("seed", "seed")
|
|
102
|
+
self._parameter_mapper.set_text_mapping("stream", "stream")
|
|
103
|
+
self._parameter_mapper.set_text_mapping("timeout", "timeout")
|
|
104
|
+
|
|
105
|
+
# Chat mode parameters
|
|
106
|
+
self._parameter_mapper.set_chat_mapping("max_tokens", "max_tokens")
|
|
107
|
+
self._parameter_mapper.set_chat_mapping("temperature", "temperature")
|
|
108
|
+
self._parameter_mapper.set_chat_mapping("top_p", "top_p")
|
|
109
|
+
self._parameter_mapper.set_chat_mapping("presence_penalty", "presence_penalty")
|
|
110
|
+
self._parameter_mapper.set_chat_mapping(
|
|
111
|
+
"frequency_penalty", "frequency_penalty"
|
|
112
|
+
)
|
|
113
|
+
self._parameter_mapper.set_chat_mapping("stop_sequences", "stop")
|
|
114
|
+
self._parameter_mapper.set_chat_mapping("logprobs", "logprobs")
|
|
115
|
+
self._parameter_mapper.set_chat_mapping("top_logprobs", "top_logprobs")
|
|
116
|
+
self._parameter_mapper.set_chat_mapping("seed", "seed")
|
|
117
|
+
self._parameter_mapper.set_chat_mapping("stream", "stream")
|
|
118
|
+
self._parameter_mapper.set_chat_mapping("tools", "tools")
|
|
119
|
+
self._parameter_mapper.set_chat_mapping("tool_choice", "tool_choice")
|
|
120
|
+
self._parameter_mapper.set_chat_mapping("timeout", "timeout")
|
|
121
|
+
|
|
122
|
+
# Custom transform for decoding_method
|
|
123
|
+
def transform_decoding_method(value, mode):
|
|
124
|
+
# OpenAI doesn't have direct decoding_method, map to temperature for approximation
|
|
125
|
+
if value == "greedy":
|
|
126
|
+
return {"temperature": 0.0}
|
|
127
|
+
elif value == "sample":
|
|
128
|
+
return {} # Use default temperature
|
|
129
|
+
else:
|
|
130
|
+
return {} # Unknown method, no transformation
|
|
131
|
+
|
|
132
|
+
# Custom transform for min_tokens (not supported by OpenAI)
|
|
133
|
+
def transform_min_tokens(value, mode):
|
|
134
|
+
# OpenAI doesn't support min_tokens, so we ignore it and emit a warning
|
|
135
|
+
import warnings
|
|
136
|
+
|
|
137
|
+
warnings.warn(
|
|
138
|
+
f"min_tokens parameter ({value}) is not supported by OpenAI. Parameter will be ignored.",
|
|
139
|
+
UserWarning,
|
|
140
|
+
)
|
|
141
|
+
return {} # Return empty dict to ignore the parameter
|
|
142
|
+
|
|
143
|
+
self._parameter_mapper.set_custom_transform(
|
|
144
|
+
"decoding_method", transform_decoding_method
|
|
145
|
+
)
|
|
146
|
+
self._parameter_mapper.set_custom_transform("min_tokens", transform_min_tokens)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@register_llm("openai.sync")
|
|
150
|
+
class SyncOpenAIClient(BaseOpenAIClient, LLMClient):
|
|
151
|
+
"""
|
|
152
|
+
Adapter for openai.OpenAI.
|
|
153
|
+
|
|
154
|
+
Supports:
|
|
155
|
+
- text: completions.create
|
|
156
|
+
- chat: chat.completions.create
|
|
157
|
+
- text_async: completions.create
|
|
158
|
+
- chat_async: chat.completions.create
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
def __init__(self, *, client: Optional[Any] = None, **provider_kwargs: Any) -> None:
|
|
162
|
+
client_needs_init = client is None
|
|
163
|
+
if client_needs_init:
|
|
164
|
+
super().__init__(client_needs_init=True, **provider_kwargs)
|
|
165
|
+
else:
|
|
166
|
+
super().__init__(client=client, **provider_kwargs)
|
|
167
|
+
|
|
168
|
+
@classmethod
|
|
169
|
+
def provider_class(cls) -> type:
|
|
170
|
+
return openai.OpenAI
|
|
171
|
+
|
|
172
|
+
def _register_methods(self) -> None:
|
|
173
|
+
self.set_method_config(
|
|
174
|
+
GenerationMode.TEXT.value, "completions.create", "prompt"
|
|
175
|
+
)
|
|
176
|
+
self.set_method_config(
|
|
177
|
+
GenerationMode.CHAT.value, "chat.completions.create", "messages"
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
def _parse_llm_response(self, raw: Any) -> Union[str, LLMResponse]:
|
|
181
|
+
"""Parse response, handling both content and tool calls"""
|
|
182
|
+
return _parse_llm_response(raw)
|
|
183
|
+
|
|
184
|
+
def generate(
|
|
185
|
+
self,
|
|
186
|
+
prompt: Union[str, List[Dict[str, Any]]],
|
|
187
|
+
mode: Union[str, GenerationMode] = GenerationMode.CHAT,
|
|
188
|
+
**kwargs: Any,
|
|
189
|
+
) -> str:
|
|
190
|
+
"""
|
|
191
|
+
Generate with proper prompt format validation based on mode.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
prompt: Input prompt
|
|
195
|
+
mode: Generation mode (text or chat)
|
|
196
|
+
**kwargs: Additional parameters
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Generated text
|
|
200
|
+
"""
|
|
201
|
+
mode_str = mode.value if isinstance(mode, GenerationMode) else mode
|
|
202
|
+
|
|
203
|
+
# Validate prompt format based on mode
|
|
204
|
+
if mode_str == GenerationMode.TEXT.value:
|
|
205
|
+
# Text mode expects a string prompt
|
|
206
|
+
if isinstance(prompt, list):
|
|
207
|
+
# Convert messages to simple string
|
|
208
|
+
prompt = "\n".join(
|
|
209
|
+
[msg.get("content", "") for msg in prompt if msg.get("content")]
|
|
210
|
+
)
|
|
211
|
+
elif mode_str == GenerationMode.CHAT.value:
|
|
212
|
+
# Chat mode expects list of messages
|
|
213
|
+
if isinstance(prompt, str):
|
|
214
|
+
prompt = [{"role": "user", "content": prompt}]
|
|
215
|
+
|
|
216
|
+
return super().generate(prompt=prompt, mode=mode_str, **kwargs)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
@register_llm("openai.async")
|
|
220
|
+
class AsyncOpenAIClient(BaseOpenAIClient, LLMClient):
|
|
221
|
+
"""
|
|
222
|
+
Adapter for openai.AsyncOpenAI.
|
|
223
|
+
"""
|
|
224
|
+
|
|
225
|
+
def __init__(self, *, client: Optional[Any] = None, **provider_kwargs: Any) -> None:
|
|
226
|
+
client_needs_init = client is None
|
|
227
|
+
if client_needs_init:
|
|
228
|
+
super().__init__(client_needs_init=True, **provider_kwargs)
|
|
229
|
+
else:
|
|
230
|
+
super().__init__(client=client, **provider_kwargs)
|
|
231
|
+
|
|
232
|
+
@classmethod
|
|
233
|
+
def provider_class(cls) -> type:
|
|
234
|
+
return openai.AsyncOpenAI
|
|
235
|
+
|
|
236
|
+
def _register_methods(self) -> None:
|
|
237
|
+
self.set_method_config(
|
|
238
|
+
GenerationMode.TEXT_ASYNC.value, "completions.create", "prompt"
|
|
239
|
+
)
|
|
240
|
+
self.set_method_config(
|
|
241
|
+
GenerationMode.CHAT_ASYNC.value, "chat.completions.create", "messages"
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
def _parse_llm_response(self, raw: Any) -> Union[str, LLMResponse]:
|
|
245
|
+
"""Parse response, handling both content and tool calls"""
|
|
246
|
+
return _parse_llm_response(raw)
|
|
247
|
+
|
|
248
|
+
async def generate_async(
|
|
249
|
+
self,
|
|
250
|
+
prompt: Union[str, List[Dict[str, Any]]],
|
|
251
|
+
mode: Union[str, GenerationMode] = GenerationMode.CHAT_ASYNC,
|
|
252
|
+
**kwargs: Any,
|
|
253
|
+
) -> str:
|
|
254
|
+
"""
|
|
255
|
+
Generate async with proper prompt format validation based on mode.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
prompt: Input prompt
|
|
259
|
+
mode: Generation mode (text_async or chat_async)
|
|
260
|
+
**kwargs: Additional parameters
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
Generated text
|
|
264
|
+
"""
|
|
265
|
+
mode_str = mode.value if isinstance(mode, GenerationMode) else mode
|
|
266
|
+
|
|
267
|
+
# Validate prompt format based on mode
|
|
268
|
+
if mode_str == GenerationMode.TEXT_ASYNC.value:
|
|
269
|
+
# Text mode expects a string prompt
|
|
270
|
+
if isinstance(prompt, list):
|
|
271
|
+
# Convert messages to simple string
|
|
272
|
+
prompt = "\n".join(
|
|
273
|
+
[msg.get("content", "") for msg in prompt if msg.get("content")]
|
|
274
|
+
)
|
|
275
|
+
elif mode_str == GenerationMode.CHAT_ASYNC.value:
|
|
276
|
+
# Chat mode expects list of messages
|
|
277
|
+
if isinstance(prompt, str):
|
|
278
|
+
prompt = [{"role": "user", "content": prompt}]
|
|
279
|
+
|
|
280
|
+
return await super().generate_async(prompt=prompt, mode=mode_str, **kwargs)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
@register_llm("openai.sync.output_val")
|
|
284
|
+
class SyncOpenAIClientOutputVal(BaseOpenAIClient, ValidatingLLMClient):
|
|
285
|
+
"""
|
|
286
|
+
Validating adapter for openai.OpenAI with structured output support.
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
def __init__(self, *, client: Optional[Any] = None, **provider_kwargs: Any) -> None:
|
|
290
|
+
client_needs_init = client is None
|
|
291
|
+
if client_needs_init:
|
|
292
|
+
super().__init__(client_needs_init=True, **provider_kwargs)
|
|
293
|
+
else:
|
|
294
|
+
super().__init__(client=client, **provider_kwargs)
|
|
295
|
+
|
|
296
|
+
@classmethod
|
|
297
|
+
def provider_class(cls) -> type:
|
|
298
|
+
return openai.OpenAI
|
|
299
|
+
|
|
300
|
+
def _register_methods(self) -> None:
|
|
301
|
+
self.set_method_config(
|
|
302
|
+
GenerationMode.CHAT.value, "chat.completions.parse", "messages"
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
def generate(
|
|
306
|
+
self,
|
|
307
|
+
prompt: Union[str, List[Dict[str, str]]],
|
|
308
|
+
schema: Optional[Any] = None,
|
|
309
|
+
schema_field: Optional[str] = "response_format",
|
|
310
|
+
retries: int = 3,
|
|
311
|
+
**kwargs: Any,
|
|
312
|
+
) -> Any:
|
|
313
|
+
"""Generate with OpenAI structured output support"""
|
|
314
|
+
# Convert string prompts to message format for chat
|
|
315
|
+
if isinstance(prompt, str):
|
|
316
|
+
prompt = [{"role": "user", "content": prompt}]
|
|
317
|
+
|
|
318
|
+
# For OpenAI, we can use their structured output feature
|
|
319
|
+
if schema_field == "response_format" and schema:
|
|
320
|
+
# Let OpenAI handle parsing
|
|
321
|
+
return super().generate(
|
|
322
|
+
prompt=prompt,
|
|
323
|
+
schema=schema,
|
|
324
|
+
schema_field=schema_field,
|
|
325
|
+
retries=retries,
|
|
326
|
+
**kwargs,
|
|
327
|
+
)
|
|
328
|
+
else:
|
|
329
|
+
# Fall back to our validation logic
|
|
330
|
+
return super().generate(
|
|
331
|
+
prompt=prompt,
|
|
332
|
+
schema=schema,
|
|
333
|
+
schema_field=None, # Don't use OpenAI's structured output
|
|
334
|
+
retries=retries,
|
|
335
|
+
**kwargs,
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
def _parse_llm_response(self, raw: Any) -> Union[str, LLMResponse]:
|
|
339
|
+
"""Parse response, handling both content and tool calls"""
|
|
340
|
+
return _parse_llm_response(raw)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
@register_llm("openai.async.output_val")
|
|
344
|
+
class AsyncOpenAIClientOutputVal(BaseOpenAIClient, ValidatingLLMClient):
|
|
345
|
+
"""
|
|
346
|
+
Validating adapter for openai.AsyncOpenAI with structured output support.
|
|
347
|
+
"""
|
|
348
|
+
|
|
349
|
+
def __init__(self, *, client: Optional[Any] = None, **provider_kwargs: Any) -> None:
|
|
350
|
+
client_needs_init = client is None
|
|
351
|
+
if client_needs_init:
|
|
352
|
+
super().__init__(client_needs_init=True, **provider_kwargs)
|
|
353
|
+
else:
|
|
354
|
+
super().__init__(client=client, **provider_kwargs)
|
|
355
|
+
|
|
356
|
+
@classmethod
|
|
357
|
+
def provider_class(cls) -> type:
|
|
358
|
+
return openai.AsyncOpenAI
|
|
359
|
+
|
|
360
|
+
def _register_methods(self) -> None:
|
|
361
|
+
self.set_method_config(
|
|
362
|
+
GenerationMode.CHAT_ASYNC.value, "chat.completions.parse", "messages"
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
async def generate_async(
|
|
366
|
+
self,
|
|
367
|
+
prompt: Union[str, List[Dict[str, str]]],
|
|
368
|
+
schema: Optional[Any] = None,
|
|
369
|
+
schema_field: Optional[str] = "response_format",
|
|
370
|
+
retries: int = 3,
|
|
371
|
+
**kwargs: Any,
|
|
372
|
+
) -> Any:
|
|
373
|
+
"""Generate with OpenAI structured output support"""
|
|
374
|
+
# Convert string prompts to message format for chat
|
|
375
|
+
if isinstance(prompt, str):
|
|
376
|
+
prompt = [{"role": "user", "content": prompt}]
|
|
377
|
+
|
|
378
|
+
# For OpenAI, we can use their structured output feature
|
|
379
|
+
if schema_field == "response_format" and schema:
|
|
380
|
+
# Let OpenAI handle parsing
|
|
381
|
+
return await super().generate_async(
|
|
382
|
+
prompt=prompt,
|
|
383
|
+
schema=schema,
|
|
384
|
+
schema_field=schema_field,
|
|
385
|
+
retries=retries,
|
|
386
|
+
**kwargs,
|
|
387
|
+
)
|
|
388
|
+
else:
|
|
389
|
+
# Fall back to our validation logic
|
|
390
|
+
return await super().generate_async(
|
|
391
|
+
prompt=prompt,
|
|
392
|
+
schema=schema,
|
|
393
|
+
schema_field=None, # Don't use OpenAI's structured output
|
|
394
|
+
retries=retries,
|
|
395
|
+
**kwargs,
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
def _parse_llm_response(self, raw: Any) -> Union[str, LLMResponse]:
|
|
399
|
+
"""Parse response, handling both content and tool calls"""
|
|
400
|
+
return _parse_llm_response(raw)
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
@register_llm("azure_openai.sync")
|
|
404
|
+
class SyncAzureOpenAIClient(BaseOpenAIClient):
|
|
405
|
+
"""
|
|
406
|
+
Adapter for openai.AzureOpenAI.
|
|
407
|
+
|
|
408
|
+
Supports:
|
|
409
|
+
- text: completions.create
|
|
410
|
+
- chat: chat.completions.create
|
|
411
|
+
"""
|
|
412
|
+
|
|
413
|
+
def __init__(self, *, client: Optional[Any] = None, **provider_kwargs: Any) -> None:
|
|
414
|
+
client_needs_init = client is None
|
|
415
|
+
if client_needs_init:
|
|
416
|
+
super().__init__(client_needs_init=True, **provider_kwargs)
|
|
417
|
+
else:
|
|
418
|
+
super().__init__(client=client, **provider_kwargs)
|
|
419
|
+
|
|
420
|
+
@classmethod
|
|
421
|
+
def provider_class(cls) -> type:
|
|
422
|
+
return openai.AzureOpenAI
|
|
423
|
+
|
|
424
|
+
def _register_methods(self) -> None:
|
|
425
|
+
self.set_method_config(
|
|
426
|
+
GenerationMode.TEXT.value, "completions.create", "prompt"
|
|
427
|
+
)
|
|
428
|
+
self.set_method_config(
|
|
429
|
+
GenerationMode.CHAT.value, "chat.completions.create", "messages"
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
def _parse_llm_response(self, raw: Any) -> Union[str, LLMResponse]:
|
|
433
|
+
"""Parse response, handling both content and tool calls"""
|
|
434
|
+
return _parse_llm_response(raw)
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
@register_llm("azure_openai.async")
|
|
438
|
+
class AsyncAzureOpenAIClient(BaseOpenAIClient):
|
|
439
|
+
"""
|
|
440
|
+
Adapter for openai.AsyncAzureOpenAI.
|
|
441
|
+
"""
|
|
442
|
+
|
|
443
|
+
def __init__(self, *, client: Optional[Any] = None, **provider_kwargs: Any) -> None:
|
|
444
|
+
client_needs_init = client is None
|
|
445
|
+
if client_needs_init:
|
|
446
|
+
super().__init__(client_needs_init=True, **provider_kwargs)
|
|
447
|
+
else:
|
|
448
|
+
super().__init__(client=client, **provider_kwargs)
|
|
449
|
+
|
|
450
|
+
@classmethod
|
|
451
|
+
def provider_class(cls) -> type:
|
|
452
|
+
return openai.AsyncAzureOpenAI
|
|
453
|
+
|
|
454
|
+
def _register_methods(self) -> None:
|
|
455
|
+
self.set_method_config(
|
|
456
|
+
GenerationMode.TEXT_ASYNC.value, "completions.create", "prompt"
|
|
457
|
+
)
|
|
458
|
+
self.set_method_config(
|
|
459
|
+
GenerationMode.CHAT_ASYNC.value, "chat.completions.create", "messages"
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
def _parse_llm_response(self, raw: Any) -> Union[str, LLMResponse]:
|
|
463
|
+
"""Parse response, handling both content and tool calls"""
|
|
464
|
+
return _parse_llm_response(raw)
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
@register_llm("azure_openai.sync.output_val")
|
|
468
|
+
class SyncAzureOpenAIClientOutputVal(BaseValidatingOpenAIClient):
|
|
469
|
+
"""
|
|
470
|
+
Validating adapter for openai.AzureOpenAI with structured output support.
|
|
471
|
+
"""
|
|
472
|
+
|
|
473
|
+
def __init__(self, *, client: Optional[Any] = None, **provider_kwargs: Any) -> None:
|
|
474
|
+
client_needs_init = client is None
|
|
475
|
+
if client_needs_init:
|
|
476
|
+
super().__init__(client_needs_init=True, **provider_kwargs)
|
|
477
|
+
else:
|
|
478
|
+
super().__init__(client=client, **provider_kwargs)
|
|
479
|
+
|
|
480
|
+
@classmethod
|
|
481
|
+
def provider_class(cls) -> type:
|
|
482
|
+
return openai.AzureOpenAI
|
|
483
|
+
|
|
484
|
+
def _register_methods(self) -> None:
|
|
485
|
+
self.set_method_config(
|
|
486
|
+
GenerationMode.CHAT.value, "chat.completions.parse", "messages"
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
def generate(
|
|
490
|
+
self,
|
|
491
|
+
prompt: Union[str, List[Dict[str, str]]],
|
|
492
|
+
schema: Optional[Any] = None,
|
|
493
|
+
schema_field: Optional[str] = "response_format",
|
|
494
|
+
retries: int = 3,
|
|
495
|
+
**kwargs: Any,
|
|
496
|
+
) -> Any:
|
|
497
|
+
"""Generate with Azure OpenAI structured output support"""
|
|
498
|
+
# Convert string prompts to message format for chat
|
|
499
|
+
if isinstance(prompt, str):
|
|
500
|
+
prompt = [{"role": "user", "content": prompt}]
|
|
501
|
+
|
|
502
|
+
# For Azure OpenAI, we can use their structured output feature
|
|
503
|
+
if schema_field == "response_format" and schema:
|
|
504
|
+
# Let Azure OpenAI handle parsing
|
|
505
|
+
return super().generate(
|
|
506
|
+
prompt=prompt,
|
|
507
|
+
schema=schema,
|
|
508
|
+
schema_field=schema_field,
|
|
509
|
+
retries=retries,
|
|
510
|
+
**kwargs,
|
|
511
|
+
)
|
|
512
|
+
else:
|
|
513
|
+
# Fall back to our validation logic
|
|
514
|
+
return super().generate(
|
|
515
|
+
prompt=prompt,
|
|
516
|
+
schema=schema,
|
|
517
|
+
schema_field=None, # Don't use Azure OpenAI's structured output
|
|
518
|
+
retries=retries,
|
|
519
|
+
**kwargs,
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
def _parse_llm_response(self, raw: Any) -> Union[str, LLMResponse]:
|
|
523
|
+
"""Parse response, handling both content and tool calls"""
|
|
524
|
+
return _parse_llm_response(raw)
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
@register_llm("azure_openai.async.output_val")
|
|
528
|
+
class AsyncAzureOpenAIClientOutputVal(BaseValidatingOpenAIClient):
|
|
529
|
+
"""
|
|
530
|
+
Validating adapter for openai.AsyncAzureOpenAI with structured output support.
|
|
531
|
+
"""
|
|
532
|
+
|
|
533
|
+
def __init__(self, *, client: Optional[Any] = None, **provider_kwargs: Any) -> None:
|
|
534
|
+
client_needs_init = client is None
|
|
535
|
+
if client_needs_init:
|
|
536
|
+
super().__init__(client_needs_init=True, **provider_kwargs)
|
|
537
|
+
else:
|
|
538
|
+
super().__init__(client=client, **provider_kwargs)
|
|
539
|
+
|
|
540
|
+
@classmethod
|
|
541
|
+
def provider_class(cls) -> type:
|
|
542
|
+
return openai.AsyncAzureOpenAI
|
|
543
|
+
|
|
544
|
+
def _register_methods(self) -> None:
|
|
545
|
+
self.set_method_config(
|
|
546
|
+
GenerationMode.CHAT_ASYNC.value, "chat.completions.parse", "messages"
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
async def generate_async(
|
|
550
|
+
self,
|
|
551
|
+
prompt: Union[str, List[Dict[str, str]]],
|
|
552
|
+
schema: Optional[Any] = None,
|
|
553
|
+
schema_field: Optional[str] = "response_format",
|
|
554
|
+
retries: int = 3,
|
|
555
|
+
**kwargs: Any,
|
|
556
|
+
) -> Any:
|
|
557
|
+
"""Generate with Azure OpenAI structured output support"""
|
|
558
|
+
# Convert string prompts to message format for chat
|
|
559
|
+
if isinstance(prompt, str):
|
|
560
|
+
prompt = [{"role": "user", "content": prompt}]
|
|
561
|
+
|
|
562
|
+
# For Azure OpenAI, we can use their structured output feature
|
|
563
|
+
if schema_field == "response_format" and schema:
|
|
564
|
+
# Let Azure OpenAI handle parsing
|
|
565
|
+
return await super().generate_async(
|
|
566
|
+
prompt=prompt,
|
|
567
|
+
schema=schema,
|
|
568
|
+
schema_field=schema_field,
|
|
569
|
+
retries=retries,
|
|
570
|
+
**kwargs,
|
|
571
|
+
)
|
|
572
|
+
else:
|
|
573
|
+
# Fall back to our validation logic
|
|
574
|
+
return await super().generate_async(
|
|
575
|
+
prompt=prompt,
|
|
576
|
+
schema=schema,
|
|
577
|
+
schema_field=None, # Don't use Azure OpenAI's structured output
|
|
578
|
+
retries=retries,
|
|
579
|
+
**kwargs,
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
def _parse_llm_response(self, raw: Any) -> Union[str, LLMResponse]:
|
|
583
|
+
"""Parse response, handling both content and tool calls"""
|
|
584
|
+
return _parse_llm_response(raw)
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
def _parse_llm_response(raw: Any) -> Union[str, LLMResponse]:
|
|
588
|
+
"""
|
|
589
|
+
Helper function to parse OpenAI response and extract content and tool calls.
|
|
590
|
+
|
|
591
|
+
Args:
|
|
592
|
+
raw: The raw response from OpenAI API
|
|
593
|
+
|
|
594
|
+
Returns:
|
|
595
|
+
str: If no tool calls, returns just the content
|
|
596
|
+
LLMResponse: If tool calls exist, returns object with content and tool_calls
|
|
597
|
+
"""
|
|
598
|
+
if (
|
|
599
|
+
not raw
|
|
600
|
+
or not hasattr(raw, "choices")
|
|
601
|
+
or not raw.choices
|
|
602
|
+
or not isinstance(raw.choices, list)
|
|
603
|
+
):
|
|
604
|
+
raise ValueError("Invalid OpenAI response format")
|
|
605
|
+
|
|
606
|
+
first = raw.choices[0]
|
|
607
|
+
content = ""
|
|
608
|
+
tool_calls = []
|
|
609
|
+
|
|
610
|
+
# Extract content
|
|
611
|
+
if hasattr(first, "message"):
|
|
612
|
+
content = first.message.content or ""
|
|
613
|
+
|
|
614
|
+
# Extract tool calls if present
|
|
615
|
+
if hasattr(first.message, "tool_calls") and first.message.tool_calls:
|
|
616
|
+
for tool_call in first.message.tool_calls:
|
|
617
|
+
tool_call_dict = {
|
|
618
|
+
"id": tool_call.id,
|
|
619
|
+
"type": tool_call.type,
|
|
620
|
+
"function": {
|
|
621
|
+
"name": tool_call.function.name,
|
|
622
|
+
"arguments": tool_call.function.arguments,
|
|
623
|
+
},
|
|
624
|
+
}
|
|
625
|
+
tool_calls.append(tool_call_dict)
|
|
626
|
+
|
|
627
|
+
elif hasattr(first, "text"):
|
|
628
|
+
content = first.text
|
|
629
|
+
else:
|
|
630
|
+
# Fallback to dict access
|
|
631
|
+
content = first.get("message", {}).get("content", first.get("text", ""))
|
|
632
|
+
|
|
633
|
+
if not content and not tool_calls:
|
|
634
|
+
raise ValueError("No content or tool calls found in OpenAI response")
|
|
635
|
+
|
|
636
|
+
# Return LLMResponse if tool calls exist, otherwise just content
|
|
637
|
+
if tool_calls:
|
|
638
|
+
return LLMResponse(content=content, tool_calls=tool_calls)
|
|
639
|
+
return content
|
|
File without changes
|