ibm-watsonx-gov 1.3.3__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ibm_watsonx_gov/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/clients/__init__.py +14 -0
- ibm_watsonx_gov/agent_catalog/clients/ai_agent_client.py +333 -0
- ibm_watsonx_gov/agent_catalog/core/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/core/agent_loader.py +202 -0
- ibm_watsonx_gov/agent_catalog/core/agents.py +134 -0
- ibm_watsonx_gov/agent_catalog/entities/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/entities/ai_agent.py +599 -0
- ibm_watsonx_gov/agent_catalog/utils/__init__.py +8 -0
- ibm_watsonx_gov/agent_catalog/utils/constants.py +36 -0
- ibm_watsonx_gov/agent_catalog/utils/notebook_utils.py +70 -0
- ibm_watsonx_gov/ai_experiments/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/ai_experiments_client.py +980 -0
- ibm_watsonx_gov/ai_experiments/utils/__init__.py +8 -0
- ibm_watsonx_gov/ai_experiments/utils/ai_experiment_utils.py +139 -0
- ibm_watsonx_gov/clients/__init__.py +0 -0
- ibm_watsonx_gov/clients/api_client.py +99 -0
- ibm_watsonx_gov/clients/segment_client.py +46 -0
- ibm_watsonx_gov/clients/usage_client.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/clients/wx_ai_client.py +87 -0
- ibm_watsonx_gov/config/__init__.py +14 -0
- ibm_watsonx_gov/config/agentic_ai_configuration.py +225 -0
- ibm_watsonx_gov/config/gen_ai_configuration.py +129 -0
- ibm_watsonx_gov/config/model_risk_configuration.py +173 -0
- ibm_watsonx_gov/config/predictive_ai_configuration.py +20 -0
- ibm_watsonx_gov/entities/__init__.py +8 -0
- ibm_watsonx_gov/entities/agentic_app.py +209 -0
- ibm_watsonx_gov/entities/agentic_evaluation_result.py +185 -0
- ibm_watsonx_gov/entities/ai_evaluation.py +290 -0
- ibm_watsonx_gov/entities/ai_experiment.py +419 -0
- ibm_watsonx_gov/entities/base_classes.py +134 -0
- ibm_watsonx_gov/entities/container.py +54 -0
- ibm_watsonx_gov/entities/credentials.py +633 -0
- ibm_watsonx_gov/entities/criteria.py +508 -0
- ibm_watsonx_gov/entities/enums.py +274 -0
- ibm_watsonx_gov/entities/evaluation_result.py +444 -0
- ibm_watsonx_gov/entities/foundation_model.py +490 -0
- ibm_watsonx_gov/entities/llm_judge.py +44 -0
- ibm_watsonx_gov/entities/locale.py +17 -0
- ibm_watsonx_gov/entities/mapping.py +49 -0
- ibm_watsonx_gov/entities/metric.py +211 -0
- ibm_watsonx_gov/entities/metric_threshold.py +36 -0
- ibm_watsonx_gov/entities/model_provider.py +329 -0
- ibm_watsonx_gov/entities/model_risk_result.py +43 -0
- ibm_watsonx_gov/entities/monitor.py +71 -0
- ibm_watsonx_gov/entities/prompt_setup.py +40 -0
- ibm_watsonx_gov/entities/state.py +22 -0
- ibm_watsonx_gov/entities/utils.py +99 -0
- ibm_watsonx_gov/evaluators/__init__.py +26 -0
- ibm_watsonx_gov/evaluators/agentic_evaluator.py +2725 -0
- ibm_watsonx_gov/evaluators/agentic_traces_evaluator.py +115 -0
- ibm_watsonx_gov/evaluators/base_evaluator.py +22 -0
- ibm_watsonx_gov/evaluators/impl/__init__.py +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_metrics_impl.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/evaluators/impl/evaluate_model_risk_impl.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/evaluators/metrics_evaluator.py +187 -0
- ibm_watsonx_gov/evaluators/model_risk_evaluator.py +89 -0
- ibm_watsonx_gov/evaluators/traces_evaluator.py +93 -0
- ibm_watsonx_gov/metric_groups/answer_quality/answer_quality_decorator.py +66 -0
- ibm_watsonx_gov/metric_groups/content_safety/content_safety_decorator.py +76 -0
- ibm_watsonx_gov/metric_groups/readability/readability_decorator.py +59 -0
- ibm_watsonx_gov/metric_groups/retrieval_quality/retrieval_quality_decorator.py +63 -0
- ibm_watsonx_gov/metric_groups/usage/usage_decorator.py +58 -0
- ibm_watsonx_gov/metrics/__init__.py +74 -0
- ibm_watsonx_gov/metrics/answer_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_decorator.py +63 -0
- ibm_watsonx_gov/metrics/answer_relevance/answer_relevance_metric.py +260 -0
- ibm_watsonx_gov/metrics/answer_similarity/__init__.py +0 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_decorator.py +66 -0
- ibm_watsonx_gov/metrics/answer_similarity/answer_similarity_metric.py +219 -0
- ibm_watsonx_gov/metrics/average_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/average_precision/average_precision_metric.py +174 -0
- ibm_watsonx_gov/metrics/base_metric_decorator.py +193 -0
- ibm_watsonx_gov/metrics/context_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_decorator.py +60 -0
- ibm_watsonx_gov/metrics/context_relevance/context_relevance_metric.py +414 -0
- ibm_watsonx_gov/metrics/cost/__init__.py +8 -0
- ibm_watsonx_gov/metrics/cost/cost_decorator.py +58 -0
- ibm_watsonx_gov/metrics/cost/cost_metric.py +155 -0
- ibm_watsonx_gov/metrics/duration/__init__.py +8 -0
- ibm_watsonx_gov/metrics/duration/duration_decorator.py +59 -0
- ibm_watsonx_gov/metrics/duration/duration_metric.py +111 -0
- ibm_watsonx_gov/metrics/evasiveness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_decorator.py +61 -0
- ibm_watsonx_gov/metrics/evasiveness/evasiveness_metric.py +103 -0
- ibm_watsonx_gov/metrics/faithfulness/__init__.py +8 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_decorator.py +65 -0
- ibm_watsonx_gov/metrics/faithfulness/faithfulness_metric.py +254 -0
- ibm_watsonx_gov/metrics/hap/__init__.py +16 -0
- ibm_watsonx_gov/metrics/hap/hap_decorator.py +58 -0
- ibm_watsonx_gov/metrics/hap/hap_metric.py +98 -0
- ibm_watsonx_gov/metrics/hap/input_hap_metric.py +104 -0
- ibm_watsonx_gov/metrics/hap/output_hap_metric.py +110 -0
- ibm_watsonx_gov/metrics/harm/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm/harm_decorator.py +60 -0
- ibm_watsonx_gov/metrics/harm/harm_metric.py +103 -0
- ibm_watsonx_gov/metrics/harm_engagement/__init__.py +8 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_decorator.py +61 -0
- ibm_watsonx_gov/metrics/harm_engagement/harm_engagement_metric.py +103 -0
- ibm_watsonx_gov/metrics/hit_rate/__init__.py +0 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_decorator.py +59 -0
- ibm_watsonx_gov/metrics/hit_rate/hit_rate_metric.py +167 -0
- ibm_watsonx_gov/metrics/input_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/input_token_count/input_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/jailbreak/__init__.py +8 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_decorator.py +60 -0
- ibm_watsonx_gov/metrics/jailbreak/jailbreak_metric.py +103 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/keyword_detection/keyword_detection_metric.py +111 -0
- ibm_watsonx_gov/metrics/llm_validation/__init__.py +8 -0
- ibm_watsonx_gov/metrics/llm_validation/evaluation_criteria.py +84 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_constants.py +24 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_decorator.py +54 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_impl.py +525 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_metric.py +258 -0
- ibm_watsonx_gov/metrics/llm_validation/llm_validation_prompts.py +106 -0
- ibm_watsonx_gov/metrics/llmaj/__init__.py +0 -0
- ibm_watsonx_gov/metrics/llmaj/llmaj_metric.py +298 -0
- ibm_watsonx_gov/metrics/ndcg/__init__.py +0 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_decorator.py +61 -0
- ibm_watsonx_gov/metrics/ndcg/ndcg_metric.py +166 -0
- ibm_watsonx_gov/metrics/output_token_count/__init__.py +8 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_decorator.py +58 -0
- ibm_watsonx_gov/metrics/output_token_count/output_token_count_metric.py +112 -0
- ibm_watsonx_gov/metrics/pii/__init__.py +16 -0
- ibm_watsonx_gov/metrics/pii/input_pii_metric.py +102 -0
- ibm_watsonx_gov/metrics/pii/output_pii_metric.py +107 -0
- ibm_watsonx_gov/metrics/pii/pii_decorator.py +59 -0
- ibm_watsonx_gov/metrics/pii/pii_metric.py +96 -0
- ibm_watsonx_gov/metrics/profanity/__init__.py +8 -0
- ibm_watsonx_gov/metrics/profanity/profanity_decorator.py +60 -0
- ibm_watsonx_gov/metrics/profanity/profanity_metric.py +103 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/__init__.py +8 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_decorator.py +57 -0
- ibm_watsonx_gov/metrics/prompt_safety_risk/prompt_safety_risk_metric.py +128 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/__init__.py +0 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_decorator.py +62 -0
- ibm_watsonx_gov/metrics/reciprocal_rank/reciprocal_rank_metric.py +162 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_decorator.py +58 -0
- ibm_watsonx_gov/metrics/regex_detection/regex_detection_metric.py +106 -0
- ibm_watsonx_gov/metrics/retrieval_precision/__init__.py +0 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_decorator.py +62 -0
- ibm_watsonx_gov/metrics/retrieval_precision/retrieval_precision_metric.py +170 -0
- ibm_watsonx_gov/metrics/sexual_content/__init__.py +8 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_decorator.py +61 -0
- ibm_watsonx_gov/metrics/sexual_content/sexual_content_metric.py +103 -0
- ibm_watsonx_gov/metrics/social_bias/__init__.py +8 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_decorator.py +62 -0
- ibm_watsonx_gov/metrics/social_bias/social_bias_metric.py +103 -0
- ibm_watsonx_gov/metrics/status/__init__.py +0 -0
- ibm_watsonx_gov/metrics/status/status_metric.py +113 -0
- ibm_watsonx_gov/metrics/text_grade_level/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_grade_level/text_grade_level_metric.py +127 -0
- ibm_watsonx_gov/metrics/text_reading_ease/__init__.py +8 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_decorator.py +59 -0
- ibm_watsonx_gov/metrics/text_reading_ease/text_reading_ease_metric.py +123 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_decorator.py +67 -0
- ibm_watsonx_gov/metrics/tool_call_accuracy/tool_call_accuracy_metric.py +162 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_decorator.py +68 -0
- ibm_watsonx_gov/metrics/tool_call_parameter_accuracy/tool_call_parameter_accuracy_metric.py +151 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_decorator.py +71 -0
- ibm_watsonx_gov/metrics/tool_call_relevance/tool_call_relevance_metric.py +166 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/__init__.py +0 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_decorator.py +66 -0
- ibm_watsonx_gov/metrics/tool_call_syntactic_accuracy/tool_call_syntactic_accuracy_metric.py +121 -0
- ibm_watsonx_gov/metrics/topic_relevance/__init__.py +8 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_decorator.py +57 -0
- ibm_watsonx_gov/metrics/topic_relevance/topic_relevance_metric.py +106 -0
- ibm_watsonx_gov/metrics/unethical_behavior/__init__.py +8 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_decorator.py +61 -0
- ibm_watsonx_gov/metrics/unethical_behavior/unethical_behavior_metric.py +103 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/__init__.py +0 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_decorator.py +66 -0
- ibm_watsonx_gov/metrics/unsuccessful_requests/unsuccessful_requests_metric.py +128 -0
- ibm_watsonx_gov/metrics/user_id/__init__.py +0 -0
- ibm_watsonx_gov/metrics/user_id/user_id_metric.py +111 -0
- ibm_watsonx_gov/metrics/utils.py +440 -0
- ibm_watsonx_gov/metrics/violence/__init__.py +8 -0
- ibm_watsonx_gov/metrics/violence/violence_decorator.py +60 -0
- ibm_watsonx_gov/metrics/violence/violence_metric.py +103 -0
- ibm_watsonx_gov/prompt_evaluator/__init__.py +9 -0
- ibm_watsonx_gov/prompt_evaluator/impl/__init__.py +8 -0
- ibm_watsonx_gov/prompt_evaluator/impl/prompt_evaluator_impl.py +554 -0
- ibm_watsonx_gov/prompt_evaluator/impl/pta_lifecycle_evaluator.py +2332 -0
- ibm_watsonx_gov/prompt_evaluator/prompt_evaluator.py +262 -0
- ibm_watsonx_gov/providers/__init__.py +8 -0
- ibm_watsonx_gov/providers/detectors_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/providers/detectors_provider.py +415 -0
- ibm_watsonx_gov/providers/eval_assist_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/providers/eval_assist_provider.py +266 -0
- ibm_watsonx_gov/providers/inference_engines/__init__.py +0 -0
- ibm_watsonx_gov/providers/inference_engines/custom_inference_engine.py +165 -0
- ibm_watsonx_gov/providers/inference_engines/portkey_inference_engine.py +57 -0
- ibm_watsonx_gov/providers/llmevalkit/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/main.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/preprocess_log.py +111 -0
- ibm_watsonx_gov/providers/llmevalkit/ciso_agent/utils.py +186 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/README.md +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/__init__.py +27 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/README.md +306 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/__init__.py +89 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/__init__.py +30 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/base.py +411 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/code_agent.py +1254 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/exact_match.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/fuzzy_string.py +104 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/hybrid.py +516 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/comparators/llm_judge.py +1882 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/pipeline.py +387 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/types.py +178 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/comparison/utils.py +298 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/consts.py +33 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/__init__.py +31 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/base.py +26 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics.json +783 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/__init__.py +6 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection.py +28 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics.json +599 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/loader.py +259 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter.py +52 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics.json +613 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/parameter/parameter_metrics_runtime.json +489 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/__init__.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory.py +43 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/metrics/trajectory/trajectory_metrics.json +161 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/adapters.py +102 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/pipeline.py +355 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/semantic_checker.py +816 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/static_checker.py +297 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/transformation_prompts.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/function_calling/pipeline/types.py +596 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/README.md +375 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/__init__.py +137 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/base.py +426 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/output_parser.py +364 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/consts.py +7 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/ibm_watsonx_ai/ibm_watsonx_ai.py +656 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/litellm.py +509 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/rits.py +224 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/litellm/watsonx.py +60 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/mock_llm_client.py +75 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/openai/openai.py +639 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway.py +134 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/providers/wxo_ai_gateway/wxo_ai_gateway_inference.py +214 -0
- ibm_watsonx_gov/providers/llmevalkit/llm/types.py +136 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/__init__.py +4 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/field.py +255 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metric.py +332 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/metrics_runner.py +188 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/prompt.py +403 -0
- ibm_watsonx_gov/providers/llmevalkit/metrics/utils.py +46 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/__init__.py +0 -0
- ibm_watsonx_gov/providers/llmevalkit/prompt/runner.py +144 -0
- ibm_watsonx_gov/providers/tool_call_metric_provider.py +455 -0
- ibm_watsonx_gov/providers/unitxt_provider.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/tools/__init__.py +10 -0
- ibm_watsonx_gov/tools/clients/__init__.py +11 -0
- ibm_watsonx_gov/tools/clients/ai_tool_client.py +405 -0
- ibm_watsonx_gov/tools/clients/detector_client.py +82 -0
- ibm_watsonx_gov/tools/core/__init__.py +8 -0
- ibm_watsonx_gov/tools/core/tool_loader.py +237 -0
- ibm_watsonx_gov/tools/entities/__init__.py +8 -0
- ibm_watsonx_gov/tools/entities/ai_tools.py +435 -0
- ibm_watsonx_gov/tools/onboarding/create/answer_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/chromadb_retrieval_tool.json +63 -0
- ibm_watsonx_gov/tools/onboarding/create/context_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/duduckgo_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/create/google_search_tool.json +62 -0
- ibm_watsonx_gov/tools/onboarding/create/hap_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/jailbreak_detector.json +70 -0
- ibm_watsonx_gov/tools/onboarding/create/pii_detector.json +36 -0
- ibm_watsonx_gov/tools/onboarding/create/prompt_safety_risk_detector.json +69 -0
- ibm_watsonx_gov/tools/onboarding/create/topic_relevance_detector.json +57 -0
- ibm_watsonx_gov/tools/onboarding/create/weather_tool.json +39 -0
- ibm_watsonx_gov/tools/onboarding/create/webcrawler_tool.json +34 -0
- ibm_watsonx_gov/tools/onboarding/create/wikipedia_search_tool.json +53 -0
- ibm_watsonx_gov/tools/onboarding/delete/delete_tools.json +4 -0
- ibm_watsonx_gov/tools/onboarding/update/google_search_tool.json +38 -0
- ibm_watsonx_gov/tools/ootb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/detectors/hap_detector_tool.py +109 -0
- ibm_watsonx_gov/tools/ootb/detectors/jailbreak_detector_tool.py +104 -0
- ibm_watsonx_gov/tools/ootb/detectors/pii_detector_tool.py +83 -0
- ibm_watsonx_gov/tools/ootb/detectors/prompt_safety_risk_detector_tool.py +111 -0
- ibm_watsonx_gov/tools/ootb/detectors/topic_relevance_detector_tool.py +101 -0
- ibm_watsonx_gov/tools/ootb/rag/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/rag/answer_relevance_detector_tool.py +119 -0
- ibm_watsonx_gov/tools/ootb/rag/context_relevance_detector_tool.py +118 -0
- ibm_watsonx_gov/tools/ootb/search/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/search/duckduckgo_search_tool.py +62 -0
- ibm_watsonx_gov/tools/ootb/search/google_search_tool.py +105 -0
- ibm_watsonx_gov/tools/ootb/search/weather_tool.py +95 -0
- ibm_watsonx_gov/tools/ootb/search/web_crawler_tool.py +69 -0
- ibm_watsonx_gov/tools/ootb/search/wikipedia_search_tool.py +63 -0
- ibm_watsonx_gov/tools/ootb/vectordb/__init__.py +8 -0
- ibm_watsonx_gov/tools/ootb/vectordb/chromadb_retriever_tool.py +111 -0
- ibm_watsonx_gov/tools/rest_api/__init__.py +10 -0
- ibm_watsonx_gov/tools/rest_api/restapi_tool.py +72 -0
- ibm_watsonx_gov/tools/schemas/__init__.py +10 -0
- ibm_watsonx_gov/tools/schemas/search_tool_schema.py +46 -0
- ibm_watsonx_gov/tools/schemas/vectordb_retrieval_schema.py +55 -0
- ibm_watsonx_gov/tools/utils/__init__.py +14 -0
- ibm_watsonx_gov/tools/utils/constants.py +69 -0
- ibm_watsonx_gov/tools/utils/display_utils.py +38 -0
- ibm_watsonx_gov/tools/utils/environment.py +108 -0
- ibm_watsonx_gov/tools/utils/package_utils.py +40 -0
- ibm_watsonx_gov/tools/utils/platform_url_mapping.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/tools/utils/python_utils.py +68 -0
- ibm_watsonx_gov/tools/utils/tool_utils.py +206 -0
- ibm_watsonx_gov/traces/__init__.py +8 -0
- ibm_watsonx_gov/traces/span_exporter.py +195 -0
- ibm_watsonx_gov/traces/span_node.py +251 -0
- ibm_watsonx_gov/traces/span_util.py +153 -0
- ibm_watsonx_gov/traces/trace_utils.py +1074 -0
- ibm_watsonx_gov/utils/__init__.py +8 -0
- ibm_watsonx_gov/utils/aggregation_util.py +346 -0
- ibm_watsonx_gov/utils/async_util.py +62 -0
- ibm_watsonx_gov/utils/authenticator.py +144 -0
- ibm_watsonx_gov/utils/constants.py +15 -0
- ibm_watsonx_gov/utils/errors.py +40 -0
- ibm_watsonx_gov/utils/gov_sdk_logger.py +39 -0
- ibm_watsonx_gov/utils/insights_generator.py +1285 -0
- ibm_watsonx_gov/utils/python_utils.py +425 -0
- ibm_watsonx_gov/utils/rest_util.py +73 -0
- ibm_watsonx_gov/utils/segment_batch_manager.py +162 -0
- ibm_watsonx_gov/utils/singleton_meta.py +25 -0
- ibm_watsonx_gov/utils/url_mapping.cpython-313-darwin.so +0 -0
- ibm_watsonx_gov/utils/validation_util.py +126 -0
- ibm_watsonx_gov/visualizations/__init__.py +13 -0
- ibm_watsonx_gov/visualizations/metric_descriptions.py +57 -0
- ibm_watsonx_gov/visualizations/model_insights.py +1304 -0
- ibm_watsonx_gov/visualizations/visualization_utils.py +75 -0
- ibm_watsonx_gov-1.3.3.dist-info/METADATA +93 -0
- ibm_watsonx_gov-1.3.3.dist-info/RECORD +353 -0
- ibm_watsonx_gov-1.3.3.dist-info/WHEEL +6 -0
|
@@ -0,0 +1,2332 @@
|
|
|
1
|
+
# ----------------------------------------------------------------------------------------------------
|
|
2
|
+
# IBM Confidential
|
|
3
|
+
# Licensed Materials - Property of IBM
|
|
4
|
+
# 5737-H76, 5900-A3Q
|
|
5
|
+
# © Copyright IBM Corp. 2025 All Rights Reserved.
|
|
6
|
+
# US Government Users Restricted Rights - Use, duplication or disclosure restricted by
|
|
7
|
+
# GSA ADPSchedule Contract with IBM Corp.
|
|
8
|
+
# ----------------------------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import time
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from typing import Union
|
|
15
|
+
|
|
16
|
+
import pandas as pd
|
|
17
|
+
import requests
|
|
18
|
+
from ibm_watsonx_ai.foundation_models.prompts.prompt_template import (
|
|
19
|
+
DetachedPromptTemplate, PromptTemplate, PromptTemplateManager)
|
|
20
|
+
from IPython.display import display
|
|
21
|
+
|
|
22
|
+
from ibm_watsonx_gov.entities.enums import EvaluationStage
|
|
23
|
+
from ibm_watsonx_gov.utils.authenticator import Authenticator
|
|
24
|
+
from ibm_watsonx_gov.utils.gov_sdk_logger import GovSDKLogger
|
|
25
|
+
from ibm_watsonx_gov.utils.rest_util import RestUtil
|
|
26
|
+
from ibm_watsonx_gov.visualizations.visualization_utils import \
|
|
27
|
+
display_message_with_frame
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class SpaceConfigurations:
|
|
32
|
+
"""Class for keeping track of spaces."""
|
|
33
|
+
|
|
34
|
+
space_id: str = None
|
|
35
|
+
prompt_template: PromptTemplate | DetachedPromptTemplate = None
|
|
36
|
+
space_deployment: dict[str, str] = None
|
|
37
|
+
prompt_setup: dict[str, str] = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class ProjectConfigurations:
|
|
42
|
+
"""Class for keeping track of projects."""
|
|
43
|
+
|
|
44
|
+
project_id: str = None
|
|
45
|
+
prompt_template: PromptTemplate | DetachedPromptTemplate = None
|
|
46
|
+
prompt_setup: dict[str, str] = None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class ModelUsecase:
|
|
51
|
+
"""Class for keeping track of model usecase"""
|
|
52
|
+
|
|
53
|
+
usecase_id: str
|
|
54
|
+
version: str
|
|
55
|
+
catalog_id: str
|
|
56
|
+
approach_id: str = None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class PTALifecycleEvaluator:
|
|
60
|
+
"""Class responsible to trigger the e2e lifecycle of pta with evaluation
|
|
61
|
+
support."""
|
|
62
|
+
|
|
63
|
+
def __init__(self):
|
|
64
|
+
self.logger = GovSDKLogger.get_logger(__name__)
|
|
65
|
+
self.config = None
|
|
66
|
+
|
|
67
|
+
# Configuration details
|
|
68
|
+
self.use_cpd: bool = None
|
|
69
|
+
self.credentials: dict[str, str] = None
|
|
70
|
+
self.use_ssl: bool = None
|
|
71
|
+
self.service_instance_id: str = None
|
|
72
|
+
self.wml_url = None
|
|
73
|
+
self.platform_url = None
|
|
74
|
+
self.wos_url = None
|
|
75
|
+
self.dataplatform_url = None
|
|
76
|
+
self.setup_stages: list[EvaluationStage] = None
|
|
77
|
+
self.is_detached: bool = False
|
|
78
|
+
self.ai_usecase: ModelUsecase = None
|
|
79
|
+
|
|
80
|
+
# Authentication
|
|
81
|
+
self.__authenticator: Authenticator = None
|
|
82
|
+
self.__iam_access_token: str = None
|
|
83
|
+
|
|
84
|
+
# Base urls
|
|
85
|
+
self.__platform_url: str = None
|
|
86
|
+
self.__wos_url: str = None
|
|
87
|
+
self.__dataplatform_url: str = None
|
|
88
|
+
|
|
89
|
+
# Parsed configurations
|
|
90
|
+
self.__stage_configurations: dict[EvaluationStage, Union[ProjectConfigurations, SpaceConfigurations]] = {
|
|
91
|
+
EvaluationStage.DEVELOPMENT: ProjectConfigurations(),
|
|
92
|
+
EvaluationStage.PRE_PRODUCTION: SpaceConfigurations(),
|
|
93
|
+
EvaluationStage.PRODUCTION: SpaceConfigurations(),
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
# Template ids
|
|
97
|
+
self.__prompt_template_ids: dict[EvaluationStage, str] = {
|
|
98
|
+
EvaluationStage.DEVELOPMENT: None,
|
|
99
|
+
EvaluationStage.PRE_PRODUCTION: None,
|
|
100
|
+
EvaluationStage.PRODUCTION: None,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
# subscription ids
|
|
104
|
+
self.__subscription_ids: dict[EvaluationStage, str] = {
|
|
105
|
+
EvaluationStage.DEVELOPMENT: None,
|
|
106
|
+
EvaluationStage.PRE_PRODUCTION: None,
|
|
107
|
+
EvaluationStage.PRODUCTION: None,
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
# deployment ids
|
|
111
|
+
self.__deployment_ids: dict[EvaluationStage, str] = {
|
|
112
|
+
EvaluationStage.PRE_PRODUCTION: None,
|
|
113
|
+
EvaluationStage.PRODUCTION: None,
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
# Scoring urls
|
|
117
|
+
self.__scoring_urls: dict[EvaluationStage, str] = {
|
|
118
|
+
EvaluationStage.DEVELOPMENT: None,
|
|
119
|
+
EvaluationStage.PRE_PRODUCTION: None,
|
|
120
|
+
EvaluationStage.PRODUCTION: None,
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
# monitors
|
|
124
|
+
self.__monitors_info: dict[EvaluationStage, dict[str, any]] = {
|
|
125
|
+
EvaluationStage.DEVELOPMENT: None,
|
|
126
|
+
EvaluationStage.PRE_PRODUCTION: None,
|
|
127
|
+
EvaluationStage.PRODUCTION: None,
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
def __send_request(self, method: str, **kwargs) -> requests.Response:
|
|
131
|
+
"""Helper method to wrap requests.request method.
|
|
132
|
+
|
|
133
|
+
This will raise exception if the response status code is not 2xx.
|
|
134
|
+
"""
|
|
135
|
+
self.logger.info(
|
|
136
|
+
f"sending request. method '{method}', "
|
|
137
|
+
f"url '{kwargs.get('url')}', "
|
|
138
|
+
f"json payload: '{kwargs.get('json', {})}', "
|
|
139
|
+
f"params: '{kwargs.get('params', {})}'."
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Check if kwargs has headers argument and use it, otherwise use the default values
|
|
143
|
+
headers = kwargs.pop("headers", {})
|
|
144
|
+
if not headers:
|
|
145
|
+
headers["Authorization"] = f"Bearer {self.__iam_access_token}"
|
|
146
|
+
headers["Content-Type"] = "application/json"
|
|
147
|
+
headers["Accept"] = "application/json"
|
|
148
|
+
|
|
149
|
+
# Check if kwargs has verify, otherwise use the default value
|
|
150
|
+
verify = kwargs.get("verify", self.use_ssl)
|
|
151
|
+
|
|
152
|
+
# Send the request with retries,
|
|
153
|
+
# this will raise exception if the the response is non 2xx or if there were any intermediate failure
|
|
154
|
+
try:
|
|
155
|
+
response = RestUtil.request_with_retry().request(
|
|
156
|
+
method=method,
|
|
157
|
+
headers=headers,
|
|
158
|
+
verify=verify,
|
|
159
|
+
**kwargs,
|
|
160
|
+
)
|
|
161
|
+
response.raise_for_status()
|
|
162
|
+
self.logger.info(
|
|
163
|
+
f"status code: {response.status_code}, response body: {response.text}"
|
|
164
|
+
)
|
|
165
|
+
except requests.exceptions.HTTPError as e:
|
|
166
|
+
message = f"HTTP Error: {e}. Response body: {response.text}"
|
|
167
|
+
self.logger.error(message)
|
|
168
|
+
raise Exception(message)
|
|
169
|
+
return response
|
|
170
|
+
|
|
171
|
+
def __validate_dict_schema(
|
|
172
|
+
self, object_in: dict[str, any], object_schema: dict[str, type]
|
|
173
|
+
) -> None:
|
|
174
|
+
"""Helper method to validate dicts against a schema. This will validate
|
|
175
|
+
the following:
|
|
176
|
+
|
|
177
|
+
- the types of dict members.
|
|
178
|
+
- required keys
|
|
179
|
+
- validate that members of type dict are non empty
|
|
180
|
+
|
|
181
|
+
"""
|
|
182
|
+
missing_keys = object_schema.keys() - object_in.keys()
|
|
183
|
+
if len(missing_keys) > 0:
|
|
184
|
+
message = f"Missing required attributes: {missing_keys}"
|
|
185
|
+
self.logger.error(message)
|
|
186
|
+
raise Exception(message)
|
|
187
|
+
|
|
188
|
+
for key, value_type in object_schema.items():
|
|
189
|
+
if not isinstance(object_in[key], value_type):
|
|
190
|
+
message = f"Invalid attribute `{key}` type. expected type: {value_type}, actual type: {type(object_in[key])}."
|
|
191
|
+
self.logger.error(message)
|
|
192
|
+
raise Exception(message)
|
|
193
|
+
if value_type == dict and not object_in[key]:
|
|
194
|
+
message = f"Attribute `{key}` of type `dict` can not be empty."
|
|
195
|
+
self.logger.error(message)
|
|
196
|
+
raise Exception(message)
|
|
197
|
+
|
|
198
|
+
def __validate_configuration(self, config: dict) -> None:
|
|
199
|
+
"""
|
|
200
|
+
Helper function to validate the configuration object.
|
|
201
|
+
"""
|
|
202
|
+
self.logger.info(f"Validating configuration: {config}")
|
|
203
|
+
|
|
204
|
+
# Validate that all the attributes in the configuration are recognized
|
|
205
|
+
expected_config_attributes = set(
|
|
206
|
+
["common_configurations"] + [e.value for e in EvaluationStage])
|
|
207
|
+
unexpected_config_attributes = config.keys() - expected_config_attributes
|
|
208
|
+
if len(unexpected_config_attributes) > 0:
|
|
209
|
+
self.logger.warning(
|
|
210
|
+
f"Ignoring properties {unexpected_config_attributes} as these are not recognized.")
|
|
211
|
+
|
|
212
|
+
# Validate the config
|
|
213
|
+
configuration_schema = {
|
|
214
|
+
"common_configurations": dict,
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
# Only validate the configuration of stages in setup_stages
|
|
218
|
+
for stage in self.setup_stages:
|
|
219
|
+
configuration_schema[stage.value] = dict
|
|
220
|
+
|
|
221
|
+
# Check attributes types
|
|
222
|
+
self.__validate_dict_schema(config, configuration_schema)
|
|
223
|
+
|
|
224
|
+
# Validate common_configurations
|
|
225
|
+
common_configurations = config["common_configurations"]
|
|
226
|
+
common_configurations_schema = {
|
|
227
|
+
"use_ssl": bool,
|
|
228
|
+
"use_cpd": bool,
|
|
229
|
+
"credentials": dict,
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
self.__validate_dict_schema(
|
|
233
|
+
common_configurations,
|
|
234
|
+
common_configurations_schema
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# Validate CPD credentials
|
|
238
|
+
if common_configurations["use_cpd"] is True:
|
|
239
|
+
cpd_credentials_schema = {"url": str, "username": str}
|
|
240
|
+
if "api_key" in common_configurations["credentials"].keys():
|
|
241
|
+
cpd_credentials_schema["api_key"] = str
|
|
242
|
+
elif "password" in common_configurations["credentials"].keys():
|
|
243
|
+
cpd_credentials_schema["password"] = str
|
|
244
|
+
else:
|
|
245
|
+
message = (
|
|
246
|
+
"Please provide on of `api_key` or `password` for `credentials`"
|
|
247
|
+
)
|
|
248
|
+
self.logger.error(message)
|
|
249
|
+
raise Exception(message)
|
|
250
|
+
|
|
251
|
+
self.__validate_dict_schema(
|
|
252
|
+
common_configurations["credentials"], cpd_credentials_schema
|
|
253
|
+
)
|
|
254
|
+
# Validate cloud credentials
|
|
255
|
+
else:
|
|
256
|
+
cloud_credentials_schema = {"iam_url": str, "apikey": str}
|
|
257
|
+
|
|
258
|
+
# Validate cloud credentials
|
|
259
|
+
self.__validate_dict_schema(
|
|
260
|
+
common_configurations["credentials"], cloud_credentials_schema
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
# Validate ai_usecase
|
|
264
|
+
if common_configurations.get("ai_usecase"):
|
|
265
|
+
ai_usecase_schema = {
|
|
266
|
+
"ai_usecase_id": str,
|
|
267
|
+
"catalog_id": str,
|
|
268
|
+
"approach_version": str,
|
|
269
|
+
}
|
|
270
|
+
self.__validate_dict_schema(
|
|
271
|
+
common_configurations["ai_usecase"],
|
|
272
|
+
ai_usecase_schema,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
# Validate the development configuration
|
|
276
|
+
if EvaluationStage.DEVELOPMENT in self.setup_stages:
|
|
277
|
+
development_configurations = config[EvaluationStage.DEVELOPMENT.value]
|
|
278
|
+
development_configurations_schema = {
|
|
279
|
+
"project_id": str,
|
|
280
|
+
"prompt_setup": dict,
|
|
281
|
+
}
|
|
282
|
+
self.__validate_dict_schema(development_configurations,
|
|
283
|
+
development_configurations_schema)
|
|
284
|
+
|
|
285
|
+
if EvaluationStage.PRE_PRODUCTION in self.setup_stages:
|
|
286
|
+
# Check if the user provided project id or space id
|
|
287
|
+
pre_production_configurations = config[EvaluationStage.PRE_PRODUCTION.value]
|
|
288
|
+
if "project_id" in pre_production_configurations.keys():
|
|
289
|
+
pre_production_configurations_schema = {
|
|
290
|
+
"project_id": str,
|
|
291
|
+
"prompt_setup": dict,
|
|
292
|
+
}
|
|
293
|
+
elif "space_id" in pre_production_configurations.keys():
|
|
294
|
+
pre_production_configurations_schema = {
|
|
295
|
+
"space_id": str,
|
|
296
|
+
"space_deployment": dict,
|
|
297
|
+
"prompt_setup": dict,
|
|
298
|
+
}
|
|
299
|
+
else:
|
|
300
|
+
message = "Please provide either `project_id` or `space_id` for `pre_production` configuration."
|
|
301
|
+
self.logger.error(message)
|
|
302
|
+
raise Exception(message)
|
|
303
|
+
self.__validate_dict_schema(
|
|
304
|
+
pre_production_configurations, pre_production_configurations_schema)
|
|
305
|
+
|
|
306
|
+
# Validate the production configuration
|
|
307
|
+
if EvaluationStage.PRODUCTION in self.setup_stages:
|
|
308
|
+
production_configurations = config[EvaluationStage.PRODUCTION.value]
|
|
309
|
+
production_configurations_schema = {
|
|
310
|
+
"space_id": str,
|
|
311
|
+
"space_deployment": dict,
|
|
312
|
+
"prompt_setup": dict,
|
|
313
|
+
}
|
|
314
|
+
self.__validate_dict_schema(production_configurations,
|
|
315
|
+
production_configurations_schema)
|
|
316
|
+
|
|
317
|
+
def __get_prompt_template_input_variables_list(
|
|
318
|
+
self,
|
|
319
|
+
stage: EvaluationStage = EvaluationStage.DEVELOPMENT,
|
|
320
|
+
) -> list[str]:
|
|
321
|
+
"""
|
|
322
|
+
Helper to return prompt template variable list
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
stage (EvaluationStage): evaluation stage for the prompt template. Defaults to EvaluationStage.DEVELOPMENT
|
|
326
|
+
"""
|
|
327
|
+
try:
|
|
328
|
+
input_variables = self.__stage_configurations[stage].prompt_template.input_variables
|
|
329
|
+
|
|
330
|
+
if isinstance(input_variables, list):
|
|
331
|
+
return input_variables
|
|
332
|
+
else:
|
|
333
|
+
return list(input_variables.keys())
|
|
334
|
+
except Exception as e:
|
|
335
|
+
message = f"Failed to parse prompt variables list. {e}"
|
|
336
|
+
self.logger.error(message)
|
|
337
|
+
raise Exception(message)
|
|
338
|
+
|
|
339
|
+
def evaluate_df(self, input_df: pd.DataFrame, scoring_url: str) -> list[dict[str, any]]:
|
|
340
|
+
"""
|
|
341
|
+
Method to evaluate the prompt. This will take the scoring url and
|
|
342
|
+
will process input_df.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
input_df (pd.DataFrame): Input DataFrame to be evaluated.
|
|
346
|
+
scoring_url (str): Scoring URL to send requests to.
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
list[dict[str, any]]: List of dictionaries containing request and response data for each record in the input DataFrame.
|
|
350
|
+
|
|
351
|
+
Raises:
|
|
352
|
+
Exception: If the input DataFrame is empty or if there is an error during the evaluation process.
|
|
353
|
+
|
|
354
|
+
"""
|
|
355
|
+
self.logger.info("Running evaluation")
|
|
356
|
+
|
|
357
|
+
if input_df.empty:
|
|
358
|
+
message = "Input dataframe is empty."
|
|
359
|
+
self.logger.error(message)
|
|
360
|
+
raise Exception(message)
|
|
361
|
+
|
|
362
|
+
# process the dataframe to take only the prompt template variables
|
|
363
|
+
try:
|
|
364
|
+
prompt_template_variables = self.__get_prompt_template_input_variables_list()
|
|
365
|
+
df = input_df[prompt_template_variables]
|
|
366
|
+
except Exception as e:
|
|
367
|
+
message = f"Unable to parse the prompt template variables from the dataframe. {e}"
|
|
368
|
+
self.logger.error(message)
|
|
369
|
+
raise Exception(message)
|
|
370
|
+
|
|
371
|
+
prompt_data = df.to_dict(orient="records")
|
|
372
|
+
|
|
373
|
+
pl_data = []
|
|
374
|
+
for row in prompt_data:
|
|
375
|
+
json_request = {"parameters": {"template_variables": row}}
|
|
376
|
+
params = {"version": datetime.today().strftime("%Y-%m-%d")}
|
|
377
|
+
try:
|
|
378
|
+
response = self.__send_request(
|
|
379
|
+
"post", url=scoring_url, json=json_request, params=params
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
try:
|
|
383
|
+
pl_data.append(
|
|
384
|
+
{
|
|
385
|
+
"request": json_request,
|
|
386
|
+
"response": response.json()
|
|
387
|
+
}
|
|
388
|
+
)
|
|
389
|
+
except Exception as e:
|
|
390
|
+
message = f"Failed to parse evaluation response. {e}"
|
|
391
|
+
self.logger.error(message)
|
|
392
|
+
raise Exception(message)
|
|
393
|
+
except Exception as e:
|
|
394
|
+
message = f"Failed to evaluate record. {e}"
|
|
395
|
+
self.logger.error(message)
|
|
396
|
+
raise Exception(message)
|
|
397
|
+
|
|
398
|
+
self.logger.info("Records evaluation is done")
|
|
399
|
+
|
|
400
|
+
return pl_data
|
|
401
|
+
|
|
402
|
+
def __build_wxai_credentials_dict(self) -> dict[str, str]:
|
|
403
|
+
if self.use_cpd:
|
|
404
|
+
return {
|
|
405
|
+
"username": self.credentials["username"],
|
|
406
|
+
"url": self.credentials["url"],
|
|
407
|
+
"api_key": self.credentials["api_key"],
|
|
408
|
+
"verify": self.use_ssl,
|
|
409
|
+
"instance_id": "openshift",
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
return {
|
|
413
|
+
"url": self.__wml_url,
|
|
414
|
+
"api_key": self.credentials["apikey"],
|
|
415
|
+
"verify": self.use_ssl,
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
def create_prompt_template_using_wxai(
|
|
419
|
+
self,
|
|
420
|
+
prompt_template: PromptTemplate | DetachedPromptTemplate,
|
|
421
|
+
stage: EvaluationStage
|
|
422
|
+
) -> str:
|
|
423
|
+
"""Method to create prompt template using ibm_watsonx_ai"""
|
|
424
|
+
self.logger.info("Creating Prompt Template")
|
|
425
|
+
display_message_with_frame("Creating Prompt Template Asset...")
|
|
426
|
+
|
|
427
|
+
try:
|
|
428
|
+
if isinstance(self.__stage_configurations[stage], ProjectConfigurations):
|
|
429
|
+
prompt_manager = PromptTemplateManager(
|
|
430
|
+
credentials=self.__build_wxai_credentials_dict(),
|
|
431
|
+
project_id=self.__stage_configurations[stage].project_id,
|
|
432
|
+
)
|
|
433
|
+
else:
|
|
434
|
+
prompt_manager = PromptTemplateManager(
|
|
435
|
+
credentials=self.__build_wxai_credentials_dict(),
|
|
436
|
+
space_id=self.__stage_configurations[stage].space_id,
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
self.__stage_configurations[stage].prompt_template = prompt_manager.store_prompt(
|
|
440
|
+
prompt_template)
|
|
441
|
+
except Exception as e:
|
|
442
|
+
message = f"Failed to create prompt template. {e}"
|
|
443
|
+
self.logger.error(message)
|
|
444
|
+
raise Exception(message)
|
|
445
|
+
|
|
446
|
+
prompt_template_id = self.__stage_configurations[stage].prompt_template.prompt_id
|
|
447
|
+
|
|
448
|
+
display_message_with_frame(
|
|
449
|
+
message=f"Prompt template created successfully. Prompt template id: {prompt_template_id}",
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
return prompt_template_id
|
|
453
|
+
|
|
454
|
+
def get_prompt_template_details_from_wxai(
|
|
455
|
+
self,
|
|
456
|
+
prompt_template_id: str,
|
|
457
|
+
stage: EvaluationStage,
|
|
458
|
+
) -> str:
|
|
459
|
+
"""Helper to get the details of an existing prompt template. This will set the prompt template self.stage_configurations"""
|
|
460
|
+
self.logger.info("Loading Prompt Template")
|
|
461
|
+
display_message_with_frame("Loading Prompt Template Asset...")
|
|
462
|
+
|
|
463
|
+
try:
|
|
464
|
+
prompt_manager = PromptTemplateManager(
|
|
465
|
+
credentials=self.__build_wxai_credentials_dict(),
|
|
466
|
+
project_id=self.__stage_configurations[EvaluationStage.DEVELOPMENT].project_id,
|
|
467
|
+
)
|
|
468
|
+
self.__stage_configurations[stage].prompt_template = prompt_manager.load_prompt(
|
|
469
|
+
prompt_id=prompt_template_id)
|
|
470
|
+
except Exception as e:
|
|
471
|
+
message = f"Failed to load prompt template. {e}"
|
|
472
|
+
self.logger.error(message)
|
|
473
|
+
raise Exception(message)
|
|
474
|
+
|
|
475
|
+
prompt_template_id = self.__stage_configurations[stage].prompt_template.prompt_id
|
|
476
|
+
|
|
477
|
+
display_message_with_frame(
|
|
478
|
+
message=f"Prompt template loaded successfully. Prompt template id: {prompt_template_id}",
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
return prompt_template_id
|
|
482
|
+
|
|
483
|
+
def get_scoring_url(self, subscription_id: str) -> str:
|
|
484
|
+
"""Method to get the scoring url for a subscription id."""
|
|
485
|
+
try:
|
|
486
|
+
response = self.__send_request(
|
|
487
|
+
method="get",
|
|
488
|
+
url=f"{self.__wos_url}/openscale/{self.service_instance_id}/v2/subscriptions/{subscription_id}",
|
|
489
|
+
)
|
|
490
|
+
except Exception as e:
|
|
491
|
+
message = f"Failed to get subscription details, subscription_id: {subscription_id}. {e}"
|
|
492
|
+
self.logger.error(message)
|
|
493
|
+
raise Exception(message)
|
|
494
|
+
|
|
495
|
+
# Get scoring url
|
|
496
|
+
try:
|
|
497
|
+
json_response = response.json()
|
|
498
|
+
if self.use_cpd:
|
|
499
|
+
deployment_id = json_response["entity"]["deployment"]["deployment_id"]
|
|
500
|
+
scoring_url = f"{self.__wml_url}/ml/v1/deployments/{deployment_id}/text/generation"
|
|
501
|
+
else:
|
|
502
|
+
scoring_url = json_response["entity"]["deployment"]["url"]
|
|
503
|
+
self.logger.info(f"scoring url: {scoring_url}")
|
|
504
|
+
except Exception as e:
|
|
505
|
+
message = (
|
|
506
|
+
f"Failed to parse scoring url from subscription details response. {e}"
|
|
507
|
+
)
|
|
508
|
+
self.logger.error(message)
|
|
509
|
+
raise Exception(message)
|
|
510
|
+
return scoring_url
|
|
511
|
+
|
|
512
|
+
def get_available_datamarts(self) -> None:
|
|
513
|
+
"""Method to get the available datamarts."""
|
|
514
|
+
try:
|
|
515
|
+
response = self.__send_request(
|
|
516
|
+
method="get",
|
|
517
|
+
url=f"{self.__wos_url}/openscale/{self.service_instance_id}/v2/data_marts",
|
|
518
|
+
)
|
|
519
|
+
except Exception as e:
|
|
520
|
+
message = f"Failed to get available data marts. {e}"
|
|
521
|
+
self.logger.error(message)
|
|
522
|
+
raise Exception(message)
|
|
523
|
+
|
|
524
|
+
display_message_with_frame(
|
|
525
|
+
f"Available datamarts:\n{json.dumps(response.json(),indent=2)}"
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
def trigger_prompt_setup(self, stage: EvaluationStage) -> None:
|
|
529
|
+
"""Method to trigger prompt set up for prompt template in a given
|
|
530
|
+
evaluation stage.
|
|
531
|
+
|
|
532
|
+
This will poll until the prompt template is set up successfully.
|
|
533
|
+
"""
|
|
534
|
+
self.logger.info(f"Triggering prompt setup for {stage}")
|
|
535
|
+
|
|
536
|
+
display_message_with_frame(
|
|
537
|
+
message=f"Setting up prompt for evaluation stage '{stage.value}'..."
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
if stage == EvaluationStage.DEVELOPMENT:
|
|
541
|
+
params = {
|
|
542
|
+
"prompt_template_asset_id": self.__prompt_template_ids[stage],
|
|
543
|
+
"project_id": self.__stage_configurations[stage].project_id,
|
|
544
|
+
}
|
|
545
|
+
payload = self.__stage_configurations[stage].prompt_setup
|
|
546
|
+
|
|
547
|
+
elif stage == EvaluationStage.PRE_PRODUCTION:
|
|
548
|
+
params = {
|
|
549
|
+
"prompt_template_asset_id": self.__prompt_template_ids[stage],
|
|
550
|
+
"space_id": self.__stage_configurations[stage].space_id,
|
|
551
|
+
"deployment_id": self.__deployment_ids[stage],
|
|
552
|
+
}
|
|
553
|
+
payload = self.__stage_configurations[stage].prompt_setup
|
|
554
|
+
|
|
555
|
+
elif stage == EvaluationStage.PRODUCTION:
|
|
556
|
+
params = {
|
|
557
|
+
"prompt_template_asset_id": self.__prompt_template_ids[stage],
|
|
558
|
+
"space_id": self.__stage_configurations[stage].space_id,
|
|
559
|
+
"deployment_id": self.__deployment_ids[stage],
|
|
560
|
+
}
|
|
561
|
+
payload = self.__stage_configurations[stage].prompt_setup
|
|
562
|
+
else:
|
|
563
|
+
message = f"Prompt setup for stage: '{stage}' is not supported yet"
|
|
564
|
+
self.logger.error(message)
|
|
565
|
+
raise Exception(message)
|
|
566
|
+
|
|
567
|
+
try:
|
|
568
|
+
self.logger.info(
|
|
569
|
+
f"setting up prompt template for '{stage}'. parameters {params}"
|
|
570
|
+
)
|
|
571
|
+
response = self.__send_request(
|
|
572
|
+
method="post",
|
|
573
|
+
url=f"{self.__wos_url}/openscale/{self.service_instance_id}/v2/prompt_setup",
|
|
574
|
+
json=payload,
|
|
575
|
+
params=params,
|
|
576
|
+
)
|
|
577
|
+
except Exception as e:
|
|
578
|
+
message = f"Prompt setup failed for the stage {stage}. {e}"
|
|
579
|
+
self.logger.info(message)
|
|
580
|
+
raise Exception(message)
|
|
581
|
+
|
|
582
|
+
display_message_with_frame(
|
|
583
|
+
message=f"Started prompt set up for '{stage.value}':\n\n{json.dumps(response.json(), indent=2)}",
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
# Check the prompt set up progress
|
|
587
|
+
self.logger.info(
|
|
588
|
+
f"Checking prompt set up progress for the stage {stage}...")
|
|
589
|
+
for attempt in range(10):
|
|
590
|
+
try:
|
|
591
|
+
response = self.__send_request(
|
|
592
|
+
method="get",
|
|
593
|
+
url=f"{self.__wos_url}/openscale/{self.service_instance_id}/v2/prompt_setup",
|
|
594
|
+
params=params,
|
|
595
|
+
)
|
|
596
|
+
except Exception as e:
|
|
597
|
+
message = (
|
|
598
|
+
f"Failed to check for prompt setup status in the stage {stage}. {e}"
|
|
599
|
+
)
|
|
600
|
+
self.logger.error(message)
|
|
601
|
+
raise Exception(message)
|
|
602
|
+
|
|
603
|
+
try:
|
|
604
|
+
json_response = response.json()
|
|
605
|
+
prompt_setup_status = json_response["status"]["state"]
|
|
606
|
+
except Exception as e:
|
|
607
|
+
message = f"Failed to parse prompt set up status response. {e}"
|
|
608
|
+
self.logger.error(message)
|
|
609
|
+
raise Exception(message)
|
|
610
|
+
|
|
611
|
+
if prompt_setup_status.lower() == "finished":
|
|
612
|
+
self.logger.info(
|
|
613
|
+
f"prompt template set up for the stage {stage} is done. Status {prompt_setup_status}"
|
|
614
|
+
)
|
|
615
|
+
break
|
|
616
|
+
elif prompt_setup_status.lower() == "error":
|
|
617
|
+
message = f"Prompt set up failed due to an error. {response.text}"
|
|
618
|
+
self.logger.error(message)
|
|
619
|
+
raise Exception(message)
|
|
620
|
+
else:
|
|
621
|
+
self.logger.info(f"Attempt {attempt+1} not done. Retrying...")
|
|
622
|
+
time.sleep(5)
|
|
623
|
+
else:
|
|
624
|
+
message = "Prompt template set up status did not update after 10 attempts. aborting..."
|
|
625
|
+
self.logger.error(message)
|
|
626
|
+
raise Exception(message)
|
|
627
|
+
|
|
628
|
+
# Parse items needed from the response get the subscription id from the response
|
|
629
|
+
try:
|
|
630
|
+
subscription_id = json_response["subscription_id"]
|
|
631
|
+
self.logger.info(
|
|
632
|
+
f"subscription id for the stage {stage}: {subscription_id}"
|
|
633
|
+
)
|
|
634
|
+
except Exception as e:
|
|
635
|
+
message = f"Failed to get subscription id from the response. {e}"
|
|
636
|
+
self.logger.error(message)
|
|
637
|
+
raise Exception(message)
|
|
638
|
+
|
|
639
|
+
self.__subscription_ids[stage] = subscription_id
|
|
640
|
+
|
|
641
|
+
display_message_with_frame(
|
|
642
|
+
message=f"Prompt set up for the stage {stage} finished successfully:\n\n{json.dumps(json_response, indent=2)}",
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
def promote_prompt_to_space(
|
|
646
|
+
self,
|
|
647
|
+
project_id: str,
|
|
648
|
+
project_prompt_template_id: str,
|
|
649
|
+
space_id: str,
|
|
650
|
+
) -> str:
|
|
651
|
+
"""Method to promote a prompt from project to space."""
|
|
652
|
+
self.logger.info("Promoting prompt template assets to space")
|
|
653
|
+
display_message_with_frame(
|
|
654
|
+
message=f"Promoting prompt from project id: {project_id} to space id {space_id}"
|
|
655
|
+
)
|
|
656
|
+
"""
|
|
657
|
+
Payload Sample:
|
|
658
|
+
{
|
|
659
|
+
"space_id": "d7fc6056-a06b-4de0-bae5-7c97fa06c862"
|
|
660
|
+
}
|
|
661
|
+
"""
|
|
662
|
+
payload = {"space_id": space_id}
|
|
663
|
+
params = {"project_id": project_id}
|
|
664
|
+
|
|
665
|
+
try:
|
|
666
|
+
response = self.__send_request(
|
|
667
|
+
method="post",
|
|
668
|
+
url=f"{self.__dataplatform_url}/v2/assets/{project_prompt_template_id}/promote",
|
|
669
|
+
json=payload,
|
|
670
|
+
params=params,
|
|
671
|
+
)
|
|
672
|
+
except Exception as e:
|
|
673
|
+
message = f"Failed to promote template asset to space. {e}"
|
|
674
|
+
self.logger.error(message)
|
|
675
|
+
raise Exception(message)
|
|
676
|
+
|
|
677
|
+
try:
|
|
678
|
+
json_response = response.json()
|
|
679
|
+
space_prompt_template_id = json_response["metadata"]["asset_id"]
|
|
680
|
+
self.logger.info(
|
|
681
|
+
f"Prompt template id promoted to space. Space prompt template asset id: {space_prompt_template_id}"
|
|
682
|
+
)
|
|
683
|
+
except Exception as e:
|
|
684
|
+
message = f"Failed to parse the response of promoting template asset from project to space. {e}"
|
|
685
|
+
self.logger.error(message)
|
|
686
|
+
raise Exception(message)
|
|
687
|
+
|
|
688
|
+
display_message_with_frame(
|
|
689
|
+
message=f"Template promoted to space successfully. Prompt template id: {space_prompt_template_id}",
|
|
690
|
+
)
|
|
691
|
+
|
|
692
|
+
return space_prompt_template_id
|
|
693
|
+
|
|
694
|
+
def create_pta_space_deployment(
|
|
695
|
+
self,
|
|
696
|
+
space_configurations: SpaceConfigurations,
|
|
697
|
+
space_prompt_template_id: str,
|
|
698
|
+
) -> str:
|
|
699
|
+
"""Method to create prompt template asset space deployment."""
|
|
700
|
+
|
|
701
|
+
"""
|
|
702
|
+
payload sample:
|
|
703
|
+
{
|
|
704
|
+
"prompt_template": {
|
|
705
|
+
"id": "81ad403c-df8b-41c0-87bb-68fea6717411",
|
|
706
|
+
},
|
|
707
|
+
"online": {
|
|
708
|
+
"parameters": {"serving_name": "serving_name"}
|
|
709
|
+
},
|
|
710
|
+
"base_model_id": "ibm/granite-13b-chat-v2",
|
|
711
|
+
"name": "deployment_name",
|
|
712
|
+
"description": "deployment_description",
|
|
713
|
+
"space_id": "74a51b1a-a83a-4e0b-b5b4-88e97b3a14a1",
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
"""
|
|
717
|
+
payload = {
|
|
718
|
+
"prompt_template": {
|
|
719
|
+
"id": space_prompt_template_id,
|
|
720
|
+
},
|
|
721
|
+
"base_model_id": space_configurations.space_deployment["base_model_id"],
|
|
722
|
+
"name": space_configurations.space_deployment["name"],
|
|
723
|
+
"description": space_configurations.space_deployment["description"],
|
|
724
|
+
"space_id": space_configurations.space_id,
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
if self.is_detached:
|
|
728
|
+
payload["detached"] = {}
|
|
729
|
+
else:
|
|
730
|
+
payload["online"] = {
|
|
731
|
+
"parameters": {"serving_name": space_configurations.space_deployment["serving_name"]}
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
params = {
|
|
735
|
+
"version": space_configurations.space_deployment["version_date"],
|
|
736
|
+
"space_id": space_configurations.space_id,
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
display_message_with_frame(
|
|
740
|
+
message=f"Creating space deployment for space id {space_configurations.space_id} and prompt template id {space_prompt_template_id}",
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
try:
|
|
744
|
+
response = self.__send_request(
|
|
745
|
+
method="post",
|
|
746
|
+
url=f"{self.__wml_url}/ml/v4/deployments",
|
|
747
|
+
params=params,
|
|
748
|
+
json=payload,
|
|
749
|
+
)
|
|
750
|
+
except Exception as e:
|
|
751
|
+
message = f"Failed to create space deployment. {e}"
|
|
752
|
+
self.logger.error(message)
|
|
753
|
+
raise Exception(message)
|
|
754
|
+
|
|
755
|
+
try:
|
|
756
|
+
json_response = response.json()
|
|
757
|
+
space_deployment_id = json_response["metadata"]["id"]
|
|
758
|
+
self.logger.info(
|
|
759
|
+
f"Space deployment id: {space_deployment_id}")
|
|
760
|
+
except Exception as e:
|
|
761
|
+
message = f"Failed to parse space deployment creation response. {e}"
|
|
762
|
+
self.logger.error(message)
|
|
763
|
+
raise Exception(message)
|
|
764
|
+
|
|
765
|
+
display_message_with_frame(
|
|
766
|
+
message=f"Deployment created successfully. Space deployment id: {space_deployment_id}",
|
|
767
|
+
)
|
|
768
|
+
|
|
769
|
+
return space_deployment_id
|
|
770
|
+
|
|
771
|
+
def risk_evaluation_for_pta_subscription(
|
|
772
|
+
self,
|
|
773
|
+
input_df: pd.DataFrame,
|
|
774
|
+
monitor_instance_id: str,
|
|
775
|
+
) -> str:
|
|
776
|
+
"""Function to trigger risk evaluation for PTA subscription.
|
|
777
|
+
|
|
778
|
+
Args:
|
|
779
|
+
input_df (pd.Dataframe): dataframe of the dataset
|
|
780
|
+
monitor_instance_id (str): monitor instance id
|
|
781
|
+
|
|
782
|
+
Returns:
|
|
783
|
+
str: measurement_id
|
|
784
|
+
"""
|
|
785
|
+
self.logger.info(
|
|
786
|
+
f"Starting risk evaluation for PTA subscription. monitor_instance_id: {monitor_instance_id}"
|
|
787
|
+
)
|
|
788
|
+
display_message_with_frame(
|
|
789
|
+
message=f"Evaluating risk of MRM monitor id {monitor_instance_id}"
|
|
790
|
+
)
|
|
791
|
+
|
|
792
|
+
url = f"{self.__wos_url}/openscale/{self.service_instance_id}/v2/monitoring_services/mrm/monitor_instances/{monitor_instance_id}/risk_evaluations"
|
|
793
|
+
try:
|
|
794
|
+
file_payload = [
|
|
795
|
+
("data", ("risk_evaluation_for_pta.csv", input_df.to_csv()))]
|
|
796
|
+
params = {"test_data_set_name": "risk_evaluation_for_pta"}
|
|
797
|
+
headers = {
|
|
798
|
+
"Authorization": f"Bearer {self.__iam_access_token}"}
|
|
799
|
+
self.__send_request(
|
|
800
|
+
method="post",
|
|
801
|
+
url=url,
|
|
802
|
+
files=file_payload,
|
|
803
|
+
params=params,
|
|
804
|
+
headers=headers,
|
|
805
|
+
)
|
|
806
|
+
except Exception as e:
|
|
807
|
+
message = f"Failed to do risk evaluation for PTA subscription. {e}"
|
|
808
|
+
self.logger.error(message)
|
|
809
|
+
raise Exception(message)
|
|
810
|
+
|
|
811
|
+
self.logger.info("Waiting for risk evaluation result")
|
|
812
|
+
|
|
813
|
+
# Retry for 15 minutes (180 retry, 5 seconds apart)
|
|
814
|
+
for attempt in range(180):
|
|
815
|
+
response = self.__send_request(
|
|
816
|
+
method="get",
|
|
817
|
+
url=url,
|
|
818
|
+
)
|
|
819
|
+
|
|
820
|
+
try:
|
|
821
|
+
json_response = response.json()
|
|
822
|
+
state = json_response["entity"]["status"]["state"]
|
|
823
|
+
|
|
824
|
+
except Exception as e:
|
|
825
|
+
message = f"Failed to parse risk evaluation status response. {e}"
|
|
826
|
+
self.logger.error(message)
|
|
827
|
+
raise Exception(e)
|
|
828
|
+
|
|
829
|
+
if state.lower() == "finished":
|
|
830
|
+
self.logger.info(
|
|
831
|
+
f"prompt template set up is done. Status {state}")
|
|
832
|
+
break
|
|
833
|
+
elif state.lower() == "error":
|
|
834
|
+
message = f"Risk evaluation failed due to an error. {response.text}"
|
|
835
|
+
self.logger.error(message)
|
|
836
|
+
raise Exception(message)
|
|
837
|
+
else:
|
|
838
|
+
self.logger.info(
|
|
839
|
+
f"Attempt {attempt+1} not done. Retrying...")
|
|
840
|
+
time.sleep(5)
|
|
841
|
+
else:
|
|
842
|
+
message = "Risk evaluation status did not update after 15 minutes. Aborting..."
|
|
843
|
+
self.logger.error(message)
|
|
844
|
+
raise Exception(message)
|
|
845
|
+
|
|
846
|
+
try:
|
|
847
|
+
measurement_id = json_response["entity"]["parameters"]["measurement_id"]
|
|
848
|
+
except Exception as e:
|
|
849
|
+
message = f"Failed to parse measurement id. {e}"
|
|
850
|
+
self.logger.error(message)
|
|
851
|
+
raise Exception(message)
|
|
852
|
+
|
|
853
|
+
display_message_with_frame(
|
|
854
|
+
message=f"Successfully finished the risk evaluation.\nMeasurement id for risk evaluation for PTA subscription: {measurement_id}",
|
|
855
|
+
)
|
|
856
|
+
return measurement_id
|
|
857
|
+
|
|
858
|
+
def risk_evaluation_for_pta_subscription_in_space(
|
|
859
|
+
self, monitor_instance_id: str
|
|
860
|
+
) -> str:
|
|
861
|
+
"""Function to trigger risk evaluation for PTA subscription in space.
|
|
862
|
+
This assumes that payload logging and feedback data sets are populated.
|
|
863
|
+
|
|
864
|
+
Args:
|
|
865
|
+
monitor_instance_id (str): monitor instance id
|
|
866
|
+
|
|
867
|
+
Returns:
|
|
868
|
+
str: measurement_id
|
|
869
|
+
"""
|
|
870
|
+
self.logger.info(
|
|
871
|
+
f"Starting risk evaluation for PTA subscription in space. monitor_instance_id: {monitor_instance_id}"
|
|
872
|
+
)
|
|
873
|
+
display_message_with_frame(
|
|
874
|
+
message=f"Evaluating risk of MRM monitor id {monitor_instance_id}"
|
|
875
|
+
)
|
|
876
|
+
url = f"{self.__wos_url}/openscale/{self.service_instance_id}/v2/monitoring_services/mrm/monitor_instances/{monitor_instance_id}/risk_evaluations"
|
|
877
|
+
headers = {
|
|
878
|
+
"Authorization": f"Bearer {self.__iam_access_token}",
|
|
879
|
+
"Content-Type": "application/json",
|
|
880
|
+
"Accept": "application/json",
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
try:
|
|
884
|
+
self.__send_request(
|
|
885
|
+
method="post",
|
|
886
|
+
url=url,
|
|
887
|
+
headers=headers,
|
|
888
|
+
json={}
|
|
889
|
+
)
|
|
890
|
+
except Exception as e:
|
|
891
|
+
message = f"Failed to do risk evaluation for PTA subscription. {e}"
|
|
892
|
+
self.logger.error(message)
|
|
893
|
+
raise Exception(message)
|
|
894
|
+
|
|
895
|
+
self.logger.info("Waiting for risk evaluation result")
|
|
896
|
+
# Retry for 15 minutes (180 retry, 5 seconds apart)
|
|
897
|
+
for attempt in range(180):
|
|
898
|
+
response = self.__send_request(
|
|
899
|
+
method="get",
|
|
900
|
+
url=url,
|
|
901
|
+
)
|
|
902
|
+
|
|
903
|
+
try:
|
|
904
|
+
json_response = response.json()
|
|
905
|
+
state = json_response["entity"]["status"]["state"]
|
|
906
|
+
|
|
907
|
+
except Exception as e:
|
|
908
|
+
message = f"Failed to parse risk evaluation status response. {e}"
|
|
909
|
+
self.logger.error(message)
|
|
910
|
+
raise Exception(e)
|
|
911
|
+
|
|
912
|
+
if state.lower() == "finished":
|
|
913
|
+
self.logger.info(
|
|
914
|
+
f"prompt template set up is done. Status {state}")
|
|
915
|
+
break
|
|
916
|
+
elif state.lower() == "error":
|
|
917
|
+
message = f"Risk evaluation failed due to an error. {response.text}"
|
|
918
|
+
self.logger.error(message)
|
|
919
|
+
raise Exception(message)
|
|
920
|
+
else:
|
|
921
|
+
self.logger.info(
|
|
922
|
+
f"Attempt {attempt+1} not done. Retrying...")
|
|
923
|
+
time.sleep(5)
|
|
924
|
+
else:
|
|
925
|
+
message = "Risk evaluation status did not update after 15 minutes. Aborting..."
|
|
926
|
+
self.logger.error(message)
|
|
927
|
+
raise Exception(message)
|
|
928
|
+
|
|
929
|
+
try:
|
|
930
|
+
measurement_id = json_response["entity"]["parameters"]["measurement_id"]
|
|
931
|
+
except Exception as e:
|
|
932
|
+
message = f"Failed to parse measurement id. {e}"
|
|
933
|
+
self.logger.error(message)
|
|
934
|
+
raise Exception(message)
|
|
935
|
+
|
|
936
|
+
display_message_with_frame(
|
|
937
|
+
message=f"Successfully finished the risk evaluation.\nMeasurement id for risk evaluation for PTA subscription: {measurement_id}",
|
|
938
|
+
)
|
|
939
|
+
return measurement_id
|
|
940
|
+
|
|
941
|
+
def get_monitor_metrics(
|
|
942
|
+
self,
|
|
943
|
+
monitor_instance_id: str,
|
|
944
|
+
measurement_id: str
|
|
945
|
+
) -> dict[str, any]:
|
|
946
|
+
"""Function to get the monitor metrics for a given measurement id.
|
|
947
|
+
|
|
948
|
+
Args:
|
|
949
|
+
monitor_instance_id (str): monitor instance id
|
|
950
|
+
measurement_id (str): measurement id
|
|
951
|
+
|
|
952
|
+
Returns:
|
|
953
|
+
dict: response body for the monitor
|
|
954
|
+
"""
|
|
955
|
+
self.logger.info(
|
|
956
|
+
f"Retrieving metrics for measurement_id: {measurement_id}")
|
|
957
|
+
|
|
958
|
+
try:
|
|
959
|
+
response = self.__send_request(
|
|
960
|
+
method="get",
|
|
961
|
+
url=f"{self.__wos_url}/openscale/{self.service_instance_id}/v2/monitor_instances/{monitor_instance_id}/measurements/{measurement_id}",
|
|
962
|
+
)
|
|
963
|
+
except Exception as e:
|
|
964
|
+
message = f"Failed to retrieve monitor metrics. {e}"
|
|
965
|
+
self.logger.error(message)
|
|
966
|
+
raise Exception(message)
|
|
967
|
+
|
|
968
|
+
try:
|
|
969
|
+
json_response = response.json()
|
|
970
|
+
except Exception as e:
|
|
971
|
+
message = f"Failed to parse metrics response. {e}"
|
|
972
|
+
self.logger.error(message)
|
|
973
|
+
raise Exception(message)
|
|
974
|
+
return json_response
|
|
975
|
+
|
|
976
|
+
def get_monitor_instances(
|
|
977
|
+
self, subscription_id: str, monitor_definition_id: str = None
|
|
978
|
+
) -> list[dict[str, any]]:
|
|
979
|
+
"""Function to get the monitor instances."""
|
|
980
|
+
self.logger.info(
|
|
981
|
+
f"Getting all monitors for subscription_id {subscription_id}, monitor_definition_id: {monitor_definition_id}"
|
|
982
|
+
)
|
|
983
|
+
|
|
984
|
+
params = {
|
|
985
|
+
"target.target_id": subscription_id,
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
if monitor_definition_id is not None:
|
|
989
|
+
params["monitor_definition_id"] = monitor_definition_id
|
|
990
|
+
|
|
991
|
+
# Send request to get all monitors
|
|
992
|
+
try:
|
|
993
|
+
response = self.__send_request(
|
|
994
|
+
method="get",
|
|
995
|
+
url=f"{self.__wos_url}/openscale/{self.service_instance_id}/v2/monitor_instances",
|
|
996
|
+
params=params,
|
|
997
|
+
)
|
|
998
|
+
except Exception as e:
|
|
999
|
+
message = f"Failed to retrieve all monitor instances. {e}"
|
|
1000
|
+
self.logger.error(message)
|
|
1001
|
+
raise Exception(message)
|
|
1002
|
+
|
|
1003
|
+
# Parse the response to get the needed monitor information
|
|
1004
|
+
monitor_instances = []
|
|
1005
|
+
try:
|
|
1006
|
+
json_response = response.json()
|
|
1007
|
+
for instance in json_response["monitor_instances"]:
|
|
1008
|
+
monitor_instances.append(
|
|
1009
|
+
{
|
|
1010
|
+
"monitor_name": instance["entity"]["monitor_definition_id"],
|
|
1011
|
+
"data_mart_id": instance["entity"]["data_mart_id"],
|
|
1012
|
+
"status": instance["entity"]["status"]["state"],
|
|
1013
|
+
"monitor_instance_id": instance["metadata"]["id"],
|
|
1014
|
+
"measurement_id": None,
|
|
1015
|
+
}
|
|
1016
|
+
)
|
|
1017
|
+
except Exception as e:
|
|
1018
|
+
message = f"Failed to parse monitors instances json response. {e}"
|
|
1019
|
+
self.logger.error(message)
|
|
1020
|
+
raise Exception(message)
|
|
1021
|
+
|
|
1022
|
+
self.logger.info(f"monitor instances: {monitor_instances}")
|
|
1023
|
+
|
|
1024
|
+
return monitor_instances
|
|
1025
|
+
|
|
1026
|
+
def get_measurement_ids(
|
|
1027
|
+
self, subscription_id: str, monitor_definition_id: str = None
|
|
1028
|
+
) -> list[dict[str, str]]:
|
|
1029
|
+
"""Retrieve measurement IDs for a given subscription ID and monitor
|
|
1030
|
+
definition ID.
|
|
1031
|
+
|
|
1032
|
+
Parameters:
|
|
1033
|
+
- subscription_id (str): Required. The ID of the subscription.
|
|
1034
|
+
- monitor_definition_id (str, optional): The ID of the monitor definition. Defaults to None.
|
|
1035
|
+
|
|
1036
|
+
Returns:
|
|
1037
|
+
- List[Dict[str, str]]: A list of dictionaries containing the measurement IDs.
|
|
1038
|
+
|
|
1039
|
+
Raises:
|
|
1040
|
+
- Exception: If there is an error retrieving the measurements.
|
|
1041
|
+
"""
|
|
1042
|
+
self.logger.info(
|
|
1043
|
+
f"Getting measurement ids for subscription id {subscription_id}. monitor definition id: {monitor_definition_id}"
|
|
1044
|
+
)
|
|
1045
|
+
|
|
1046
|
+
params = {
|
|
1047
|
+
"target_id": subscription_id,
|
|
1048
|
+
"target_type": "subscription",
|
|
1049
|
+
}
|
|
1050
|
+
if monitor_definition_id is not None:
|
|
1051
|
+
params["monitor_definition_id"] = monitor_definition_id
|
|
1052
|
+
|
|
1053
|
+
try:
|
|
1054
|
+
response = self.__send_request(
|
|
1055
|
+
method="get",
|
|
1056
|
+
url=f"{self.__wos_url}/openscale/{self.service_instance_id}/v2/measurements",
|
|
1057
|
+
params=params,
|
|
1058
|
+
)
|
|
1059
|
+
except Exception as e:
|
|
1060
|
+
message = f"Failed to get the measurements. {e}"
|
|
1061
|
+
self.logger.error(message)
|
|
1062
|
+
raise Exception(message)
|
|
1063
|
+
|
|
1064
|
+
try:
|
|
1065
|
+
json_response = response.json()
|
|
1066
|
+
monitor_measurements = []
|
|
1067
|
+
for instance in json_response["measurements"]:
|
|
1068
|
+
monitor_measurements.append(
|
|
1069
|
+
{
|
|
1070
|
+
"monitor_name": instance["entity"]["monitor_definition_id"],
|
|
1071
|
+
"monitor_instance_id": instance["entity"][
|
|
1072
|
+
"monitor_instance_id"
|
|
1073
|
+
],
|
|
1074
|
+
"measurement_id": instance["metadata"]["id"],
|
|
1075
|
+
}
|
|
1076
|
+
)
|
|
1077
|
+
|
|
1078
|
+
except Exception as e:
|
|
1079
|
+
message = f"Failed to parse the measurements response. {e}"
|
|
1080
|
+
self.logger.error(message)
|
|
1081
|
+
raise Exception(message)
|
|
1082
|
+
|
|
1083
|
+
self.logger.info(f"monitor measurements: {monitor_measurements}")
|
|
1084
|
+
return monitor_measurements
|
|
1085
|
+
|
|
1086
|
+
def display_evaluation_url(
|
|
1087
|
+
self, stage: EvaluationStage
|
|
1088
|
+
) -> None:
|
|
1089
|
+
"""Helper function to build the evaluation url based in the given
|
|
1090
|
+
stage. This will raise an exception if the stage is not part of the
|
|
1091
|
+
setup.
|
|
1092
|
+
|
|
1093
|
+
Args:
|
|
1094
|
+
stage (EvaluationStage): The stage which we need its evaluation url.
|
|
1095
|
+
"""
|
|
1096
|
+
|
|
1097
|
+
if stage not in self.setup_stages:
|
|
1098
|
+
message = f"the stage {stage} must be part of the setup stages"
|
|
1099
|
+
self.logger.error(message)
|
|
1100
|
+
raise Exception(message)
|
|
1101
|
+
|
|
1102
|
+
# Build the url
|
|
1103
|
+
if stage == EvaluationStage.DEVELOPMENT:
|
|
1104
|
+
self.logger.info(
|
|
1105
|
+
f"Building evaluation url for {stage.value} stage with project id: {self.__stage_configurations[stage].project_id}")
|
|
1106
|
+
evaluation_url = f"{self.__platform_url}/wx/prompt-details/{self.__prompt_template_ids[stage]}/evaluate?context=wx&project_id={self.__stage_configurations[stage].project_id}"
|
|
1107
|
+
evaluation_url_message = (
|
|
1108
|
+
f"User can navigate to the evaluations page in project {evaluation_url}"
|
|
1109
|
+
)
|
|
1110
|
+
|
|
1111
|
+
else:
|
|
1112
|
+
self.logger.info(
|
|
1113
|
+
f"Building evaluation url for {stage.value} stage with space id: {self.__stage_configurations[stage].space_id}")
|
|
1114
|
+
evaluation_url = f"{self.__platform_url}/ml-runtime/deployments/{self.__deployment_ids[stage]}/evaluations?space_id={self.__stage_configurations[stage].space_id}&context=wx&flush=true"
|
|
1115
|
+
evaluation_url_message = (
|
|
1116
|
+
f"User can navigate to the evaluations page in space {evaluation_url}"
|
|
1117
|
+
)
|
|
1118
|
+
|
|
1119
|
+
# Display the url
|
|
1120
|
+
display_message_with_frame(message=evaluation_url_message)
|
|
1121
|
+
|
|
1122
|
+
def display_factsheet_url(
|
|
1123
|
+
self, stage: EvaluationStage
|
|
1124
|
+
) -> None:
|
|
1125
|
+
"""Helper function to build the factsheet url based in the given stage.
|
|
1126
|
+
This will raise an exception if the stage is not part of the setup.
|
|
1127
|
+
|
|
1128
|
+
Args:
|
|
1129
|
+
stage (EvaluationStage): The stage which we need its factsheet url.
|
|
1130
|
+
"""
|
|
1131
|
+
|
|
1132
|
+
if stage not in self.setup_stages:
|
|
1133
|
+
message = f"the stage {stage} must be part of the setup stages"
|
|
1134
|
+
self.logger.error(message)
|
|
1135
|
+
raise Exception(message)
|
|
1136
|
+
|
|
1137
|
+
# Build the url
|
|
1138
|
+
if stage == EvaluationStage.DEVELOPMENT:
|
|
1139
|
+
self.logger.info(
|
|
1140
|
+
f"Building factsheet url for {stage.value} stage with project id: {self.__stage_configurations[stage].project_id}")
|
|
1141
|
+
factsheet_url = f"{self.__platform_url}/wx/prompt-details/{self.__prompt_template_ids[stage]}/factsheet?context=wx&project_id={self.__stage_configurations[stage].project_id}"
|
|
1142
|
+
factsheet_url_message = (
|
|
1143
|
+
f"User can navigate to the published facts in project {factsheet_url}"
|
|
1144
|
+
)
|
|
1145
|
+
|
|
1146
|
+
else:
|
|
1147
|
+
self.logger.info(
|
|
1148
|
+
f"Building factsheet url for {stage.value} stage with space id: {self.__stage_configurations[stage].space_id}")
|
|
1149
|
+
factsheet_url = f"{self.__platform_url}/ml-runtime/deployments/{self.__deployment_ids[stage]}/details?space_id={self.__stage_configurations[stage].space_id}&context=wx&flush=true"
|
|
1150
|
+
factsheet_url_message = (
|
|
1151
|
+
f"User can navigate to the published facts in space {factsheet_url}"
|
|
1152
|
+
)
|
|
1153
|
+
|
|
1154
|
+
# Display the url
|
|
1155
|
+
display_message_with_frame(message=factsheet_url_message)
|
|
1156
|
+
|
|
1157
|
+
def get_monitors_with_measurements_info(
|
|
1158
|
+
self, stage: EvaluationStage, show_table: bool = False
|
|
1159
|
+
) -> list[dict[str, any]]:
|
|
1160
|
+
"""This function will retrieve the data from the backend and will
|
|
1161
|
+
return a dictionary of the monitor data that was retrieved. Optionally,
|
|
1162
|
+
the function would display the response as a table. This will also
|
|
1163
|
+
update the object state to store the recent value for the monitors
|
|
1164
|
+
info.
|
|
1165
|
+
|
|
1166
|
+
Args:
|
|
1167
|
+
stage (EvaluationStage): the evaluation stage
|
|
1168
|
+
show_table (bool, optional): whether to display the table or not. Defaults to False.
|
|
1169
|
+
|
|
1170
|
+
Returns:
|
|
1171
|
+
list[dict[str, any]]: a list of dictionaries containing the monitor data.
|
|
1172
|
+
"""
|
|
1173
|
+
|
|
1174
|
+
subscription_id = self.__subscription_ids.get(stage, None)
|
|
1175
|
+
|
|
1176
|
+
if subscription_id is None:
|
|
1177
|
+
message = f"Missing subscription_id for {stage}. Ensure the set up process is done for it."
|
|
1178
|
+
self.logger.error(message)
|
|
1179
|
+
raise Exception(message)
|
|
1180
|
+
|
|
1181
|
+
# Get the monitors info
|
|
1182
|
+
monitors_list = self.get_monitor_instances(
|
|
1183
|
+
subscription_id=subscription_id)
|
|
1184
|
+
|
|
1185
|
+
# Get the measurements
|
|
1186
|
+
measurements_list = self.get_measurement_ids(
|
|
1187
|
+
subscription_id=subscription_id)
|
|
1188
|
+
|
|
1189
|
+
# Add the measurement id to the monitors list
|
|
1190
|
+
try:
|
|
1191
|
+
self.logger.info("Joining monitors list with measurements list.")
|
|
1192
|
+
for monitor in monitors_list:
|
|
1193
|
+
for measurement in measurements_list:
|
|
1194
|
+
if monitor["monitor_name"] == measurement["monitor_name"]:
|
|
1195
|
+
monitor["measurement_id"] = measurement["measurement_id"]
|
|
1196
|
+
except Exception as e:
|
|
1197
|
+
message = f"Failed to append measurement ids to monitors list. {e}"
|
|
1198
|
+
self.logger.error(message)
|
|
1199
|
+
raise Exception(message)
|
|
1200
|
+
|
|
1201
|
+
# Optionally display the table
|
|
1202
|
+
if show_table:
|
|
1203
|
+
try:
|
|
1204
|
+
print(f"Monitors list for subscription_id {subscription_id}:")
|
|
1205
|
+
display(pd.DataFrame.from_dict(monitors_list))
|
|
1206
|
+
except Exception as e:
|
|
1207
|
+
message = f"Failed to display monitors table. {e}"
|
|
1208
|
+
self.logger.error(message)
|
|
1209
|
+
raise Exception(message)
|
|
1210
|
+
|
|
1211
|
+
self.logger.info(f"Updating the monitors info for {stage}")
|
|
1212
|
+
self.__monitors_info[stage] = monitors_list
|
|
1213
|
+
|
|
1214
|
+
self.logger.info(f"Monitors with measurements ids: {monitors_list}")
|
|
1215
|
+
return monitors_list
|
|
1216
|
+
|
|
1217
|
+
def get_metrics_from_monitor_list(
|
|
1218
|
+
self,
|
|
1219
|
+
stage: EvaluationStage,
|
|
1220
|
+
monitor_name: str,
|
|
1221
|
+
show_table: bool = False,
|
|
1222
|
+
) -> dict[str, any]:
|
|
1223
|
+
"""Retrieves metrics from a list of monitors based on the provided
|
|
1224
|
+
monitor name.
|
|
1225
|
+
|
|
1226
|
+
Args:
|
|
1227
|
+
stage (EvaluationStage): the evaluation stage
|
|
1228
|
+
monitor_name (str): The name of the monitor to retrieve metrics for.
|
|
1229
|
+
show_table (bool, optional): Whether to display the metrics in a table format. Defaults to False.
|
|
1230
|
+
|
|
1231
|
+
Returns:
|
|
1232
|
+
dict[str, any]: A dictionary containing the retrieved metrics data.
|
|
1233
|
+
"""
|
|
1234
|
+
|
|
1235
|
+
monitors_list = self.__monitors_info.get(stage, None)
|
|
1236
|
+
|
|
1237
|
+
if monitors_list is None:
|
|
1238
|
+
message = f"Monitors list for {stage} is not set. Ensure the setup and evaluation steps are done for it."
|
|
1239
|
+
self.logger.error(message)
|
|
1240
|
+
raise Exception(message)
|
|
1241
|
+
|
|
1242
|
+
monitor = next(
|
|
1243
|
+
(
|
|
1244
|
+
monitor
|
|
1245
|
+
for monitor in monitors_list
|
|
1246
|
+
if monitor["monitor_name"] == monitor_name
|
|
1247
|
+
),
|
|
1248
|
+
{},
|
|
1249
|
+
)
|
|
1250
|
+
|
|
1251
|
+
if (
|
|
1252
|
+
monitor.get("monitor_instance_id") is None
|
|
1253
|
+
or monitor.get("measurement_id") is None
|
|
1254
|
+
):
|
|
1255
|
+
message = f"Missing {monitor_name} monitor details. {monitor}"
|
|
1256
|
+
self.logger.error(message)
|
|
1257
|
+
raise Exception(message)
|
|
1258
|
+
|
|
1259
|
+
monitor_metrics = self.get_monitor_metrics(
|
|
1260
|
+
monitor_instance_id=monitor["monitor_instance_id"],
|
|
1261
|
+
measurement_id=monitor["measurement_id"],
|
|
1262
|
+
)
|
|
1263
|
+
|
|
1264
|
+
table_data = []
|
|
1265
|
+
values = monitor_metrics.get("entity", {}).get("values", {})
|
|
1266
|
+
try:
|
|
1267
|
+
for value in values:
|
|
1268
|
+
metrics_values = value["metrics"]
|
|
1269
|
+
tags_list = value["tags"]
|
|
1270
|
+
tags = [f"{t['id']}:{t['value']}" for t in tags_list]
|
|
1271
|
+
for v in metrics_values:
|
|
1272
|
+
table_data.append(
|
|
1273
|
+
{
|
|
1274
|
+
"ts": monitor_metrics["entity"]["timestamp"],
|
|
1275
|
+
"id": v["id"],
|
|
1276
|
+
"measurement_id": monitor_metrics["metadata"]["id"],
|
|
1277
|
+
"value": v["value"],
|
|
1278
|
+
"lower_limit": v.get("lower_limit", None),
|
|
1279
|
+
"upper_limit": v.get("upper_limit", None),
|
|
1280
|
+
"tags": tags,
|
|
1281
|
+
"monitor_definition_id": monitor_metrics["entity"][
|
|
1282
|
+
"monitor_definition_id"
|
|
1283
|
+
],
|
|
1284
|
+
"run_id": monitor_metrics["entity"]["run_id"],
|
|
1285
|
+
"target_id": monitor_metrics["entity"]["target"]["target_id"],
|
|
1286
|
+
"target_type": monitor_metrics["entity"]["target"][
|
|
1287
|
+
"target_type"
|
|
1288
|
+
],
|
|
1289
|
+
}
|
|
1290
|
+
)
|
|
1291
|
+
except Exception as e:
|
|
1292
|
+
message = f"Failed to parse monitor metrics. {e}"
|
|
1293
|
+
self.logger.error(message)
|
|
1294
|
+
raise Exception(message)
|
|
1295
|
+
|
|
1296
|
+
self.logger.info(f"metrics for {monitor_name}: {table_data}")
|
|
1297
|
+
|
|
1298
|
+
if show_table:
|
|
1299
|
+
try:
|
|
1300
|
+
display_message_with_frame(f"Metrics for {monitor_name}")
|
|
1301
|
+
display(pd.DataFrame.from_dict(table_data)[
|
|
1302
|
+
["id", "value", "monitor_definition_id", "ts"]])
|
|
1303
|
+
except Exception as e:
|
|
1304
|
+
message = f"Failed to display metrics for {monitor_name}. {e}"
|
|
1305
|
+
self.logger.error(message)
|
|
1306
|
+
raise Exception(message)
|
|
1307
|
+
|
|
1308
|
+
return table_data
|
|
1309
|
+
|
|
1310
|
+
def get_data_set_records_by_id(
|
|
1311
|
+
self,
|
|
1312
|
+
data_set_id: str,
|
|
1313
|
+
show_table: bool = False,
|
|
1314
|
+
) -> dict[str, any]:
|
|
1315
|
+
"""Retrieves records from a data set using the provided data set ID.
|
|
1316
|
+
|
|
1317
|
+
Args:
|
|
1318
|
+
data_set_id (str): The ID of the data set to retrieve records from.
|
|
1319
|
+
show_table (bool, optional): Whether to display the records in a table format. Defaults to False.
|
|
1320
|
+
|
|
1321
|
+
Returns:
|
|
1322
|
+
dict: The JSON response containing the records.
|
|
1323
|
+
|
|
1324
|
+
Raises:
|
|
1325
|
+
Exception: If an error occurs while retrieving or parsing the records.
|
|
1326
|
+
"""
|
|
1327
|
+
try:
|
|
1328
|
+
response = self.__send_request(
|
|
1329
|
+
method="get",
|
|
1330
|
+
url=f"{self.__wos_url}/openscale/{self.service_instance_id}/v2/data_sets/{data_set_id}/records",
|
|
1331
|
+
)
|
|
1332
|
+
except Exception as e:
|
|
1333
|
+
message = f"Failed to get records for data set id: {data_set_id}. {e}"
|
|
1334
|
+
self.logger.error(message)
|
|
1335
|
+
raise Exception(message)
|
|
1336
|
+
|
|
1337
|
+
try:
|
|
1338
|
+
json_response = response.json()
|
|
1339
|
+
except Exception as e:
|
|
1340
|
+
message = (
|
|
1341
|
+
f"Failed to parse records response for data set id: {data_set_id}. {e}"
|
|
1342
|
+
)
|
|
1343
|
+
self.logger.error(message)
|
|
1344
|
+
raise Exception(message)
|
|
1345
|
+
|
|
1346
|
+
if show_table:
|
|
1347
|
+
try:
|
|
1348
|
+
records = [
|
|
1349
|
+
record["entity"]["values"] for record in json_response["records"]
|
|
1350
|
+
]
|
|
1351
|
+
display_message_with_frame(
|
|
1352
|
+
message=f"Records from data set id {data_set_id}"
|
|
1353
|
+
)
|
|
1354
|
+
display(pd.DataFrame.from_dict(records))
|
|
1355
|
+
except Exception as e:
|
|
1356
|
+
message = f"Failed to display data sets records. {e}"
|
|
1357
|
+
self.logger.error(message)
|
|
1358
|
+
raise Exception(message)
|
|
1359
|
+
|
|
1360
|
+
return json_response
|
|
1361
|
+
|
|
1362
|
+
def get_monitor_data_set_records(
|
|
1363
|
+
self,
|
|
1364
|
+
stage: EvaluationStage,
|
|
1365
|
+
data_set_type: str,
|
|
1366
|
+
show_table: bool = False
|
|
1367
|
+
) -> dict[str, any]:
|
|
1368
|
+
"""Retrieves monitor data set records for a given data set type and
|
|
1369
|
+
evaluation stage.
|
|
1370
|
+
|
|
1371
|
+
Parameters:
|
|
1372
|
+
- stage (EvaluationStage): The evaluation stage for which to retrieve the data set records.
|
|
1373
|
+
- data_set_type (str): The type of data set for which to retrieve the records.
|
|
1374
|
+
- show_table (bool, optional): Whether to display the data set records in a table. Defaults to False.
|
|
1375
|
+
|
|
1376
|
+
Returns:
|
|
1377
|
+
dict[str, any]: A dictionary containing the data set records.
|
|
1378
|
+
"""
|
|
1379
|
+
self.logger.info(
|
|
1380
|
+
f"Getting data set records for {data_set_type} for {stage} stage.")
|
|
1381
|
+
subscription_id = self.__subscription_ids.get(stage, None)
|
|
1382
|
+
|
|
1383
|
+
if subscription_id is None:
|
|
1384
|
+
message = f"Missing subscription_id for {stage}. Ensure the set up process is done for it."
|
|
1385
|
+
self.logger.error(message)
|
|
1386
|
+
raise Exception(message)
|
|
1387
|
+
|
|
1388
|
+
display_message_with_frame(
|
|
1389
|
+
message=f"Getting monitor data set records for data set type '{data_set_type}' from subscription id {subscription_id}",
|
|
1390
|
+
)
|
|
1391
|
+
|
|
1392
|
+
# Get the data set for generative ai quality metrics
|
|
1393
|
+
datasets = self.get_all_data_sets(
|
|
1394
|
+
subscription_id=subscription_id,
|
|
1395
|
+
data_set_type=data_set_type,
|
|
1396
|
+
)
|
|
1397
|
+
|
|
1398
|
+
try:
|
|
1399
|
+
data_set_id = datasets["data_sets"][0]["metadata"]["id"]
|
|
1400
|
+
except Exception as e:
|
|
1401
|
+
message = f"Failed to parse dataset id. {e}"
|
|
1402
|
+
self.logger.error(message)
|
|
1403
|
+
raise Exception(message)
|
|
1404
|
+
|
|
1405
|
+
return self.get_data_set_records_by_id(
|
|
1406
|
+
data_set_id=data_set_id,
|
|
1407
|
+
show_table=show_table,
|
|
1408
|
+
)
|
|
1409
|
+
|
|
1410
|
+
def get_all_data_sets(
|
|
1411
|
+
self,
|
|
1412
|
+
subscription_id: str,
|
|
1413
|
+
space_id: str = None,
|
|
1414
|
+
project_id: str = None,
|
|
1415
|
+
data_set_type: str = None,
|
|
1416
|
+
):
|
|
1417
|
+
"""Retrieves all data sets for a given subscription ID.
|
|
1418
|
+
|
|
1419
|
+
Args:
|
|
1420
|
+
subscription_id (str): The ID of the subscription.
|
|
1421
|
+
space_id (str, optional): The ID of the space. Defaults to None.
|
|
1422
|
+
project_id (str, optional): The ID of the project. Defaults to None.
|
|
1423
|
+
data_set_type (str, optional): The type of data set. Defaults to None.
|
|
1424
|
+
|
|
1425
|
+
Returns:
|
|
1426
|
+
dict: The JSON response containing the data sets.
|
|
1427
|
+
|
|
1428
|
+
Raises:
|
|
1429
|
+
Exception: If there is an error retrieving or parsing the data sets.
|
|
1430
|
+
"""
|
|
1431
|
+
params = {
|
|
1432
|
+
"target.target_id": subscription_id,
|
|
1433
|
+
"target.target_type": "subscription",
|
|
1434
|
+
}
|
|
1435
|
+
if project_id is not None:
|
|
1436
|
+
params["project_id"] = project_id
|
|
1437
|
+
if space_id is not None:
|
|
1438
|
+
params["space_id"] = space_id
|
|
1439
|
+
if data_set_type is not None:
|
|
1440
|
+
params["type"] = data_set_type
|
|
1441
|
+
|
|
1442
|
+
try:
|
|
1443
|
+
response = self.__send_request(
|
|
1444
|
+
method="get",
|
|
1445
|
+
url=f"{self.__wos_url}/openscale/{self.service_instance_id}/v2/data_sets",
|
|
1446
|
+
params=params,
|
|
1447
|
+
)
|
|
1448
|
+
except Exception as e:
|
|
1449
|
+
message = f"Failed to retrieve data sets. {e}"
|
|
1450
|
+
self.logger.error(message)
|
|
1451
|
+
raise Exception(message)
|
|
1452
|
+
|
|
1453
|
+
try:
|
|
1454
|
+
json_response = response.json()
|
|
1455
|
+
except Exception as e:
|
|
1456
|
+
message = f"Failed to parse data sets response. {e}"
|
|
1457
|
+
self.logger.error(message)
|
|
1458
|
+
raise Exception(message)
|
|
1459
|
+
|
|
1460
|
+
return json_response
|
|
1461
|
+
|
|
1462
|
+
def add_records_to_data_set(
|
|
1463
|
+
self,
|
|
1464
|
+
data_set_id: str,
|
|
1465
|
+
payload_data: list[dict[str, any]],
|
|
1466
|
+
project_id: str = None,
|
|
1467
|
+
space_id: str = None,
|
|
1468
|
+
) -> list[dict[str, any]]:
|
|
1469
|
+
"""Adds records to a data set in the Watson Knowledge Catalog.
|
|
1470
|
+
|
|
1471
|
+
Args:
|
|
1472
|
+
data_set_id (str): The ID of the data set to add records to.
|
|
1473
|
+
payload_data (list[dict[str, any]]): A list of dictionaries containing the records to add.
|
|
1474
|
+
project_id (str, optional): The ID of the project to associate the records with. Defaults to None.
|
|
1475
|
+
space_id (str, optional): The ID of the space to associate the records with. Defaults to None.
|
|
1476
|
+
|
|
1477
|
+
Returns:
|
|
1478
|
+
list[dict[str, any]]: A list of dictionaries containing the added records.
|
|
1479
|
+
|
|
1480
|
+
Raises:
|
|
1481
|
+
Exception: If there is an error adding the records to the data set.
|
|
1482
|
+
"""
|
|
1483
|
+
self.logger.info(
|
|
1484
|
+
f"Adding records to data set id: {data_set_id}\nrecords: {payload_data}")
|
|
1485
|
+
params = {}
|
|
1486
|
+
if space_id is not None:
|
|
1487
|
+
params["space_id"] = space_id
|
|
1488
|
+
if project_id is not None:
|
|
1489
|
+
params["project_id"] = project_id
|
|
1490
|
+
|
|
1491
|
+
try:
|
|
1492
|
+
response = self.__send_request(
|
|
1493
|
+
method="post",
|
|
1494
|
+
url=f"{self.__wos_url}/openscale/{self.service_instance_id}/v2/data_sets/{data_set_id}/records",
|
|
1495
|
+
params=params,
|
|
1496
|
+
json=payload_data,
|
|
1497
|
+
)
|
|
1498
|
+
except Exception as e:
|
|
1499
|
+
message = f"Failed to add records to data set id {data_set_id}. {e}"
|
|
1500
|
+
self.logger.error(message)
|
|
1501
|
+
raise Exception(message)
|
|
1502
|
+
|
|
1503
|
+
try:
|
|
1504
|
+
json_response = response.json()
|
|
1505
|
+
except Exception as e:
|
|
1506
|
+
message = f"Failed to parse add records to data set response, {e}"
|
|
1507
|
+
self.logger.error(message)
|
|
1508
|
+
raise Exception(message)
|
|
1509
|
+
|
|
1510
|
+
return json_response
|
|
1511
|
+
|
|
1512
|
+
def space_deployment_risk_evaluation_data_set_setup(
|
|
1513
|
+
self,
|
|
1514
|
+
subscription_id: str,
|
|
1515
|
+
pl_data: list[dict[str, any]],
|
|
1516
|
+
prompt_setup: dict[str, any],
|
|
1517
|
+
input_df: pd.DataFrame,
|
|
1518
|
+
):
|
|
1519
|
+
"""Sets up a data set for evaluating model risk.
|
|
1520
|
+
|
|
1521
|
+
Parameters:
|
|
1522
|
+
- subscription_id (str): The ID of the subscription.
|
|
1523
|
+
- pl_data (list[dict[str, any]]): A list of dictionaries representing payload logging data.
|
|
1524
|
+
- prompt_setup (dict[str, any]): A dictionary representing prompt setup.
|
|
1525
|
+
- input_df (pd.Dataframe): The input data frame.
|
|
1526
|
+
|
|
1527
|
+
Returns:
|
|
1528
|
+
None
|
|
1529
|
+
"""
|
|
1530
|
+
self.logger.info(
|
|
1531
|
+
f"Evaluating model risk for subscription id: {subscription_id}")
|
|
1532
|
+
|
|
1533
|
+
# upload records to data set records to do mrm evaluation
|
|
1534
|
+
payload_logging_data_set_list = self.get_all_data_sets(
|
|
1535
|
+
subscription_id=subscription_id,
|
|
1536
|
+
data_set_type="payload_logging",
|
|
1537
|
+
)
|
|
1538
|
+
|
|
1539
|
+
# Parse the data set id
|
|
1540
|
+
try:
|
|
1541
|
+
payload_logging_data_set_id = payload_logging_data_set_list[
|
|
1542
|
+
"data_sets"][0]["metadata"]["id"]
|
|
1543
|
+
except Exception as e:
|
|
1544
|
+
message = f"Failed to parse payload logging data set id. {e}"
|
|
1545
|
+
self.logger.error(message)
|
|
1546
|
+
|
|
1547
|
+
message = f"payload logging data set id: {payload_logging_data_set_id}"
|
|
1548
|
+
self.logger.info(message)
|
|
1549
|
+
display_message_with_frame(message=message)
|
|
1550
|
+
|
|
1551
|
+
# check if the records are in the data set
|
|
1552
|
+
data_set = self.get_data_set_records_by_id(payload_logging_data_set_id)
|
|
1553
|
+
|
|
1554
|
+
# check length if 0, then we need to do add the data
|
|
1555
|
+
if len(data_set["records"]) != 0:
|
|
1556
|
+
return
|
|
1557
|
+
|
|
1558
|
+
# Now we need to do the upload and wait for the set up to be done
|
|
1559
|
+
self.add_records_to_data_set(
|
|
1560
|
+
data_set_id=payload_logging_data_set_id,
|
|
1561
|
+
payload_data=pl_data,
|
|
1562
|
+
)
|
|
1563
|
+
print(
|
|
1564
|
+
f"Adding payload logging data to data set id: {payload_logging_data_set_id}")
|
|
1565
|
+
time.sleep(5)
|
|
1566
|
+
|
|
1567
|
+
# Do the feedback data flow
|
|
1568
|
+
feedback_data_set_list = self.get_all_data_sets(
|
|
1569
|
+
subscription_id=subscription_id,
|
|
1570
|
+
data_set_type="feedback",
|
|
1571
|
+
)
|
|
1572
|
+
|
|
1573
|
+
# Parse the data set id
|
|
1574
|
+
try:
|
|
1575
|
+
feedback_data_set_id = feedback_data_set_list["data_sets"][0]["metadata"]["id"]
|
|
1576
|
+
except Exception as e:
|
|
1577
|
+
message = f"Failed to parse payload logging data set id. {e}"
|
|
1578
|
+
self.logger.error(message)
|
|
1579
|
+
|
|
1580
|
+
message = f"feedback data set id: {feedback_data_set_id}"
|
|
1581
|
+
self.logger.info(message)
|
|
1582
|
+
display_message_with_frame(message=message)
|
|
1583
|
+
|
|
1584
|
+
# check if the records are in the data set
|
|
1585
|
+
feedback_data_set = self.get_data_set_records_by_id(
|
|
1586
|
+
feedback_data_set_id)
|
|
1587
|
+
|
|
1588
|
+
# check length if 0, then we need to do add pl data
|
|
1589
|
+
if len(feedback_data_set["records"]) != 0:
|
|
1590
|
+
return
|
|
1591
|
+
|
|
1592
|
+
# Build the payload and add the data to the data set
|
|
1593
|
+
feedback_data = self.__generate_feedback_data(
|
|
1594
|
+
input_df=input_df,
|
|
1595
|
+
pl_data=pl_data,
|
|
1596
|
+
prompt_setup=prompt_setup,
|
|
1597
|
+
)
|
|
1598
|
+
|
|
1599
|
+
# Add the feedback data set and wait for the upload to be done
|
|
1600
|
+
self.add_records_to_data_set(
|
|
1601
|
+
data_set_id=feedback_data_set_id,
|
|
1602
|
+
payload_data=feedback_data,
|
|
1603
|
+
)
|
|
1604
|
+
print(f"Adding feedback data to data set id: {feedback_data_set_id}")
|
|
1605
|
+
time.sleep(5)
|
|
1606
|
+
|
|
1607
|
+
def __generate_feedback_data(
|
|
1608
|
+
self,
|
|
1609
|
+
input_df: pd.DataFrame,
|
|
1610
|
+
pl_data: list[dict[str, any]],
|
|
1611
|
+
prompt_setup: dict[str, any]
|
|
1612
|
+
) -> list[dict[str, any]]:
|
|
1613
|
+
"""Generates feedback data for a given input file path, prediction
|
|
1614
|
+
data, and prompt setup.
|
|
1615
|
+
|
|
1616
|
+
Args:
|
|
1617
|
+
input_df(str): A pandas DataFrame containing the input data.
|
|
1618
|
+
pl_data (list[dict[str, any]]): A list of dictionaries containing prediction data.
|
|
1619
|
+
prompt_setup (dict[str, any]): A dictionary containing the necessary information for generating the feedback data.
|
|
1620
|
+
|
|
1621
|
+
Returns:
|
|
1622
|
+
list[dict[str, any]]: A list of dictionaries containing the generated feedback data.
|
|
1623
|
+
"""
|
|
1624
|
+
self.logger.info(
|
|
1625
|
+
f"Generating add feedback dataset payload. prompt setup: {prompt_setup}")
|
|
1626
|
+
|
|
1627
|
+
try:
|
|
1628
|
+
prompt_template_variables = self.__get_prompt_template_input_variables_list()
|
|
1629
|
+
fields = prompt_template_variables + [prompt_setup["label_column"]]
|
|
1630
|
+
except Exception as e:
|
|
1631
|
+
message = f"Failed to retrieve fields from the prompt set up. {e}"
|
|
1632
|
+
self.logger.error(message)
|
|
1633
|
+
raise Exception(message)
|
|
1634
|
+
|
|
1635
|
+
# Build the payload based on the supplied file and the evaluation response
|
|
1636
|
+
feedback_data_values = []
|
|
1637
|
+
for row, prediction in zip(input_df.to_dict(orient="records"), pl_data):
|
|
1638
|
+
result_row = [row[key] for key in fields if key in row.keys()]
|
|
1639
|
+
result_row.append(
|
|
1640
|
+
prediction["response"]["results"][0]["generated_text"]
|
|
1641
|
+
)
|
|
1642
|
+
feedback_data_values.append(result_row)
|
|
1643
|
+
|
|
1644
|
+
fields.append("_original_prediction")
|
|
1645
|
+
|
|
1646
|
+
return [
|
|
1647
|
+
{
|
|
1648
|
+
"fields": fields,
|
|
1649
|
+
"values": feedback_data_values,
|
|
1650
|
+
}
|
|
1651
|
+
]
|
|
1652
|
+
|
|
1653
|
+
def __instance_mapping_for_cpd(
|
|
1654
|
+
self,
|
|
1655
|
+
stage: EvaluationStage,
|
|
1656
|
+
) -> None:
|
|
1657
|
+
"""Function to check if the given stage has a cpd instance mapping. If
|
|
1658
|
+
not it will be set to the default service instance id would be
|
|
1659
|
+
"00000000-0000-0000-0000-000000000000".
|
|
1660
|
+
|
|
1661
|
+
Args:
|
|
1662
|
+
stage (EvaluationStage): The stage where the instance mapping is checked and set.
|
|
1663
|
+
"""
|
|
1664
|
+
|
|
1665
|
+
url = f"{self.__wos_url}/openscale/v2/instance_mappings"
|
|
1666
|
+
params = {}
|
|
1667
|
+
if stage == EvaluationStage.DEVELOPMENT:
|
|
1668
|
+
params["project_id"] = self.__stage_configurations[stage].project_id
|
|
1669
|
+
else:
|
|
1670
|
+
params["space_id"] = self.__stage_configurations[stage].space_id
|
|
1671
|
+
|
|
1672
|
+
try:
|
|
1673
|
+
self.logger.info(
|
|
1674
|
+
f"Checking instance mapping for {stage} stage. {params}")
|
|
1675
|
+
response = self.__send_request(
|
|
1676
|
+
method="get",
|
|
1677
|
+
url=url,
|
|
1678
|
+
params=params,
|
|
1679
|
+
)
|
|
1680
|
+
except Exception as e:
|
|
1681
|
+
message = f"Failed to get instance mapping. {e}"
|
|
1682
|
+
self.logger.error(message)
|
|
1683
|
+
raise Exception(message)
|
|
1684
|
+
|
|
1685
|
+
try:
|
|
1686
|
+
# Check if the instance mapping already exists and return, otherwise use the instance mapping from the configuration
|
|
1687
|
+
json_response = response.json()
|
|
1688
|
+
if len(json_response.get("instance_mappings", [])) > 0:
|
|
1689
|
+
self.logger.info(
|
|
1690
|
+
f"Instance mapping already done for {stage} stage.")
|
|
1691
|
+
return
|
|
1692
|
+
except Exception as e:
|
|
1693
|
+
message = f"Failed to parse instance mapping response. {e}"
|
|
1694
|
+
self.logger.error(message)
|
|
1695
|
+
raise Exception(message)
|
|
1696
|
+
|
|
1697
|
+
# Build the payload
|
|
1698
|
+
# Sample payload
|
|
1699
|
+
# {
|
|
1700
|
+
# "service_instance_id": "00000000-0000-0000-0000-000000000000",
|
|
1701
|
+
# "target": {
|
|
1702
|
+
# "target_type": "space/project",
|
|
1703
|
+
# "target_id": "space_id/project_id",
|
|
1704
|
+
# },
|
|
1705
|
+
# }
|
|
1706
|
+
if stage == EvaluationStage.DEVELOPMENT:
|
|
1707
|
+
target = {
|
|
1708
|
+
"target_type": "project",
|
|
1709
|
+
"target_id": self.__stage_configurations[stage].project_id
|
|
1710
|
+
}
|
|
1711
|
+
else:
|
|
1712
|
+
target = {
|
|
1713
|
+
"target_type": "space",
|
|
1714
|
+
"target_id": self.__stage_configurations[stage].space_id
|
|
1715
|
+
}
|
|
1716
|
+
payload = {
|
|
1717
|
+
"service_instance_id": self.service_instance_id,
|
|
1718
|
+
"target": target
|
|
1719
|
+
}
|
|
1720
|
+
|
|
1721
|
+
try:
|
|
1722
|
+
response = self.__send_request(
|
|
1723
|
+
method="post",
|
|
1724
|
+
url=url,
|
|
1725
|
+
json=payload
|
|
1726
|
+
)
|
|
1727
|
+
except Exception as e:
|
|
1728
|
+
message = f"Failed to map service instance id. {e}"
|
|
1729
|
+
self.logger.error(message)
|
|
1730
|
+
raise Exception(message)
|
|
1731
|
+
|
|
1732
|
+
self.logger.info("Instance mapping done successfully.")
|
|
1733
|
+
|
|
1734
|
+
def setup(
|
|
1735
|
+
self,
|
|
1736
|
+
configuration: dict,
|
|
1737
|
+
prompt_template: PromptTemplate | DetachedPromptTemplate = None,
|
|
1738
|
+
prompt_template_id: str = None,
|
|
1739
|
+
setup_stages: list[EvaluationStage] = [
|
|
1740
|
+
EvaluationStage.DEVELOPMENT, EvaluationStage.PRODUCTION],
|
|
1741
|
+
) -> None:
|
|
1742
|
+
"""Function to create do the set up based on the configuration
|
|
1743
|
+
provided.
|
|
1744
|
+
|
|
1745
|
+
This will do the following:
|
|
1746
|
+
- By default, the set up will be done for the development and production stages.
|
|
1747
|
+
- Create the prompt template asset
|
|
1748
|
+
- Prompt set up in the provided projects and spaces
|
|
1749
|
+
- Monitor set up
|
|
1750
|
+
- Associate the prompt template with a usecase -- optional
|
|
1751
|
+
|
|
1752
|
+
Args:
|
|
1753
|
+
configuration (dict): The configuration dictionary
|
|
1754
|
+
Configurations structure:
|
|
1755
|
+
configurations =
|
|
1756
|
+
{
|
|
1757
|
+
"common_configurations": {
|
|
1758
|
+
"credentials": {
|
|
1759
|
+
"iam_url": "",
|
|
1760
|
+
"apikey": "",
|
|
1761
|
+
},
|
|
1762
|
+
"ai_usecase": { // optional
|
|
1763
|
+
"ai_usecase_id": str,
|
|
1764
|
+
"catalog_id": str,
|
|
1765
|
+
"approach_version": str,
|
|
1766
|
+
"approach_id": str,
|
|
1767
|
+
}
|
|
1768
|
+
"service_instance_id": "",
|
|
1769
|
+
"use_ssl": bool,
|
|
1770
|
+
"use_cpd": bool,
|
|
1771
|
+
"wml_url": str,
|
|
1772
|
+
"platform_url": str,
|
|
1773
|
+
"wos_url": str,
|
|
1774
|
+
"dataplatform_url": str,
|
|
1775
|
+
},
|
|
1776
|
+
"development": {
|
|
1777
|
+
"project_id": "",
|
|
1778
|
+
"prompt_setup": {...},
|
|
1779
|
+
},
|
|
1780
|
+
"pre_production": {
|
|
1781
|
+
"space_id": "",
|
|
1782
|
+
"space_deployment": {...},
|
|
1783
|
+
"prompt_setup": {...},
|
|
1784
|
+
},
|
|
1785
|
+
"production": {
|
|
1786
|
+
"space_id": "",
|
|
1787
|
+
"space_deployment": {...},
|
|
1788
|
+
"prompt_setup": {...},
|
|
1789
|
+
},
|
|
1790
|
+
}
|
|
1791
|
+
prompt_template (PromptTemplate): The prompt template to evaluate.
|
|
1792
|
+
prompt_template_id (str): Prompt template id for an existing prompt.
|
|
1793
|
+
setup_stages (list[EvaluationStage]): list of stages to do the prompt set up. Defaults to [ EvaluationStage.development, EvaluationStage.production]
|
|
1794
|
+
"""
|
|
1795
|
+
|
|
1796
|
+
if prompt_template is None and prompt_template_id is None:
|
|
1797
|
+
raise Exception(
|
|
1798
|
+
"Please provide either prompt_template or prompt_template_id")
|
|
1799
|
+
|
|
1800
|
+
# set setup stages
|
|
1801
|
+
self.setup_stages = setup_stages
|
|
1802
|
+
|
|
1803
|
+
# Validate inputs before parsing the config
|
|
1804
|
+
self.__validate_configuration(config=configuration)
|
|
1805
|
+
|
|
1806
|
+
# Parse the configuration
|
|
1807
|
+
self.__parse_configuration(config=configuration)
|
|
1808
|
+
|
|
1809
|
+
# Authenticate
|
|
1810
|
+
try:
|
|
1811
|
+
self.__authenticator = Authenticator(
|
|
1812
|
+
credentials=self.credentials,
|
|
1813
|
+
use_cpd=self.use_cpd,
|
|
1814
|
+
use_ssl=self.use_ssl,
|
|
1815
|
+
)
|
|
1816
|
+
self.__iam_access_token = self.__authenticator.authenticate()
|
|
1817
|
+
except Exception as e:
|
|
1818
|
+
message = f"Failed to authenticate the client. {e}"
|
|
1819
|
+
self.logger.error(message)
|
|
1820
|
+
raise Exception(message)
|
|
1821
|
+
|
|
1822
|
+
# Development stage should be set up first
|
|
1823
|
+
for stage in [EvaluationStage.DEVELOPMENT, EvaluationStage.PRE_PRODUCTION, EvaluationStage.PRODUCTION]:
|
|
1824
|
+
if stage not in self.setup_stages:
|
|
1825
|
+
continue
|
|
1826
|
+
|
|
1827
|
+
message = f"Starting setup process for {stage.value}"
|
|
1828
|
+
self.logger.info(message)
|
|
1829
|
+
display_message_with_frame(message=message)
|
|
1830
|
+
|
|
1831
|
+
# If using CPD, check the instance mapping and set it up if it does not exist
|
|
1832
|
+
if self.use_cpd:
|
|
1833
|
+
self.__instance_mapping_for_cpd(stage=stage)
|
|
1834
|
+
|
|
1835
|
+
# If we are doing the set up for development, use the setup flow for a project
|
|
1836
|
+
if stage == EvaluationStage.DEVELOPMENT:
|
|
1837
|
+
# Do the setup for project
|
|
1838
|
+
self.logger.info(f"Setting up {stage.value} environment")
|
|
1839
|
+
|
|
1840
|
+
# If the user provided a prompt template, use it, otherwise use the prompt template id
|
|
1841
|
+
if prompt_template:
|
|
1842
|
+
self.__prompt_template_ids[stage] = self.create_prompt_template_using_wxai(
|
|
1843
|
+
prompt_template=prompt_template,
|
|
1844
|
+
stage=stage,
|
|
1845
|
+
)
|
|
1846
|
+
else:
|
|
1847
|
+
self.__prompt_template_ids[stage] = self.get_prompt_template_details_from_wxai(
|
|
1848
|
+
prompt_template_id=prompt_template_id,
|
|
1849
|
+
stage=stage,
|
|
1850
|
+
)
|
|
1851
|
+
|
|
1852
|
+
self.is_detached = isinstance(
|
|
1853
|
+
self.__stage_configurations[stage].prompt_template, DetachedPromptTemplate)
|
|
1854
|
+
|
|
1855
|
+
self.trigger_prompt_setup(stage)
|
|
1856
|
+
|
|
1857
|
+
if not self.is_detached:
|
|
1858
|
+
self.__scoring_urls[stage] = self.get_scoring_url(
|
|
1859
|
+
self.__subscription_ids[stage])
|
|
1860
|
+
|
|
1861
|
+
display_message_with_frame(
|
|
1862
|
+
message=f"Development scoring url: {self.__scoring_urls[stage]}"
|
|
1863
|
+
)
|
|
1864
|
+
|
|
1865
|
+
display_message_with_frame(
|
|
1866
|
+
message=f"{stage.value} monitors:"
|
|
1867
|
+
)
|
|
1868
|
+
self.__monitors_info[stage] = (
|
|
1869
|
+
self.get_monitors_with_measurements_info(
|
|
1870
|
+
stage=stage,
|
|
1871
|
+
show_table=True,
|
|
1872
|
+
)
|
|
1873
|
+
)
|
|
1874
|
+
display_message_with_frame(
|
|
1875
|
+
message=f"{stage.value} prompt set up finished successfully"
|
|
1876
|
+
)
|
|
1877
|
+
|
|
1878
|
+
# If we are doing the set up for production or pre_production, use the setup flow for a space
|
|
1879
|
+
elif stage in [EvaluationStage.PRE_PRODUCTION, EvaluationStage.PRODUCTION]:
|
|
1880
|
+
# Do the setup for space
|
|
1881
|
+
self.logger.info(f"Setting up {stage.value} space environment")
|
|
1882
|
+
|
|
1883
|
+
if prompt_template_id or self.__stage_configurations[EvaluationStage.DEVELOPMENT].project_id:
|
|
1884
|
+
# If the prompt template id was provided by the user, retrieve the prompt details.
|
|
1885
|
+
if prompt_template_id:
|
|
1886
|
+
self.__prompt_template_ids[EvaluationStage.DEVELOPMENT] = self.get_prompt_template_details_from_wxai(
|
|
1887
|
+
prompt_template_id=prompt_template_id,
|
|
1888
|
+
stage=EvaluationStage.DEVELOPMENT,
|
|
1889
|
+
)
|
|
1890
|
+
|
|
1891
|
+
self.is_detached = isinstance(
|
|
1892
|
+
self.__stage_configurations[EvaluationStage.DEVELOPMENT].prompt_template, DetachedPromptTemplate)
|
|
1893
|
+
|
|
1894
|
+
self.__stage_configurations[stage].space_deployment["base_model_id"] = self.__stage_configurations[
|
|
1895
|
+
EvaluationStage.DEVELOPMENT].prompt_template.model_id
|
|
1896
|
+
|
|
1897
|
+
self.__prompt_template_ids[stage] = self.promote_prompt_to_space(
|
|
1898
|
+
# Always promote the template from the development environment
|
|
1899
|
+
project_id=self.__stage_configurations[EvaluationStage.DEVELOPMENT].project_id,
|
|
1900
|
+
project_prompt_template_id=self.__prompt_template_ids[
|
|
1901
|
+
EvaluationStage.DEVELOPMENT],
|
|
1902
|
+
space_id=self.__stage_configurations[stage].space_id,
|
|
1903
|
+
)
|
|
1904
|
+
else:
|
|
1905
|
+
self.__prompt_template_ids[stage] = self.create_prompt_template_using_wxai(
|
|
1906
|
+
prompt_template=prompt_template,
|
|
1907
|
+
stage=stage,
|
|
1908
|
+
)
|
|
1909
|
+
self.is_detached = isinstance(
|
|
1910
|
+
self.__stage_configurations[stage].prompt_template, DetachedPromptTemplate)
|
|
1911
|
+
|
|
1912
|
+
self.__deployment_ids[stage] = self.create_pta_space_deployment(
|
|
1913
|
+
space_configurations=self.__stage_configurations[stage],
|
|
1914
|
+
space_prompt_template_id=self.__prompt_template_ids[stage],
|
|
1915
|
+
)
|
|
1916
|
+
self.trigger_prompt_setup(stage)
|
|
1917
|
+
|
|
1918
|
+
if not self.is_detached:
|
|
1919
|
+
self.__scoring_urls[stage] = self.get_scoring_url(
|
|
1920
|
+
self.__subscription_ids[stage])
|
|
1921
|
+
|
|
1922
|
+
display_message_with_frame(
|
|
1923
|
+
message=f"{stage.value} scoring url: {self.__scoring_urls[stage]}"
|
|
1924
|
+
)
|
|
1925
|
+
|
|
1926
|
+
display_message_with_frame(
|
|
1927
|
+
message=f"{stage.value} monitors:"
|
|
1928
|
+
)
|
|
1929
|
+
self.__monitors_info[stage] = (
|
|
1930
|
+
self.get_monitors_with_measurements_info(
|
|
1931
|
+
stage=stage,
|
|
1932
|
+
show_table=True,
|
|
1933
|
+
)
|
|
1934
|
+
)
|
|
1935
|
+
display_message_with_frame(
|
|
1936
|
+
message=f"{stage.value} prompt set up finished successfully"
|
|
1937
|
+
)
|
|
1938
|
+
|
|
1939
|
+
# Track the prompt template with a usecase
|
|
1940
|
+
if self.ai_usecase:
|
|
1941
|
+
self.__track_pta_with_usecase(stage)
|
|
1942
|
+
|
|
1943
|
+
def __parse_configuration(self, config: dict) -> None:
|
|
1944
|
+
"""Function to parse the configuration. This assumes that the
|
|
1945
|
+
configuration object is already validated.
|
|
1946
|
+
|
|
1947
|
+
Args:
|
|
1948
|
+
config (dict): validated configuration object.
|
|
1949
|
+
"""
|
|
1950
|
+
# Parse the config
|
|
1951
|
+
self.config = config
|
|
1952
|
+
|
|
1953
|
+
# Parse common_configurations
|
|
1954
|
+
self.use_cpd: bool = self.config["common_configurations"]["use_cpd"]
|
|
1955
|
+
self.credentials: dict[str, str] = self.config["common_configurations"][
|
|
1956
|
+
"credentials"
|
|
1957
|
+
]
|
|
1958
|
+
self.use_ssl: bool = self.config["common_configurations"]["use_ssl"]
|
|
1959
|
+
self.service_instance_id: str = self.config["common_configurations"].get(
|
|
1960
|
+
"service_instance_id", "00000000-0000-0000-0000-000000000000"
|
|
1961
|
+
)
|
|
1962
|
+
self.wml_url: str = self.config["common_configurations"].get(
|
|
1963
|
+
"wml_url", "https://us-south.ml.cloud.ibm.com")
|
|
1964
|
+
self.platform_url: str = self.config["common_configurations"].get(
|
|
1965
|
+
"platform_url", "https://dataplatform.cloud.ibm.com")
|
|
1966
|
+
self.wos_url: str = self.config["common_configurations"].get(
|
|
1967
|
+
"wos_url", "https://api.aiopenscale.cloud.ibm.com")
|
|
1968
|
+
self.dataplatform_url: str = self.config["common_configurations"].get(
|
|
1969
|
+
"dataplatform_url", "https://api.dataplatform.cloud.ibm.com"
|
|
1970
|
+
)
|
|
1971
|
+
|
|
1972
|
+
# Parse model usecase details if provided by the user
|
|
1973
|
+
if self.config["common_configurations"].get("ai_usecase"):
|
|
1974
|
+
usecase = self.config["common_configurations"].get("ai_usecase")
|
|
1975
|
+
self.ai_usecase = ModelUsecase(
|
|
1976
|
+
usecase_id=usecase.get("ai_usecase_id"),
|
|
1977
|
+
catalog_id=usecase.get("catalog_id"),
|
|
1978
|
+
version=usecase.get("approach_version"),
|
|
1979
|
+
approach_id=usecase.get("approach_id"),
|
|
1980
|
+
)
|
|
1981
|
+
|
|
1982
|
+
# Parse development related configurations
|
|
1983
|
+
self.__stage_configurations[EvaluationStage.DEVELOPMENT].prompt_setup = self.config.get(
|
|
1984
|
+
"development", {}).get("prompt_setup")
|
|
1985
|
+
self.__stage_configurations[EvaluationStage.DEVELOPMENT].project_id = self.config.get(
|
|
1986
|
+
"development", {}).get("project_id")
|
|
1987
|
+
|
|
1988
|
+
# Parse pre_production related configurations
|
|
1989
|
+
if EvaluationStage.PRE_PRODUCTION in self.setup_stages:
|
|
1990
|
+
# Check if we have project or space, then init the config option
|
|
1991
|
+
self.__stage_configurations[EvaluationStage.PRE_PRODUCTION].space_id = self.config["pre_production"]["space_id"]
|
|
1992
|
+
self.__stage_configurations[EvaluationStage.PRE_PRODUCTION].space_deployment = self.config["pre_production"]["space_deployment"]
|
|
1993
|
+
self.__stage_configurations[EvaluationStage.PRE_PRODUCTION].prompt_setup = self.config["pre_production"]["prompt_setup"]
|
|
1994
|
+
|
|
1995
|
+
# Parse production related configurations
|
|
1996
|
+
if EvaluationStage.PRODUCTION in self.setup_stages:
|
|
1997
|
+
self.__stage_configurations[EvaluationStage.PRODUCTION].space_id = self.config["production"]["space_id"]
|
|
1998
|
+
self.__stage_configurations[EvaluationStage.PRODUCTION].space_deployment = self.config["production"]["space_deployment"]
|
|
1999
|
+
self.__stage_configurations[EvaluationStage.PRODUCTION].prompt_setup = self.config["production"]["prompt_setup"]
|
|
2000
|
+
|
|
2001
|
+
# Parse the credentials
|
|
2002
|
+
if self.use_cpd:
|
|
2003
|
+
self.__platform_url = self.credentials["url"]
|
|
2004
|
+
self.__wos_url = self.credentials["url"]
|
|
2005
|
+
self.__dataplatform_url = self.credentials["url"]
|
|
2006
|
+
self.__wml_url = self.credentials["url"]
|
|
2007
|
+
else:
|
|
2008
|
+
self.__wml_url = self.wml_url
|
|
2009
|
+
self.__platform_url = self.platform_url
|
|
2010
|
+
self.__wos_url = self.wos_url
|
|
2011
|
+
self.__dataplatform_url = self.dataplatform_url
|
|
2012
|
+
|
|
2013
|
+
def get_monitors_info(self, stage: EvaluationStage):
|
|
2014
|
+
"""Retrieves monitor information based on the provided stage.
|
|
2015
|
+
|
|
2016
|
+
Args:
|
|
2017
|
+
stage (EvaluationStage): The stage for which monitor information is required.
|
|
2018
|
+
|
|
2019
|
+
Returns:
|
|
2020
|
+
dict: A dictionary containing monitor information.
|
|
2021
|
+
|
|
2022
|
+
Raises:
|
|
2023
|
+
Exception: If the monitor info is not set
|
|
2024
|
+
"""
|
|
2025
|
+
self.logger.info(f"Retrieving monitors info for {stage}")
|
|
2026
|
+
monitors_info = self.__monitors_info.get(stage, None)
|
|
2027
|
+
if monitors_info is None:
|
|
2028
|
+
message = f"monitors info for the stage {stage} is not set"
|
|
2029
|
+
self.logger.error(message)
|
|
2030
|
+
raise Exception(message)
|
|
2031
|
+
|
|
2032
|
+
return monitors_info
|
|
2033
|
+
|
|
2034
|
+
def __generate_detached_prompt_payload_data(self, input_df: pd.DataFrame, prediction_field: str = "generated_text") -> list[dict[str, any]]:
|
|
2035
|
+
"""
|
|
2036
|
+
Helper method to generate the payload data for detached prompt
|
|
2037
|
+
|
|
2038
|
+
Args:
|
|
2039
|
+
input_df (pd.DataFrame): The detached prompt dataframe
|
|
2040
|
+
prediction_field (str): column name for the prediction value. Defaults to "generated_text"
|
|
2041
|
+
|
|
2042
|
+
Returns:
|
|
2043
|
+
list[dict[str, any]]: payload data
|
|
2044
|
+
"""
|
|
2045
|
+
self.logger.info("Generating payload data for detached prompt")
|
|
2046
|
+
|
|
2047
|
+
prompt_template_variables = self.__get_prompt_template_input_variables_list()
|
|
2048
|
+
|
|
2049
|
+
pl_data = []
|
|
2050
|
+
for _, row in input_df.iterrows():
|
|
2051
|
+
pl_data.append(
|
|
2052
|
+
{
|
|
2053
|
+
"request": {
|
|
2054
|
+
"parameters": {
|
|
2055
|
+
"template_variables": {
|
|
2056
|
+
k: str(row[k]) for k in prompt_template_variables
|
|
2057
|
+
}
|
|
2058
|
+
}
|
|
2059
|
+
},
|
|
2060
|
+
"response": {
|
|
2061
|
+
"results": [{"generated_text": str(row[prediction_field])}]
|
|
2062
|
+
}
|
|
2063
|
+
}
|
|
2064
|
+
)
|
|
2065
|
+
return pl_data
|
|
2066
|
+
|
|
2067
|
+
def evaluate(
|
|
2068
|
+
self,
|
|
2069
|
+
input_df: pd.DataFrame,
|
|
2070
|
+
evaluation_stages: list[EvaluationStage] = [
|
|
2071
|
+
EvaluationStage.DEVELOPMENT,
|
|
2072
|
+
EvaluationStage.PRODUCTION,
|
|
2073
|
+
],
|
|
2074
|
+
) -> None:
|
|
2075
|
+
"""Evaluate the input data in the specified stages.
|
|
2076
|
+
|
|
2077
|
+
Args:
|
|
2078
|
+
input_df (pd.DataFrame): The input dataframe to be evaluated. This should only contain the columns required by the prompt template.
|
|
2079
|
+
evaluation_stages (list[EvaluationStage], optional): list of environment stages to evaluate the dataframe in.
|
|
2080
|
+
The stages here must exist in setup_stages in setup() too. Defaults to [ EvaluationStage.development, EvaluationStage.production].
|
|
2081
|
+
"""
|
|
2082
|
+
self.logger.info(
|
|
2083
|
+
f"Evaluating the input data in {[stage.value for stage in evaluation_stages]} environments"
|
|
2084
|
+
)
|
|
2085
|
+
if self.config is None:
|
|
2086
|
+
message = "Configuration is not set yet."
|
|
2087
|
+
self.logger.error(message)
|
|
2088
|
+
raise Exception(message)
|
|
2089
|
+
|
|
2090
|
+
# Validate the we have the evaluation stage as part of the setup stages
|
|
2091
|
+
for stage in evaluation_stages:
|
|
2092
|
+
if stage not in self.setup_stages:
|
|
2093
|
+
message = f"The set up step for stage {stage} was not done."
|
|
2094
|
+
self.logger.error(message)
|
|
2095
|
+
raise Exception(message)
|
|
2096
|
+
|
|
2097
|
+
for stage in evaluation_stages:
|
|
2098
|
+
display_message_with_frame(
|
|
2099
|
+
message=f"Starting evaluation for {stage.value} stage",
|
|
2100
|
+
)
|
|
2101
|
+
|
|
2102
|
+
if stage == EvaluationStage.DEVELOPMENT:
|
|
2103
|
+
if not self.is_detached and not self.__scoring_urls[stage]:
|
|
2104
|
+
raise Exception(f"{stage.value} scoring url is not set")
|
|
2105
|
+
|
|
2106
|
+
# Get the MRM monitor id
|
|
2107
|
+
mrm_monitors = self.get_monitor_instances(
|
|
2108
|
+
subscription_id=self.__subscription_ids[stage],
|
|
2109
|
+
monitor_definition_id="mrm",
|
|
2110
|
+
)
|
|
2111
|
+
if not mrm_monitors:
|
|
2112
|
+
message = "MRM monitor is not configured"
|
|
2113
|
+
self.logger.error(message)
|
|
2114
|
+
raise Exception(message)
|
|
2115
|
+
|
|
2116
|
+
mrm_monitor_id = mrm_monitors[0]["monitor_instance_id"]
|
|
2117
|
+
|
|
2118
|
+
# Do the risk evaluations
|
|
2119
|
+
self.risk_evaluation_for_pta_subscription(
|
|
2120
|
+
input_df=input_df,
|
|
2121
|
+
monitor_instance_id=mrm_monitor_id,
|
|
2122
|
+
)
|
|
2123
|
+
|
|
2124
|
+
# Get all the monitors with measurements ids and display its table
|
|
2125
|
+
self.__monitors_info[stage] = (
|
|
2126
|
+
self.get_monitors_with_measurements_info(
|
|
2127
|
+
stage=stage,
|
|
2128
|
+
show_table=True,
|
|
2129
|
+
)
|
|
2130
|
+
)
|
|
2131
|
+
|
|
2132
|
+
# Display the factsheet url
|
|
2133
|
+
self.display_factsheet_url(stage=stage)
|
|
2134
|
+
|
|
2135
|
+
elif stage in [EvaluationStage.PRE_PRODUCTION, EvaluationStage.PRODUCTION]:
|
|
2136
|
+
|
|
2137
|
+
if self.is_detached:
|
|
2138
|
+
# Get the payload_data from the cvs file
|
|
2139
|
+
pl_data = self.__generate_detached_prompt_payload_data(
|
|
2140
|
+
input_df=input_df,
|
|
2141
|
+
prediction_field=self.__stage_configurations[stage].prompt_setup.get(
|
|
2142
|
+
"prediction_field", "generated_text")
|
|
2143
|
+
)
|
|
2144
|
+
else:
|
|
2145
|
+
if not self.__scoring_urls[stage]:
|
|
2146
|
+
raise Exception(
|
|
2147
|
+
f"{stage.value} scoring url is not set")
|
|
2148
|
+
# Evaluate the dataframe
|
|
2149
|
+
pl_data = self.evaluate_df(
|
|
2150
|
+
input_df=input_df,
|
|
2151
|
+
scoring_url=self.__scoring_urls[stage],
|
|
2152
|
+
)
|
|
2153
|
+
|
|
2154
|
+
self.space_deployment_risk_evaluation_data_set_setup(
|
|
2155
|
+
subscription_id=self.__subscription_ids[stage],
|
|
2156
|
+
pl_data=pl_data,
|
|
2157
|
+
prompt_setup=self.__stage_configurations[stage].prompt_setup,
|
|
2158
|
+
input_df=input_df,
|
|
2159
|
+
)
|
|
2160
|
+
|
|
2161
|
+
# Get the MRM monitor id
|
|
2162
|
+
mrm_monitors = self.get_monitor_instances(
|
|
2163
|
+
subscription_id=self.__subscription_ids[stage],
|
|
2164
|
+
monitor_definition_id="mrm",
|
|
2165
|
+
)
|
|
2166
|
+
if not mrm_monitors:
|
|
2167
|
+
message = "MRM monitor is not configured"
|
|
2168
|
+
self.logger.error(message)
|
|
2169
|
+
raise Exception(message)
|
|
2170
|
+
|
|
2171
|
+
mrm_monitor_id = mrm_monitors[0]["monitor_instance_id"]
|
|
2172
|
+
|
|
2173
|
+
# Evaluate mrm monitor
|
|
2174
|
+
if stage == EvaluationStage.PRE_PRODUCTION:
|
|
2175
|
+
self.risk_evaluation_for_pta_subscription(
|
|
2176
|
+
input_df=input_df,
|
|
2177
|
+
monitor_instance_id=mrm_monitor_id,
|
|
2178
|
+
)
|
|
2179
|
+
else:
|
|
2180
|
+
self.risk_evaluation_for_pta_subscription_in_space(
|
|
2181
|
+
monitor_instance_id=mrm_monitor_id
|
|
2182
|
+
)
|
|
2183
|
+
|
|
2184
|
+
# Get the pre production monitors info with their measurement id
|
|
2185
|
+
self.__monitors_info[stage] = (
|
|
2186
|
+
self.get_monitors_with_measurements_info(
|
|
2187
|
+
stage=stage,
|
|
2188
|
+
show_table=True,
|
|
2189
|
+
)
|
|
2190
|
+
)
|
|
2191
|
+
|
|
2192
|
+
# display the factsheet url
|
|
2193
|
+
self.display_factsheet_url(stage=stage)
|
|
2194
|
+
|
|
2195
|
+
display_message_with_frame(
|
|
2196
|
+
message=f"Finished evaluation for {stage.value} stage",
|
|
2197
|
+
)
|
|
2198
|
+
|
|
2199
|
+
def get_prompt_template_id(self, stage: EvaluationStage = EvaluationStage.DEVELOPMENT):
|
|
2200
|
+
return self.__prompt_template_ids[stage]
|
|
2201
|
+
|
|
2202
|
+
def __is_workspace_associated_with_usecase(
|
|
2203
|
+
self,
|
|
2204
|
+
usecase_id: str,
|
|
2205
|
+
catalog_id: str,
|
|
2206
|
+
workspace_id: str
|
|
2207
|
+
):
|
|
2208
|
+
"""Helper to check if workspace is associated with a usecase"""
|
|
2209
|
+
|
|
2210
|
+
display_message_with_frame(
|
|
2211
|
+
f"Checking if workspace {workspace_id} is associated with usecase {usecase_id}."
|
|
2212
|
+
)
|
|
2213
|
+
|
|
2214
|
+
try:
|
|
2215
|
+
response = self.__send_request(
|
|
2216
|
+
method="get",
|
|
2217
|
+
url=f"{self.__dataplatform_url}/v1/aigov/factsheet/ai_usecases/{usecase_id}/workspaces",
|
|
2218
|
+
params={"inventory_id": catalog_id},
|
|
2219
|
+
)
|
|
2220
|
+
except Exception as e:
|
|
2221
|
+
raise Exception(
|
|
2222
|
+
f"Failed to check if workspace is associated with usecase. {e}")
|
|
2223
|
+
|
|
2224
|
+
try:
|
|
2225
|
+
json_response = response.json()
|
|
2226
|
+
for associated_workspace in json_response["associated_workspaces"]:
|
|
2227
|
+
for workspace in associated_workspace["workspaces"]:
|
|
2228
|
+
if workspace["id"] == workspace_id:
|
|
2229
|
+
return True
|
|
2230
|
+
except Exception as e:
|
|
2231
|
+
raise Exception(f"Failed to parse workspaces ids response. {e}")
|
|
2232
|
+
|
|
2233
|
+
return False
|
|
2234
|
+
|
|
2235
|
+
def __associate_workspace_with_usecase(
|
|
2236
|
+
self,
|
|
2237
|
+
usecase_id: str,
|
|
2238
|
+
catalog_id: str,
|
|
2239
|
+
workspace: Union[ProjectConfigurations, SpaceConfigurations],
|
|
2240
|
+
):
|
|
2241
|
+
"""Helper to associate a workspace with a usecase"""
|
|
2242
|
+
if isinstance(workspace, ProjectConfigurations):
|
|
2243
|
+
workspace_id = workspace.project_id
|
|
2244
|
+
workspace_type = "project"
|
|
2245
|
+
phase_name = "Develop"
|
|
2246
|
+
else:
|
|
2247
|
+
workspace_id = workspace.space_id
|
|
2248
|
+
workspace_type = "space"
|
|
2249
|
+
phase_name = "Operate"
|
|
2250
|
+
|
|
2251
|
+
display_message_with_frame(
|
|
2252
|
+
f"Associating workspace id {workspace_id} with usecase {usecase_id}"
|
|
2253
|
+
)
|
|
2254
|
+
|
|
2255
|
+
payload = {
|
|
2256
|
+
"phase_name": phase_name,
|
|
2257
|
+
"workspaces": [
|
|
2258
|
+
{
|
|
2259
|
+
"id": workspace_id,
|
|
2260
|
+
"type": workspace_type,
|
|
2261
|
+
|
|
2262
|
+
}
|
|
2263
|
+
],
|
|
2264
|
+
}
|
|
2265
|
+
|
|
2266
|
+
try:
|
|
2267
|
+
self.__send_request(
|
|
2268
|
+
method="post",
|
|
2269
|
+
url=f"{self.__dataplatform_url}/v1/aigov/factsheet/ai_usecases/{usecase_id}/workspaces",
|
|
2270
|
+
params={"inventory_id": catalog_id},
|
|
2271
|
+
json=payload,
|
|
2272
|
+
)
|
|
2273
|
+
except Exception as e:
|
|
2274
|
+
raise Exception(f"Failed to associate workspace with usecase. {e}")
|
|
2275
|
+
|
|
2276
|
+
display_message_with_frame(
|
|
2277
|
+
"Workspace associated with usecase successfully")
|
|
2278
|
+
|
|
2279
|
+
def __track_pta_with_usecase(self, stage):
|
|
2280
|
+
"""
|
|
2281
|
+
Helper function to associate workspace with usecase in factsheet
|
|
2282
|
+
"""
|
|
2283
|
+
display_message_with_frame(
|
|
2284
|
+
"Starting Prompt template usecase tracking process")
|
|
2285
|
+
|
|
2286
|
+
if isinstance(self.__stage_configurations[stage], ProjectConfigurations):
|
|
2287
|
+
params = {
|
|
2288
|
+
"project_id": self.__stage_configurations[stage].project_id}
|
|
2289
|
+
workspace_id = self.__stage_configurations[stage].project_id
|
|
2290
|
+
else:
|
|
2291
|
+
params = {"space_id": self.__stage_configurations[stage].space_id}
|
|
2292
|
+
workspace_id = self.__stage_configurations[stage].space_id
|
|
2293
|
+
|
|
2294
|
+
if not self.__is_workspace_associated_with_usecase(
|
|
2295
|
+
usecase_id=self.ai_usecase.usecase_id,
|
|
2296
|
+
catalog_id=self.ai_usecase.catalog_id,
|
|
2297
|
+
workspace_id=workspace_id,
|
|
2298
|
+
):
|
|
2299
|
+
self.__associate_workspace_with_usecase(
|
|
2300
|
+
usecase_id=self.ai_usecase.usecase_id,
|
|
2301
|
+
catalog_id=self.ai_usecase.catalog_id,
|
|
2302
|
+
workspace=self.__stage_configurations[stage],
|
|
2303
|
+
|
|
2304
|
+
)
|
|
2305
|
+
|
|
2306
|
+
payload = {
|
|
2307
|
+
"model_entry_catalog_id": self.ai_usecase.catalog_id,
|
|
2308
|
+
"model_entry_asset_id": self.ai_usecase.usecase_id,
|
|
2309
|
+
"version_details": {
|
|
2310
|
+
"number": self.ai_usecase.version,
|
|
2311
|
+
}
|
|
2312
|
+
}
|
|
2313
|
+
|
|
2314
|
+
if self.ai_usecase.approach_id:
|
|
2315
|
+
payload["version_details"]["approach_id"] = self.ai_usecase.approach_id
|
|
2316
|
+
|
|
2317
|
+
try:
|
|
2318
|
+
self.__send_request(
|
|
2319
|
+
method="post",
|
|
2320
|
+
url=f"{self.__dataplatform_url}/v1/aigov/model_inventory/models/{self.__prompt_template_ids[stage]}/model_entry",
|
|
2321
|
+
json=payload,
|
|
2322
|
+
params=params,
|
|
2323
|
+
|
|
2324
|
+
)
|
|
2325
|
+
except Exception as e:
|
|
2326
|
+
message = f"Failed to track usecase. {e}"
|
|
2327
|
+
self.logger.debug(message)
|
|
2328
|
+
return
|
|
2329
|
+
|
|
2330
|
+
display_message_with_frame(
|
|
2331
|
+
message=f"Prompt template id {self.__prompt_template_ids[stage]} is tracked with usecase id {self.ai_usecase.usecase_id} successfully."
|
|
2332
|
+
)
|