ibm-watsonx-orchestrate-evaluation-framework 1.0.3__py3-none-any.whl → 1.1.8b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info/METADATA +53 -0
- ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info/RECORD +146 -0
- wxo_agentic_evaluation/analytics/tools/analyzer.py +38 -21
- wxo_agentic_evaluation/analytics/tools/main.py +19 -25
- wxo_agentic_evaluation/analytics/tools/types.py +26 -11
- wxo_agentic_evaluation/analytics/tools/ux.py +75 -31
- wxo_agentic_evaluation/analyze_run.py +1184 -97
- wxo_agentic_evaluation/annotate.py +7 -5
- wxo_agentic_evaluation/arg_configs.py +97 -5
- wxo_agentic_evaluation/base_user.py +25 -0
- wxo_agentic_evaluation/batch_annotate.py +97 -27
- wxo_agentic_evaluation/clients.py +103 -0
- wxo_agentic_evaluation/compare_runs/__init__.py +0 -0
- wxo_agentic_evaluation/compare_runs/compare_2_runs.py +74 -0
- wxo_agentic_evaluation/compare_runs/diff.py +554 -0
- wxo_agentic_evaluation/compare_runs/model.py +193 -0
- wxo_agentic_evaluation/data_annotator.py +45 -19
- wxo_agentic_evaluation/description_quality_checker.py +178 -0
- wxo_agentic_evaluation/evaluation.py +50 -0
- wxo_agentic_evaluation/evaluation_controller/evaluation_controller.py +303 -0
- wxo_agentic_evaluation/evaluation_package.py +544 -107
- wxo_agentic_evaluation/external_agent/__init__.py +18 -7
- wxo_agentic_evaluation/external_agent/external_validate.py +49 -36
- wxo_agentic_evaluation/external_agent/performance_test.py +33 -22
- wxo_agentic_evaluation/external_agent/types.py +8 -7
- wxo_agentic_evaluation/extractors/__init__.py +3 -0
- wxo_agentic_evaluation/extractors/extractor_base.py +21 -0
- wxo_agentic_evaluation/extractors/labeled_messages.py +47 -0
- wxo_agentic_evaluation/hr_agent_langgraph.py +68 -0
- wxo_agentic_evaluation/langfuse_collection.py +60 -0
- wxo_agentic_evaluation/langfuse_evaluation_package.py +192 -0
- wxo_agentic_evaluation/llm_matching.py +108 -5
- wxo_agentic_evaluation/llm_rag_eval.py +7 -4
- wxo_agentic_evaluation/llm_safety_eval.py +64 -0
- wxo_agentic_evaluation/llm_user.py +12 -6
- wxo_agentic_evaluation/llm_user_v2.py +114 -0
- wxo_agentic_evaluation/main.py +128 -246
- wxo_agentic_evaluation/metrics/__init__.py +15 -0
- wxo_agentic_evaluation/metrics/dummy_metric.py +16 -0
- wxo_agentic_evaluation/metrics/evaluations.py +107 -0
- wxo_agentic_evaluation/metrics/journey_success.py +137 -0
- wxo_agentic_evaluation/metrics/llm_as_judge.py +28 -2
- wxo_agentic_evaluation/metrics/metrics.py +319 -16
- wxo_agentic_evaluation/metrics/tool_calling.py +93 -0
- wxo_agentic_evaluation/otel_parser/__init__.py +1 -0
- wxo_agentic_evaluation/otel_parser/langflow_parser.py +86 -0
- wxo_agentic_evaluation/otel_parser/langgraph_parser.py +61 -0
- wxo_agentic_evaluation/otel_parser/parser.py +163 -0
- wxo_agentic_evaluation/otel_parser/parser_types.py +38 -0
- wxo_agentic_evaluation/otel_parser/pydantic_parser.py +50 -0
- wxo_agentic_evaluation/otel_parser/utils.py +15 -0
- wxo_agentic_evaluation/otel_parser/wxo_parser.py +39 -0
- wxo_agentic_evaluation/otel_support/evaluate_tau.py +101 -0
- wxo_agentic_evaluation/otel_support/otel_message_conversion.py +29 -0
- wxo_agentic_evaluation/otel_support/tasks_test.py +1566 -0
- wxo_agentic_evaluation/prompt/bad_tool_descriptions_prompt.jinja2 +178 -0
- wxo_agentic_evaluation/prompt/derailment_prompt.jinja2 +55 -0
- wxo_agentic_evaluation/prompt/llama_user_prompt.jinja2 +59 -5
- wxo_agentic_evaluation/prompt/llmaaj_prompt.jinja2 +15 -0
- wxo_agentic_evaluation/prompt/off_policy_attack_generation_prompt.jinja2 +34 -0
- wxo_agentic_evaluation/prompt/on_policy_attack_generation_prompt.jinja2 +46 -0
- wxo_agentic_evaluation/prompt/semantic_matching_prompt.jinja2 +41 -9
- wxo_agentic_evaluation/prompt/template_render.py +163 -12
- wxo_agentic_evaluation/prompt/unsafe_topic_prompt.jinja2 +65 -0
- wxo_agentic_evaluation/quick_eval.py +384 -0
- wxo_agentic_evaluation/record_chat.py +132 -81
- wxo_agentic_evaluation/red_teaming/attack_evaluator.py +302 -0
- wxo_agentic_evaluation/red_teaming/attack_generator.py +329 -0
- wxo_agentic_evaluation/red_teaming/attack_list.py +184 -0
- wxo_agentic_evaluation/red_teaming/attack_runner.py +204 -0
- wxo_agentic_evaluation/referenceless_eval/__init__.py +3 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/consts.py +28 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/base.py +29 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/general.py +49 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/general_metrics.json +783 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection.py +31 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection_metrics.json +600 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/loader.py +245 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/adapters.py +106 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/pipeline.py +291 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/semantic_checker.py +465 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/static_checker.py +162 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/transformation_prompts.py +509 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/types.py +562 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/__init__.py +3 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/field.py +266 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/metric.py +344 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/metrics_runner.py +193 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/prompt.py +413 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/utils.py +46 -0
- wxo_agentic_evaluation/referenceless_eval/prompt/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/prompt/runner.py +158 -0
- wxo_agentic_evaluation/referenceless_eval/referenceless_eval.py +191 -0
- wxo_agentic_evaluation/resource_map.py +6 -3
- wxo_agentic_evaluation/runner.py +329 -0
- wxo_agentic_evaluation/runtime_adapter/a2a_runtime_adapter.py +0 -0
- wxo_agentic_evaluation/runtime_adapter/runtime_adapter.py +14 -0
- wxo_agentic_evaluation/{inference_backend.py → runtime_adapter/wxo_runtime_adapter.py} +88 -150
- wxo_agentic_evaluation/scheduler.py +247 -0
- wxo_agentic_evaluation/service_instance.py +117 -26
- wxo_agentic_evaluation/service_provider/__init__.py +182 -17
- wxo_agentic_evaluation/service_provider/gateway_provider.py +707 -0
- wxo_agentic_evaluation/service_provider/model_proxy_provider.py +628 -45
- wxo_agentic_evaluation/service_provider/ollama_provider.py +392 -22
- wxo_agentic_evaluation/service_provider/portkey_provider.py +229 -0
- wxo_agentic_evaluation/service_provider/provider.py +129 -10
- wxo_agentic_evaluation/service_provider/referenceless_provider_wrapper.py +203 -0
- wxo_agentic_evaluation/service_provider/watsonx_provider.py +516 -53
- wxo_agentic_evaluation/simluation_runner.py +125 -0
- wxo_agentic_evaluation/test_prompt.py +4 -4
- wxo_agentic_evaluation/tool_planner.py +141 -46
- wxo_agentic_evaluation/type.py +217 -14
- wxo_agentic_evaluation/user_simulator/demo_usage_llm_user.py +100 -0
- wxo_agentic_evaluation/utils/__init__.py +44 -3
- wxo_agentic_evaluation/utils/evaluation_discovery.py +47 -0
- wxo_agentic_evaluation/utils/gateway_provider_utils.py +39 -0
- wxo_agentic_evaluation/utils/messages_parser.py +30 -0
- wxo_agentic_evaluation/utils/open_ai_tool_extractor.py +178 -0
- wxo_agentic_evaluation/utils/parsers.py +71 -0
- wxo_agentic_evaluation/utils/rich_utils.py +188 -0
- wxo_agentic_evaluation/utils/rouge_score.py +23 -0
- wxo_agentic_evaluation/utils/utils.py +514 -17
- wxo_agentic_evaluation/wxo_client.py +81 -0
- ibm_watsonx_orchestrate_evaluation_framework-1.0.3.dist-info/METADATA +0 -380
- ibm_watsonx_orchestrate_evaluation_framework-1.0.3.dist-info/RECORD +0 -56
- {ibm_watsonx_orchestrate_evaluation_framework-1.0.3.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info}/WHEEL +0 -0
- {ibm_watsonx_orchestrate_evaluation_framework-1.0.3.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info}/top_level.txt +0 -0
|
@@ -1,19 +1,138 @@
|
|
|
1
|
-
from
|
|
2
|
-
from typing import List
|
|
1
|
+
from __future__ import annotations
|
|
3
2
|
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
from abc import ABC, ABCMeta, abstractmethod
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from threading import Lock
|
|
8
|
+
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
|
4
9
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
10
|
+
from wxo_agentic_evaluation.type import ProviderInstancesCacheKey
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class SingletonProviderMeta(type):
|
|
14
|
+
|
|
15
|
+
_provider_instances: Dict[str, "Provider"] = {}
|
|
16
|
+
_instantiation_lock = Lock()
|
|
17
|
+
|
|
18
|
+
def __call__(cls, *args, **kwargs):
|
|
19
|
+
|
|
20
|
+
key_str: str = str(cls._get_key(cls.__name__, args, kwargs))
|
|
21
|
+
|
|
22
|
+
if key_str not in cls._provider_instances:
|
|
23
|
+
with cls._instantiation_lock:
|
|
24
|
+
if key_str not in cls._provider_instances:
|
|
25
|
+
cls._provider_instances[key_str] = super().__call__(
|
|
26
|
+
*args, **kwargs
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
return cls._provider_instances[key_str]
|
|
30
|
+
|
|
31
|
+
@staticmethod
|
|
32
|
+
def _get_key(
|
|
33
|
+
provider: str, args: Tuple[Any, ...], kwargs: Dict[str, Any]
|
|
34
|
+
) -> ProviderInstancesCacheKey:
|
|
35
|
+
|
|
36
|
+
args_str = str(args) if args else "noargs"
|
|
37
|
+
kwargs_str = str(sorted(kwargs.items())) if kwargs else "nokwargs"
|
|
38
|
+
|
|
39
|
+
return ProviderInstancesCacheKey(
|
|
40
|
+
provider=provider,
|
|
41
|
+
hashed_args=args_str,
|
|
42
|
+
hashed_kwargs=kwargs_str,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class SingletonProviderABCMeta(ABCMeta, SingletonProviderMeta):
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class ChatResult:
|
|
52
|
+
text: str
|
|
53
|
+
usage: Optional[Dict[str, Any]] = None
|
|
54
|
+
finish_reason: Optional[str] = None
|
|
55
|
+
raw: Optional[Any] = None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class Provider(ABC, metaclass=SingletonProviderABCMeta):
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
use_legacy_query: Optional[bool] = None,
|
|
62
|
+
logger: Optional[logging.Logger] = None,
|
|
63
|
+
) -> None:
|
|
64
|
+
self.logger = logger or logging.getLogger(self.__class__.__name__)
|
|
65
|
+
|
|
66
|
+
env_use_legacy = os.environ.get("USE_LEGACY_QUERY")
|
|
67
|
+
if env_use_legacy is not None:
|
|
68
|
+
self.use_legacy_query: bool = env_use_legacy.strip().lower() in (
|
|
69
|
+
"1",
|
|
70
|
+
"true",
|
|
71
|
+
"yes",
|
|
72
|
+
"on",
|
|
73
|
+
)
|
|
74
|
+
else:
|
|
75
|
+
self.use_legacy_query = (
|
|
76
|
+
bool(use_legacy_query) if use_legacy_query is not None else True
|
|
77
|
+
)
|
|
78
|
+
if self.use_legacy_query:
|
|
79
|
+
self.logger.debug("[d][b]Using legacy /text/generation queries")
|
|
80
|
+
else:
|
|
81
|
+
self.logger.debug("[d][b]Using new /chat/completions queries")
|
|
8
82
|
|
|
9
83
|
@abstractmethod
|
|
10
|
-
def
|
|
11
|
-
|
|
84
|
+
def old_query(self, sentence: str) -> str:
|
|
85
|
+
raise NotImplementedError
|
|
12
86
|
|
|
13
|
-
|
|
14
|
-
|
|
87
|
+
@abstractmethod
|
|
88
|
+
def new_query(self, sentence: str) -> str:
|
|
89
|
+
raise NotImplementedError
|
|
15
90
|
|
|
16
91
|
@abstractmethod
|
|
17
92
|
def encode(self, sentences: List[str]) -> List[list]:
|
|
18
|
-
|
|
93
|
+
raise NotImplementedError
|
|
94
|
+
|
|
95
|
+
def query(self, sentence: str) -> str:
|
|
96
|
+
if self.use_legacy_query:
|
|
97
|
+
return self.old_query(sentence)
|
|
98
|
+
return self.new_query(sentence)
|
|
99
|
+
|
|
100
|
+
def chat(
|
|
101
|
+
self,
|
|
102
|
+
messages: Sequence[Dict[str, str]],
|
|
103
|
+
params: Optional[Dict[str, Any]] = None,
|
|
104
|
+
) -> ChatResult:
|
|
105
|
+
raise NotImplementedError(
|
|
106
|
+
f"{self.__class__.__name__} does not implement chat()."
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
def batch_query(
|
|
110
|
+
self,
|
|
111
|
+
sentences: List[str],
|
|
112
|
+
max_workers: Optional[int] = None,
|
|
113
|
+
) -> List[str]:
|
|
114
|
+
if not sentences:
|
|
115
|
+
return []
|
|
116
|
+
|
|
117
|
+
if not max_workers or max_workers <= 1:
|
|
118
|
+
return [self.query(sentence) for sentence in sentences]
|
|
119
|
+
|
|
120
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
121
|
+
|
|
122
|
+
results: List[Optional[str]] = [None] * len(sentences)
|
|
123
|
+
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
|
124
|
+
future_to_idx = {
|
|
125
|
+
pool.submit(self.query, s): i for i, s in enumerate(sentences)
|
|
126
|
+
}
|
|
127
|
+
for fut in as_completed(future_to_idx):
|
|
128
|
+
idx = future_to_idx[fut]
|
|
129
|
+
results[idx] = fut.result()
|
|
130
|
+
|
|
131
|
+
return [r if r is not None else "" for r in results]
|
|
132
|
+
|
|
133
|
+
def set_routing(self, use_legacy_query: Optional[bool] = None) -> None:
|
|
134
|
+
if use_legacy_query is not None:
|
|
135
|
+
self.use_legacy_query = bool(use_legacy_query)
|
|
19
136
|
|
|
137
|
+
def close(self) -> None:
|
|
138
|
+
return
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from typing import Any, List, Mapping, Optional, Union
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
import rich
|
|
7
|
+
|
|
8
|
+
from wxo_agentic_evaluation.service_provider.gateway_provider import (
|
|
9
|
+
GatewayProvider,
|
|
10
|
+
_translate_params_to_chat,
|
|
11
|
+
)
|
|
12
|
+
from wxo_agentic_evaluation.service_provider.model_proxy_provider import (
|
|
13
|
+
ModelProxyProvider,
|
|
14
|
+
)
|
|
15
|
+
from wxo_agentic_evaluation.service_provider.watsonx_provider import (
|
|
16
|
+
WatsonXProvider,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class LLMResponse:
|
|
21
|
+
"""
|
|
22
|
+
NOTE: Taken from LLM-Eval-Kit
|
|
23
|
+
Response object that can contain both content and tool calls
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self, content: str, tool_calls: Optional[List[Mapping[str, Any]]] = None
|
|
28
|
+
):
|
|
29
|
+
self.content = content
|
|
30
|
+
self.tool_calls = tool_calls or []
|
|
31
|
+
|
|
32
|
+
def __str__(self) -> str:
|
|
33
|
+
"""Return the content of the response as a string."""
|
|
34
|
+
return self.content
|
|
35
|
+
|
|
36
|
+
def __repr__(self) -> str:
|
|
37
|
+
"""Return a string representation of the LLMResponse object."""
|
|
38
|
+
return f"LLMResponse(content='{self.content}', tool_calls={self.tool_calls})"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class LLMKitWrapper(ABC):
|
|
42
|
+
"""In the future this wrapper won't be neccesary.
|
|
43
|
+
Right now the referenceless code requires a `generate()` function for the metrics client.
|
|
44
|
+
In refactor, rewrite referenceless code so this wrapper is not needed.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
@abstractmethod
|
|
48
|
+
def chat():
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
def generate(
|
|
52
|
+
self,
|
|
53
|
+
prompt: Union[str, List[Mapping[str, str]]],
|
|
54
|
+
*,
|
|
55
|
+
schema,
|
|
56
|
+
retries: int = 3,
|
|
57
|
+
generation_args: Optional[Any] = None,
|
|
58
|
+
**kwargs: Any,
|
|
59
|
+
):
|
|
60
|
+
"""
|
|
61
|
+
In future, implement validation of response like in llmevalkit
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
for attempt in range(1, retries + 1):
|
|
65
|
+
try:
|
|
66
|
+
raw_response = self.chat(prompt)
|
|
67
|
+
response = self._parse_llm_response(raw_response)
|
|
68
|
+
return response
|
|
69
|
+
except Exception as e:
|
|
70
|
+
rich.print(
|
|
71
|
+
f"[b][r] Generation failed with error '{str(e)}' during `quick-eval` ... Attempt ({attempt} / {retries}))"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def _parse_llm_response(self, raw: Any) -> Union[str, LLMResponse]:
|
|
75
|
+
"""
|
|
76
|
+
Extract the generated text and tool calls from a watsonx response.
|
|
77
|
+
|
|
78
|
+
- For text generation: raw['results'][0]['generated_text']
|
|
79
|
+
- For chat: raw['choices'][0]['message']['content']
|
|
80
|
+
"""
|
|
81
|
+
content = ""
|
|
82
|
+
tool_calls = []
|
|
83
|
+
|
|
84
|
+
if isinstance(raw, dict) and "choices" in raw:
|
|
85
|
+
choices = raw["choices"]
|
|
86
|
+
if isinstance(choices, list) and choices:
|
|
87
|
+
first = choices[0]
|
|
88
|
+
msg = first.get("message")
|
|
89
|
+
if isinstance(msg, dict):
|
|
90
|
+
content = msg.get("content", "")
|
|
91
|
+
# Extract tool calls if present
|
|
92
|
+
if "tool_calls" in msg and msg["tool_calls"]:
|
|
93
|
+
tool_calls = []
|
|
94
|
+
for tool_call in msg["tool_calls"]:
|
|
95
|
+
tool_call_dict = {
|
|
96
|
+
"id": tool_call.get("id"),
|
|
97
|
+
"type": tool_call.get("type", "function"),
|
|
98
|
+
"function": {
|
|
99
|
+
"name": tool_call.get("function", {}).get(
|
|
100
|
+
"name"
|
|
101
|
+
),
|
|
102
|
+
"arguments": tool_call.get(
|
|
103
|
+
"function", {}
|
|
104
|
+
).get("arguments"),
|
|
105
|
+
},
|
|
106
|
+
}
|
|
107
|
+
tool_calls.append(tool_call_dict)
|
|
108
|
+
elif "text" in first:
|
|
109
|
+
content = first["text"]
|
|
110
|
+
|
|
111
|
+
if not content and not tool_calls:
|
|
112
|
+
raise ValueError(f"Unexpected watsonx response format: {raw!r}")
|
|
113
|
+
|
|
114
|
+
# Return LLMResponse if tool calls exist, otherwise just content
|
|
115
|
+
if tool_calls:
|
|
116
|
+
return LLMResponse(content=content, tool_calls=tool_calls)
|
|
117
|
+
|
|
118
|
+
return content
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class ModelProxyProviderLLMKitWrapper(ModelProxyProvider, LLMKitWrapper):
|
|
122
|
+
def chat(self, sentence: List[str]):
|
|
123
|
+
if self.model_id is None:
|
|
124
|
+
raise Exception("model id must be specified for text generation")
|
|
125
|
+
chat_url = f"{self.instance_url}/ml/v1/text/chat?version=2023-10-25"
|
|
126
|
+
self.refresh_token_if_expires()
|
|
127
|
+
headers = self.get_header()
|
|
128
|
+
data = {
|
|
129
|
+
"model_id": self.model_id,
|
|
130
|
+
"messages": sentence,
|
|
131
|
+
"parameters": self.params,
|
|
132
|
+
"space_id": "1",
|
|
133
|
+
"timeout": self.timeout,
|
|
134
|
+
}
|
|
135
|
+
resp = requests.post(url=chat_url, headers=headers, json=data)
|
|
136
|
+
if resp.status_code == 200:
|
|
137
|
+
return resp.json()
|
|
138
|
+
else:
|
|
139
|
+
resp.raise_for_status()
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class WatsonXLLMKitWrapper(WatsonXProvider, LLMKitWrapper):
|
|
143
|
+
def chat(self, sentence: list):
|
|
144
|
+
chat_url = f"{self.api_endpoint}/ml/v1/text/chat?version=2023-05-02"
|
|
145
|
+
headers = self.prepare_header()
|
|
146
|
+
data = {
|
|
147
|
+
"model_id": self.model_id,
|
|
148
|
+
"messages": sentence,
|
|
149
|
+
"parameters": self.params,
|
|
150
|
+
"space_id": self.space_id,
|
|
151
|
+
}
|
|
152
|
+
resp = requests.post(url=chat_url, headers=headers, json=data)
|
|
153
|
+
if resp.status_code == 200:
|
|
154
|
+
return resp.json()
|
|
155
|
+
else:
|
|
156
|
+
resp.raise_for_status()
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class GatewayProviderLLMKitWrapper(GatewayProvider, LLMKitWrapper):
|
|
160
|
+
def chat(self, sentence: Union[str, List[Mapping[str, str]]]):
|
|
161
|
+
if isinstance(sentence, str):
|
|
162
|
+
messages = []
|
|
163
|
+
if self.system_prompt:
|
|
164
|
+
messages.append(
|
|
165
|
+
{"role": "system", "content": self.system_prompt}
|
|
166
|
+
)
|
|
167
|
+
messages.append({"role": "user", "content": sentence})
|
|
168
|
+
else:
|
|
169
|
+
messages = sentence
|
|
170
|
+
|
|
171
|
+
if self.model_id is None:
|
|
172
|
+
raise Exception("model id must be specified for text generation")
|
|
173
|
+
|
|
174
|
+
self.refresh_token_if_expires()
|
|
175
|
+
|
|
176
|
+
merged_params = dict(self.params or {})
|
|
177
|
+
chat_params = _translate_params_to_chat(merged_params)
|
|
178
|
+
chat_params.pop("stream", None)
|
|
179
|
+
|
|
180
|
+
override_params = dict(merged_params)
|
|
181
|
+
override_params["model"] = self.model_id
|
|
182
|
+
|
|
183
|
+
payload = {
|
|
184
|
+
"model": self._payload_model_str(self.model_id),
|
|
185
|
+
"messages": list(messages),
|
|
186
|
+
**chat_params,
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
request_id = str(uuid.uuid4())
|
|
190
|
+
headers = self._headers(request_id, override_params)
|
|
191
|
+
|
|
192
|
+
resp = requests.post(
|
|
193
|
+
self.chat_url,
|
|
194
|
+
json=payload,
|
|
195
|
+
headers=headers,
|
|
196
|
+
verify=self._wo_ssl_verify,
|
|
197
|
+
timeout=self.timeout,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
if resp.status_code == 200:
|
|
201
|
+
return resp.json()
|
|
202
|
+
else:
|
|
203
|
+
resp.raise_for_status()
|