ibm-watsonx-orchestrate-evaluation-framework 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ibm-watsonx-orchestrate-evaluation-framework might be problematic. Click here for more details.
- {ibm_watsonx_orchestrate_evaluation_framework-1.1.5.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.7.dist-info}/METADATA +4 -1
- {ibm_watsonx_orchestrate_evaluation_framework-1.1.5.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.7.dist-info}/RECORD +49 -39
- wxo_agentic_evaluation/analyze_run.py +822 -344
- wxo_agentic_evaluation/arg_configs.py +39 -2
- wxo_agentic_evaluation/data_annotator.py +22 -4
- wxo_agentic_evaluation/description_quality_checker.py +29 -4
- wxo_agentic_evaluation/evaluation_package.py +197 -18
- wxo_agentic_evaluation/external_agent/external_validate.py +3 -1
- wxo_agentic_evaluation/external_agent/types.py +1 -1
- wxo_agentic_evaluation/inference_backend.py +105 -108
- wxo_agentic_evaluation/llm_matching.py +104 -2
- wxo_agentic_evaluation/llm_user.py +2 -2
- wxo_agentic_evaluation/main.py +147 -38
- wxo_agentic_evaluation/metrics/__init__.py +5 -0
- wxo_agentic_evaluation/metrics/evaluations.py +124 -0
- wxo_agentic_evaluation/metrics/llm_as_judge.py +4 -3
- wxo_agentic_evaluation/metrics/metrics.py +64 -1
- wxo_agentic_evaluation/prompt/llmaaj_prompt.jinja2 +15 -0
- wxo_agentic_evaluation/prompt/semantic_matching_prompt.jinja2 +41 -9
- wxo_agentic_evaluation/prompt/template_render.py +20 -2
- wxo_agentic_evaluation/quick_eval.py +23 -11
- wxo_agentic_evaluation/record_chat.py +18 -10
- wxo_agentic_evaluation/red_teaming/attack_evaluator.py +169 -100
- wxo_agentic_evaluation/red_teaming/attack_generator.py +63 -40
- wxo_agentic_evaluation/red_teaming/attack_list.py +78 -8
- wxo_agentic_evaluation/red_teaming/attack_runner.py +71 -14
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/general_metrics.json +783 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection_metrics.json +600 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/types.py +10 -10
- wxo_agentic_evaluation/referenceless_eval/referenceless_eval.py +103 -39
- wxo_agentic_evaluation/resource_map.py +3 -1
- wxo_agentic_evaluation/service_instance.py +12 -3
- wxo_agentic_evaluation/service_provider/__init__.py +129 -9
- wxo_agentic_evaluation/service_provider/gateway_provider.py +707 -0
- wxo_agentic_evaluation/service_provider/model_proxy_provider.py +415 -17
- wxo_agentic_evaluation/service_provider/ollama_provider.py +393 -22
- wxo_agentic_evaluation/service_provider/provider.py +130 -10
- wxo_agentic_evaluation/service_provider/referenceless_provider_wrapper.py +52 -0
- wxo_agentic_evaluation/service_provider/watsonx_provider.py +480 -52
- wxo_agentic_evaluation/type.py +15 -5
- wxo_agentic_evaluation/utils/__init__.py +44 -3
- wxo_agentic_evaluation/utils/evaluation_discovery.py +47 -0
- wxo_agentic_evaluation/utils/gateway_provider_utils.py +39 -0
- wxo_agentic_evaluation/utils/messages_parser.py +30 -0
- wxo_agentic_evaluation/utils/parsers.py +71 -0
- wxo_agentic_evaluation/utils/utils.py +140 -20
- wxo_agentic_evaluation/wxo_client.py +81 -0
- {ibm_watsonx_orchestrate_evaluation_framework-1.1.5.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.7.dist-info}/WHEEL +0 -0
- {ibm_watsonx_orchestrate_evaluation_framework-1.1.5.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.7.dist-info}/top_level.txt +0 -0
|
@@ -390,18 +390,18 @@ class PipelineResult(BaseModel):
|
|
|
390
390
|
)
|
|
391
391
|
|
|
392
392
|
@model_validator(mode="after")
|
|
393
|
-
def compute_overall(
|
|
393
|
+
def compute_overall(self) -> Self:
|
|
394
394
|
"""
|
|
395
395
|
After validation, compute overall_valid as AND of:
|
|
396
396
|
• all semantic is_correct flags
|
|
397
397
|
• if transform exists: all execution_success flags
|
|
398
398
|
"""
|
|
399
|
-
static: StaticResult =
|
|
399
|
+
static: StaticResult = self.static
|
|
400
400
|
if static:
|
|
401
401
|
# static checks
|
|
402
402
|
ok = static.final_decision
|
|
403
403
|
|
|
404
|
-
sem: SemanticResult =
|
|
404
|
+
sem: SemanticResult = self.semantic
|
|
405
405
|
if sem:
|
|
406
406
|
# semantic checks
|
|
407
407
|
if sem.general and sem.general.metrics:
|
|
@@ -441,11 +441,11 @@ class PipelineResult(BaseModel):
|
|
|
441
441
|
if param_avgs:
|
|
442
442
|
cat_avgs.append(sum(param_avgs) / len(param_avgs))
|
|
443
443
|
|
|
444
|
-
|
|
444
|
+
self.overall_avg_score = (
|
|
445
445
|
sum(cat_avgs) / len(cat_avgs) if cat_avgs else None
|
|
446
446
|
)
|
|
447
|
-
|
|
448
|
-
return
|
|
447
|
+
self.overall_valid = ok
|
|
448
|
+
return self
|
|
449
449
|
|
|
450
450
|
|
|
451
451
|
# ----------------------------------------------------------------------
|
|
@@ -531,17 +531,17 @@ class ToolFunctionCall(BaseModel):
|
|
|
531
531
|
)
|
|
532
532
|
|
|
533
533
|
@model_validator(mode="after")
|
|
534
|
-
def _parse_arguments(
|
|
534
|
+
def _parse_arguments(self) -> Self:
|
|
535
535
|
"""
|
|
536
536
|
After model construction, parse the `arguments` JSON string
|
|
537
537
|
into `parsed_arguments`, or raise a ValidationError.
|
|
538
538
|
"""
|
|
539
539
|
try:
|
|
540
|
-
raw =
|
|
541
|
-
|
|
540
|
+
raw = self.arguments
|
|
541
|
+
self.parsed_arguments = json.loads(raw)
|
|
542
542
|
except json.JSONDecodeError as e:
|
|
543
543
|
raise ValidationError(f"Invalid JSON in arguments: {e}") from e
|
|
544
|
-
return
|
|
544
|
+
return self
|
|
545
545
|
|
|
546
546
|
|
|
547
547
|
class ToolCall(BaseModel):
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
|
-
from typing import Any, List, Mapping
|
|
2
|
+
from typing import Any, List, Mapping, Optional
|
|
3
3
|
|
|
4
4
|
import rich
|
|
5
5
|
|
|
6
|
+
from wxo_agentic_evaluation.inference_backend import WXOInferenceBackend
|
|
6
7
|
from wxo_agentic_evaluation.referenceless_eval.function_calling.consts import (
|
|
7
8
|
METRIC_FUNCTION_SELECTION_APPROPRIATENESS,
|
|
8
9
|
METRIC_GENERAL_HALLUCINATION_CHECK,
|
|
@@ -16,6 +17,15 @@ from wxo_agentic_evaluation.referenceless_eval.function_calling.pipeline.types i
|
|
|
16
17
|
)
|
|
17
18
|
from wxo_agentic_evaluation.service_provider import get_provider
|
|
18
19
|
from wxo_agentic_evaluation.type import Message
|
|
20
|
+
from wxo_agentic_evaluation.utils.gateway_provider_utils import (
|
|
21
|
+
get_provider_kwargs,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
DEFAULT_GENERATION_PARAMS = {
|
|
25
|
+
"min_new_tokens": 0,
|
|
26
|
+
"decoding_method": "greedy",
|
|
27
|
+
"max_new_tokens": 4096,
|
|
28
|
+
}
|
|
19
29
|
|
|
20
30
|
|
|
21
31
|
class ReferencelessEvaluation:
|
|
@@ -31,20 +41,29 @@ class ReferencelessEvaluation:
|
|
|
31
41
|
def __init__(
|
|
32
42
|
self,
|
|
33
43
|
api_spec: List[Mapping[str, Any]],
|
|
34
|
-
messages: List[Message],
|
|
35
44
|
model_id: str,
|
|
36
45
|
task_n: str,
|
|
37
46
|
dataset_name: str,
|
|
47
|
+
runtime_pipeline: bool = True,
|
|
48
|
+
generation_params=DEFAULT_GENERATION_PARAMS,
|
|
49
|
+
inference_backend: Optional[WXOInferenceBackend] = None,
|
|
38
50
|
):
|
|
39
51
|
|
|
40
|
-
|
|
52
|
+
extra_kwargs = {}
|
|
53
|
+
if inference_backend is not None:
|
|
54
|
+
wxo_client = getattr(inference_backend, "wxo_client")
|
|
55
|
+
instance_url = getattr(wxo_client, "service_url", None)
|
|
56
|
+
token = getattr(wxo_client, "api_key", None)
|
|
57
|
+
if instance_url:
|
|
58
|
+
extra_kwargs["instance_url"] = instance_url
|
|
59
|
+
if token:
|
|
60
|
+
extra_kwargs["token"] = token
|
|
61
|
+
|
|
62
|
+
self.metrics_client = ReferencelessEvaluation.get_metrics_client(
|
|
41
63
|
model_id=model_id,
|
|
42
|
-
params=
|
|
43
|
-
"min_new_tokens": 0,
|
|
44
|
-
"decoding_method": "greedy",
|
|
45
|
-
"max_new_tokens": 4096,
|
|
46
|
-
},
|
|
64
|
+
params=generation_params,
|
|
47
65
|
referenceless_eval=True,
|
|
66
|
+
**extra_kwargs,
|
|
48
67
|
)
|
|
49
68
|
|
|
50
69
|
self.pipeline = ReflectionPipeline(
|
|
@@ -52,39 +71,54 @@ class ReferencelessEvaluation:
|
|
|
52
71
|
general_metrics=[METRIC_GENERAL_HALLUCINATION_CHECK],
|
|
53
72
|
function_metrics=[METRIC_FUNCTION_SELECTION_APPROPRIATENESS],
|
|
54
73
|
parameter_metrics=None,
|
|
74
|
+
runtime_pipeline=runtime_pipeline,
|
|
55
75
|
)
|
|
56
76
|
|
|
57
77
|
self.task_n = task_n
|
|
58
78
|
self.dataset_name = dataset_name
|
|
59
79
|
|
|
60
80
|
self.apis_specs = [ToolSpec.model_validate(spec) for spec in api_spec]
|
|
61
|
-
self.messages = messages
|
|
62
81
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
for example in examples:
|
|
66
|
-
result = self.pipeline.run_sync(
|
|
67
|
-
conversation=example["context"],
|
|
68
|
-
inventory=self.apis_specs,
|
|
69
|
-
call=example["call"],
|
|
70
|
-
continue_on_static=False,
|
|
71
|
-
retries=2,
|
|
72
|
-
)
|
|
73
|
-
result_dict = result.model_dump()
|
|
74
|
-
results.append(result_dict)
|
|
82
|
+
@staticmethod
|
|
83
|
+
def get_metrics_client(**kwargs):
|
|
75
84
|
|
|
76
|
-
|
|
85
|
+
provider_kwargs = get_provider_kwargs(**kwargs)
|
|
77
86
|
|
|
78
|
-
|
|
79
|
-
|
|
87
|
+
return get_provider(
|
|
88
|
+
**provider_kwargs,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
@staticmethod
|
|
92
|
+
def fmt_tool_call(tool_id, tool_call_name, arguments, context):
|
|
93
|
+
call = {
|
|
94
|
+
"call": {
|
|
95
|
+
"id": tool_id,
|
|
96
|
+
"type": "function",
|
|
97
|
+
"function": {
|
|
98
|
+
"name": tool_call_name,
|
|
99
|
+
"arguments": arguments,
|
|
100
|
+
},
|
|
101
|
+
},
|
|
102
|
+
"context": context,
|
|
103
|
+
}
|
|
80
104
|
|
|
105
|
+
return call
|
|
106
|
+
|
|
107
|
+
@staticmethod
|
|
108
|
+
def fmt_msgs_referenceless(
|
|
109
|
+
messages: List[Message],
|
|
110
|
+
) -> List[Mapping[str, Any]]:
|
|
111
|
+
"""Assume that the last item in the `messages` array is the tool call, and preceding items
|
|
112
|
+
in the messages array is the context.
|
|
113
|
+
"""
|
|
114
|
+
examples = []
|
|
81
115
|
processed_data = [
|
|
82
116
|
{
|
|
83
117
|
k: msg.model_dump().get(k)
|
|
84
118
|
for k in ["role", "content", "type"]
|
|
85
119
|
if k in msg.model_dump()
|
|
86
120
|
}
|
|
87
|
-
for msg in
|
|
121
|
+
for msg in messages
|
|
88
122
|
]
|
|
89
123
|
|
|
90
124
|
for idx, message in enumerate(processed_data):
|
|
@@ -97,22 +131,47 @@ class ReferencelessEvaluation:
|
|
|
97
131
|
if tool_call_msg["name"].startswith("transfer_to"):
|
|
98
132
|
continue
|
|
99
133
|
|
|
100
|
-
call =
|
|
101
|
-
"
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
"arguments": json.dumps(tool_call_msg["args"]),
|
|
107
|
-
},
|
|
108
|
-
},
|
|
109
|
-
"context": context,
|
|
110
|
-
}
|
|
134
|
+
call = ReferencelessEvaluation.fmt_tool_call(
|
|
135
|
+
tool_id=tool_call_msg.get("id", "1"),
|
|
136
|
+
tool_call_name=tool_call_msg["name"],
|
|
137
|
+
arguments=json.dumps(tool_call_msg["args"]),
|
|
138
|
+
context=context,
|
|
139
|
+
)
|
|
111
140
|
examples.append(call)
|
|
112
141
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
142
|
+
return examples
|
|
143
|
+
|
|
144
|
+
def _run_pipeline(self, examples: List[Mapping[str, Any]]):
|
|
145
|
+
results = []
|
|
146
|
+
for example in examples:
|
|
147
|
+
result = self.pipeline.run_sync(
|
|
148
|
+
conversation=example["context"],
|
|
149
|
+
inventory=self.apis_specs,
|
|
150
|
+
call=example["call"],
|
|
151
|
+
continue_on_static=False,
|
|
152
|
+
retries=2,
|
|
153
|
+
)
|
|
154
|
+
result_dict = result.model_dump()
|
|
155
|
+
results.append(result_dict)
|
|
156
|
+
|
|
157
|
+
return results
|
|
158
|
+
|
|
159
|
+
def run(self, examples: List[Mapping[str, str]], verbose=False):
|
|
160
|
+
"""`examples` should be an array where each element is formatted:
|
|
161
|
+
|
|
162
|
+
call = {
|
|
163
|
+
"call": {
|
|
164
|
+
"id": tool_call_msg.get("id", "1"),
|
|
165
|
+
"type": "function",
|
|
166
|
+
"function": {
|
|
167
|
+
"name": tool_call_msg["name"],
|
|
168
|
+
"arguments": json.dumps(tool_call_msg["args"]),
|
|
169
|
+
},
|
|
170
|
+
},
|
|
171
|
+
"context": context,
|
|
172
|
+
}
|
|
173
|
+
"""
|
|
174
|
+
|
|
116
175
|
examples = [
|
|
117
176
|
{
|
|
118
177
|
"call": ToolCall.model_validate(ex["call"]),
|
|
@@ -120,6 +179,11 @@ class ReferencelessEvaluation:
|
|
|
120
179
|
}
|
|
121
180
|
for ex in examples
|
|
122
181
|
]
|
|
182
|
+
|
|
183
|
+
if verbose:
|
|
184
|
+
rich.print(
|
|
185
|
+
f"[yellow][b][Task-{self.task_n}] There are {len(examples)} examples to analyze"
|
|
186
|
+
)
|
|
123
187
|
results = self._run_pipeline(examples)
|
|
124
188
|
|
|
125
189
|
return results
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
2
|
|
|
3
|
-
from wxo_agentic_evaluation.
|
|
3
|
+
from wxo_agentic_evaluation.utils.utils import is_saas_url
|
|
4
|
+
from wxo_agentic_evaluation.wxo_client import WXOClient
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
class ResourceMap:
|
|
@@ -34,6 +35,7 @@ class ResourceMap:
|
|
|
34
35
|
|
|
35
36
|
if resp.status_code == 200:
|
|
36
37
|
agents = resp.json()
|
|
38
|
+
self.all_agent_objs = agents
|
|
37
39
|
for agent in agents:
|
|
38
40
|
agent_name = agent["name"]
|
|
39
41
|
tools = [tool_map[id] for id in agent["tools"]]
|
|
@@ -143,7 +143,7 @@ class ServiceInstance:
|
|
|
143
143
|
def create_tenant_if_not_exist(self) -> str:
|
|
144
144
|
if self.is_saas:
|
|
145
145
|
logger.info(
|
|
146
|
-
"SaaS mode: running against Remote Service and skipping tenant creation"
|
|
146
|
+
"[d b]SaaS mode: running against Remote Service and skipping tenant creation"
|
|
147
147
|
)
|
|
148
148
|
return None
|
|
149
149
|
|
|
@@ -151,11 +151,13 @@ class ServiceInstance:
|
|
|
151
151
|
default_tenant = self.get_default_tenant(user_auth_token)
|
|
152
152
|
|
|
153
153
|
if not default_tenant:
|
|
154
|
-
logger.info(
|
|
154
|
+
logger.info(
|
|
155
|
+
"[d b]no local tenant found. A default tenant is created"
|
|
156
|
+
)
|
|
155
157
|
self.create_eval_tenant(user_auth_token)
|
|
156
158
|
default_tenant = self.get_default_tenant(user_auth_token)
|
|
157
159
|
else:
|
|
158
|
-
logger.info("local tenant found")
|
|
160
|
+
logger.info("[d b]local tenant found")
|
|
159
161
|
|
|
160
162
|
return default_tenant["id"]
|
|
161
163
|
|
|
@@ -247,6 +249,13 @@ def tenant_setup(
|
|
|
247
249
|
|
|
248
250
|
context["active_environment"] = tenant_name
|
|
249
251
|
|
|
252
|
+
# Ensure parent directories exist so tests (which may run in clean envs)
|
|
253
|
+
# can write these files without raising FileNotFoundError.
|
|
254
|
+
auth_dir = os.path.dirname(auth_config_path)
|
|
255
|
+
env_dir = os.path.dirname(env_config_path)
|
|
256
|
+
os.makedirs(auth_dir, exist_ok=True)
|
|
257
|
+
os.makedirs(env_dir, exist_ok=True)
|
|
258
|
+
|
|
250
259
|
with open(auth_config_path, "w") as f:
|
|
251
260
|
yaml.dump(auth_config, f)
|
|
252
261
|
with open(env_config_path, "w") as f:
|
|
@@ -1,6 +1,13 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import os
|
|
2
3
|
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
from rich.logging import RichHandler
|
|
6
|
+
|
|
3
7
|
from wxo_agentic_evaluation.arg_configs import ProviderConfig
|
|
8
|
+
from wxo_agentic_evaluation.service_provider.gateway_provider import (
|
|
9
|
+
GatewayProvider,
|
|
10
|
+
)
|
|
4
11
|
from wxo_agentic_evaluation.service_provider.model_proxy_provider import (
|
|
5
12
|
ModelProxyProvider,
|
|
6
13
|
)
|
|
@@ -8,6 +15,7 @@ from wxo_agentic_evaluation.service_provider.ollama_provider import (
|
|
|
8
15
|
OllamaProvider,
|
|
9
16
|
)
|
|
10
17
|
from wxo_agentic_evaluation.service_provider.referenceless_provider_wrapper import (
|
|
18
|
+
GatewayProviderLLMKitWrapper,
|
|
11
19
|
ModelProxyProviderLLMKitWrapper,
|
|
12
20
|
WatsonXLLMKitWrapper,
|
|
13
21
|
)
|
|
@@ -15,24 +23,123 @@ from wxo_agentic_evaluation.service_provider.watsonx_provider import (
|
|
|
15
23
|
WatsonXProvider,
|
|
16
24
|
)
|
|
17
25
|
|
|
26
|
+
USE_GATEWAY_MODEL_PROVIDER: bool = (
|
|
27
|
+
os.environ.get("USE_GATEWAY_MODEL_PROVIDER", "FALSE").upper() == "TRUE"
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
_logging_console = Console(stderr=True)
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_log_level_from_env():
|
|
37
|
+
|
|
38
|
+
level_env = os.getenv("WXO_EVALUATION_LOGLEVEL")
|
|
39
|
+
return level_env
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
LOGGING_ENABLED = get_log_level_from_env() is not None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def configure_logging_for_package_from_env(
|
|
46
|
+
package_name: str = "wxo_agentic_evaluation",
|
|
47
|
+
ensure_output: bool = True,
|
|
48
|
+
) -> None:
|
|
49
|
+
"""
|
|
50
|
+
Configure logging using the env var WXO_EVALUATION_LOGLEVEL - no logging if that's not set
|
|
51
|
+
"""
|
|
52
|
+
try:
|
|
53
|
+
level_env = get_log_level_from_env()
|
|
54
|
+
if not level_env:
|
|
55
|
+
return
|
|
56
|
+
|
|
57
|
+
level = None
|
|
58
|
+
upper = level_env.strip().upper()
|
|
59
|
+
if hasattr(logging, upper):
|
|
60
|
+
level = getattr(logging, upper, None)
|
|
61
|
+
|
|
62
|
+
pkg_logger = logging.getLogger(package_name)
|
|
63
|
+
pkg_logger.setLevel(level)
|
|
64
|
+
|
|
65
|
+
if ensure_output:
|
|
66
|
+
if not pkg_logger.handlers:
|
|
67
|
+
handler = RichHandler(
|
|
68
|
+
console=_logging_console,
|
|
69
|
+
rich_tracebacks=True,
|
|
70
|
+
show_time=False,
|
|
71
|
+
show_level=False,
|
|
72
|
+
show_path=False,
|
|
73
|
+
markup=True,
|
|
74
|
+
enable_link_path=True,
|
|
75
|
+
omit_repeated_times=True,
|
|
76
|
+
tracebacks_theme="github-dark",
|
|
77
|
+
)
|
|
78
|
+
handler.setFormatter(
|
|
79
|
+
logging.Formatter("%(levelname)s %(message)s")
|
|
80
|
+
)
|
|
81
|
+
handler.setLevel(logging.NOTSET)
|
|
82
|
+
pkg_logger.addHandler(handler)
|
|
83
|
+
pkg_logger.propagate = False
|
|
84
|
+
|
|
85
|
+
# Quiet common noisy debug libs
|
|
86
|
+
for name in (
|
|
87
|
+
"urllib3",
|
|
88
|
+
"urllib3.connectionpool",
|
|
89
|
+
"requests.packages.urllib3",
|
|
90
|
+
):
|
|
91
|
+
logging.getLogger(name).setLevel(logging.WARNING)
|
|
92
|
+
except:
|
|
93
|
+
logger.warning("Input log level %s not valid", level_env)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
configure_logging_for_package_from_env()
|
|
97
|
+
|
|
18
98
|
|
|
19
99
|
def _instantiate_provider(
|
|
20
100
|
config: ProviderConfig, is_referenceless_eval: bool = False, **kwargs
|
|
21
101
|
):
|
|
102
|
+
|
|
22
103
|
if config.provider == "watsonx":
|
|
104
|
+
logger.info("Instantiate watsonx provider")
|
|
23
105
|
if is_referenceless_eval:
|
|
24
106
|
provider = WatsonXLLMKitWrapper
|
|
25
107
|
else:
|
|
26
108
|
provider = WatsonXProvider
|
|
27
|
-
return provider(
|
|
109
|
+
return provider(
|
|
110
|
+
model_id=config.model_id,
|
|
111
|
+
embedding_model_id=config.embedding_model_id,
|
|
112
|
+
**kwargs,
|
|
113
|
+
)
|
|
28
114
|
elif config.provider == "ollama":
|
|
115
|
+
logger.info("Instantiate Ollama")
|
|
29
116
|
return OllamaProvider(model_id=config.model_id, **kwargs)
|
|
117
|
+
|
|
118
|
+
elif config.provider == "gateway":
|
|
119
|
+
logger.info("Instantiate gateway inference provider")
|
|
120
|
+
if is_referenceless_eval:
|
|
121
|
+
provider = GatewayProviderLLMKitWrapper
|
|
122
|
+
else:
|
|
123
|
+
provider = GatewayProvider
|
|
124
|
+
return provider(
|
|
125
|
+
model_id=config.model_id,
|
|
126
|
+
embedding_model_id=config.embedding_model_id,
|
|
127
|
+
**kwargs,
|
|
128
|
+
)
|
|
129
|
+
|
|
30
130
|
elif config.provider == "model_proxy":
|
|
131
|
+
logger.info("Instantiate model proxy provider")
|
|
31
132
|
if is_referenceless_eval:
|
|
32
133
|
provider = ModelProxyProviderLLMKitWrapper
|
|
33
134
|
else:
|
|
34
135
|
provider = ModelProxyProvider
|
|
35
|
-
|
|
136
|
+
|
|
137
|
+
return provider(
|
|
138
|
+
model_id=config.model_id,
|
|
139
|
+
embedding_model_id=config.embedding_model_id,
|
|
140
|
+
**kwargs,
|
|
141
|
+
)
|
|
142
|
+
|
|
36
143
|
else:
|
|
37
144
|
raise RuntimeError(
|
|
38
145
|
f"target provider is not supported {config.provider}"
|
|
@@ -42,23 +149,36 @@ def _instantiate_provider(
|
|
|
42
149
|
def get_provider(
|
|
43
150
|
config: ProviderConfig = None,
|
|
44
151
|
model_id: str = None,
|
|
152
|
+
embedding_model_id: str = None,
|
|
45
153
|
referenceless_eval: bool = False,
|
|
46
154
|
**kwargs,
|
|
47
155
|
):
|
|
156
|
+
|
|
157
|
+
if config:
|
|
158
|
+
return _instantiate_provider(config, **kwargs)
|
|
159
|
+
|
|
48
160
|
if not model_id:
|
|
49
161
|
raise ValueError("model_id must be provided if config is not supplied")
|
|
50
162
|
|
|
163
|
+
if USE_GATEWAY_MODEL_PROVIDER:
|
|
164
|
+
logger.info("[d b]Using gateway inference provider override")
|
|
165
|
+
config = ProviderConfig(provider="gateway", model_id=model_id)
|
|
166
|
+
return _instantiate_provider(config, referenceless_eval, **kwargs)
|
|
167
|
+
|
|
51
168
|
if "WATSONX_APIKEY" in os.environ and "WATSONX_SPACE_ID" in os.environ:
|
|
52
|
-
|
|
169
|
+
logger.info("[d b]Using watsonx inference provider")
|
|
170
|
+
config = ProviderConfig(
|
|
171
|
+
provider="watsonx",
|
|
172
|
+
model_id=model_id,
|
|
173
|
+
embedding_model_id=embedding_model_id,
|
|
174
|
+
)
|
|
53
175
|
return _instantiate_provider(config, referenceless_eval, **kwargs)
|
|
54
176
|
|
|
55
177
|
if "WO_INSTANCE" in os.environ:
|
|
178
|
+
logger.info("[d b]Using model_proxy inference provider")
|
|
56
179
|
config = ProviderConfig(provider="model_proxy", model_id=model_id)
|
|
57
180
|
return _instantiate_provider(config, referenceless_eval, **kwargs)
|
|
58
181
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
raise RuntimeError(
|
|
63
|
-
"No provider found. Please either provide a config or set the required environment variables."
|
|
64
|
-
)
|
|
182
|
+
logger.info("[d b]Using gateway inference provider default")
|
|
183
|
+
config = ProviderConfig(provider="gateway", model_id=model_id)
|
|
184
|
+
return _instantiate_provider(config, referenceless_eval, **kwargs)
|