ibm-watsonx-orchestrate-evaluation-framework 1.0.3__py3-none-any.whl → 1.1.8b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info/METADATA +53 -0
- ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info/RECORD +146 -0
- wxo_agentic_evaluation/analytics/tools/analyzer.py +38 -21
- wxo_agentic_evaluation/analytics/tools/main.py +19 -25
- wxo_agentic_evaluation/analytics/tools/types.py +26 -11
- wxo_agentic_evaluation/analytics/tools/ux.py +75 -31
- wxo_agentic_evaluation/analyze_run.py +1184 -97
- wxo_agentic_evaluation/annotate.py +7 -5
- wxo_agentic_evaluation/arg_configs.py +97 -5
- wxo_agentic_evaluation/base_user.py +25 -0
- wxo_agentic_evaluation/batch_annotate.py +97 -27
- wxo_agentic_evaluation/clients.py +103 -0
- wxo_agentic_evaluation/compare_runs/__init__.py +0 -0
- wxo_agentic_evaluation/compare_runs/compare_2_runs.py +74 -0
- wxo_agentic_evaluation/compare_runs/diff.py +554 -0
- wxo_agentic_evaluation/compare_runs/model.py +193 -0
- wxo_agentic_evaluation/data_annotator.py +45 -19
- wxo_agentic_evaluation/description_quality_checker.py +178 -0
- wxo_agentic_evaluation/evaluation.py +50 -0
- wxo_agentic_evaluation/evaluation_controller/evaluation_controller.py +303 -0
- wxo_agentic_evaluation/evaluation_package.py +544 -107
- wxo_agentic_evaluation/external_agent/__init__.py +18 -7
- wxo_agentic_evaluation/external_agent/external_validate.py +49 -36
- wxo_agentic_evaluation/external_agent/performance_test.py +33 -22
- wxo_agentic_evaluation/external_agent/types.py +8 -7
- wxo_agentic_evaluation/extractors/__init__.py +3 -0
- wxo_agentic_evaluation/extractors/extractor_base.py +21 -0
- wxo_agentic_evaluation/extractors/labeled_messages.py +47 -0
- wxo_agentic_evaluation/hr_agent_langgraph.py +68 -0
- wxo_agentic_evaluation/langfuse_collection.py +60 -0
- wxo_agentic_evaluation/langfuse_evaluation_package.py +192 -0
- wxo_agentic_evaluation/llm_matching.py +108 -5
- wxo_agentic_evaluation/llm_rag_eval.py +7 -4
- wxo_agentic_evaluation/llm_safety_eval.py +64 -0
- wxo_agentic_evaluation/llm_user.py +12 -6
- wxo_agentic_evaluation/llm_user_v2.py +114 -0
- wxo_agentic_evaluation/main.py +128 -246
- wxo_agentic_evaluation/metrics/__init__.py +15 -0
- wxo_agentic_evaluation/metrics/dummy_metric.py +16 -0
- wxo_agentic_evaluation/metrics/evaluations.py +107 -0
- wxo_agentic_evaluation/metrics/journey_success.py +137 -0
- wxo_agentic_evaluation/metrics/llm_as_judge.py +28 -2
- wxo_agentic_evaluation/metrics/metrics.py +319 -16
- wxo_agentic_evaluation/metrics/tool_calling.py +93 -0
- wxo_agentic_evaluation/otel_parser/__init__.py +1 -0
- wxo_agentic_evaluation/otel_parser/langflow_parser.py +86 -0
- wxo_agentic_evaluation/otel_parser/langgraph_parser.py +61 -0
- wxo_agentic_evaluation/otel_parser/parser.py +163 -0
- wxo_agentic_evaluation/otel_parser/parser_types.py +38 -0
- wxo_agentic_evaluation/otel_parser/pydantic_parser.py +50 -0
- wxo_agentic_evaluation/otel_parser/utils.py +15 -0
- wxo_agentic_evaluation/otel_parser/wxo_parser.py +39 -0
- wxo_agentic_evaluation/otel_support/evaluate_tau.py +101 -0
- wxo_agentic_evaluation/otel_support/otel_message_conversion.py +29 -0
- wxo_agentic_evaluation/otel_support/tasks_test.py +1566 -0
- wxo_agentic_evaluation/prompt/bad_tool_descriptions_prompt.jinja2 +178 -0
- wxo_agentic_evaluation/prompt/derailment_prompt.jinja2 +55 -0
- wxo_agentic_evaluation/prompt/llama_user_prompt.jinja2 +59 -5
- wxo_agentic_evaluation/prompt/llmaaj_prompt.jinja2 +15 -0
- wxo_agentic_evaluation/prompt/off_policy_attack_generation_prompt.jinja2 +34 -0
- wxo_agentic_evaluation/prompt/on_policy_attack_generation_prompt.jinja2 +46 -0
- wxo_agentic_evaluation/prompt/semantic_matching_prompt.jinja2 +41 -9
- wxo_agentic_evaluation/prompt/template_render.py +163 -12
- wxo_agentic_evaluation/prompt/unsafe_topic_prompt.jinja2 +65 -0
- wxo_agentic_evaluation/quick_eval.py +384 -0
- wxo_agentic_evaluation/record_chat.py +132 -81
- wxo_agentic_evaluation/red_teaming/attack_evaluator.py +302 -0
- wxo_agentic_evaluation/red_teaming/attack_generator.py +329 -0
- wxo_agentic_evaluation/red_teaming/attack_list.py +184 -0
- wxo_agentic_evaluation/red_teaming/attack_runner.py +204 -0
- wxo_agentic_evaluation/referenceless_eval/__init__.py +3 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/consts.py +28 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/base.py +29 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/general.py +49 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/general_metrics.json +783 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_call/general_metrics_runtime.json +580 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection.py +31 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection_metrics.json +600 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/function_selection/function_selection_metrics_runtime.json +477 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/metrics/loader.py +245 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/adapters.py +106 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/pipeline.py +291 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/semantic_checker.py +465 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/static_checker.py +162 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/transformation_prompts.py +509 -0
- wxo_agentic_evaluation/referenceless_eval/function_calling/pipeline/types.py +562 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/__init__.py +3 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/field.py +266 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/metric.py +344 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/metrics_runner.py +193 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/prompt.py +413 -0
- wxo_agentic_evaluation/referenceless_eval/metrics/utils.py +46 -0
- wxo_agentic_evaluation/referenceless_eval/prompt/__init__.py +0 -0
- wxo_agentic_evaluation/referenceless_eval/prompt/runner.py +158 -0
- wxo_agentic_evaluation/referenceless_eval/referenceless_eval.py +191 -0
- wxo_agentic_evaluation/resource_map.py +6 -3
- wxo_agentic_evaluation/runner.py +329 -0
- wxo_agentic_evaluation/runtime_adapter/a2a_runtime_adapter.py +0 -0
- wxo_agentic_evaluation/runtime_adapter/runtime_adapter.py +14 -0
- wxo_agentic_evaluation/{inference_backend.py → runtime_adapter/wxo_runtime_adapter.py} +88 -150
- wxo_agentic_evaluation/scheduler.py +247 -0
- wxo_agentic_evaluation/service_instance.py +117 -26
- wxo_agentic_evaluation/service_provider/__init__.py +182 -17
- wxo_agentic_evaluation/service_provider/gateway_provider.py +707 -0
- wxo_agentic_evaluation/service_provider/model_proxy_provider.py +628 -45
- wxo_agentic_evaluation/service_provider/ollama_provider.py +392 -22
- wxo_agentic_evaluation/service_provider/portkey_provider.py +229 -0
- wxo_agentic_evaluation/service_provider/provider.py +129 -10
- wxo_agentic_evaluation/service_provider/referenceless_provider_wrapper.py +203 -0
- wxo_agentic_evaluation/service_provider/watsonx_provider.py +516 -53
- wxo_agentic_evaluation/simluation_runner.py +125 -0
- wxo_agentic_evaluation/test_prompt.py +4 -4
- wxo_agentic_evaluation/tool_planner.py +141 -46
- wxo_agentic_evaluation/type.py +217 -14
- wxo_agentic_evaluation/user_simulator/demo_usage_llm_user.py +100 -0
- wxo_agentic_evaluation/utils/__init__.py +44 -3
- wxo_agentic_evaluation/utils/evaluation_discovery.py +47 -0
- wxo_agentic_evaluation/utils/gateway_provider_utils.py +39 -0
- wxo_agentic_evaluation/utils/messages_parser.py +30 -0
- wxo_agentic_evaluation/utils/open_ai_tool_extractor.py +178 -0
- wxo_agentic_evaluation/utils/parsers.py +71 -0
- wxo_agentic_evaluation/utils/rich_utils.py +188 -0
- wxo_agentic_evaluation/utils/rouge_score.py +23 -0
- wxo_agentic_evaluation/utils/utils.py +514 -17
- wxo_agentic_evaluation/wxo_client.py +81 -0
- ibm_watsonx_orchestrate_evaluation_framework-1.0.3.dist-info/METADATA +0 -380
- ibm_watsonx_orchestrate_evaluation_framework-1.0.3.dist-info/RECORD +0 -56
- {ibm_watsonx_orchestrate_evaluation_framework-1.0.3.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info}/WHEEL +0 -0
- {ibm_watsonx_orchestrate_evaluation_framework-1.0.3.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.1.8b0.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import yaml
|
|
3
2
|
import os
|
|
3
|
+
import shutil
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, Iterable, Optional, Tuple
|
|
6
|
+
|
|
4
7
|
import requests
|
|
5
|
-
|
|
8
|
+
import yaml
|
|
9
|
+
|
|
10
|
+
from wxo_agentic_evaluation.utils.utils import is_ibm_cloud_url, is_saas_url
|
|
6
11
|
|
|
7
12
|
logger = logging.getLogger(__name__)
|
|
8
13
|
|
|
@@ -11,13 +16,15 @@ USER = {"username": "wxo.archer@ibm.com", "password": "watsonx"}
|
|
|
11
16
|
|
|
12
17
|
class ServiceInstance:
|
|
13
18
|
def __init__(
|
|
14
|
-
self,
|
|
19
|
+
self,
|
|
20
|
+
service_url,
|
|
21
|
+
tenant_name,
|
|
22
|
+
is_saas: bool = None,
|
|
23
|
+
is_ibm_cloud: bool = None,
|
|
15
24
|
) -> None:
|
|
16
25
|
self.service_url = service_url
|
|
17
26
|
self.tenant_name = tenant_name
|
|
18
|
-
STAGING_AUTH_ENDPOINT =
|
|
19
|
-
"https://iam.platform.test.saas.ibm.com/siusermgr/api/1.0/apikeys/token"
|
|
20
|
-
)
|
|
27
|
+
STAGING_AUTH_ENDPOINT = "https://iam.platform.test.saas.ibm.com/siusermgr/api/1.0/apikeys/token"
|
|
21
28
|
PROD_AUTH_ENDPOINT = (
|
|
22
29
|
"https://iam.platform.saas.ibm.com/siusermgr/api/1.0/apikeys/token"
|
|
23
30
|
)
|
|
@@ -25,7 +32,9 @@ class ServiceInstance:
|
|
|
25
32
|
|
|
26
33
|
self.is_saas = is_saas_url(service_url) if is_saas is None else is_saas
|
|
27
34
|
self.is_ibm_cloud = (
|
|
28
|
-
is_ibm_cloud_url(service_url)
|
|
35
|
+
is_ibm_cloud_url(service_url)
|
|
36
|
+
if is_ibm_cloud is None
|
|
37
|
+
else is_ibm_cloud
|
|
29
38
|
)
|
|
30
39
|
|
|
31
40
|
if self.is_saas:
|
|
@@ -49,10 +58,10 @@ class ServiceInstance:
|
|
|
49
58
|
def get_user_token(self):
|
|
50
59
|
try:
|
|
51
60
|
if self.is_saas:
|
|
52
|
-
apikey = os.environ.get("
|
|
61
|
+
apikey = os.environ.get("WO_API_KEY")
|
|
53
62
|
if not apikey:
|
|
54
63
|
raise RuntimeError(
|
|
55
|
-
"
|
|
64
|
+
"WO_API_KEY not set in environment for SaaS mode"
|
|
56
65
|
)
|
|
57
66
|
if self.is_ibm_cloud:
|
|
58
67
|
data = {
|
|
@@ -88,7 +97,8 @@ class ServiceInstance:
|
|
|
88
97
|
|
|
89
98
|
def _get_tenant_token(self, tenant_id: str):
|
|
90
99
|
resp = requests.post(
|
|
91
|
-
self.tenant_auth_endpoint.format(self.service_url, tenant_id),
|
|
100
|
+
self.tenant_auth_endpoint.format(self.service_url, tenant_id),
|
|
101
|
+
data=USER,
|
|
92
102
|
)
|
|
93
103
|
if resp.status_code == 200:
|
|
94
104
|
return resp.json()["access_token"]
|
|
@@ -122,7 +132,9 @@ class ServiceInstance:
|
|
|
122
132
|
"tags": ["test"],
|
|
123
133
|
}
|
|
124
134
|
|
|
125
|
-
resp = requests.post(
|
|
135
|
+
resp = requests.post(
|
|
136
|
+
self.tenant_url, headers=headers, json=tenant_config
|
|
137
|
+
)
|
|
126
138
|
if resp.status_code == 201:
|
|
127
139
|
return True
|
|
128
140
|
else:
|
|
@@ -131,7 +143,7 @@ class ServiceInstance:
|
|
|
131
143
|
def create_tenant_if_not_exist(self) -> str:
|
|
132
144
|
if self.is_saas:
|
|
133
145
|
logger.info(
|
|
134
|
-
"SaaS mode: running against Remote Service and skipping tenant creation"
|
|
146
|
+
"[d b]SaaS mode: running against Remote Service and skipping tenant creation"
|
|
135
147
|
)
|
|
136
148
|
return None
|
|
137
149
|
|
|
@@ -139,16 +151,63 @@ class ServiceInstance:
|
|
|
139
151
|
default_tenant = self.get_default_tenant(user_auth_token)
|
|
140
152
|
|
|
141
153
|
if not default_tenant:
|
|
142
|
-
logger.info(
|
|
154
|
+
logger.info(
|
|
155
|
+
"[d b]no local tenant found. A default tenant is created"
|
|
156
|
+
)
|
|
143
157
|
self.create_eval_tenant(user_auth_token)
|
|
144
158
|
default_tenant = self.get_default_tenant(user_auth_token)
|
|
145
159
|
else:
|
|
146
|
-
logger.info("local tenant found")
|
|
160
|
+
logger.info("[d b]local tenant found")
|
|
147
161
|
|
|
148
162
|
return default_tenant["id"]
|
|
149
163
|
|
|
150
164
|
|
|
151
|
-
def
|
|
165
|
+
def get_env_settings(
|
|
166
|
+
tenant_name: str, env_config_path: Optional[str] = None
|
|
167
|
+
) -> Dict[str, Any]:
|
|
168
|
+
if env_config_path is None:
|
|
169
|
+
env_config_path = (
|
|
170
|
+
f"{os.path.expanduser('~')}/.config/orchestrate/config.yaml"
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
with open(env_config_path, "r", encoding="utf-8") as f:
|
|
175
|
+
cfg = yaml.safe_load(f) or {}
|
|
176
|
+
except FileNotFoundError:
|
|
177
|
+
return {}
|
|
178
|
+
|
|
179
|
+
tenant_env = (cfg.get("environments") or {}).get(tenant_name) or {}
|
|
180
|
+
cached_user_env = cfg.get("cached_user_env") or {}
|
|
181
|
+
|
|
182
|
+
merged = cached_user_env | tenant_env
|
|
183
|
+
|
|
184
|
+
return dict(merged)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def apply_env_overrides(
|
|
188
|
+
base: Dict[str, Any],
|
|
189
|
+
tenant_name: str,
|
|
190
|
+
keys: Optional[Iterable[str]] = None,
|
|
191
|
+
env_config_path: Optional[str] = None,
|
|
192
|
+
) -> Dict[str, Any]:
|
|
193
|
+
"""
|
|
194
|
+
Returns a new dict where base is overridden by tenant-defined values.
|
|
195
|
+
- If keys is None, tries to override any keys present in tenant env.
|
|
196
|
+
- Only overrides when the tenant value is present and not None.
|
|
197
|
+
"""
|
|
198
|
+
env = get_env_settings(tenant_name, env_config_path=env_config_path)
|
|
199
|
+
merged = dict(base)
|
|
200
|
+
keys_to_consider = keys if keys is not None else env.keys()
|
|
201
|
+
|
|
202
|
+
for k in keys_to_consider:
|
|
203
|
+
if k in env and env[k] is not None:
|
|
204
|
+
merged[k] = env[k]
|
|
205
|
+
return merged
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def tenant_setup(
|
|
209
|
+
service_url: Optional[str], tenant_name: str
|
|
210
|
+
) -> Tuple[Optional[str], Optional[str], Dict[str, Any]]:
|
|
152
211
|
# service_instance = ServiceInstance(
|
|
153
212
|
# service_url=service_url,
|
|
154
213
|
# tenant_name=tenant_name
|
|
@@ -159,21 +218,53 @@ def tenant_setup(service_url: str, tenant_name: str):
|
|
|
159
218
|
# else:
|
|
160
219
|
# tenant_token = service_instance._get_tenant_token(tenant_id)
|
|
161
220
|
|
|
162
|
-
auth_config_path =
|
|
163
|
-
|
|
221
|
+
auth_config_path = (
|
|
222
|
+
f"{os.path.expanduser('~')}/.cache/orchestrate/credentials.yaml"
|
|
223
|
+
)
|
|
224
|
+
env_config_path = (
|
|
225
|
+
f"{os.path.expanduser('~')}/.config/orchestrate/config.yaml"
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
try:
|
|
229
|
+
with open(auth_config_path, "r", encoding="utf-8") as f:
|
|
230
|
+
auth_config = yaml.safe_load(f) or {}
|
|
231
|
+
except FileNotFoundError:
|
|
232
|
+
auth_config = {}
|
|
233
|
+
|
|
234
|
+
try:
|
|
235
|
+
with open(env_config_path, "r", encoding="utf-8") as f:
|
|
236
|
+
env_config = yaml.safe_load(f) or {}
|
|
237
|
+
except FileNotFoundError:
|
|
238
|
+
env_config = {}
|
|
164
239
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
auth_config = yaml.safe_load(f)
|
|
168
|
-
# auth_config["auth"][tenant_name] = {"wxo_mcsp_token": tenant_token}
|
|
240
|
+
environments = env_config.setdefault("environments", {})
|
|
241
|
+
context = env_config.setdefault("context", {})
|
|
169
242
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
243
|
+
tenant_env = environments.setdefault(tenant_name, {})
|
|
244
|
+
|
|
245
|
+
if service_url and str(service_url).strip():
|
|
246
|
+
tenant_env["wxo_url"] = service_url
|
|
247
|
+
|
|
248
|
+
resolved_service_url = tenant_env.get("wxo_url")
|
|
249
|
+
|
|
250
|
+
context["active_environment"] = tenant_name
|
|
251
|
+
|
|
252
|
+
# Ensure parent directories exist so tests (which may run in clean envs)
|
|
253
|
+
# can write these files without raising FileNotFoundError.
|
|
254
|
+
auth_dir = os.path.dirname(auth_config_path)
|
|
255
|
+
env_dir = os.path.dirname(env_config_path)
|
|
256
|
+
os.makedirs(auth_dir, exist_ok=True)
|
|
257
|
+
os.makedirs(env_dir, exist_ok=True)
|
|
174
258
|
|
|
175
259
|
with open(auth_config_path, "w") as f:
|
|
176
260
|
yaml.dump(auth_config, f)
|
|
177
261
|
with open(env_config_path, "w") as f:
|
|
178
262
|
yaml.dump(env_config, f)
|
|
179
|
-
|
|
263
|
+
|
|
264
|
+
token = (
|
|
265
|
+
auth_config.get("auth", {}).get(tenant_name, {}).get("wxo_mcsp_token")
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
env_merged = get_env_settings(tenant_name, env_config_path=env_config_path)
|
|
269
|
+
|
|
270
|
+
return token, resolved_service_url, env_merged
|
|
@@ -1,35 +1,200 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
from rich.logging import RichHandler
|
|
6
|
+
|
|
4
7
|
from wxo_agentic_evaluation.arg_configs import ProviderConfig
|
|
8
|
+
from wxo_agentic_evaluation.service_provider.gateway_provider import (
|
|
9
|
+
GatewayProvider,
|
|
10
|
+
)
|
|
11
|
+
from wxo_agentic_evaluation.service_provider.model_proxy_provider import (
|
|
12
|
+
ModelProxyProvider,
|
|
13
|
+
)
|
|
14
|
+
from wxo_agentic_evaluation.service_provider.ollama_provider import (
|
|
15
|
+
OllamaProvider,
|
|
16
|
+
)
|
|
17
|
+
from wxo_agentic_evaluation.service_provider.referenceless_provider_wrapper import (
|
|
18
|
+
GatewayProviderLLMKitWrapper,
|
|
19
|
+
ModelProxyProviderLLMKitWrapper,
|
|
20
|
+
WatsonXLLMKitWrapper,
|
|
21
|
+
)
|
|
22
|
+
from wxo_agentic_evaluation.service_provider.watsonx_provider import (
|
|
23
|
+
WatsonXProvider,
|
|
24
|
+
)
|
|
5
25
|
|
|
6
|
-
|
|
26
|
+
try:
|
|
27
|
+
from wxo_agentic_evaluation.service_provider.portkey_provider import (
|
|
28
|
+
PortkeyProvider,
|
|
29
|
+
)
|
|
30
|
+
except:
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
USE_GATEWAY_MODEL_PROVIDER: bool = (
|
|
35
|
+
os.environ.get("USE_GATEWAY_MODEL_PROVIDER", "FALSE").upper() == "TRUE"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
_logging_console = Console(stderr=True)
|
|
40
|
+
|
|
41
|
+
logger = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_log_level_from_env():
|
|
45
|
+
|
|
46
|
+
level_env = os.getenv("WXO_EVALUATION_LOGLEVEL")
|
|
47
|
+
return level_env
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
LOGGING_ENABLED = get_log_level_from_env() is not None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def configure_logging_for_package_from_env(
|
|
54
|
+
package_name: str = "wxo_agentic_evaluation",
|
|
55
|
+
ensure_output: bool = True,
|
|
56
|
+
) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Configure logging using the env var WXO_EVALUATION_LOGLEVEL - no logging if that's not set
|
|
59
|
+
"""
|
|
60
|
+
try:
|
|
61
|
+
level_env = get_log_level_from_env()
|
|
62
|
+
if not level_env:
|
|
63
|
+
return
|
|
64
|
+
|
|
65
|
+
level = None
|
|
66
|
+
upper = level_env.strip().upper()
|
|
67
|
+
if hasattr(logging, upper):
|
|
68
|
+
level = getattr(logging, upper, None)
|
|
69
|
+
|
|
70
|
+
pkg_logger = logging.getLogger(package_name)
|
|
71
|
+
pkg_logger.setLevel(level)
|
|
72
|
+
|
|
73
|
+
if ensure_output:
|
|
74
|
+
if not pkg_logger.handlers:
|
|
75
|
+
handler = RichHandler(
|
|
76
|
+
console=_logging_console,
|
|
77
|
+
rich_tracebacks=True,
|
|
78
|
+
show_time=False,
|
|
79
|
+
show_level=False,
|
|
80
|
+
show_path=False,
|
|
81
|
+
markup=True,
|
|
82
|
+
enable_link_path=True,
|
|
83
|
+
omit_repeated_times=True,
|
|
84
|
+
tracebacks_theme="github-dark",
|
|
85
|
+
)
|
|
86
|
+
handler.setFormatter(
|
|
87
|
+
logging.Formatter("%(levelname)s %(message)s")
|
|
88
|
+
)
|
|
89
|
+
handler.setLevel(logging.NOTSET)
|
|
90
|
+
pkg_logger.addHandler(handler)
|
|
91
|
+
pkg_logger.propagate = False
|
|
92
|
+
|
|
93
|
+
# Quiet common noisy debug libs
|
|
94
|
+
for name in (
|
|
95
|
+
"urllib3",
|
|
96
|
+
"urllib3.connectionpool",
|
|
97
|
+
"requests.packages.urllib3",
|
|
98
|
+
):
|
|
99
|
+
logging.getLogger(name).setLevel(logging.WARNING)
|
|
100
|
+
except:
|
|
101
|
+
logger.warning("Input log level %s not valid", level_env)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
configure_logging_for_package_from_env()
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _instantiate_provider(
|
|
108
|
+
config: ProviderConfig, is_referenceless_eval: bool = False, **kwargs
|
|
109
|
+
):
|
|
7
110
|
|
|
8
|
-
def _instantiate_provider(config: ProviderConfig, **kwargs):
|
|
9
111
|
if config.provider == "watsonx":
|
|
10
|
-
|
|
112
|
+
logger.info("Instantiate watsonx provider")
|
|
113
|
+
if is_referenceless_eval:
|
|
114
|
+
provider = WatsonXLLMKitWrapper
|
|
115
|
+
else:
|
|
116
|
+
provider = WatsonXProvider
|
|
117
|
+
return provider(
|
|
118
|
+
model_id=config.model_id,
|
|
119
|
+
embedding_model_id=config.embedding_model_id,
|
|
120
|
+
**kwargs,
|
|
121
|
+
)
|
|
11
122
|
elif config.provider == "ollama":
|
|
123
|
+
logger.info("Instantiate Ollama")
|
|
12
124
|
return OllamaProvider(model_id=config.model_id, **kwargs)
|
|
125
|
+
|
|
126
|
+
elif config.provider == "gateway":
|
|
127
|
+
logger.info("Instantiate gateway inference provider")
|
|
128
|
+
if is_referenceless_eval:
|
|
129
|
+
provider = GatewayProviderLLMKitWrapper
|
|
130
|
+
else:
|
|
131
|
+
provider = GatewayProvider
|
|
132
|
+
return provider(
|
|
133
|
+
model_id=config.model_id,
|
|
134
|
+
embedding_model_id=config.embedding_model_id,
|
|
135
|
+
**kwargs,
|
|
136
|
+
)
|
|
137
|
+
|
|
13
138
|
elif config.provider == "model_proxy":
|
|
14
|
-
|
|
139
|
+
logger.info("Instantiate model proxy provider")
|
|
140
|
+
if is_referenceless_eval:
|
|
141
|
+
provider = ModelProxyProviderLLMKitWrapper
|
|
142
|
+
else:
|
|
143
|
+
provider = ModelProxyProvider
|
|
144
|
+
|
|
145
|
+
return provider(
|
|
146
|
+
model_id=config.model_id,
|
|
147
|
+
embedding_model_id=config.embedding_model_id,
|
|
148
|
+
**kwargs,
|
|
149
|
+
)
|
|
150
|
+
|
|
15
151
|
else:
|
|
16
|
-
raise RuntimeError(
|
|
17
|
-
|
|
18
|
-
|
|
152
|
+
raise RuntimeError(
|
|
153
|
+
f"target provider is not supported {config.provider}"
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def get_provider(
|
|
158
|
+
config: ProviderConfig = None,
|
|
159
|
+
model_id: str = None,
|
|
160
|
+
embedding_model_id: str = None,
|
|
161
|
+
referenceless_eval: bool = False,
|
|
162
|
+
provider: str = None,
|
|
163
|
+
api_key: str = None,
|
|
164
|
+
use_portkey_provider: bool = False,
|
|
165
|
+
**kwargs,
|
|
166
|
+
):
|
|
167
|
+
|
|
168
|
+
if use_portkey_provider:
|
|
169
|
+
return PortkeyProvider(
|
|
170
|
+
provider=provider, model_id=model_id, api_key=api_key, **kwargs
|
|
171
|
+
)
|
|
172
|
+
|
|
19
173
|
if config:
|
|
20
174
|
return _instantiate_provider(config, **kwargs)
|
|
21
175
|
|
|
22
176
|
if not model_id:
|
|
23
177
|
raise ValueError("model_id must be provided if config is not supplied")
|
|
24
178
|
|
|
179
|
+
if USE_GATEWAY_MODEL_PROVIDER:
|
|
180
|
+
logger.info("[d b]Using gateway inference provider override")
|
|
181
|
+
config = ProviderConfig(provider="gateway", model_id=model_id)
|
|
182
|
+
return _instantiate_provider(config, referenceless_eval, **kwargs)
|
|
183
|
+
|
|
25
184
|
if "WATSONX_APIKEY" in os.environ and "WATSONX_SPACE_ID" in os.environ:
|
|
26
|
-
|
|
27
|
-
|
|
185
|
+
logger.info("[d b]Using watsonx inference provider")
|
|
186
|
+
config = ProviderConfig(
|
|
187
|
+
provider="watsonx",
|
|
188
|
+
model_id=model_id,
|
|
189
|
+
embedding_model_id=embedding_model_id,
|
|
190
|
+
)
|
|
191
|
+
return _instantiate_provider(config, referenceless_eval, **kwargs)
|
|
28
192
|
|
|
29
|
-
if "
|
|
193
|
+
if "WO_INSTANCE" in os.environ:
|
|
194
|
+
logger.info("[d b]Using model_proxy inference provider")
|
|
30
195
|
config = ProviderConfig(provider="model_proxy", model_id=model_id)
|
|
31
|
-
return _instantiate_provider(config, **kwargs)
|
|
196
|
+
return _instantiate_provider(config, referenceless_eval, **kwargs)
|
|
32
197
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
)
|
|
198
|
+
logger.info("[d b]Using gateway inference provider default")
|
|
199
|
+
config = ProviderConfig(provider="gateway", model_id=model_id)
|
|
200
|
+
return _instantiate_provider(config, referenceless_eval, **kwargs)
|