ibm-watsonx-orchestrate-evaluation-framework 1.0.4__py3-none-any.whl → 1.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ibm-watsonx-orchestrate-evaluation-framework might be problematic. Click here for more details.
- {ibm_watsonx_orchestrate_evaluation_framework-1.0.4.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.0.6.dist-info}/METADATA +2 -2
- {ibm_watsonx_orchestrate_evaluation_framework-1.0.4.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.0.6.dist-info}/RECORD +5 -5
- wxo_agentic_evaluation/record_chat.py +5 -20
- {ibm_watsonx_orchestrate_evaluation_framework-1.0.4.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.0.6.dist-info}/WHEEL +0 -0
- {ibm_watsonx_orchestrate_evaluation_framework-1.0.4.dist-info → ibm_watsonx_orchestrate_evaluation_framework-1.0.6.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ibm-watsonx-orchestrate-evaluation-framework
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.6
|
|
4
4
|
Summary: The WxO evaluation framework
|
|
5
5
|
Author-email: Haode Qi <Haode.Qi@ibm.com>
|
|
6
6
|
License: MIT
|
|
7
7
|
Requires-Python: <3.14,>=3.11
|
|
8
8
|
Description-Content-Type: text/markdown
|
|
9
9
|
Requires-Dist: rich~=13.9.4
|
|
10
|
-
Requires-Dist: pydantic
|
|
10
|
+
Requires-Dist: pydantic<3.0.0,>=2.10.3
|
|
11
11
|
Requires-Dist: pyyaml~=6.0.2
|
|
12
12
|
Requires-Dist: jinja2~=3.1.5
|
|
13
13
|
Requires-Dist: python-dotenv~=1.0.1
|
|
@@ -10,7 +10,7 @@ wxo_agentic_evaluation/llm_matching.py,sha256=l010exoMmsvTIAVHCm-Ok0diyeQogjCmem
|
|
|
10
10
|
wxo_agentic_evaluation/llm_rag_eval.py,sha256=vsNGz1cFE5QGdhnfrx-iJq1r6q8tSI9Ef1mzuhoHElg,1642
|
|
11
11
|
wxo_agentic_evaluation/llm_user.py,sha256=0zSsyEM7pYQtLcfbnu0gEIkosHDwntOZY84Ito6__SM,1407
|
|
12
12
|
wxo_agentic_evaluation/main.py,sha256=tRXVle2o1JhwJZOTpqdsOzBOpxPYxAH5ziZkbCmzfyU,11470
|
|
13
|
-
wxo_agentic_evaluation/record_chat.py,sha256=
|
|
13
|
+
wxo_agentic_evaluation/record_chat.py,sha256=IAKCZ6Bc4natHA4SyNtC4tjo-0MDglwBcY5AWvXSgR0,7317
|
|
14
14
|
wxo_agentic_evaluation/resource_map.py,sha256=-dIWQdpEpPeSCbDeYfRupG9KV1Q4NlHGb5KXywjkulM,1645
|
|
15
15
|
wxo_agentic_evaluation/service_instance.py,sha256=yt7XpwheaRRG8Ri4TFIS5G2p5mnCwvNgj6T7bDF5uTU,6494
|
|
16
16
|
wxo_agentic_evaluation/test_prompt.py,sha256=ksteXCs9iDQPMETc4Hb7JAXHhxz2r678U6-sgZJAO28,3924
|
|
@@ -50,7 +50,7 @@ wxo_agentic_evaluation/service_provider/provider.py,sha256=MsnRzLYAaQiU6y6xf6eId
|
|
|
50
50
|
wxo_agentic_evaluation/service_provider/watsonx_provider.py,sha256=iKVkWs4PRTM_S0TIdPgQ9NFQWPlDvcEvuHpQlIPzO10,6216
|
|
51
51
|
wxo_agentic_evaluation/utils/__init__.py,sha256=QMxk6hx1CDvCBLFh40WpPZmqFNJtDqwXP7S7cXD6NQE,145
|
|
52
52
|
wxo_agentic_evaluation/utils/utils.py,sha256=JYZQZ-OBy43gAWg9S7duJi9StRApGJATs2JUsW1l30M,6057
|
|
53
|
-
ibm_watsonx_orchestrate_evaluation_framework-1.0.
|
|
54
|
-
ibm_watsonx_orchestrate_evaluation_framework-1.0.
|
|
55
|
-
ibm_watsonx_orchestrate_evaluation_framework-1.0.
|
|
56
|
-
ibm_watsonx_orchestrate_evaluation_framework-1.0.
|
|
53
|
+
ibm_watsonx_orchestrate_evaluation_framework-1.0.6.dist-info/METADATA,sha256=BqQELgtuSVS6tHNQ5nGkgfwPBiAFgTnvgZbWG3hjCgM,17674
|
|
54
|
+
ibm_watsonx_orchestrate_evaluation_framework-1.0.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
55
|
+
ibm_watsonx_orchestrate_evaluation_framework-1.0.6.dist-info/top_level.txt,sha256=2okpqtpxyqHoLyb2msio4pzqSg7yPSzwI7ekks96wYE,23
|
|
56
|
+
ibm_watsonx_orchestrate_evaluation_framework-1.0.6.dist-info/RECORD,,
|
|
@@ -128,7 +128,6 @@ def record_chats(config: ChatRecordingConfig):
|
|
|
128
128
|
while True:
|
|
129
129
|
all_runs = get_all_runs(wxo_client)
|
|
130
130
|
seen_threads = set()
|
|
131
|
-
|
|
132
131
|
# Process only new runs that started after our recording began
|
|
133
132
|
for run in all_runs:
|
|
134
133
|
thread_id = run.get("thread_id")
|
|
@@ -156,11 +155,8 @@ def record_chats(config: ChatRecordingConfig):
|
|
|
156
155
|
f"\n[green]INFO:[/green] New recording started at {started_at}"
|
|
157
156
|
)
|
|
158
157
|
rich.print(
|
|
159
|
-
f"[green]INFO:[/green]
|
|
158
|
+
f"[green]INFO:[/green] Annotations saved to: {os.path.join(config.output_dir, f'{thread_id}_annotated_data.json')}"
|
|
160
159
|
)
|
|
161
|
-
# rich.print(
|
|
162
|
-
# f"[green]INFO:[/green] Annotations saved to: {os.path.join(config.output_dir, f'{thread_id}_annotated_data.json')}"
|
|
163
|
-
# )
|
|
164
160
|
processed_threads.add(thread_id)
|
|
165
161
|
|
|
166
162
|
try:
|
|
@@ -177,23 +173,12 @@ def record_chats(config: ChatRecordingConfig):
|
|
|
177
173
|
agent_name, messages, config.keywords_generation_config
|
|
178
174
|
)
|
|
179
175
|
|
|
180
|
-
|
|
181
|
-
config.output_dir, f"{thread_id}
|
|
176
|
+
annotation_filename = os.path.join(
|
|
177
|
+
config.output_dir, f"{thread_id}_annotated_data.json"
|
|
182
178
|
)
|
|
183
179
|
|
|
184
|
-
with open(
|
|
185
|
-
json.dump(
|
|
186
|
-
[msg.model_dump() for msg in messages], f, indent=4
|
|
187
|
-
)
|
|
188
|
-
|
|
189
|
-
# TO-DO: we want some tracing but we also do not want to persist the file
|
|
190
|
-
# in the same folder.
|
|
191
|
-
# annotation_filename = os.path.join(
|
|
192
|
-
# config.output_dir, f"{thread_id}_annotated_data.json"
|
|
193
|
-
# )
|
|
194
|
-
|
|
195
|
-
# with open(annotation_filename, "w") as f:
|
|
196
|
-
# json.dump(annotated_data, f, indent=4)
|
|
180
|
+
with open(annotation_filename, "w") as f:
|
|
181
|
+
json.dump(annotated_data, f, indent=4)
|
|
197
182
|
except Exception as e:
|
|
198
183
|
rich.print(
|
|
199
184
|
f"[red]ERROR:[/red] Failed to process thread {thread_id}: {str(e)}"
|
|
File without changes
|