@pentatonic-ai/ai-agent-sdk 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +170 -69
- package/bin/__tests__/callback-server.test.js +4 -1
- package/bin/cli.js +41 -164
- package/bin/commands/config.js +251 -0
- package/package.json +2 -1
- package/packages/doctor/__tests__/detect.test.js +2 -6
- package/packages/doctor/src/checks/local-memory.js +164 -196
- package/packages/doctor/src/detect.js +11 -3
- package/packages/memory/src/corpus/adapters.js +104 -0
- package/packages/memory/src/corpus/cli.js +72 -7
- package/packages/memory/src/corpus/index.js +1 -1
- package/packages/memory-engine/.env.example +13 -0
- package/packages/memory-engine/README.md +131 -0
- package/packages/memory-engine/bench/README.md +99 -0
- package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +1115 -0
- package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +819 -0
- package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +1278 -0
- package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +1018 -0
- package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +1038 -0
- package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +961 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +1115 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +819 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +1278 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +1018 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +1038 -0
- package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +937 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +1115 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +819 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +1278 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +1018 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +1038 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +961 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +1115 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +819 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +1278 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +1018 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +1038 -0
- package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +883 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +1115 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +819 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +1278 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +1018 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +1038 -0
- package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +937 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +1115 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +1115 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +819 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +542 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +1278 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +894 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +1018 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +680 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +1038 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +693 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +961 -0
- package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +727 -0
- package/packages/memory-engine/compat/Dockerfile +11 -0
- package/packages/memory-engine/compat/server.py +680 -0
- package/packages/memory-engine/docker-compose.yml +243 -0
- package/packages/memory-engine/docs/MIGRATION.md +178 -0
- package/packages/memory-engine/docs/RUNBOOK-AWS.md +375 -0
- package/packages/memory-engine/docs/why-v05-underperforms.md +138 -0
- package/packages/memory-engine/engine/README.md +52 -0
- package/packages/memory-engine/engine/l2-hybridrag-proxy.py +1543 -0
- package/packages/memory-engine/engine/l5-comms-layer.py +663 -0
- package/packages/memory-engine/engine/l6-document-store.py +1018 -0
- package/packages/memory-engine/engine/services/l2/Dockerfile +41 -0
- package/packages/memory-engine/engine/services/l2/init_databases.py +81 -0
- package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +1543 -0
- package/packages/memory-engine/engine/services/l4/Dockerfile +15 -0
- package/packages/memory-engine/engine/services/l4/server.py +235 -0
- package/packages/memory-engine/engine/services/l5/Dockerfile +9 -0
- package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +678 -0
- package/packages/memory-engine/engine/services/l6/Dockerfile +11 -0
- package/packages/memory-engine/engine/services/l6/l6-document-store.py +1016 -0
- package/packages/memory-engine/engine/services/nv-embed/Dockerfile +28 -0
- package/packages/memory-engine/engine/services/nv-embed/server.py +152 -0
- package/packages/memory-engine/pme_memory/__init__.py +0 -0
- package/packages/memory-engine/pme_memory/__main__.py +129 -0
- package/packages/memory-engine/pme_memory/artifacts.py +95 -0
- package/packages/memory-engine/pme_memory/embed.py +74 -0
- package/packages/memory-engine/pme_memory/health.py +36 -0
- package/packages/memory-engine/pme_memory/hygiene.py +159 -0
- package/packages/memory-engine/pme_memory/indexer.py +200 -0
- package/packages/memory-engine/pme_memory/needs.py +55 -0
- package/packages/memory-engine/pme_memory/provenance.py +80 -0
- package/packages/memory-engine/pme_memory/scoring.py +168 -0
- package/packages/memory-engine/pme_memory/search.py +52 -0
- package/packages/memory-engine/pme_memory/store.py +86 -0
- package/packages/memory-engine/pme_memory/synthesis.py +114 -0
- package/packages/memory-engine/pyproject.toml +65 -0
- package/packages/memory-engine/scripts/kg-extractor.py +557 -0
- package/packages/memory-engine/scripts/kg-preflexor-v2.py +738 -0
- package/packages/memory-engine/tests/test_api_contract.sh +57 -0
|
@@ -0,0 +1,727 @@
|
|
|
1
|
+
{
|
|
2
|
+
"bench": "product-catalogue",
|
|
3
|
+
"stack": "pentatonic-memory",
|
|
4
|
+
"n_tasks": 18,
|
|
5
|
+
"n_correct": 12,
|
|
6
|
+
"accuracy": 0.6666666666666666,
|
|
7
|
+
"mean_score": 0.6666666666666666,
|
|
8
|
+
"p50_search_ms": 32.06061548553407,
|
|
9
|
+
"p95_search_ms": 41.31171901826747,
|
|
10
|
+
"total_tokens_in": 0,
|
|
11
|
+
"total_tokens_out": 0,
|
|
12
|
+
"total_usd": 0.0,
|
|
13
|
+
"by_tag": {
|
|
14
|
+
"factoid": {
|
|
15
|
+
"n": 10,
|
|
16
|
+
"mean_score": 0.9,
|
|
17
|
+
"accuracy": 0.9
|
|
18
|
+
},
|
|
19
|
+
"material": {
|
|
20
|
+
"n": 4,
|
|
21
|
+
"mean_score": 0.5,
|
|
22
|
+
"accuracy": 0.5
|
|
23
|
+
},
|
|
24
|
+
"spec": {
|
|
25
|
+
"n": 1,
|
|
26
|
+
"mean_score": 1.0,
|
|
27
|
+
"accuracy": 1.0
|
|
28
|
+
},
|
|
29
|
+
"warranty": {
|
|
30
|
+
"n": 1,
|
|
31
|
+
"mean_score": 1.0,
|
|
32
|
+
"accuracy": 1.0
|
|
33
|
+
},
|
|
34
|
+
"inventory": {
|
|
35
|
+
"n": 3,
|
|
36
|
+
"mean_score": 0.6666666666666666,
|
|
37
|
+
"accuracy": 0.6666666666666666
|
|
38
|
+
},
|
|
39
|
+
"compat": {
|
|
40
|
+
"n": 2,
|
|
41
|
+
"mean_score": 1.0,
|
|
42
|
+
"accuracy": 1.0
|
|
43
|
+
},
|
|
44
|
+
"multi-doc": {
|
|
45
|
+
"n": 3,
|
|
46
|
+
"mean_score": 0.6666666666666666,
|
|
47
|
+
"accuracy": 0.6666666666666666
|
|
48
|
+
},
|
|
49
|
+
"care": {
|
|
50
|
+
"n": 1,
|
|
51
|
+
"mean_score": 1.0,
|
|
52
|
+
"accuracy": 1.0
|
|
53
|
+
},
|
|
54
|
+
"policy": {
|
|
55
|
+
"n": 2,
|
|
56
|
+
"mean_score": 0.5,
|
|
57
|
+
"accuracy": 0.5
|
|
58
|
+
},
|
|
59
|
+
"pricing": {
|
|
60
|
+
"n": 2,
|
|
61
|
+
"mean_score": 1.0,
|
|
62
|
+
"accuracy": 1.0
|
|
63
|
+
},
|
|
64
|
+
"multi-hop": {
|
|
65
|
+
"n": 2,
|
|
66
|
+
"mean_score": 0.5,
|
|
67
|
+
"accuracy": 0.5
|
|
68
|
+
},
|
|
69
|
+
"math": {
|
|
70
|
+
"n": 1,
|
|
71
|
+
"mean_score": 0.0,
|
|
72
|
+
"accuracy": 0.0
|
|
73
|
+
},
|
|
74
|
+
"rubric": {
|
|
75
|
+
"n": 3,
|
|
76
|
+
"mean_score": 0.0,
|
|
77
|
+
"accuracy": 0.0
|
|
78
|
+
},
|
|
79
|
+
"recommendation": {
|
|
80
|
+
"n": 1,
|
|
81
|
+
"mean_score": 0.0,
|
|
82
|
+
"accuracy": 0.0
|
|
83
|
+
},
|
|
84
|
+
"certification": {
|
|
85
|
+
"n": 1,
|
|
86
|
+
"mean_score": 0.0,
|
|
87
|
+
"accuracy": 0.0
|
|
88
|
+
},
|
|
89
|
+
"entity": {
|
|
90
|
+
"n": 1,
|
|
91
|
+
"mean_score": 0.0,
|
|
92
|
+
"accuracy": 0.0
|
|
93
|
+
}
|
|
94
|
+
},
|
|
95
|
+
"extra": {
|
|
96
|
+
"ingest_ms": 8186.979395017261,
|
|
97
|
+
"grading": "substring",
|
|
98
|
+
"limit": 3,
|
|
99
|
+
"tokens": {
|
|
100
|
+
"corpus_tokens": 1845,
|
|
101
|
+
"query_tokens": 271,
|
|
102
|
+
"context_tokens": 2072,
|
|
103
|
+
"retrieval_tokens": 2343,
|
|
104
|
+
"naive_tokens": 33481,
|
|
105
|
+
"saved_tokens": 31138,
|
|
106
|
+
"reduction_pct": 0.9300200113497208,
|
|
107
|
+
"mean_retrieval_tokens_per_task": 130.16666666666666,
|
|
108
|
+
"tokenizer": "cl100k_base",
|
|
109
|
+
"per_task": {
|
|
110
|
+
"sku-atlas-material": {
|
|
111
|
+
"query": 11,
|
|
112
|
+
"context": 150,
|
|
113
|
+
"retrieval": 161,
|
|
114
|
+
"judge_in": 0,
|
|
115
|
+
"judge_out": 0,
|
|
116
|
+
"judge_latency_ms": 0.0
|
|
117
|
+
},
|
|
118
|
+
"sku-luna-capacity": {
|
|
119
|
+
"query": 8,
|
|
120
|
+
"context": 145,
|
|
121
|
+
"retrieval": 153,
|
|
122
|
+
"judge_in": 0,
|
|
123
|
+
"judge_out": 0,
|
|
124
|
+
"judge_latency_ms": 0.0
|
|
125
|
+
},
|
|
126
|
+
"sku-luna-warranty": {
|
|
127
|
+
"query": 9,
|
|
128
|
+
"context": 145,
|
|
129
|
+
"retrieval": 154,
|
|
130
|
+
"judge_in": 0,
|
|
131
|
+
"judge_out": 0,
|
|
132
|
+
"judge_latency_ms": 0.0
|
|
133
|
+
},
|
|
134
|
+
"sku-kite-harness-out-of-stock": {
|
|
135
|
+
"query": 17,
|
|
136
|
+
"context": 0,
|
|
137
|
+
"retrieval": 17,
|
|
138
|
+
"judge_in": 0,
|
|
139
|
+
"judge_out": 0,
|
|
140
|
+
"judge_latency_ms": 0.0
|
|
141
|
+
},
|
|
142
|
+
"sku-kite-harness-restock": {
|
|
143
|
+
"query": 17,
|
|
144
|
+
"context": 183,
|
|
145
|
+
"retrieval": 200,
|
|
146
|
+
"judge_in": 0,
|
|
147
|
+
"judge_out": 0,
|
|
148
|
+
"judge_latency_ms": 0.0
|
|
149
|
+
},
|
|
150
|
+
"sku-tee-discontinued-sizes": {
|
|
151
|
+
"query": 13,
|
|
152
|
+
"context": 161,
|
|
153
|
+
"retrieval": 174,
|
|
154
|
+
"judge_in": 0,
|
|
155
|
+
"judge_out": 0,
|
|
156
|
+
"judge_latency_ms": 0.0
|
|
157
|
+
},
|
|
158
|
+
"sku-sleeve-compat-14-mbp": {
|
|
159
|
+
"query": 16,
|
|
160
|
+
"context": 345,
|
|
161
|
+
"retrieval": 361,
|
|
162
|
+
"judge_in": 0,
|
|
163
|
+
"judge_out": 0,
|
|
164
|
+
"judge_latency_ms": 0.0
|
|
165
|
+
},
|
|
166
|
+
"sku-coffee-cup-microwave": {
|
|
167
|
+
"query": 8,
|
|
168
|
+
"context": 156,
|
|
169
|
+
"retrieval": 164,
|
|
170
|
+
"judge_in": 0,
|
|
171
|
+
"judge_out": 0,
|
|
172
|
+
"judge_latency_ms": 0.0
|
|
173
|
+
},
|
|
174
|
+
"sku-bin-liners-home-compost": {
|
|
175
|
+
"query": 11,
|
|
176
|
+
"context": 146,
|
|
177
|
+
"retrieval": 157,
|
|
178
|
+
"judge_in": 0,
|
|
179
|
+
"judge_out": 0,
|
|
180
|
+
"judge_latency_ms": 0.0
|
|
181
|
+
},
|
|
182
|
+
"policy-custom-tote-return": {
|
|
183
|
+
"query": 10,
|
|
184
|
+
"context": 83,
|
|
185
|
+
"retrieval": 93,
|
|
186
|
+
"judge_in": 0,
|
|
187
|
+
"judge_out": 0,
|
|
188
|
+
"judge_latency_ms": 0.0
|
|
189
|
+
},
|
|
190
|
+
"price-tee-subscription": {
|
|
191
|
+
"query": 14,
|
|
192
|
+
"context": 145,
|
|
193
|
+
"retrieval": 159,
|
|
194
|
+
"judge_in": 0,
|
|
195
|
+
"judge_out": 0,
|
|
196
|
+
"judge_latency_ms": 0.0
|
|
197
|
+
},
|
|
198
|
+
"price-sleeve-diff": {
|
|
199
|
+
"query": 19,
|
|
200
|
+
"context": 345,
|
|
201
|
+
"retrieval": 364,
|
|
202
|
+
"judge_in": 0,
|
|
203
|
+
"judge_out": 0,
|
|
204
|
+
"judge_latency_ms": 0.0
|
|
205
|
+
},
|
|
206
|
+
"multi-hop-kite-duotone": {
|
|
207
|
+
"query": 25,
|
|
208
|
+
"context": 68,
|
|
209
|
+
"retrieval": 93,
|
|
210
|
+
"judge_in": 0,
|
|
211
|
+
"judge_out": 0,
|
|
212
|
+
"judge_latency_ms": 0.0
|
|
213
|
+
},
|
|
214
|
+
"multi-hop-takeback-credit": {
|
|
215
|
+
"query": 23,
|
|
216
|
+
"context": 0,
|
|
217
|
+
"retrieval": 23,
|
|
218
|
+
"judge_in": 0,
|
|
219
|
+
"judge_out": 0,
|
|
220
|
+
"judge_latency_ms": 0.0
|
|
221
|
+
},
|
|
222
|
+
"rubric-recommend-kitesurf-bag": {
|
|
223
|
+
"query": 31,
|
|
224
|
+
"context": 0,
|
|
225
|
+
"retrieval": 31,
|
|
226
|
+
"judge_in": 341,
|
|
227
|
+
"judge_out": 38,
|
|
228
|
+
"judge_latency_ms": 874.3087880015373
|
|
229
|
+
},
|
|
230
|
+
"rubric-plastic-free-tee": {
|
|
231
|
+
"query": 11,
|
|
232
|
+
"context": 0,
|
|
233
|
+
"retrieval": 11,
|
|
234
|
+
"judge_in": 307,
|
|
235
|
+
"judge_out": 30,
|
|
236
|
+
"judge_latency_ms": 787.7367050051689
|
|
237
|
+
},
|
|
238
|
+
"rubric-cc-certified-items": {
|
|
239
|
+
"query": 14,
|
|
240
|
+
"context": 0,
|
|
241
|
+
"retrieval": 14,
|
|
242
|
+
"judge_in": 288,
|
|
243
|
+
"judge_out": 38,
|
|
244
|
+
"judge_latency_ms": 714.9526060223579
|
|
245
|
+
},
|
|
246
|
+
"entity-recycled-content-high": {
|
|
247
|
+
"query": 14,
|
|
248
|
+
"context": 0,
|
|
249
|
+
"retrieval": 14,
|
|
250
|
+
"judge_in": 0,
|
|
251
|
+
"judge_out": 0,
|
|
252
|
+
"judge_latency_ms": 0.0
|
|
253
|
+
}
|
|
254
|
+
},
|
|
255
|
+
"judge_tokens_in": 936,
|
|
256
|
+
"judge_tokens_out": 106,
|
|
257
|
+
"judge_calls": 3,
|
|
258
|
+
"judge_mean_latency_ms": 792.3326996763548
|
|
259
|
+
},
|
|
260
|
+
"cost_usd": {
|
|
261
|
+
"assumed_completion_tokens_per_task": 100,
|
|
262
|
+
"rates": {
|
|
263
|
+
"input_per_1k": 0.0025,
|
|
264
|
+
"output_per_1k": 0.01,
|
|
265
|
+
"model": "gpt-4o"
|
|
266
|
+
},
|
|
267
|
+
"retrieval_usd_in": 0.0058575,
|
|
268
|
+
"retrieval_usd_out": 0.018,
|
|
269
|
+
"retrieval_usd_total": 0.023857499999999997,
|
|
270
|
+
"naive_usd_total": 0.1017025,
|
|
271
|
+
"saved_usd": 0.077845,
|
|
272
|
+
"saved_usd_per_1k_tasks": 4.324722222222222
|
|
273
|
+
}
|
|
274
|
+
},
|
|
275
|
+
"task_results": [
|
|
276
|
+
{
|
|
277
|
+
"task_id": "sku-atlas-material",
|
|
278
|
+
"query": "What's the material composition of the Atlas Phone Shell?",
|
|
279
|
+
"answer": "# Atlas Phone Shell \u2014 SKU ATL-PHN-001\n\n**Brand:** Atlas Circular\n**Category:** Mobile accessories / Phone cases\n**Compatible with:** iPhone 15, iPhone 15 Pro\n\n**Material composition:**\n- 68% recycled ocean-bound polypropylene\n- 24% recycled polycarbonate (post-consumer)\n- 8% natural calcium carbonate\n\n**Price:** \u00a329.00 GBP (standard) / \u00a324.00 (subscription)\n**Inventory:** 1,240 units in stock at UK fulfilment (Worthing DC)\n**Take-back:** Yes \u2014 returnable for 50% credit after 18 months\n**Certification:** Cradle to Cradle Bronze, MADE-BY Class B",
|
|
280
|
+
"hits": [
|
|
281
|
+
{
|
|
282
|
+
"text": "# Atlas Phone Shell \u2014 SKU ATL-PHN-001\n\n**Brand:** Atlas Circular\n**Category:** Mobile accessories / Phone cases\n**Compatible with:** iPhone 15, iPhone 15 Pro\n\n**Material composition:**\n- 68% recycled ocean-bound polypropylene\n- 24% recycled polycarbonate (post-consumer)\n- 8% natural calcium carbonate\n\n**Price:** \u00a329.00 GBP (standard) / \u00a324.00 (subscription)\n**Inventory:** 1,240 units in stock at U",
|
|
283
|
+
"score": 0.7574853661453905,
|
|
284
|
+
"source": "pentatonic-memory",
|
|
285
|
+
"doc_id": "sku-atlas-phone-01"
|
|
286
|
+
}
|
|
287
|
+
],
|
|
288
|
+
"correct": true,
|
|
289
|
+
"score": 1.0,
|
|
290
|
+
"grading_notes": "all substrings matched",
|
|
291
|
+
"search_time_ms": 38.870015006978065,
|
|
292
|
+
"generation_time_ms": 0.0,
|
|
293
|
+
"tokens_in": 0,
|
|
294
|
+
"tokens_out": 0,
|
|
295
|
+
"retrieval_tokens": 161,
|
|
296
|
+
"query_tokens": 11,
|
|
297
|
+
"context_tokens": 150,
|
|
298
|
+
"judge_tokens_in": 0,
|
|
299
|
+
"judge_tokens_out": 0,
|
|
300
|
+
"judge_latency_ms": 0.0
|
|
301
|
+
},
|
|
302
|
+
{
|
|
303
|
+
"task_id": "sku-luna-capacity",
|
|
304
|
+
"query": "What capacity is the Luna water bottle?",
|
|
305
|
+
"answer": "# Luna Water Bottle 500ml \u2014 SKU LUN-BTL-042\n\n**Brand:** Luna\n**Category:** Drinkware / Bottles\n**Capacity:** 500ml\n\n**Material composition:**\n- 100% food-grade 304 stainless steel (65% recycled content)\n- Lid: bio-based silicone (sugarcane derivative)\n\n**Price:** \u00a318.50 GBP\n**Inventory:** 87 units in stock (UK), 412 units (EU DC Eindhoven)\n**Colours available:** Matte black, Sage green, Terracotta\n**Replacement parts:** Lid (\u00a33.50), Silicone seal (\u00a30.90)\n**Warranty:** Lifetime on body, 2 years on lid",
|
|
306
|
+
"hits": [
|
|
307
|
+
{
|
|
308
|
+
"text": "# Luna Water Bottle 500ml \u2014 SKU LUN-BTL-042\n\n**Brand:** Luna\n**Category:** Drinkware / Bottles\n**Capacity:** 500ml\n\n**Material composition:**\n- 100% food-grade 304 stainless steel (65% recycled content)\n- Lid: bio-based silicone (sugarcane derivative)\n\n**Price:** \u00a318.50 GBP\n**Inventory:** 87 units in stock (UK), 412 units (EU DC Eindhoven)\n**Colours available:** Matte black, Sage green, Terracotta",
|
|
309
|
+
"score": 0.7714634221147277,
|
|
310
|
+
"source": "pentatonic-memory",
|
|
311
|
+
"doc_id": "sku-luna-bottle-42"
|
|
312
|
+
}
|
|
313
|
+
],
|
|
314
|
+
"correct": true,
|
|
315
|
+
"score": 1.0,
|
|
316
|
+
"grading_notes": "all substrings matched",
|
|
317
|
+
"search_time_ms": 28.36476400261745,
|
|
318
|
+
"generation_time_ms": 0.0,
|
|
319
|
+
"tokens_in": 0,
|
|
320
|
+
"tokens_out": 0,
|
|
321
|
+
"retrieval_tokens": 153,
|
|
322
|
+
"query_tokens": 8,
|
|
323
|
+
"context_tokens": 145,
|
|
324
|
+
"judge_tokens_in": 0,
|
|
325
|
+
"judge_tokens_out": 0,
|
|
326
|
+
"judge_latency_ms": 0.0
|
|
327
|
+
},
|
|
328
|
+
{
|
|
329
|
+
"task_id": "sku-luna-warranty",
|
|
330
|
+
"query": "What's the warranty on the Luna bottle?",
|
|
331
|
+
"answer": "# Luna Water Bottle 500ml \u2014 SKU LUN-BTL-042\n\n**Brand:** Luna\n**Category:** Drinkware / Bottles\n**Capacity:** 500ml\n\n**Material composition:**\n- 100% food-grade 304 stainless steel (65% recycled content)\n- Lid: bio-based silicone (sugarcane derivative)\n\n**Price:** \u00a318.50 GBP\n**Inventory:** 87 units in stock (UK), 412 units (EU DC Eindhoven)\n**Colours available:** Matte black, Sage green, Terracotta\n**Replacement parts:** Lid (\u00a33.50), Silicone seal (\u00a30.90)\n**Warranty:** Lifetime on body, 2 years on lid",
|
|
332
|
+
"hits": [
|
|
333
|
+
{
|
|
334
|
+
"text": "# Luna Water Bottle 500ml \u2014 SKU LUN-BTL-042\n\n**Brand:** Luna\n**Category:** Drinkware / Bottles\n**Capacity:** 500ml\n\n**Material composition:**\n- 100% food-grade 304 stainless steel (65% recycled content)\n- Lid: bio-based silicone (sugarcane derivative)\n\n**Price:** \u00a318.50 GBP\n**Inventory:** 87 units in stock (UK), 412 units (EU DC Eindhoven)\n**Colours available:** Matte black, Sage green, Terracotta",
|
|
335
|
+
"score": 0.5845189118562412,
|
|
336
|
+
"source": "pentatonic-memory",
|
|
337
|
+
"doc_id": "sku-luna-bottle-42"
|
|
338
|
+
}
|
|
339
|
+
],
|
|
340
|
+
"correct": true,
|
|
341
|
+
"score": 1.0,
|
|
342
|
+
"grading_notes": "all substrings matched",
|
|
343
|
+
"search_time_ms": 29.115474986610934,
|
|
344
|
+
"generation_time_ms": 0.0,
|
|
345
|
+
"tokens_in": 0,
|
|
346
|
+
"tokens_out": 0,
|
|
347
|
+
"retrieval_tokens": 154,
|
|
348
|
+
"query_tokens": 9,
|
|
349
|
+
"context_tokens": 145,
|
|
350
|
+
"judge_tokens_in": 0,
|
|
351
|
+
"judge_tokens_out": 0,
|
|
352
|
+
"judge_latency_ms": 0.0
|
|
353
|
+
},
|
|
354
|
+
{
|
|
355
|
+
"task_id": "sku-kite-harness-out-of-stock",
|
|
356
|
+
"query": "Which size of the Nomad Kite Harness v3 is currently out of stock?",
|
|
357
|
+
"answer": "",
|
|
358
|
+
"hits": [],
|
|
359
|
+
"correct": false,
|
|
360
|
+
"score": 0.0,
|
|
361
|
+
"grading_notes": "missing 1/1: ['M']",
|
|
362
|
+
"search_time_ms": 39.99572299653664,
|
|
363
|
+
"generation_time_ms": 0.0,
|
|
364
|
+
"tokens_in": 0,
|
|
365
|
+
"tokens_out": 0,
|
|
366
|
+
"retrieval_tokens": 17,
|
|
367
|
+
"query_tokens": 17,
|
|
368
|
+
"context_tokens": 0,
|
|
369
|
+
"judge_tokens_in": 0,
|
|
370
|
+
"judge_tokens_out": 0,
|
|
371
|
+
"judge_latency_ms": 0.0
|
|
372
|
+
},
|
|
373
|
+
{
|
|
374
|
+
"task_id": "sku-kite-harness-restock",
|
|
375
|
+
"query": "When does the Nomad Kite Harness v3 restock for XS and S?",
|
|
376
|
+
"answer": "# Nomad Kite Harness v3 \u2014 SKU NMD-HRN-V3\n\n**Brand:** Nomad Sports\n**Category:** Watersports / Kite surfing\n**Size range:** XS (26-28\"), S (28-30\"), M (30-32\"), L (32-34\"), XL (34-36\")\n\n**Material composition:**\n- Outer shell: 92% recycled nylon 6,6 (ghost-net source)\n- Foam padding: EVA (non-recycled)\n- Hardware: 316 stainless steel spreader bar\n\n**Price:** \u00a3145.00 GBP\n**Inventory:** M=out of stock, L=6 units, XL=12 units, XS/S=restock 2026-05-15\n**Repair service:** \u00a325 flat fee, turnaround 14 days\n**Compatibility:** F-One bar systems, North bar systems (adapter sold separately)",
|
|
377
|
+
"hits": [
|
|
378
|
+
{
|
|
379
|
+
"text": "# Nomad Kite Harness v3 \u2014 SKU NMD-HRN-V3\n\n**Brand:** Nomad Sports\n**Category:** Watersports / Kite surfing\n**Size range:** XS (26-28\"), S (28-30\"), M (30-32\"), L (32-34\"), XL (34-36\")\n\n**Material composition:**\n- Outer shell: 92% recycled nylon 6,6 (ghost-net source)\n- Foam padding: EVA (non-recycled)\n- Hardware: 316 stainless steel spreader bar\n\n**Price:** \u00a3145.00 GBP\n**Inventory:** M=out of stoc",
|
|
380
|
+
"score": 0.7568954994182511,
|
|
381
|
+
"source": "pentatonic-memory",
|
|
382
|
+
"doc_id": "sku-kite-harness-v3"
|
|
383
|
+
}
|
|
384
|
+
],
|
|
385
|
+
"correct": true,
|
|
386
|
+
"score": 1.0,
|
|
387
|
+
"grading_notes": "all substrings matched",
|
|
388
|
+
"search_time_ms": 39.893368986668065,
|
|
389
|
+
"generation_time_ms": 0.0,
|
|
390
|
+
"tokens_in": 0,
|
|
391
|
+
"tokens_out": 0,
|
|
392
|
+
"retrieval_tokens": 200,
|
|
393
|
+
"query_tokens": 17,
|
|
394
|
+
"context_tokens": 183,
|
|
395
|
+
"judge_tokens_in": 0,
|
|
396
|
+
"judge_tokens_out": 0,
|
|
397
|
+
"judge_latency_ms": 0.0
|
|
398
|
+
},
|
|
399
|
+
{
|
|
400
|
+
"task_id": "sku-tee-discontinued-sizes",
|
|
401
|
+
"query": "Which sizes of the Pentatonic Crew Tee Black are discontinued?",
|
|
402
|
+
"answer": "# Pentatonic Crew Tee Black \u2014 SKU PTT-TEE-BLK\n\n**Brand:** Pentatonic\n**Category:** Apparel / T-shirts\n**Fit:** Unisex regular\n**Size range:** XS, S, M, L, XL, XXL\n\n**Material composition:**\n- 100% GOTS-certified organic cotton, 180 gsm\n- Dyeing: low-impact reactive dye, closed-loop water system\n\n**Price:** \u00a328.00 GBP\n**Inventory:** S=48, M=136, L=94, XL=22, XS=0 (discontinued), XXL=0 (discontinued)\n**Country of origin:** Portugal (CITEVE-certified mill)\n**Care:** Wash cold, line dry; no tumble dry",
|
|
403
|
+
"hits": [
|
|
404
|
+
{
|
|
405
|
+
"text": "# Pentatonic Crew Tee Black \u2014 SKU PTT-TEE-BLK\n\n**Brand:** Pentatonic\n**Category:** Apparel / T-shirts\n**Fit:** Unisex regular\n**Size range:** XS, S, M, L, XL, XXL\n\n**Material composition:**\n- 100% GOTS-certified organic cotton, 180 gsm\n- Dyeing: low-impact reactive dye, closed-loop water system\n\n**Price:** \u00a328.00 GBP\n**Inventory:** S=48, M=136, L=94, XL=22, XS=0 (discontinued), XXL=0 (discontinued",
|
|
406
|
+
"score": 0.765600191590099,
|
|
407
|
+
"source": "pentatonic-memory",
|
|
408
|
+
"doc_id": "sku-cotton-tee-black"
|
|
409
|
+
}
|
|
410
|
+
],
|
|
411
|
+
"correct": true,
|
|
412
|
+
"score": 1.0,
|
|
413
|
+
"grading_notes": "all substrings matched",
|
|
414
|
+
"search_time_ms": 35.890275990823284,
|
|
415
|
+
"generation_time_ms": 0.0,
|
|
416
|
+
"tokens_in": 0,
|
|
417
|
+
"tokens_out": 0,
|
|
418
|
+
"retrieval_tokens": 174,
|
|
419
|
+
"query_tokens": 13,
|
|
420
|
+
"context_tokens": 161,
|
|
421
|
+
"judge_tokens_in": 0,
|
|
422
|
+
"judge_tokens_out": 0,
|
|
423
|
+
"judge_latency_ms": 0.0
|
|
424
|
+
},
|
|
425
|
+
{
|
|
426
|
+
"task_id": "sku-sleeve-compat-14-mbp",
|
|
427
|
+
"query": "Does the Haven Laptop Sleeve 13\" fit a 14\" MacBook Pro?",
|
|
428
|
+
"answer": "# Compatibility note\n\nThe Haven Laptop Sleeve 13\" (HVN-SLV-13) does NOT fit the 14\" MacBook Pro \u2014 third-party dimensions exceed 31.5cm. Use the 15\" variant (HVN-SLV-15).\n---\n# Haven Laptop Sleeve 13\" \u2014 SKU HVN-SLV-13\n\n**Brand:** Haven\n**Category:** Bags / Laptop protection\n**Fits:** 13\" MacBook Air M2/M3, 13.6\" MacBook Air, 13\" Dell XPS, 13\" ThinkPad\n\n**Material composition:**\n- Outer: 100% recycled PET felt (from 12 plastic bottles per unit)\n- Inner lining: 100% post-consumer recycled polyester\n- YKK recycled-polyester zip\n\n**Price:** \u00a348.00 GBP\n**Inventory:** 238 units UK, 156 units EU\n**Colours available:** Charcoal, Oat, Ochre\n**Laptop diagonal max:** 13.6\" (third-party dimension 31.5cm \u00d7 22.5cm)\n---\n# Haven Laptop Sleeve 15\" \u2014 SKU HVN-SLV-15\n\nAs HVN-SLV-13 but for 15\"-16\" laptops. Same materials, same construction.\n\n**Fits:** 15\" MacBook Air, 16\" MacBook Pro, 15.6\" Dell XPS, 15\" ThinkPad\n**Price:** \u00a358.00 GBP (\u00a310 more than 13\" variant)\n**Inventory:** 89 units UK, 54 units EU\n**Colours available:** Charcoal only (Oat and Ochre sold through)",
|
|
429
|
+
"hits": [
|
|
430
|
+
{
|
|
431
|
+
"text": "# Compatibility note\n\nThe Haven Laptop Sleeve 13\" (HVN-SLV-13) does NOT fit the 14\" MacBook Pro \u2014 third-party dimensions exceed 31.5cm. Use the 15\" variant (HVN-SLV-15).\n",
|
|
432
|
+
"score": 0.8144855584508688,
|
|
433
|
+
"source": "pentatonic-memory",
|
|
434
|
+
"doc_id": "compat-laptop-sleeve-macbook-pro-14"
|
|
435
|
+
},
|
|
436
|
+
{
|
|
437
|
+
"text": "# Haven Laptop Sleeve 13\" \u2014 SKU HVN-SLV-13\n\n**Brand:** Haven\n**Category:** Bags / Laptop protection\n**Fits:** 13\" MacBook Air M2/M3, 13.6\" MacBook Air, 13\" Dell XPS, 13\" ThinkPad\n\n**Material composition:**\n- Outer: 100% recycled PET felt (from 12 plastic bottles per unit)\n- Inner lining: 100% post-consumer recycled polyester\n- YKK recycled-polyester zip\n\n**Price:** \u00a348.00 GBP\n**Inventory:** 238 un",
|
|
438
|
+
"score": 0.5219752131848099,
|
|
439
|
+
"source": "pentatonic-memory",
|
|
440
|
+
"doc_id": "sku-laptop-sleeve-13"
|
|
441
|
+
},
|
|
442
|
+
{
|
|
443
|
+
"text": "# Haven Laptop Sleeve 15\" \u2014 SKU HVN-SLV-15\n\nAs HVN-SLV-13 but for 15\"-16\" laptops. Same materials, same construction.\n\n**Fits:** 15\" MacBook Air, 16\" MacBook Pro, 15.6\" Dell XPS, 15\" ThinkPad\n**Price:** \u00a358.00 GBP (\u00a310 more than 13\" variant)\n**Inventory:** 89 units UK, 54 units EU\n**Colours available:** Charcoal only (Oat and Ochre sold through)\n",
|
|
444
|
+
"score": 0.5194211102311778,
|
|
445
|
+
"source": "pentatonic-memory",
|
|
446
|
+
"doc_id": "sku-laptop-sleeve-15"
|
|
447
|
+
}
|
|
448
|
+
],
|
|
449
|
+
"correct": true,
|
|
450
|
+
"score": 1.0,
|
|
451
|
+
"grading_notes": "all substrings matched",
|
|
452
|
+
"search_time_ms": 41.31171901826747,
|
|
453
|
+
"generation_time_ms": 0.0,
|
|
454
|
+
"tokens_in": 0,
|
|
455
|
+
"tokens_out": 0,
|
|
456
|
+
"retrieval_tokens": 361,
|
|
457
|
+
"query_tokens": 16,
|
|
458
|
+
"context_tokens": 345,
|
|
459
|
+
"judge_tokens_in": 0,
|
|
460
|
+
"judge_tokens_out": 0,
|
|
461
|
+
"judge_latency_ms": 0.0
|
|
462
|
+
},
|
|
463
|
+
{
|
|
464
|
+
"task_id": "sku-coffee-cup-microwave",
|
|
465
|
+
"query": "Can I microwave the Loop coffee cup?",
|
|
466
|
+
"answer": "# Loop Reusable Coffee Cup 8oz \u2014 SKU LP-CUP-008\n\n**Brand:** Loop\n**Category:** Drinkware / Coffee\n**Capacity:** 8oz (240ml)\n\n**Material composition:**\n- Body: 100% plant-based PLA (corn-derived)\n- Silicone sleeve: food-grade silicone (not bio-based)\n- Lid: BPA-free polypropylene (recycled post-industrial)\n\n**Price:** \u00a312.50 GBP\n**Inventory:** 2,104 units UK, 1,876 units EU\n**Dishwasher safe:** Top rack only\n**Lifespan:** ~2 years regular use, then return for closed-loop composting\n**Note:** Not microwave-safe due to PLA degradation above 60\u00b0C.",
|
|
467
|
+
"hits": [
|
|
468
|
+
{
|
|
469
|
+
"text": "# Loop Reusable Coffee Cup 8oz \u2014 SKU LP-CUP-008\n\n**Brand:** Loop\n**Category:** Drinkware / Coffee\n**Capacity:** 8oz (240ml)\n\n**Material composition:**\n- Body: 100% plant-based PLA (corn-derived)\n- Silicone sleeve: food-grade silicone (not bio-based)\n- Lid: BPA-free polypropylene (recycled post-industrial)\n\n**Price:** \u00a312.50 GBP\n**Inventory:** 2,104 units UK, 1,876 units EU\n**Dishwasher safe:** Top",
|
|
470
|
+
"score": 0.7307098382695573,
|
|
471
|
+
"source": "pentatonic-memory",
|
|
472
|
+
"doc_id": "sku-coffee-cup-reusable"
|
|
473
|
+
}
|
|
474
|
+
],
|
|
475
|
+
"correct": true,
|
|
476
|
+
"score": 1.0,
|
|
477
|
+
"grading_notes": "all substrings matched",
|
|
478
|
+
"search_time_ms": 26.578250981401652,
|
|
479
|
+
"generation_time_ms": 0.0,
|
|
480
|
+
"tokens_in": 0,
|
|
481
|
+
"tokens_out": 0,
|
|
482
|
+
"retrieval_tokens": 164,
|
|
483
|
+
"query_tokens": 8,
|
|
484
|
+
"context_tokens": 156,
|
|
485
|
+
"judge_tokens_in": 0,
|
|
486
|
+
"judge_tokens_out": 0,
|
|
487
|
+
"judge_latency_ms": 0.0
|
|
488
|
+
},
|
|
489
|
+
{
|
|
490
|
+
"task_id": "sku-bin-liners-home-compost",
|
|
491
|
+
"query": "Are the Root bio bin liners home-compostable?",
|
|
492
|
+
"answer": "# Bio Bin Liners 30L (roll of 20) \u2014 SKU BBL-030-020\n\n**Brand:** Root\n**Category:** Home / Waste\n**Size:** 30 litres\n\n**Material composition:**\n- 100% PBAT + corn starch biopolymer\n- EN13432 certified industrially compostable\n- NOT home-compostable (requires industrial composting temperatures)\n\n**Price:** \u00a34.20 GBP per roll\n**Inventory:** 6,420 rolls UK, 2,100 rolls EU\n**Subscription:** Monthly auto-delivery available (\u00a33.60/roll)\n**Council scheme compatibility:** Check with your local authority \u2014 accepted in most UK food-waste streams.",
|
|
493
|
+
"hits": [
|
|
494
|
+
{
|
|
495
|
+
"text": "# Bio Bin Liners 30L (roll of 20) \u2014 SKU BBL-030-020\n\n**Brand:** Root\n**Category:** Home / Waste\n**Size:** 30 litres\n\n**Material composition:**\n- 100% PBAT + corn starch biopolymer\n- EN13432 certified industrially compostable\n- NOT home-compostable (requires industrial composting temperatures)\n\n**Price:** \u00a34.20 GBP per roll\n**Inventory:** 6,420 rolls UK, 2,100 rolls EU\n**Subscription:** Monthly aut",
|
|
496
|
+
"score": 0.5844119891045675,
|
|
497
|
+
"source": "pentatonic-memory",
|
|
498
|
+
"doc_id": "sku-biodegradable-trash-bags"
|
|
499
|
+
}
|
|
500
|
+
],
|
|
501
|
+
"correct": true,
|
|
502
|
+
"score": 1.0,
|
|
503
|
+
"grading_notes": "all substrings matched",
|
|
504
|
+
"search_time_ms": 28.265644010389224,
|
|
505
|
+
"generation_time_ms": 0.0,
|
|
506
|
+
"tokens_in": 0,
|
|
507
|
+
"tokens_out": 0,
|
|
508
|
+
"retrieval_tokens": 157,
|
|
509
|
+
"query_tokens": 11,
|
|
510
|
+
"context_tokens": 146,
|
|
511
|
+
"judge_tokens_in": 0,
|
|
512
|
+
"judge_tokens_out": 0,
|
|
513
|
+
"judge_latency_ms": 0.0
|
|
514
|
+
},
|
|
515
|
+
{
|
|
516
|
+
"task_id": "policy-custom-tote-return",
|
|
517
|
+
"query": "Can I return a custom-printed tote bag?",
|
|
518
|
+
"answer": "# Returns policy\n\nAll unused items returnable within 30 days of delivery for full refund.\nItems returned after 30 days but under 90 days: store credit only.\nTake-back programme is separate \u2014 applies to any lifetime-warrantied item at end of useful life.\nExceptions: custom-printed items (e.g. custom tote bags) and consumables (e.g. bin liners) are final sale.",
|
|
519
|
+
"hits": [
|
|
520
|
+
{
|
|
521
|
+
"text": "# Returns policy\n\nAll unused items returnable within 30 days of delivery for full refund.\nItems returned after 30 days but under 90 days: store credit only.\nTake-back programme is separate \u2014 applies to any lifetime-warrantied item at end of useful life.\nExceptions: custom-printed items (e.g. custom tote bags) and consumables (e.g. bin liners) are final sale.\n",
|
|
522
|
+
"score": 0.7470334159820934,
|
|
523
|
+
"source": "pentatonic-memory",
|
|
524
|
+
"doc_id": "policy-returns-30day"
|
|
525
|
+
}
|
|
526
|
+
],
|
|
527
|
+
"correct": true,
|
|
528
|
+
"score": 1.0,
|
|
529
|
+
"grading_notes": "all substrings matched",
|
|
530
|
+
"search_time_ms": 29.92295500007458,
|
|
531
|
+
"generation_time_ms": 0.0,
|
|
532
|
+
"tokens_in": 0,
|
|
533
|
+
"tokens_out": 0,
|
|
534
|
+
"retrieval_tokens": 93,
|
|
535
|
+
"query_tokens": 10,
|
|
536
|
+
"context_tokens": 83,
|
|
537
|
+
"judge_tokens_in": 0,
|
|
538
|
+
"judge_tokens_out": 0,
|
|
539
|
+
"judge_latency_ms": 0.0
|
|
540
|
+
},
|
|
541
|
+
{
|
|
542
|
+
"task_id": "price-tee-subscription",
|
|
543
|
+
"query": "What's the subscription price for the Luna water bottle compared to standard?",
|
|
544
|
+
"answer": "# Luna Water Bottle 500ml \u2014 SKU LUN-BTL-042\n\n**Brand:** Luna\n**Category:** Drinkware / Bottles\n**Capacity:** 500ml\n\n**Material composition:**\n- 100% food-grade 304 stainless steel (65% recycled content)\n- Lid: bio-based silicone (sugarcane derivative)\n\n**Price:** \u00a318.50 GBP\n**Inventory:** 87 units in stock (UK), 412 units (EU DC Eindhoven)\n**Colours available:** Matte black, Sage green, Terracotta\n**Replacement parts:** Lid (\u00a33.50), Silicone seal (\u00a30.90)\n**Warranty:** Lifetime on body, 2 years on lid",
|
|
545
|
+
"hits": [
|
|
546
|
+
{
|
|
547
|
+
"text": "# Luna Water Bottle 500ml \u2014 SKU LUN-BTL-042\n\n**Brand:** Luna\n**Category:** Drinkware / Bottles\n**Capacity:** 500ml\n\n**Material composition:**\n- 100% food-grade 304 stainless steel (65% recycled content)\n- Lid: bio-based silicone (sugarcane derivative)\n\n**Price:** \u00a318.50 GBP\n**Inventory:** 87 units in stock (UK), 412 units (EU DC Eindhoven)\n**Colours available:** Matte black, Sage green, Terracotta",
|
|
548
|
+
"score": 0.5494470082366621,
|
|
549
|
+
"source": "pentatonic-memory",
|
|
550
|
+
"doc_id": "sku-luna-bottle-42"
|
|
551
|
+
}
|
|
552
|
+
],
|
|
553
|
+
"correct": true,
|
|
554
|
+
"score": 1.0,
|
|
555
|
+
"grading_notes": "all substrings matched",
|
|
556
|
+
"search_time_ms": 30.340191995492205,
|
|
557
|
+
"generation_time_ms": 0.0,
|
|
558
|
+
"tokens_in": 0,
|
|
559
|
+
"tokens_out": 0,
|
|
560
|
+
"retrieval_tokens": 159,
|
|
561
|
+
"query_tokens": 14,
|
|
562
|
+
"context_tokens": 145,
|
|
563
|
+
"judge_tokens_in": 0,
|
|
564
|
+
"judge_tokens_out": 0,
|
|
565
|
+
"judge_latency_ms": 0.0
|
|
566
|
+
},
|
|
567
|
+
{
|
|
568
|
+
"task_id": "price-sleeve-diff",
|
|
569
|
+
"query": "How much more does the 15\" Haven Laptop Sleeve cost than the 13\" variant?",
|
|
570
|
+
"answer": "# Haven Laptop Sleeve 15\" \u2014 SKU HVN-SLV-15\n\nAs HVN-SLV-13 but for 15\"-16\" laptops. Same materials, same construction.\n\n**Fits:** 15\" MacBook Air, 16\" MacBook Pro, 15.6\" Dell XPS, 15\" ThinkPad\n**Price:** \u00a358.00 GBP (\u00a310 more than 13\" variant)\n**Inventory:** 89 units UK, 54 units EU\n**Colours available:** Charcoal only (Oat and Ochre sold through)\n---\n# Compatibility note\n\nThe Haven Laptop Sleeve 13\" (HVN-SLV-13) does NOT fit the 14\" MacBook Pro \u2014 third-party dimensions exceed 31.5cm. Use the 15\" variant (HVN-SLV-15).\n---\n# Haven Laptop Sleeve 13\" \u2014 SKU HVN-SLV-13\n\n**Brand:** Haven\n**Category:** Bags / Laptop protection\n**Fits:** 13\" MacBook Air M2/M3, 13.6\" MacBook Air, 13\" Dell XPS, 13\" ThinkPad\n\n**Material composition:**\n- Outer: 100% recycled PET felt (from 12 plastic bottles per unit)\n- Inner lining: 100% post-consumer recycled polyester\n- YKK recycled-polyester zip\n\n**Price:** \u00a348.00 GBP\n**Inventory:** 238 units UK, 156 units EU\n**Colours available:** Charcoal, Oat, Ochre\n**Laptop diagonal max:** 13.6\" (third-party dimension 31.5cm \u00d7 22.5cm)",
|
|
571
|
+
"hits": [
|
|
572
|
+
{
|
|
573
|
+
"text": "# Haven Laptop Sleeve 15\" \u2014 SKU HVN-SLV-15\n\nAs HVN-SLV-13 but for 15\"-16\" laptops. Same materials, same construction.\n\n**Fits:** 15\" MacBook Air, 16\" MacBook Pro, 15.6\" Dell XPS, 15\" ThinkPad\n**Price:** \u00a358.00 GBP (\u00a310 more than 13\" variant)\n**Inventory:** 89 units UK, 54 units EU\n**Colours available:** Charcoal only (Oat and Ochre sold through)\n",
|
|
574
|
+
"score": 0.5661065748698271,
|
|
575
|
+
"source": "pentatonic-memory",
|
|
576
|
+
"doc_id": "sku-laptop-sleeve-15"
|
|
577
|
+
},
|
|
578
|
+
{
|
|
579
|
+
"text": "# Compatibility note\n\nThe Haven Laptop Sleeve 13\" (HVN-SLV-13) does NOT fit the 14\" MacBook Pro \u2014 third-party dimensions exceed 31.5cm. Use the 15\" variant (HVN-SLV-15).\n",
|
|
580
|
+
"score": 0.5523781557442005,
|
|
581
|
+
"source": "pentatonic-memory",
|
|
582
|
+
"doc_id": "compat-laptop-sleeve-macbook-pro-14"
|
|
583
|
+
},
|
|
584
|
+
{
|
|
585
|
+
"text": "# Haven Laptop Sleeve 13\" \u2014 SKU HVN-SLV-13\n\n**Brand:** Haven\n**Category:** Bags / Laptop protection\n**Fits:** 13\" MacBook Air M2/M3, 13.6\" MacBook Air, 13\" Dell XPS, 13\" ThinkPad\n\n**Material composition:**\n- Outer: 100% recycled PET felt (from 12 plastic bottles per unit)\n- Inner lining: 100% post-consumer recycled polyester\n- YKK recycled-polyester zip\n\n**Price:** \u00a348.00 GBP\n**Inventory:** 238 un",
|
|
586
|
+
"score": 0.5420726412178005,
|
|
587
|
+
"source": "pentatonic-memory",
|
|
588
|
+
"doc_id": "sku-laptop-sleeve-13"
|
|
589
|
+
}
|
|
590
|
+
],
|
|
591
|
+
"correct": true,
|
|
592
|
+
"score": 1.0,
|
|
593
|
+
"grading_notes": "all substrings matched",
|
|
594
|
+
"search_time_ms": 36.61788301542401,
|
|
595
|
+
"generation_time_ms": 0.0,
|
|
596
|
+
"tokens_in": 0,
|
|
597
|
+
"tokens_out": 0,
|
|
598
|
+
"retrieval_tokens": 364,
|
|
599
|
+
"query_tokens": 19,
|
|
600
|
+
"context_tokens": 345,
|
|
601
|
+
"judge_tokens_in": 0,
|
|
602
|
+
"judge_tokens_out": 0,
|
|
603
|
+
"judge_latency_ms": 0.0
|
|
604
|
+
},
|
|
605
|
+
{
|
|
606
|
+
"task_id": "multi-hop-kite-duotone",
|
|
607
|
+
"query": "I have a Duotone bar \u2014 will the Nomad Kite Harness v3 work with it out of the box?",
|
|
608
|
+
"answer": "# Compatibility note\n\nThe Nomad Kite Harness v3 (NMD-HRN-V3) ships with a standard spreader bar compatible with F-One and North bar systems. For Duotone or Core bar systems, the Duotone/Core adapter kit (NMD-ADPT-DC, \u00a3 18) is required.",
|
|
609
|
+
"hits": [
|
|
610
|
+
{
|
|
611
|
+
"text": "# Compatibility note\n\nThe Nomad Kite Harness v3 (NMD-HRN-V3) ships with a standard spreader bar compatible with F-One and North bar systems. For Duotone or Core bar systems, the Duotone/Core adapter kit (NMD-ADPT-DC, \u00a3 18) is required.\n",
|
|
612
|
+
"score": 0.5458617633302318,
|
|
613
|
+
"source": "pentatonic-memory",
|
|
614
|
+
"doc_id": "compat-kite-harness-bar"
|
|
615
|
+
}
|
|
616
|
+
],
|
|
617
|
+
"correct": true,
|
|
618
|
+
"score": 1.0,
|
|
619
|
+
"grading_notes": "all substrings matched",
|
|
620
|
+
"search_time_ms": 32.8681189857889,
|
|
621
|
+
"generation_time_ms": 0.0,
|
|
622
|
+
"tokens_in": 0,
|
|
623
|
+
"tokens_out": 0,
|
|
624
|
+
"retrieval_tokens": 93,
|
|
625
|
+
"query_tokens": 25,
|
|
626
|
+
"context_tokens": 68,
|
|
627
|
+
"judge_tokens_in": 0,
|
|
628
|
+
"judge_tokens_out": 0,
|
|
629
|
+
"judge_latency_ms": 0.0
|
|
630
|
+
},
|
|
631
|
+
{
|
|
632
|
+
"task_id": "multi-hop-takeback-credit",
|
|
633
|
+
"query": "If I return an Atlas Phone Shell via take-back after 2 years, how much store credit do I get?",
|
|
634
|
+
"answer": "",
|
|
635
|
+
"hits": [],
|
|
636
|
+
"correct": false,
|
|
637
|
+
"score": 0.0,
|
|
638
|
+
"grading_notes": "missing 2/2: ['14.50', '50%']",
|
|
639
|
+
"search_time_ms": 35.04025097936392,
|
|
640
|
+
"generation_time_ms": 0.0,
|
|
641
|
+
"tokens_in": 0,
|
|
642
|
+
"tokens_out": 0,
|
|
643
|
+
"retrieval_tokens": 23,
|
|
644
|
+
"query_tokens": 23,
|
|
645
|
+
"context_tokens": 0,
|
|
646
|
+
"judge_tokens_in": 0,
|
|
647
|
+
"judge_tokens_out": 0,
|
|
648
|
+
"judge_latency_ms": 0.0
|
|
649
|
+
},
|
|
650
|
+
{
|
|
651
|
+
"task_id": "rubric-recommend-kitesurf-bag",
|
|
652
|
+
"query": "I'm heading to Worthing for a kite session and need to carry my 15\" MacBook and some kit. What would you recommend from the catalogue?",
|
|
653
|
+
"answer": "",
|
|
654
|
+
"hits": [],
|
|
655
|
+
"correct": false,
|
|
656
|
+
"score": 0.0,
|
|
657
|
+
"grading_notes": "judge=0.00 \u2014 The agent's answer does not mention a laptop sleeve or any bag for carrying kit, and instead recommends a phone case.",
|
|
658
|
+
"search_time_ms": 36.593386001186445,
|
|
659
|
+
"generation_time_ms": 0.0,
|
|
660
|
+
"tokens_in": 0,
|
|
661
|
+
"tokens_out": 0,
|
|
662
|
+
"retrieval_tokens": 31,
|
|
663
|
+
"query_tokens": 31,
|
|
664
|
+
"context_tokens": 0,
|
|
665
|
+
"judge_tokens_in": 341,
|
|
666
|
+
"judge_tokens_out": 38,
|
|
667
|
+
"judge_latency_ms": 874.3087880015373
|
|
668
|
+
},
|
|
669
|
+
{
|
|
670
|
+
"task_id": "rubric-plastic-free-tee",
|
|
671
|
+
"query": "Which tees in the catalogue are completely plastic-free?",
|
|
672
|
+
"answer": "",
|
|
673
|
+
"hits": [],
|
|
674
|
+
"correct": false,
|
|
675
|
+
"score": 0.0,
|
|
676
|
+
"grading_notes": "judge=0.00 \u2014 The agent's answer does not mention any plastic-free tees in the catalogue.",
|
|
677
|
+
"search_time_ms": 31.253111985279247,
|
|
678
|
+
"generation_time_ms": 0.0,
|
|
679
|
+
"tokens_in": 0,
|
|
680
|
+
"tokens_out": 0,
|
|
681
|
+
"retrieval_tokens": 11,
|
|
682
|
+
"query_tokens": 11,
|
|
683
|
+
"context_tokens": 0,
|
|
684
|
+
"judge_tokens_in": 307,
|
|
685
|
+
"judge_tokens_out": 30,
|
|
686
|
+
"judge_latency_ms": 787.7367050051689
|
|
687
|
+
},
|
|
688
|
+
{
|
|
689
|
+
"task_id": "rubric-cc-certified-items",
|
|
690
|
+
"query": "Do any items in the catalogue have Cradle to Cradle certification?",
|
|
691
|
+
"answer": "",
|
|
692
|
+
"hits": [],
|
|
693
|
+
"correct": false,
|
|
694
|
+
"score": 0.0,
|
|
695
|
+
"grading_notes": "judge=0.00 \u2014 The agent's answer does not mention any item with Cradle to Cradle certification, let alone the Atlas Phone Shell.",
|
|
696
|
+
"search_time_ms": 29.518647002987564,
|
|
697
|
+
"generation_time_ms": 0.0,
|
|
698
|
+
"tokens_in": 0,
|
|
699
|
+
"tokens_out": 0,
|
|
700
|
+
"retrieval_tokens": 14,
|
|
701
|
+
"query_tokens": 14,
|
|
702
|
+
"context_tokens": 0,
|
|
703
|
+
"judge_tokens_in": 288,
|
|
704
|
+
"judge_tokens_out": 38,
|
|
705
|
+
"judge_latency_ms": 714.9526060223579
|
|
706
|
+
},
|
|
707
|
+
{
|
|
708
|
+
"task_id": "entity-recycled-content-high",
|
|
709
|
+
"query": "Which items in the catalogue use at least 50% recycled material?",
|
|
710
|
+
"answer": "",
|
|
711
|
+
"hits": [],
|
|
712
|
+
"correct": false,
|
|
713
|
+
"score": 0.0,
|
|
714
|
+
"grading_notes": "no expected_substrings set",
|
|
715
|
+
"search_time_ms": 30.805075977696106,
|
|
716
|
+
"generation_time_ms": 0.0,
|
|
717
|
+
"tokens_in": 0,
|
|
718
|
+
"tokens_out": 0,
|
|
719
|
+
"retrieval_tokens": 14,
|
|
720
|
+
"query_tokens": 14,
|
|
721
|
+
"context_tokens": 0,
|
|
722
|
+
"judge_tokens_in": 0,
|
|
723
|
+
"judge_tokens_out": 0,
|
|
724
|
+
"judge_latency_ms": 0.0
|
|
725
|
+
}
|
|
726
|
+
]
|
|
727
|
+
}
|