@pentatonic-ai/ai-agent-sdk 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/README.md +3 -3
  2. package/bin/cli.js +1 -1
  3. package/bin/commands/config.js +1 -1
  4. package/dist/index.cjs +1 -1
  5. package/dist/index.js +1 -1
  6. package/package.json +2 -2
  7. package/packages/doctor/src/checks/local-memory.js +2 -2
  8. package/packages/memory/README.md +2 -2
  9. package/packages/memory/openclaw-plugin/README.md +2 -2
  10. package/packages/memory/openclaw-plugin/openclaw.plugin.json +1 -1
  11. package/packages/memory/src/server.js +2 -2
  12. package/packages/memory-engine-v2/.env.example +30 -0
  13. package/packages/memory-engine-v2/README.md +125 -0
  14. package/packages/memory-engine-v2/compat/Dockerfile +11 -0
  15. package/packages/memory-engine-v2/compat/requirements.txt +6 -0
  16. package/packages/memory-engine-v2/compat/server.py +1047 -0
  17. package/packages/memory-engine-v2/docker-compose.aws.yml +78 -0
  18. package/packages/memory-engine-v2/docker-compose.yml +206 -0
  19. package/packages/memory-engine-v2/extractor-async/Dockerfile +14 -0
  20. package/packages/memory-engine-v2/extractor-async/confidence.py +62 -0
  21. package/packages/memory-engine-v2/extractor-async/noise_filter.py +144 -0
  22. package/packages/memory-engine-v2/extractor-async/requirements.txt +2 -0
  23. package/packages/memory-engine-v2/extractor-async/test_confidence.py +76 -0
  24. package/packages/memory-engine-v2/extractor-async/test_noise_filter.py +177 -0
  25. package/packages/memory-engine-v2/extractor-async/worker.py +797 -0
  26. package/packages/memory-engine-v2/extractor-sync/Dockerfile +11 -0
  27. package/packages/memory-engine-v2/extractor-sync/requirements.txt +4 -0
  28. package/packages/memory-engine-v2/extractor-sync/server.py +424 -0
  29. package/packages/memory-engine-v2/org-model/migrations/001_init.sql +390 -0
  30. package/packages/memory-engine-v2/tests/e2e_smoke.py +356 -0
  31. package/packages/memory-engine-v2/tests/fixtures/generate_synthetic_corpus.py +758 -0
  32. package/packages/memory-engine/.env.example +0 -13
  33. package/packages/memory-engine/MIGRATION.md +0 -219
  34. package/packages/memory-engine/README.md +0 -145
  35. package/packages/memory-engine/bench/README.md +0 -99
  36. package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +0 -1115
  37. package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +0 -819
  38. package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +0 -1278
  39. package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +0 -1018
  40. package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +0 -1038
  41. package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +0 -961
  42. package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +0 -1115
  43. package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +0 -819
  44. package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +0 -1278
  45. package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +0 -1018
  46. package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +0 -1038
  47. package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +0 -937
  48. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +0 -1115
  49. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +0 -819
  50. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +0 -1278
  51. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +0 -1018
  52. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +0 -1038
  53. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +0 -961
  54. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +0 -1115
  55. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +0 -819
  56. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +0 -1278
  57. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +0 -1018
  58. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +0 -1038
  59. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +0 -883
  60. package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +0 -1115
  61. package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +0 -819
  62. package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +0 -1278
  63. package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +0 -1018
  64. package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +0 -1038
  65. package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +0 -937
  66. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +0 -1115
  67. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +0 -1115
  68. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +0 -819
  69. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +0 -542
  70. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +0 -1278
  71. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +0 -894
  72. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +0 -1018
  73. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +0 -680
  74. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +0 -1038
  75. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +0 -693
  76. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +0 -961
  77. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +0 -727
  78. package/packages/memory-engine/compat/Dockerfile +0 -22
  79. package/packages/memory-engine/compat/server.py +0 -1255
  80. package/packages/memory-engine/docker-compose.test.yml +0 -59
  81. package/packages/memory-engine/docker-compose.yml +0 -255
  82. package/packages/memory-engine/engine/README.md +0 -52
  83. package/packages/memory-engine/engine/l2-hybridrag-proxy.py +0 -1543
  84. package/packages/memory-engine/engine/l5-comms-layer.py +0 -663
  85. package/packages/memory-engine/engine/l6-document-store.py +0 -1018
  86. package/packages/memory-engine/engine/services/_shared/__init__.py +0 -1
  87. package/packages/memory-engine/engine/services/_shared/embed_provider.py +0 -562
  88. package/packages/memory-engine/engine/services/l2/Dockerfile +0 -50
  89. package/packages/memory-engine/engine/services/l2/init_databases.py +0 -81
  90. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +0 -2721
  91. package/packages/memory-engine/engine/services/l5/Dockerfile +0 -11
  92. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +0 -808
  93. package/packages/memory-engine/engine/services/l6/Dockerfile +0 -30
  94. package/packages/memory-engine/engine/services/l6/l6-document-store.py +0 -1221
  95. package/packages/memory-engine/engine/services/nv-embed/Dockerfile +0 -28
  96. package/packages/memory-engine/engine/services/nv-embed/server.py +0 -152
  97. package/packages/memory-engine/pme_memory/__init__.py +0 -0
  98. package/packages/memory-engine/pme_memory/__main__.py +0 -129
  99. package/packages/memory-engine/pme_memory/artifacts.py +0 -95
  100. package/packages/memory-engine/pme_memory/embed.py +0 -74
  101. package/packages/memory-engine/pme_memory/health.py +0 -36
  102. package/packages/memory-engine/pme_memory/hygiene.py +0 -159
  103. package/packages/memory-engine/pme_memory/indexer.py +0 -200
  104. package/packages/memory-engine/pme_memory/needs.py +0 -55
  105. package/packages/memory-engine/pme_memory/provenance.py +0 -80
  106. package/packages/memory-engine/pme_memory/scoring.py +0 -168
  107. package/packages/memory-engine/pme_memory/search.py +0 -52
  108. package/packages/memory-engine/pme_memory/store.py +0 -86
  109. package/packages/memory-engine/pme_memory/synthesis.py +0 -114
  110. package/packages/memory-engine/pyproject.toml +0 -65
  111. package/packages/memory-engine/scripts/kg-extractor.py +0 -557
  112. package/packages/memory-engine/scripts/kg-preflexor-v2.py +0 -738
  113. package/packages/memory-engine/scripts/wipe-legacy-l3-entities.py +0 -128
  114. package/packages/memory-engine/tests/e2e_arena.sh +0 -259
  115. package/packages/memory-engine/tests/embed_stub/Dockerfile +0 -13
  116. package/packages/memory-engine/tests/embed_stub/server.py +0 -80
  117. package/packages/memory-engine/tests/test_aggregate.py +0 -333
  118. package/packages/memory-engine/tests/test_api_contract.sh +0 -57
  119. package/packages/memory-engine/tests/test_arena_safety.py +0 -232
  120. package/packages/memory-engine/tests/test_channel_stat_reader.py +0 -437
  121. package/packages/memory-engine/tests/test_channel_stat_rollups.py +0 -308
  122. package/packages/memory-engine/tests/test_compat_nv_embed_probe.py +0 -48
  123. package/packages/memory-engine/tests/test_embed_provider.py +0 -693
  124. package/packages/memory-engine/tests/test_l2_qmd_vec_search.py +0 -280
  125. package/packages/memory-engine/tests/test_l3_arena_isolation.py +0 -412
  126. package/packages/memory-engine/tests/test_l6_module_load.py +0 -84
  127. package/packages/memory-engine/tests/test_people_list_reader.py +0 -432
@@ -1,727 +0,0 @@
1
- {
2
- "bench": "product-catalogue",
3
- "stack": "pentatonic-memory",
4
- "n_tasks": 18,
5
- "n_correct": 12,
6
- "accuracy": 0.6666666666666666,
7
- "mean_score": 0.6666666666666666,
8
- "p50_search_ms": 32.06061548553407,
9
- "p95_search_ms": 41.31171901826747,
10
- "total_tokens_in": 0,
11
- "total_tokens_out": 0,
12
- "total_usd": 0.0,
13
- "by_tag": {
14
- "factoid": {
15
- "n": 10,
16
- "mean_score": 0.9,
17
- "accuracy": 0.9
18
- },
19
- "material": {
20
- "n": 4,
21
- "mean_score": 0.5,
22
- "accuracy": 0.5
23
- },
24
- "spec": {
25
- "n": 1,
26
- "mean_score": 1.0,
27
- "accuracy": 1.0
28
- },
29
- "warranty": {
30
- "n": 1,
31
- "mean_score": 1.0,
32
- "accuracy": 1.0
33
- },
34
- "inventory": {
35
- "n": 3,
36
- "mean_score": 0.6666666666666666,
37
- "accuracy": 0.6666666666666666
38
- },
39
- "compat": {
40
- "n": 2,
41
- "mean_score": 1.0,
42
- "accuracy": 1.0
43
- },
44
- "multi-doc": {
45
- "n": 3,
46
- "mean_score": 0.6666666666666666,
47
- "accuracy": 0.6666666666666666
48
- },
49
- "care": {
50
- "n": 1,
51
- "mean_score": 1.0,
52
- "accuracy": 1.0
53
- },
54
- "policy": {
55
- "n": 2,
56
- "mean_score": 0.5,
57
- "accuracy": 0.5
58
- },
59
- "pricing": {
60
- "n": 2,
61
- "mean_score": 1.0,
62
- "accuracy": 1.0
63
- },
64
- "multi-hop": {
65
- "n": 2,
66
- "mean_score": 0.5,
67
- "accuracy": 0.5
68
- },
69
- "math": {
70
- "n": 1,
71
- "mean_score": 0.0,
72
- "accuracy": 0.0
73
- },
74
- "rubric": {
75
- "n": 3,
76
- "mean_score": 0.0,
77
- "accuracy": 0.0
78
- },
79
- "recommendation": {
80
- "n": 1,
81
- "mean_score": 0.0,
82
- "accuracy": 0.0
83
- },
84
- "certification": {
85
- "n": 1,
86
- "mean_score": 0.0,
87
- "accuracy": 0.0
88
- },
89
- "entity": {
90
- "n": 1,
91
- "mean_score": 0.0,
92
- "accuracy": 0.0
93
- }
94
- },
95
- "extra": {
96
- "ingest_ms": 8186.979395017261,
97
- "grading": "substring",
98
- "limit": 3,
99
- "tokens": {
100
- "corpus_tokens": 1845,
101
- "query_tokens": 271,
102
- "context_tokens": 2072,
103
- "retrieval_tokens": 2343,
104
- "naive_tokens": 33481,
105
- "saved_tokens": 31138,
106
- "reduction_pct": 0.9300200113497208,
107
- "mean_retrieval_tokens_per_task": 130.16666666666666,
108
- "tokenizer": "cl100k_base",
109
- "per_task": {
110
- "sku-atlas-material": {
111
- "query": 11,
112
- "context": 150,
113
- "retrieval": 161,
114
- "judge_in": 0,
115
- "judge_out": 0,
116
- "judge_latency_ms": 0.0
117
- },
118
- "sku-luna-capacity": {
119
- "query": 8,
120
- "context": 145,
121
- "retrieval": 153,
122
- "judge_in": 0,
123
- "judge_out": 0,
124
- "judge_latency_ms": 0.0
125
- },
126
- "sku-luna-warranty": {
127
- "query": 9,
128
- "context": 145,
129
- "retrieval": 154,
130
- "judge_in": 0,
131
- "judge_out": 0,
132
- "judge_latency_ms": 0.0
133
- },
134
- "sku-kite-harness-out-of-stock": {
135
- "query": 17,
136
- "context": 0,
137
- "retrieval": 17,
138
- "judge_in": 0,
139
- "judge_out": 0,
140
- "judge_latency_ms": 0.0
141
- },
142
- "sku-kite-harness-restock": {
143
- "query": 17,
144
- "context": 183,
145
- "retrieval": 200,
146
- "judge_in": 0,
147
- "judge_out": 0,
148
- "judge_latency_ms": 0.0
149
- },
150
- "sku-tee-discontinued-sizes": {
151
- "query": 13,
152
- "context": 161,
153
- "retrieval": 174,
154
- "judge_in": 0,
155
- "judge_out": 0,
156
- "judge_latency_ms": 0.0
157
- },
158
- "sku-sleeve-compat-14-mbp": {
159
- "query": 16,
160
- "context": 345,
161
- "retrieval": 361,
162
- "judge_in": 0,
163
- "judge_out": 0,
164
- "judge_latency_ms": 0.0
165
- },
166
- "sku-coffee-cup-microwave": {
167
- "query": 8,
168
- "context": 156,
169
- "retrieval": 164,
170
- "judge_in": 0,
171
- "judge_out": 0,
172
- "judge_latency_ms": 0.0
173
- },
174
- "sku-bin-liners-home-compost": {
175
- "query": 11,
176
- "context": 146,
177
- "retrieval": 157,
178
- "judge_in": 0,
179
- "judge_out": 0,
180
- "judge_latency_ms": 0.0
181
- },
182
- "policy-custom-tote-return": {
183
- "query": 10,
184
- "context": 83,
185
- "retrieval": 93,
186
- "judge_in": 0,
187
- "judge_out": 0,
188
- "judge_latency_ms": 0.0
189
- },
190
- "price-tee-subscription": {
191
- "query": 14,
192
- "context": 145,
193
- "retrieval": 159,
194
- "judge_in": 0,
195
- "judge_out": 0,
196
- "judge_latency_ms": 0.0
197
- },
198
- "price-sleeve-diff": {
199
- "query": 19,
200
- "context": 345,
201
- "retrieval": 364,
202
- "judge_in": 0,
203
- "judge_out": 0,
204
- "judge_latency_ms": 0.0
205
- },
206
- "multi-hop-kite-duotone": {
207
- "query": 25,
208
- "context": 68,
209
- "retrieval": 93,
210
- "judge_in": 0,
211
- "judge_out": 0,
212
- "judge_latency_ms": 0.0
213
- },
214
- "multi-hop-takeback-credit": {
215
- "query": 23,
216
- "context": 0,
217
- "retrieval": 23,
218
- "judge_in": 0,
219
- "judge_out": 0,
220
- "judge_latency_ms": 0.0
221
- },
222
- "rubric-recommend-kitesurf-bag": {
223
- "query": 31,
224
- "context": 0,
225
- "retrieval": 31,
226
- "judge_in": 341,
227
- "judge_out": 38,
228
- "judge_latency_ms": 874.3087880015373
229
- },
230
- "rubric-plastic-free-tee": {
231
- "query": 11,
232
- "context": 0,
233
- "retrieval": 11,
234
- "judge_in": 307,
235
- "judge_out": 30,
236
- "judge_latency_ms": 787.7367050051689
237
- },
238
- "rubric-cc-certified-items": {
239
- "query": 14,
240
- "context": 0,
241
- "retrieval": 14,
242
- "judge_in": 288,
243
- "judge_out": 38,
244
- "judge_latency_ms": 714.9526060223579
245
- },
246
- "entity-recycled-content-high": {
247
- "query": 14,
248
- "context": 0,
249
- "retrieval": 14,
250
- "judge_in": 0,
251
- "judge_out": 0,
252
- "judge_latency_ms": 0.0
253
- }
254
- },
255
- "judge_tokens_in": 936,
256
- "judge_tokens_out": 106,
257
- "judge_calls": 3,
258
- "judge_mean_latency_ms": 792.3326996763548
259
- },
260
- "cost_usd": {
261
- "assumed_completion_tokens_per_task": 100,
262
- "rates": {
263
- "input_per_1k": 0.0025,
264
- "output_per_1k": 0.01,
265
- "model": "gpt-4o"
266
- },
267
- "retrieval_usd_in": 0.0058575,
268
- "retrieval_usd_out": 0.018,
269
- "retrieval_usd_total": 0.023857499999999997,
270
- "naive_usd_total": 0.1017025,
271
- "saved_usd": 0.077845,
272
- "saved_usd_per_1k_tasks": 4.324722222222222
273
- }
274
- },
275
- "task_results": [
276
- {
277
- "task_id": "sku-atlas-material",
278
- "query": "What's the material composition of the Atlas Phone Shell?",
279
- "answer": "# Atlas Phone Shell \u2014 SKU ATL-PHN-001\n\n**Brand:** Atlas Circular\n**Category:** Mobile accessories / Phone cases\n**Compatible with:** iPhone 15, iPhone 15 Pro\n\n**Material composition:**\n- 68% recycled ocean-bound polypropylene\n- 24% recycled polycarbonate (post-consumer)\n- 8% natural calcium carbonate\n\n**Price:** \u00a329.00 GBP (standard) / \u00a324.00 (subscription)\n**Inventory:** 1,240 units in stock at UK fulfilment (Worthing DC)\n**Take-back:** Yes \u2014 returnable for 50% credit after 18 months\n**Certification:** Cradle to Cradle Bronze, MADE-BY Class B",
280
- "hits": [
281
- {
282
- "text": "# Atlas Phone Shell \u2014 SKU ATL-PHN-001\n\n**Brand:** Atlas Circular\n**Category:** Mobile accessories / Phone cases\n**Compatible with:** iPhone 15, iPhone 15 Pro\n\n**Material composition:**\n- 68% recycled ocean-bound polypropylene\n- 24% recycled polycarbonate (post-consumer)\n- 8% natural calcium carbonate\n\n**Price:** \u00a329.00 GBP (standard) / \u00a324.00 (subscription)\n**Inventory:** 1,240 units in stock at U",
283
- "score": 0.7574853661453905,
284
- "source": "pentatonic-memory",
285
- "doc_id": "sku-atlas-phone-01"
286
- }
287
- ],
288
- "correct": true,
289
- "score": 1.0,
290
- "grading_notes": "all substrings matched",
291
- "search_time_ms": 38.870015006978065,
292
- "generation_time_ms": 0.0,
293
- "tokens_in": 0,
294
- "tokens_out": 0,
295
- "retrieval_tokens": 161,
296
- "query_tokens": 11,
297
- "context_tokens": 150,
298
- "judge_tokens_in": 0,
299
- "judge_tokens_out": 0,
300
- "judge_latency_ms": 0.0
301
- },
302
- {
303
- "task_id": "sku-luna-capacity",
304
- "query": "What capacity is the Luna water bottle?",
305
- "answer": "# Luna Water Bottle 500ml \u2014 SKU LUN-BTL-042\n\n**Brand:** Luna\n**Category:** Drinkware / Bottles\n**Capacity:** 500ml\n\n**Material composition:**\n- 100% food-grade 304 stainless steel (65% recycled content)\n- Lid: bio-based silicone (sugarcane derivative)\n\n**Price:** \u00a318.50 GBP\n**Inventory:** 87 units in stock (UK), 412 units (EU DC Eindhoven)\n**Colours available:** Matte black, Sage green, Terracotta\n**Replacement parts:** Lid (\u00a33.50), Silicone seal (\u00a30.90)\n**Warranty:** Lifetime on body, 2 years on lid",
306
- "hits": [
307
- {
308
- "text": "# Luna Water Bottle 500ml \u2014 SKU LUN-BTL-042\n\n**Brand:** Luna\n**Category:** Drinkware / Bottles\n**Capacity:** 500ml\n\n**Material composition:**\n- 100% food-grade 304 stainless steel (65% recycled content)\n- Lid: bio-based silicone (sugarcane derivative)\n\n**Price:** \u00a318.50 GBP\n**Inventory:** 87 units in stock (UK), 412 units (EU DC Eindhoven)\n**Colours available:** Matte black, Sage green, Terracotta",
309
- "score": 0.7714634221147277,
310
- "source": "pentatonic-memory",
311
- "doc_id": "sku-luna-bottle-42"
312
- }
313
- ],
314
- "correct": true,
315
- "score": 1.0,
316
- "grading_notes": "all substrings matched",
317
- "search_time_ms": 28.36476400261745,
318
- "generation_time_ms": 0.0,
319
- "tokens_in": 0,
320
- "tokens_out": 0,
321
- "retrieval_tokens": 153,
322
- "query_tokens": 8,
323
- "context_tokens": 145,
324
- "judge_tokens_in": 0,
325
- "judge_tokens_out": 0,
326
- "judge_latency_ms": 0.0
327
- },
328
- {
329
- "task_id": "sku-luna-warranty",
330
- "query": "What's the warranty on the Luna bottle?",
331
- "answer": "# Luna Water Bottle 500ml \u2014 SKU LUN-BTL-042\n\n**Brand:** Luna\n**Category:** Drinkware / Bottles\n**Capacity:** 500ml\n\n**Material composition:**\n- 100% food-grade 304 stainless steel (65% recycled content)\n- Lid: bio-based silicone (sugarcane derivative)\n\n**Price:** \u00a318.50 GBP\n**Inventory:** 87 units in stock (UK), 412 units (EU DC Eindhoven)\n**Colours available:** Matte black, Sage green, Terracotta\n**Replacement parts:** Lid (\u00a33.50), Silicone seal (\u00a30.90)\n**Warranty:** Lifetime on body, 2 years on lid",
332
- "hits": [
333
- {
334
- "text": "# Luna Water Bottle 500ml \u2014 SKU LUN-BTL-042\n\n**Brand:** Luna\n**Category:** Drinkware / Bottles\n**Capacity:** 500ml\n\n**Material composition:**\n- 100% food-grade 304 stainless steel (65% recycled content)\n- Lid: bio-based silicone (sugarcane derivative)\n\n**Price:** \u00a318.50 GBP\n**Inventory:** 87 units in stock (UK), 412 units (EU DC Eindhoven)\n**Colours available:** Matte black, Sage green, Terracotta",
335
- "score": 0.5845189118562412,
336
- "source": "pentatonic-memory",
337
- "doc_id": "sku-luna-bottle-42"
338
- }
339
- ],
340
- "correct": true,
341
- "score": 1.0,
342
- "grading_notes": "all substrings matched",
343
- "search_time_ms": 29.115474986610934,
344
- "generation_time_ms": 0.0,
345
- "tokens_in": 0,
346
- "tokens_out": 0,
347
- "retrieval_tokens": 154,
348
- "query_tokens": 9,
349
- "context_tokens": 145,
350
- "judge_tokens_in": 0,
351
- "judge_tokens_out": 0,
352
- "judge_latency_ms": 0.0
353
- },
354
- {
355
- "task_id": "sku-kite-harness-out-of-stock",
356
- "query": "Which size of the Nomad Kite Harness v3 is currently out of stock?",
357
- "answer": "",
358
- "hits": [],
359
- "correct": false,
360
- "score": 0.0,
361
- "grading_notes": "missing 1/1: ['M']",
362
- "search_time_ms": 39.99572299653664,
363
- "generation_time_ms": 0.0,
364
- "tokens_in": 0,
365
- "tokens_out": 0,
366
- "retrieval_tokens": 17,
367
- "query_tokens": 17,
368
- "context_tokens": 0,
369
- "judge_tokens_in": 0,
370
- "judge_tokens_out": 0,
371
- "judge_latency_ms": 0.0
372
- },
373
- {
374
- "task_id": "sku-kite-harness-restock",
375
- "query": "When does the Nomad Kite Harness v3 restock for XS and S?",
376
- "answer": "# Nomad Kite Harness v3 \u2014 SKU NMD-HRN-V3\n\n**Brand:** Nomad Sports\n**Category:** Watersports / Kite surfing\n**Size range:** XS (26-28\"), S (28-30\"), M (30-32\"), L (32-34\"), XL (34-36\")\n\n**Material composition:**\n- Outer shell: 92% recycled nylon 6,6 (ghost-net source)\n- Foam padding: EVA (non-recycled)\n- Hardware: 316 stainless steel spreader bar\n\n**Price:** \u00a3145.00 GBP\n**Inventory:** M=out of stock, L=6 units, XL=12 units, XS/S=restock 2026-05-15\n**Repair service:** \u00a325 flat fee, turnaround 14 days\n**Compatibility:** F-One bar systems, North bar systems (adapter sold separately)",
377
- "hits": [
378
- {
379
- "text": "# Nomad Kite Harness v3 \u2014 SKU NMD-HRN-V3\n\n**Brand:** Nomad Sports\n**Category:** Watersports / Kite surfing\n**Size range:** XS (26-28\"), S (28-30\"), M (30-32\"), L (32-34\"), XL (34-36\")\n\n**Material composition:**\n- Outer shell: 92% recycled nylon 6,6 (ghost-net source)\n- Foam padding: EVA (non-recycled)\n- Hardware: 316 stainless steel spreader bar\n\n**Price:** \u00a3145.00 GBP\n**Inventory:** M=out of stoc",
380
- "score": 0.7568954994182511,
381
- "source": "pentatonic-memory",
382
- "doc_id": "sku-kite-harness-v3"
383
- }
384
- ],
385
- "correct": true,
386
- "score": 1.0,
387
- "grading_notes": "all substrings matched",
388
- "search_time_ms": 39.893368986668065,
389
- "generation_time_ms": 0.0,
390
- "tokens_in": 0,
391
- "tokens_out": 0,
392
- "retrieval_tokens": 200,
393
- "query_tokens": 17,
394
- "context_tokens": 183,
395
- "judge_tokens_in": 0,
396
- "judge_tokens_out": 0,
397
- "judge_latency_ms": 0.0
398
- },
399
- {
400
- "task_id": "sku-tee-discontinued-sizes",
401
- "query": "Which sizes of the Pentatonic Crew Tee Black are discontinued?",
402
- "answer": "# Pentatonic Crew Tee Black \u2014 SKU PTT-TEE-BLK\n\n**Brand:** Pentatonic\n**Category:** Apparel / T-shirts\n**Fit:** Unisex regular\n**Size range:** XS, S, M, L, XL, XXL\n\n**Material composition:**\n- 100% GOTS-certified organic cotton, 180 gsm\n- Dyeing: low-impact reactive dye, closed-loop water system\n\n**Price:** \u00a328.00 GBP\n**Inventory:** S=48, M=136, L=94, XL=22, XS=0 (discontinued), XXL=0 (discontinued)\n**Country of origin:** Portugal (CITEVE-certified mill)\n**Care:** Wash cold, line dry; no tumble dry",
403
- "hits": [
404
- {
405
- "text": "# Pentatonic Crew Tee Black \u2014 SKU PTT-TEE-BLK\n\n**Brand:** Pentatonic\n**Category:** Apparel / T-shirts\n**Fit:** Unisex regular\n**Size range:** XS, S, M, L, XL, XXL\n\n**Material composition:**\n- 100% GOTS-certified organic cotton, 180 gsm\n- Dyeing: low-impact reactive dye, closed-loop water system\n\n**Price:** \u00a328.00 GBP\n**Inventory:** S=48, M=136, L=94, XL=22, XS=0 (discontinued), XXL=0 (discontinued",
406
- "score": 0.765600191590099,
407
- "source": "pentatonic-memory",
408
- "doc_id": "sku-cotton-tee-black"
409
- }
410
- ],
411
- "correct": true,
412
- "score": 1.0,
413
- "grading_notes": "all substrings matched",
414
- "search_time_ms": 35.890275990823284,
415
- "generation_time_ms": 0.0,
416
- "tokens_in": 0,
417
- "tokens_out": 0,
418
- "retrieval_tokens": 174,
419
- "query_tokens": 13,
420
- "context_tokens": 161,
421
- "judge_tokens_in": 0,
422
- "judge_tokens_out": 0,
423
- "judge_latency_ms": 0.0
424
- },
425
- {
426
- "task_id": "sku-sleeve-compat-14-mbp",
427
- "query": "Does the Haven Laptop Sleeve 13\" fit a 14\" MacBook Pro?",
428
- "answer": "# Compatibility note\n\nThe Haven Laptop Sleeve 13\" (HVN-SLV-13) does NOT fit the 14\" MacBook Pro \u2014 third-party dimensions exceed 31.5cm. Use the 15\" variant (HVN-SLV-15).\n---\n# Haven Laptop Sleeve 13\" \u2014 SKU HVN-SLV-13\n\n**Brand:** Haven\n**Category:** Bags / Laptop protection\n**Fits:** 13\" MacBook Air M2/M3, 13.6\" MacBook Air, 13\" Dell XPS, 13\" ThinkPad\n\n**Material composition:**\n- Outer: 100% recycled PET felt (from 12 plastic bottles per unit)\n- Inner lining: 100% post-consumer recycled polyester\n- YKK recycled-polyester zip\n\n**Price:** \u00a348.00 GBP\n**Inventory:** 238 units UK, 156 units EU\n**Colours available:** Charcoal, Oat, Ochre\n**Laptop diagonal max:** 13.6\" (third-party dimension 31.5cm \u00d7 22.5cm)\n---\n# Haven Laptop Sleeve 15\" \u2014 SKU HVN-SLV-15\n\nAs HVN-SLV-13 but for 15\"-16\" laptops. Same materials, same construction.\n\n**Fits:** 15\" MacBook Air, 16\" MacBook Pro, 15.6\" Dell XPS, 15\" ThinkPad\n**Price:** \u00a358.00 GBP (\u00a310 more than 13\" variant)\n**Inventory:** 89 units UK, 54 units EU\n**Colours available:** Charcoal only (Oat and Ochre sold through)",
429
- "hits": [
430
- {
431
- "text": "# Compatibility note\n\nThe Haven Laptop Sleeve 13\" (HVN-SLV-13) does NOT fit the 14\" MacBook Pro \u2014 third-party dimensions exceed 31.5cm. Use the 15\" variant (HVN-SLV-15).\n",
432
- "score": 0.8144855584508688,
433
- "source": "pentatonic-memory",
434
- "doc_id": "compat-laptop-sleeve-macbook-pro-14"
435
- },
436
- {
437
- "text": "# Haven Laptop Sleeve 13\" \u2014 SKU HVN-SLV-13\n\n**Brand:** Haven\n**Category:** Bags / Laptop protection\n**Fits:** 13\" MacBook Air M2/M3, 13.6\" MacBook Air, 13\" Dell XPS, 13\" ThinkPad\n\n**Material composition:**\n- Outer: 100% recycled PET felt (from 12 plastic bottles per unit)\n- Inner lining: 100% post-consumer recycled polyester\n- YKK recycled-polyester zip\n\n**Price:** \u00a348.00 GBP\n**Inventory:** 238 un",
438
- "score": 0.5219752131848099,
439
- "source": "pentatonic-memory",
440
- "doc_id": "sku-laptop-sleeve-13"
441
- },
442
- {
443
- "text": "# Haven Laptop Sleeve 15\" \u2014 SKU HVN-SLV-15\n\nAs HVN-SLV-13 but for 15\"-16\" laptops. Same materials, same construction.\n\n**Fits:** 15\" MacBook Air, 16\" MacBook Pro, 15.6\" Dell XPS, 15\" ThinkPad\n**Price:** \u00a358.00 GBP (\u00a310 more than 13\" variant)\n**Inventory:** 89 units UK, 54 units EU\n**Colours available:** Charcoal only (Oat and Ochre sold through)\n",
444
- "score": 0.5194211102311778,
445
- "source": "pentatonic-memory",
446
- "doc_id": "sku-laptop-sleeve-15"
447
- }
448
- ],
449
- "correct": true,
450
- "score": 1.0,
451
- "grading_notes": "all substrings matched",
452
- "search_time_ms": 41.31171901826747,
453
- "generation_time_ms": 0.0,
454
- "tokens_in": 0,
455
- "tokens_out": 0,
456
- "retrieval_tokens": 361,
457
- "query_tokens": 16,
458
- "context_tokens": 345,
459
- "judge_tokens_in": 0,
460
- "judge_tokens_out": 0,
461
- "judge_latency_ms": 0.0
462
- },
463
- {
464
- "task_id": "sku-coffee-cup-microwave",
465
- "query": "Can I microwave the Loop coffee cup?",
466
- "answer": "# Loop Reusable Coffee Cup 8oz \u2014 SKU LP-CUP-008\n\n**Brand:** Loop\n**Category:** Drinkware / Coffee\n**Capacity:** 8oz (240ml)\n\n**Material composition:**\n- Body: 100% plant-based PLA (corn-derived)\n- Silicone sleeve: food-grade silicone (not bio-based)\n- Lid: BPA-free polypropylene (recycled post-industrial)\n\n**Price:** \u00a312.50 GBP\n**Inventory:** 2,104 units UK, 1,876 units EU\n**Dishwasher safe:** Top rack only\n**Lifespan:** ~2 years regular use, then return for closed-loop composting\n**Note:** Not microwave-safe due to PLA degradation above 60\u00b0C.",
467
- "hits": [
468
- {
469
- "text": "# Loop Reusable Coffee Cup 8oz \u2014 SKU LP-CUP-008\n\n**Brand:** Loop\n**Category:** Drinkware / Coffee\n**Capacity:** 8oz (240ml)\n\n**Material composition:**\n- Body: 100% plant-based PLA (corn-derived)\n- Silicone sleeve: food-grade silicone (not bio-based)\n- Lid: BPA-free polypropylene (recycled post-industrial)\n\n**Price:** \u00a312.50 GBP\n**Inventory:** 2,104 units UK, 1,876 units EU\n**Dishwasher safe:** Top",
470
- "score": 0.7307098382695573,
471
- "source": "pentatonic-memory",
472
- "doc_id": "sku-coffee-cup-reusable"
473
- }
474
- ],
475
- "correct": true,
476
- "score": 1.0,
477
- "grading_notes": "all substrings matched",
478
- "search_time_ms": 26.578250981401652,
479
- "generation_time_ms": 0.0,
480
- "tokens_in": 0,
481
- "tokens_out": 0,
482
- "retrieval_tokens": 164,
483
- "query_tokens": 8,
484
- "context_tokens": 156,
485
- "judge_tokens_in": 0,
486
- "judge_tokens_out": 0,
487
- "judge_latency_ms": 0.0
488
- },
489
- {
490
- "task_id": "sku-bin-liners-home-compost",
491
- "query": "Are the Root bio bin liners home-compostable?",
492
- "answer": "# Bio Bin Liners 30L (roll of 20) \u2014 SKU BBL-030-020\n\n**Brand:** Root\n**Category:** Home / Waste\n**Size:** 30 litres\n\n**Material composition:**\n- 100% PBAT + corn starch biopolymer\n- EN13432 certified industrially compostable\n- NOT home-compostable (requires industrial composting temperatures)\n\n**Price:** \u00a34.20 GBP per roll\n**Inventory:** 6,420 rolls UK, 2,100 rolls EU\n**Subscription:** Monthly auto-delivery available (\u00a33.60/roll)\n**Council scheme compatibility:** Check with your local authority \u2014 accepted in most UK food-waste streams.",
493
- "hits": [
494
- {
495
- "text": "# Bio Bin Liners 30L (roll of 20) \u2014 SKU BBL-030-020\n\n**Brand:** Root\n**Category:** Home / Waste\n**Size:** 30 litres\n\n**Material composition:**\n- 100% PBAT + corn starch biopolymer\n- EN13432 certified industrially compostable\n- NOT home-compostable (requires industrial composting temperatures)\n\n**Price:** \u00a34.20 GBP per roll\n**Inventory:** 6,420 rolls UK, 2,100 rolls EU\n**Subscription:** Monthly aut",
496
- "score": 0.5844119891045675,
497
- "source": "pentatonic-memory",
498
- "doc_id": "sku-biodegradable-trash-bags"
499
- }
500
- ],
501
- "correct": true,
502
- "score": 1.0,
503
- "grading_notes": "all substrings matched",
504
- "search_time_ms": 28.265644010389224,
505
- "generation_time_ms": 0.0,
506
- "tokens_in": 0,
507
- "tokens_out": 0,
508
- "retrieval_tokens": 157,
509
- "query_tokens": 11,
510
- "context_tokens": 146,
511
- "judge_tokens_in": 0,
512
- "judge_tokens_out": 0,
513
- "judge_latency_ms": 0.0
514
- },
515
- {
516
- "task_id": "policy-custom-tote-return",
517
- "query": "Can I return a custom-printed tote bag?",
518
- "answer": "# Returns policy\n\nAll unused items returnable within 30 days of delivery for full refund.\nItems returned after 30 days but under 90 days: store credit only.\nTake-back programme is separate \u2014 applies to any lifetime-warrantied item at end of useful life.\nExceptions: custom-printed items (e.g. custom tote bags) and consumables (e.g. bin liners) are final sale.",
519
- "hits": [
520
- {
521
- "text": "# Returns policy\n\nAll unused items returnable within 30 days of delivery for full refund.\nItems returned after 30 days but under 90 days: store credit only.\nTake-back programme is separate \u2014 applies to any lifetime-warrantied item at end of useful life.\nExceptions: custom-printed items (e.g. custom tote bags) and consumables (e.g. bin liners) are final sale.\n",
522
- "score": 0.7470334159820934,
523
- "source": "pentatonic-memory",
524
- "doc_id": "policy-returns-30day"
525
- }
526
- ],
527
- "correct": true,
528
- "score": 1.0,
529
- "grading_notes": "all substrings matched",
530
- "search_time_ms": 29.92295500007458,
531
- "generation_time_ms": 0.0,
532
- "tokens_in": 0,
533
- "tokens_out": 0,
534
- "retrieval_tokens": 93,
535
- "query_tokens": 10,
536
- "context_tokens": 83,
537
- "judge_tokens_in": 0,
538
- "judge_tokens_out": 0,
539
- "judge_latency_ms": 0.0
540
- },
541
- {
542
- "task_id": "price-tee-subscription",
543
- "query": "What's the subscription price for the Luna water bottle compared to standard?",
544
- "answer": "# Luna Water Bottle 500ml \u2014 SKU LUN-BTL-042\n\n**Brand:** Luna\n**Category:** Drinkware / Bottles\n**Capacity:** 500ml\n\n**Material composition:**\n- 100% food-grade 304 stainless steel (65% recycled content)\n- Lid: bio-based silicone (sugarcane derivative)\n\n**Price:** \u00a318.50 GBP\n**Inventory:** 87 units in stock (UK), 412 units (EU DC Eindhoven)\n**Colours available:** Matte black, Sage green, Terracotta\n**Replacement parts:** Lid (\u00a33.50), Silicone seal (\u00a30.90)\n**Warranty:** Lifetime on body, 2 years on lid",
545
- "hits": [
546
- {
547
- "text": "# Luna Water Bottle 500ml \u2014 SKU LUN-BTL-042\n\n**Brand:** Luna\n**Category:** Drinkware / Bottles\n**Capacity:** 500ml\n\n**Material composition:**\n- 100% food-grade 304 stainless steel (65% recycled content)\n- Lid: bio-based silicone (sugarcane derivative)\n\n**Price:** \u00a318.50 GBP\n**Inventory:** 87 units in stock (UK), 412 units (EU DC Eindhoven)\n**Colours available:** Matte black, Sage green, Terracotta",
548
- "score": 0.5494470082366621,
549
- "source": "pentatonic-memory",
550
- "doc_id": "sku-luna-bottle-42"
551
- }
552
- ],
553
- "correct": true,
554
- "score": 1.0,
555
- "grading_notes": "all substrings matched",
556
- "search_time_ms": 30.340191995492205,
557
- "generation_time_ms": 0.0,
558
- "tokens_in": 0,
559
- "tokens_out": 0,
560
- "retrieval_tokens": 159,
561
- "query_tokens": 14,
562
- "context_tokens": 145,
563
- "judge_tokens_in": 0,
564
- "judge_tokens_out": 0,
565
- "judge_latency_ms": 0.0
566
- },
567
- {
568
- "task_id": "price-sleeve-diff",
569
- "query": "How much more does the 15\" Haven Laptop Sleeve cost than the 13\" variant?",
570
- "answer": "# Haven Laptop Sleeve 15\" \u2014 SKU HVN-SLV-15\n\nAs HVN-SLV-13 but for 15\"-16\" laptops. Same materials, same construction.\n\n**Fits:** 15\" MacBook Air, 16\" MacBook Pro, 15.6\" Dell XPS, 15\" ThinkPad\n**Price:** \u00a358.00 GBP (\u00a310 more than 13\" variant)\n**Inventory:** 89 units UK, 54 units EU\n**Colours available:** Charcoal only (Oat and Ochre sold through)\n---\n# Compatibility note\n\nThe Haven Laptop Sleeve 13\" (HVN-SLV-13) does NOT fit the 14\" MacBook Pro \u2014 third-party dimensions exceed 31.5cm. Use the 15\" variant (HVN-SLV-15).\n---\n# Haven Laptop Sleeve 13\" \u2014 SKU HVN-SLV-13\n\n**Brand:** Haven\n**Category:** Bags / Laptop protection\n**Fits:** 13\" MacBook Air M2/M3, 13.6\" MacBook Air, 13\" Dell XPS, 13\" ThinkPad\n\n**Material composition:**\n- Outer: 100% recycled PET felt (from 12 plastic bottles per unit)\n- Inner lining: 100% post-consumer recycled polyester\n- YKK recycled-polyester zip\n\n**Price:** \u00a348.00 GBP\n**Inventory:** 238 units UK, 156 units EU\n**Colours available:** Charcoal, Oat, Ochre\n**Laptop diagonal max:** 13.6\" (third-party dimension 31.5cm \u00d7 22.5cm)",
571
- "hits": [
572
- {
573
- "text": "# Haven Laptop Sleeve 15\" \u2014 SKU HVN-SLV-15\n\nAs HVN-SLV-13 but for 15\"-16\" laptops. Same materials, same construction.\n\n**Fits:** 15\" MacBook Air, 16\" MacBook Pro, 15.6\" Dell XPS, 15\" ThinkPad\n**Price:** \u00a358.00 GBP (\u00a310 more than 13\" variant)\n**Inventory:** 89 units UK, 54 units EU\n**Colours available:** Charcoal only (Oat and Ochre sold through)\n",
574
- "score": 0.5661065748698271,
575
- "source": "pentatonic-memory",
576
- "doc_id": "sku-laptop-sleeve-15"
577
- },
578
- {
579
- "text": "# Compatibility note\n\nThe Haven Laptop Sleeve 13\" (HVN-SLV-13) does NOT fit the 14\" MacBook Pro \u2014 third-party dimensions exceed 31.5cm. Use the 15\" variant (HVN-SLV-15).\n",
580
- "score": 0.5523781557442005,
581
- "source": "pentatonic-memory",
582
- "doc_id": "compat-laptop-sleeve-macbook-pro-14"
583
- },
584
- {
585
- "text": "# Haven Laptop Sleeve 13\" \u2014 SKU HVN-SLV-13\n\n**Brand:** Haven\n**Category:** Bags / Laptop protection\n**Fits:** 13\" MacBook Air M2/M3, 13.6\" MacBook Air, 13\" Dell XPS, 13\" ThinkPad\n\n**Material composition:**\n- Outer: 100% recycled PET felt (from 12 plastic bottles per unit)\n- Inner lining: 100% post-consumer recycled polyester\n- YKK recycled-polyester zip\n\n**Price:** \u00a348.00 GBP\n**Inventory:** 238 un",
586
- "score": 0.5420726412178005,
587
- "source": "pentatonic-memory",
588
- "doc_id": "sku-laptop-sleeve-13"
589
- }
590
- ],
591
- "correct": true,
592
- "score": 1.0,
593
- "grading_notes": "all substrings matched",
594
- "search_time_ms": 36.61788301542401,
595
- "generation_time_ms": 0.0,
596
- "tokens_in": 0,
597
- "tokens_out": 0,
598
- "retrieval_tokens": 364,
599
- "query_tokens": 19,
600
- "context_tokens": 345,
601
- "judge_tokens_in": 0,
602
- "judge_tokens_out": 0,
603
- "judge_latency_ms": 0.0
604
- },
605
- {
606
- "task_id": "multi-hop-kite-duotone",
607
- "query": "I have a Duotone bar \u2014 will the Nomad Kite Harness v3 work with it out of the box?",
608
- "answer": "# Compatibility note\n\nThe Nomad Kite Harness v3 (NMD-HRN-V3) ships with a standard spreader bar compatible with F-One and North bar systems. For Duotone or Core bar systems, the Duotone/Core adapter kit (NMD-ADPT-DC, \u00a3 18) is required.",
609
- "hits": [
610
- {
611
- "text": "# Compatibility note\n\nThe Nomad Kite Harness v3 (NMD-HRN-V3) ships with a standard spreader bar compatible with F-One and North bar systems. For Duotone or Core bar systems, the Duotone/Core adapter kit (NMD-ADPT-DC, \u00a3 18) is required.\n",
612
- "score": 0.5458617633302318,
613
- "source": "pentatonic-memory",
614
- "doc_id": "compat-kite-harness-bar"
615
- }
616
- ],
617
- "correct": true,
618
- "score": 1.0,
619
- "grading_notes": "all substrings matched",
620
- "search_time_ms": 32.8681189857889,
621
- "generation_time_ms": 0.0,
622
- "tokens_in": 0,
623
- "tokens_out": 0,
624
- "retrieval_tokens": 93,
625
- "query_tokens": 25,
626
- "context_tokens": 68,
627
- "judge_tokens_in": 0,
628
- "judge_tokens_out": 0,
629
- "judge_latency_ms": 0.0
630
- },
631
- {
632
- "task_id": "multi-hop-takeback-credit",
633
- "query": "If I return an Atlas Phone Shell via take-back after 2 years, how much store credit do I get?",
634
- "answer": "",
635
- "hits": [],
636
- "correct": false,
637
- "score": 0.0,
638
- "grading_notes": "missing 2/2: ['14.50', '50%']",
639
- "search_time_ms": 35.04025097936392,
640
- "generation_time_ms": 0.0,
641
- "tokens_in": 0,
642
- "tokens_out": 0,
643
- "retrieval_tokens": 23,
644
- "query_tokens": 23,
645
- "context_tokens": 0,
646
- "judge_tokens_in": 0,
647
- "judge_tokens_out": 0,
648
- "judge_latency_ms": 0.0
649
- },
650
- {
651
- "task_id": "rubric-recommend-kitesurf-bag",
652
- "query": "I'm heading to Worthing for a kite session and need to carry my 15\" MacBook and some kit. What would you recommend from the catalogue?",
653
- "answer": "",
654
- "hits": [],
655
- "correct": false,
656
- "score": 0.0,
657
- "grading_notes": "judge=0.00 \u2014 The agent's answer does not mention a laptop sleeve or any bag for carrying kit, and instead recommends a phone case.",
658
- "search_time_ms": 36.593386001186445,
659
- "generation_time_ms": 0.0,
660
- "tokens_in": 0,
661
- "tokens_out": 0,
662
- "retrieval_tokens": 31,
663
- "query_tokens": 31,
664
- "context_tokens": 0,
665
- "judge_tokens_in": 341,
666
- "judge_tokens_out": 38,
667
- "judge_latency_ms": 874.3087880015373
668
- },
669
- {
670
- "task_id": "rubric-plastic-free-tee",
671
- "query": "Which tees in the catalogue are completely plastic-free?",
672
- "answer": "",
673
- "hits": [],
674
- "correct": false,
675
- "score": 0.0,
676
- "grading_notes": "judge=0.00 \u2014 The agent's answer does not mention any plastic-free tees in the catalogue.",
677
- "search_time_ms": 31.253111985279247,
678
- "generation_time_ms": 0.0,
679
- "tokens_in": 0,
680
- "tokens_out": 0,
681
- "retrieval_tokens": 11,
682
- "query_tokens": 11,
683
- "context_tokens": 0,
684
- "judge_tokens_in": 307,
685
- "judge_tokens_out": 30,
686
- "judge_latency_ms": 787.7367050051689
687
- },
688
- {
689
- "task_id": "rubric-cc-certified-items",
690
- "query": "Do any items in the catalogue have Cradle to Cradle certification?",
691
- "answer": "",
692
- "hits": [],
693
- "correct": false,
694
- "score": 0.0,
695
- "grading_notes": "judge=0.00 \u2014 The agent's answer does not mention any item with Cradle to Cradle certification, let alone the Atlas Phone Shell.",
696
- "search_time_ms": 29.518647002987564,
697
- "generation_time_ms": 0.0,
698
- "tokens_in": 0,
699
- "tokens_out": 0,
700
- "retrieval_tokens": 14,
701
- "query_tokens": 14,
702
- "context_tokens": 0,
703
- "judge_tokens_in": 288,
704
- "judge_tokens_out": 38,
705
- "judge_latency_ms": 714.9526060223579
706
- },
707
- {
708
- "task_id": "entity-recycled-content-high",
709
- "query": "Which items in the catalogue use at least 50% recycled material?",
710
- "answer": "",
711
- "hits": [],
712
- "correct": false,
713
- "score": 0.0,
714
- "grading_notes": "no expected_substrings set",
715
- "search_time_ms": 30.805075977696106,
716
- "generation_time_ms": 0.0,
717
- "tokens_in": 0,
718
- "tokens_out": 0,
719
- "retrieval_tokens": 14,
720
- "query_tokens": 14,
721
- "context_tokens": 0,
722
- "judge_tokens_in": 0,
723
- "judge_tokens_out": 0,
724
- "judge_latency_ms": 0.0
725
- }
726
- ]
727
- }