@pentatonic-ai/ai-agent-sdk 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +170 -69
  2. package/bin/__tests__/callback-server.test.js +4 -1
  3. package/bin/cli.js +41 -164
  4. package/bin/commands/config.js +251 -0
  5. package/package.json +2 -1
  6. package/packages/doctor/__tests__/detect.test.js +2 -6
  7. package/packages/doctor/src/checks/local-memory.js +164 -196
  8. package/packages/doctor/src/detect.js +11 -3
  9. package/packages/memory/src/corpus/adapters.js +104 -0
  10. package/packages/memory/src/corpus/cli.js +72 -7
  11. package/packages/memory/src/corpus/index.js +1 -1
  12. package/packages/memory-engine/.env.example +13 -0
  13. package/packages/memory-engine/README.md +131 -0
  14. package/packages/memory-engine/bench/README.md +99 -0
  15. package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +1115 -0
  16. package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +819 -0
  17. package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +1278 -0
  18. package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +1018 -0
  19. package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +1038 -0
  20. package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +961 -0
  21. package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +1115 -0
  22. package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +819 -0
  23. package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +1278 -0
  24. package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +1018 -0
  25. package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +1038 -0
  26. package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +937 -0
  27. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +1115 -0
  28. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +819 -0
  29. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +1278 -0
  30. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +1018 -0
  31. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +1038 -0
  32. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +961 -0
  33. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +1115 -0
  34. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +819 -0
  35. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +1278 -0
  36. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +1018 -0
  37. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +1038 -0
  38. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +883 -0
  39. package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +1115 -0
  40. package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +819 -0
  41. package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +1278 -0
  42. package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +1018 -0
  43. package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +1038 -0
  44. package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +937 -0
  45. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +1115 -0
  46. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +1115 -0
  47. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +819 -0
  48. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +542 -0
  49. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +1278 -0
  50. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +894 -0
  51. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +1018 -0
  52. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +680 -0
  53. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +1038 -0
  54. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +693 -0
  55. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +961 -0
  56. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +727 -0
  57. package/packages/memory-engine/compat/Dockerfile +11 -0
  58. package/packages/memory-engine/compat/server.py +680 -0
  59. package/packages/memory-engine/docker-compose.yml +243 -0
  60. package/packages/memory-engine/docs/MIGRATION.md +178 -0
  61. package/packages/memory-engine/docs/RUNBOOK-AWS.md +375 -0
  62. package/packages/memory-engine/docs/why-v05-underperforms.md +138 -0
  63. package/packages/memory-engine/engine/README.md +52 -0
  64. package/packages/memory-engine/engine/l2-hybridrag-proxy.py +1543 -0
  65. package/packages/memory-engine/engine/l5-comms-layer.py +663 -0
  66. package/packages/memory-engine/engine/l6-document-store.py +1018 -0
  67. package/packages/memory-engine/engine/services/l2/Dockerfile +41 -0
  68. package/packages/memory-engine/engine/services/l2/init_databases.py +81 -0
  69. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +1543 -0
  70. package/packages/memory-engine/engine/services/l4/Dockerfile +15 -0
  71. package/packages/memory-engine/engine/services/l4/server.py +235 -0
  72. package/packages/memory-engine/engine/services/l5/Dockerfile +9 -0
  73. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +678 -0
  74. package/packages/memory-engine/engine/services/l6/Dockerfile +11 -0
  75. package/packages/memory-engine/engine/services/l6/l6-document-store.py +1016 -0
  76. package/packages/memory-engine/engine/services/nv-embed/Dockerfile +28 -0
  77. package/packages/memory-engine/engine/services/nv-embed/server.py +152 -0
  78. package/packages/memory-engine/pme_memory/__init__.py +0 -0
  79. package/packages/memory-engine/pme_memory/__main__.py +129 -0
  80. package/packages/memory-engine/pme_memory/artifacts.py +95 -0
  81. package/packages/memory-engine/pme_memory/embed.py +74 -0
  82. package/packages/memory-engine/pme_memory/health.py +36 -0
  83. package/packages/memory-engine/pme_memory/hygiene.py +159 -0
  84. package/packages/memory-engine/pme_memory/indexer.py +200 -0
  85. package/packages/memory-engine/pme_memory/needs.py +55 -0
  86. package/packages/memory-engine/pme_memory/provenance.py +80 -0
  87. package/packages/memory-engine/pme_memory/scoring.py +168 -0
  88. package/packages/memory-engine/pme_memory/search.py +52 -0
  89. package/packages/memory-engine/pme_memory/store.py +86 -0
  90. package/packages/memory-engine/pme_memory/synthesis.py +114 -0
  91. package/packages/memory-engine/pyproject.toml +65 -0
  92. package/packages/memory-engine/scripts/kg-extractor.py +557 -0
  93. package/packages/memory-engine/scripts/kg-preflexor-v2.py +738 -0
  94. package/packages/memory-engine/tests/test_api_contract.sh +57 -0
@@ -0,0 +1,1278 @@
1
+ {
2
+ "bench": "circular-economy",
3
+ "stack": "pentatonic-baseline",
4
+ "n_tasks": 25,
5
+ "n_correct": 19,
6
+ "accuracy": 0.76,
7
+ "mean_score": 0.8640000000000001,
8
+ "p50_search_ms": 1189.4344199972693,
9
+ "p95_search_ms": 1468.0619304970605,
10
+ "total_tokens_in": 0,
11
+ "total_tokens_out": 0,
12
+ "total_usd": 0.0,
13
+ "by_tag": {
14
+ "factoid": {
15
+ "n": 14,
16
+ "mean_score": 0.9642857142857143,
17
+ "accuracy": 0.9285714285714286
18
+ },
19
+ "material": {
20
+ "n": 8,
21
+ "mean_score": 0.85,
22
+ "accuracy": 0.75
23
+ },
24
+ "takeback": {
25
+ "n": 6,
26
+ "mean_score": 0.9166666666666666,
27
+ "accuracy": 0.8333333333333334
28
+ },
29
+ "lifecycle": {
30
+ "n": 2,
31
+ "mean_score": 0.5,
32
+ "accuracy": 0.5
33
+ },
34
+ "multi-fact": {
35
+ "n": 1,
36
+ "mean_score": 1.0,
37
+ "accuracy": 1.0
38
+ },
39
+ "policy": {
40
+ "n": 3,
41
+ "mean_score": 1.0,
42
+ "accuracy": 1.0
43
+ },
44
+ "certification": {
45
+ "n": 4,
46
+ "mean_score": 0.875,
47
+ "accuracy": 0.75
48
+ },
49
+ "multi-doc": {
50
+ "n": 2,
51
+ "mean_score": 0.5,
52
+ "accuracy": 0.5
53
+ },
54
+ "regulation": {
55
+ "n": 5,
56
+ "mean_score": 0.96,
57
+ "accuracy": 0.8
58
+ },
59
+ "concept": {
60
+ "n": 1,
61
+ "mean_score": 1.0,
62
+ "accuracy": 1.0
63
+ },
64
+ "rubric": {
65
+ "n": 3,
66
+ "mean_score": 0.7000000000000001,
67
+ "accuracy": 0.0
68
+ },
69
+ "honesty": {
70
+ "n": 1,
71
+ "mean_score": 0.8,
72
+ "accuracy": 0.0
73
+ },
74
+ "multi-hop": {
75
+ "n": 2,
76
+ "mean_score": 0.65,
77
+ "accuracy": 0.0
78
+ },
79
+ "entity": {
80
+ "n": 1,
81
+ "mean_score": 0.0,
82
+ "accuracy": 0.0
83
+ },
84
+ "negative": {
85
+ "n": 1,
86
+ "mean_score": 0.0,
87
+ "accuracy": 0.0
88
+ }
89
+ },
90
+ "extra": {
91
+ "ingest_ms": 35908.34956098115,
92
+ "grading": "substring",
93
+ "limit": 3,
94
+ "tokens": {
95
+ "corpus_tokens": 1459,
96
+ "query_tokens": 359,
97
+ "context_tokens": 5942,
98
+ "retrieval_tokens": 6301,
99
+ "naive_tokens": 36834,
100
+ "saved_tokens": 30533,
101
+ "reduction_pct": 0.8289352228918934,
102
+ "mean_retrieval_tokens_per_task": 252.04,
103
+ "tokenizer": "cl100k_base",
104
+ "per_task": {
105
+ "atlas-material-source": {
106
+ "query": 15,
107
+ "context": 259,
108
+ "retrieval": 274,
109
+ "judge_in": 0,
110
+ "judge_out": 0,
111
+ "judge_latency_ms": 0.0
112
+ },
113
+ "atlas-takeback-credit": {
114
+ "query": 15,
115
+ "context": 230,
116
+ "retrieval": 245,
117
+ "judge_in": 0,
118
+ "judge_out": 0,
119
+ "judge_latency_ms": 0.0
120
+ },
121
+ "atlas-closed-loop": {
122
+ "query": 8,
123
+ "context": 230,
124
+ "retrieval": 238,
125
+ "judge_in": 0,
126
+ "judge_out": 0,
127
+ "judge_latency_ms": 0.0
128
+ },
129
+ "luna-takeback-split": {
130
+ "query": 16,
131
+ "context": 265,
132
+ "retrieval": 281,
133
+ "judge_in": 0,
134
+ "judge_out": 0,
135
+ "judge_latency_ms": 0.0
136
+ },
137
+ "luna-silicone-fate": {
138
+ "query": 13,
139
+ "context": 265,
140
+ "retrieval": 278,
141
+ "judge_in": 0,
142
+ "judge_out": 0,
143
+ "judge_latency_ms": 0.0
144
+ },
145
+ "pla-home-compost": {
146
+ "query": 10,
147
+ "context": 252,
148
+ "retrieval": 262,
149
+ "judge_in": 0,
150
+ "judge_out": 0,
151
+ "judge_latency_ms": 0.0
152
+ },
153
+ "pla-hot-drinks": {
154
+ "query": 10,
155
+ "context": 252,
156
+ "retrieval": 262,
157
+ "judge_in": 0,
158
+ "judge_out": 0,
159
+ "judge_latency_ms": 0.0
160
+ },
161
+ "pbat-local-authority": {
162
+ "query": 13,
163
+ "context": 246,
164
+ "retrieval": 259,
165
+ "judge_in": 0,
166
+ "judge_out": 0,
167
+ "judge_latency_ms": 0.0
168
+ },
169
+ "ghost-net-source": {
170
+ "query": 15,
171
+ "context": 237,
172
+ "retrieval": 252,
173
+ "judge_in": 0,
174
+ "judge_out": 0,
175
+ "judge_latency_ms": 0.0
176
+ },
177
+ "kite-harness-foam-recovery": {
178
+ "query": 18,
179
+ "context": 256,
180
+ "retrieval": 274,
181
+ "judge_in": 0,
182
+ "judge_out": 0,
183
+ "judge_latency_ms": 0.0
184
+ },
185
+ "haven-sleeve-bottles-15": {
186
+ "query": 17,
187
+ "context": 211,
188
+ "retrieval": 228,
189
+ "judge_in": 0,
190
+ "judge_out": 0,
191
+ "judge_latency_ms": 0.0
192
+ },
193
+ "cert-c2c-tiers": {
194
+ "query": 12,
195
+ "context": 217,
196
+ "retrieval": 229,
197
+ "judge_in": 0,
198
+ "judge_out": 0,
199
+ "judge_latency_ms": 0.0
200
+ },
201
+ "cert-c2c-our-products": {
202
+ "query": 14,
203
+ "context": 218,
204
+ "retrieval": 232,
205
+ "judge_in": 0,
206
+ "judge_out": 0,
207
+ "judge_latency_ms": 0.0
208
+ },
209
+ "cert-grs-threshold": {
210
+ "query": 12,
211
+ "context": 233,
212
+ "retrieval": 245,
213
+ "judge_in": 0,
214
+ "judge_out": 0,
215
+ "judge_latency_ms": 0.0
216
+ },
217
+ "cert-en13432-temp": {
218
+ "query": 11,
219
+ "context": 244,
220
+ "retrieval": 255,
221
+ "judge_in": 0,
222
+ "judge_out": 0,
223
+ "judge_latency_ms": 0.0
224
+ },
225
+ "reg-uk-epr-scope": {
226
+ "query": 13,
227
+ "context": 231,
228
+ "retrieval": 244,
229
+ "judge_in": 0,
230
+ "judge_out": 0,
231
+ "judge_latency_ms": 0.0
232
+ },
233
+ "reg-espr-dpp": {
234
+ "query": 13,
235
+ "context": 223,
236
+ "retrieval": 236,
237
+ "judge_in": 0,
238
+ "judge_out": 0,
239
+ "judge_latency_ms": 0.0
240
+ },
241
+ "reg-ca-sb54-deadline": {
242
+ "query": 14,
243
+ "context": 226,
244
+ "retrieval": 240,
245
+ "judge_in": 0,
246
+ "judge_out": 0,
247
+ "judge_latency_ms": 0.0
248
+ },
249
+ "reg-lithium-return": {
250
+ "query": 13,
251
+ "context": 242,
252
+ "retrieval": 255,
253
+ "judge_in": 0,
254
+ "judge_out": 0,
255
+ "judge_latency_ms": 0.0
256
+ },
257
+ "closed-loop-threshold": {
258
+ "query": 16,
259
+ "context": 241,
260
+ "retrieval": 257,
261
+ "judge_in": 0,
262
+ "judge_out": 0,
263
+ "judge_latency_ms": 0.0
264
+ },
265
+ "rubric-customer-greenwash-claim": {
266
+ "query": 27,
267
+ "context": 237,
268
+ "retrieval": 264,
269
+ "judge_in": 592,
270
+ "judge_out": 70,
271
+ "judge_latency_ms": 1744.0665280222893
272
+ },
273
+ "rubric-full-takeback-story-atlas": {
274
+ "query": 17,
275
+ "context": 231,
276
+ "retrieval": 248,
277
+ "judge_in": 559,
278
+ "judge_out": 46,
279
+ "judge_latency_ms": 1309.8052870035172
280
+ },
281
+ "rubric-regulatory-scope-briefing": {
282
+ "query": 22,
283
+ "context": 227,
284
+ "retrieval": 249,
285
+ "judge_in": 578,
286
+ "judge_out": 62,
287
+ "judge_latency_ms": 1220.718718022108
288
+ },
289
+ "entity-closed-loop-skus": {
290
+ "query": 12,
291
+ "context": 234,
292
+ "retrieval": 246,
293
+ "judge_in": 0,
294
+ "judge_out": 0,
295
+ "judge_latency_ms": 0.0
296
+ },
297
+ "entity-home-compostable": {
298
+ "query": 13,
299
+ "context": 235,
300
+ "retrieval": 248,
301
+ "judge_in": 0,
302
+ "judge_out": 0,
303
+ "judge_latency_ms": 0.0
304
+ }
305
+ },
306
+ "judge_tokens_in": 1729,
307
+ "judge_tokens_out": 178,
308
+ "judge_calls": 3,
309
+ "judge_mean_latency_ms": 1424.8635110159714
310
+ },
311
+ "cost_usd": {
312
+ "assumed_completion_tokens_per_task": 100,
313
+ "rates": {
314
+ "input_per_1k": 0.0025,
315
+ "output_per_1k": 0.01,
316
+ "model": "gpt-4o"
317
+ },
318
+ "retrieval_usd_in": 0.0157525,
319
+ "retrieval_usd_out": 0.025,
320
+ "retrieval_usd_total": 0.0407525,
321
+ "naive_usd_total": 0.11708500000000002,
322
+ "saved_usd": 0.07633250000000003,
323
+ "saved_usd_per_1k_tasks": 3.053300000000001
324
+ }
325
+ },
326
+ "task_results": [
327
+ {
328
+ "task_id": "atlas-material-source",
329
+ "query": "Where does the recycled polypropylene in the Atlas Phone Shell come from?",
330
+ "answer": "Material: recycled ocean-bound polypropylene (rPP-OB). Feedstock: polypropylene waste collected within 50km of coastlines before it enters waterways. Certification: Global Recycled Standard (GRS) and OceanCycle. Typical recycled content: 95-99% (process additives make up the balance). Applications in our catalogue: Atlas Phone Shell (68% of shell mass), Luna bottle accessory clip.\n---\nTake-back programme \u2014 Atlas Phone Shell (ATL-PHN-001). Accepted condition: any, including broken. Credit: 50% of original purchase price as store credit. Process: free returns label at pentatonic.com/takeback/ATL-PHN-001. Recovery route: shell is mechanically shredded, rPP-OB fraction recovered and fed back into the next production run. Closed-loop: YES.\n---\nMaterial: recycled ocean-bound polypropylene (rPP-OB). Feedstock: polypropylene waste collected within 50km of coastlines before it enters waterways. Certification: Global Recycled Standard (GRS) and OceanCycle. Typical recycled content: 95-99% (process additives make up the balance). Applications in our catalogue: Atlas Phone Shell (68% of shell mass), Luna bottle accessory clip.",
331
+ "hits": [
332
+ {
333
+ "text": "Material: recycled ocean-bound polypropylene (rPP-OB). Feedstock: polypropylene waste collected within 50km of coastlines before it enters waterways. Certification: Global Recycled Standard (GRS) and OceanCycle. Typical recycled content: 95-99% (process additives make up the balance). Applications in our catalogue: Atlas Phone Shell (68% of shell mass), Luna bottle accessory clip.",
334
+ "score": 0.7501,
335
+ "source": "pentatonic-baseline",
336
+ "doc_id": "material-rpp-ocean"
337
+ },
338
+ {
339
+ "text": "Take-back programme \u2014 Atlas Phone Shell (ATL-PHN-001). Accepted condition: any, including broken. Credit: 50% of original purchase price as store credit. Process: free returns label at pentatonic.com/takeback/ATL-PHN-001. Recovery route: shell is mechanically shredded, rPP-OB fraction recovered and fed back into the next production run. Closed-loop: YES.",
340
+ "score": 0.7338,
341
+ "source": "pentatonic-baseline",
342
+ "doc_id": "takeback-atlas-phone"
343
+ },
344
+ {
345
+ "text": "Material: recycled ocean-bound polypropylene (rPP-OB). Feedstock: polypropylene waste collected within 50km of coastlines before it enters waterways. Certification: Global Recycled Standard (GRS) and OceanCycle. Typical recycled content: 95-99% (process additives make up the balance). Applications in our catalogue: Atlas Phone Shell (68% of shell mass), Luna bottle accessory clip.",
346
+ "score": 0.75,
347
+ "source": "pentatonic-baseline:L0_workspace_bm25",
348
+ "doc_id": "material-rpp-ocean"
349
+ }
350
+ ],
351
+ "correct": true,
352
+ "score": 1.0,
353
+ "grading_notes": "all substrings matched",
354
+ "search_time_ms": 1189.4344199972693,
355
+ "generation_time_ms": 0.0,
356
+ "tokens_in": 0,
357
+ "tokens_out": 0,
358
+ "retrieval_tokens": 274,
359
+ "query_tokens": 15,
360
+ "context_tokens": 259,
361
+ "judge_tokens_in": 0,
362
+ "judge_tokens_out": 0,
363
+ "judge_latency_ms": 0.0
364
+ },
365
+ {
366
+ "task_id": "atlas-takeback-credit",
367
+ "query": "What credit do I get for returning an Atlas Phone Shell via take-back?",
368
+ "answer": "Take-back programme \u2014 Atlas Phone Shell (ATL-PHN-001). Accepted condition: any, including broken. Credit: 50% of original purchase price as store credit. Process: free returns label at pentatonic.com/takeback/ATL-PHN-001. Recovery route: shell is mechanically shredded, rPP-OB fraction recovered and fed back into the next production run. Closed-loop: YES.\n---\nTake-back programme \u2014 Atlas Phone Shell (ATL-PHN-001). Accepted condition: any, including broken. Credit: 50% of original purchase price as store credit. Process: free returns label at pentatonic.com/takeback/ATL-PHN-001. Recovery route: shell is mechanically shredded, rPP-OB fraction recovered and fed back into the next production run. Closed-loop: YES.\n---\nCradle to Cradle Certified: multi-attribute product certification covering material health, material reutilisation, renewable energy, water stewardship, and social fairness. Tiers (lowest to highest): Bronze, Silver, Gold, Platinum. Our holders: Atlas Phone Shell (Bronze).",
369
+ "hits": [
370
+ {
371
+ "text": "Take-back programme \u2014 Atlas Phone Shell (ATL-PHN-001). Accepted condition: any, including broken. Credit: 50% of original purchase price as store credit. Process: free returns label at pentatonic.com/takeback/ATL-PHN-001. Recovery route: shell is mechanically shredded, rPP-OB fraction recovered and fed back into the next production run. Closed-loop: YES.",
372
+ "score": 0.8061,
373
+ "source": "pentatonic-baseline",
374
+ "doc_id": "takeback-atlas-phone"
375
+ },
376
+ {
377
+ "text": "Take-back programme \u2014 Atlas Phone Shell (ATL-PHN-001). Accepted condition: any, including broken. Credit: 50% of original purchase price as store credit. Process: free returns label at pentatonic.com/takeback/ATL-PHN-001. Recovery route: shell is mechanically shredded, rPP-OB fraction recovered and fed back into the next production run. Closed-loop: YES.",
378
+ "score": 0.7402,
379
+ "source": "pentatonic-baseline:L0_workspace_bm25",
380
+ "doc_id": "takeback-atlas-phone"
381
+ },
382
+ {
383
+ "text": "Cradle to Cradle Certified: multi-attribute product certification covering material health, material reutilisation, renewable energy, water stewardship, and social fairness. Tiers (lowest to highest): Bronze, Silver, Gold, Platinum. Our holders: Atlas Phone Shell (Bronze).",
384
+ "score": 0.7185987373076197,
385
+ "source": "pentatonic-baseline",
386
+ "doc_id": "cert-cradle-to-cradle"
387
+ }
388
+ ],
389
+ "correct": true,
390
+ "score": 1.0,
391
+ "grading_notes": "all substrings matched",
392
+ "search_time_ms": 1332.2982710087672,
393
+ "generation_time_ms": 0.0,
394
+ "tokens_in": 0,
395
+ "tokens_out": 0,
396
+ "retrieval_tokens": 245,
397
+ "query_tokens": 15,
398
+ "context_tokens": 230,
399
+ "judge_tokens_in": 0,
400
+ "judge_tokens_out": 0,
401
+ "judge_latency_ms": 0.0
402
+ },
403
+ {
404
+ "task_id": "atlas-closed-loop",
405
+ "query": "Is the Atlas Phone Shell closed-loop?",
406
+ "answer": "Take-back programme \u2014 Atlas Phone Shell (ATL-PHN-001). Accepted condition: any, including broken. Credit: 50% of original purchase price as store credit. Process: free returns label at pentatonic.com/takeback/ATL-PHN-001. Recovery route: shell is mechanically shredded, rPP-OB fraction recovered and fed back into the next production run. Closed-loop: YES.\n---\nCradle to Cradle Certified: multi-attribute product certification covering material health, material reutilisation, renewable energy, water stewardship, and social fairness. Tiers (lowest to highest): Bronze, Silver, Gold, Platinum. Our holders: Atlas Phone Shell (Bronze).\n---\nTake-back programme \u2014 Atlas Phone Shell (ATL-PHN-001). Accepted condition: any, including broken. Credit: 50% of original purchase price as store credit. Process: free returns label at pentatonic.com/takeback/ATL-PHN-001. Recovery route: shell is mechanically shredded, rPP-OB fraction recovered and fed back into the next production run. Closed-loop: YES.",
407
+ "hits": [
408
+ {
409
+ "text": "Take-back programme \u2014 Atlas Phone Shell (ATL-PHN-001). Accepted condition: any, including broken. Credit: 50% of original purchase price as store credit. Process: free returns label at pentatonic.com/takeback/ATL-PHN-001. Recovery route: shell is mechanically shredded, rPP-OB fraction recovered and fed back into the next production run. Closed-loop: YES.",
410
+ "score": 0.7505,
411
+ "source": "pentatonic-baseline",
412
+ "doc_id": "takeback-atlas-phone"
413
+ },
414
+ {
415
+ "text": "Cradle to Cradle Certified: multi-attribute product certification covering material health, material reutilisation, renewable energy, water stewardship, and social fairness. Tiers (lowest to highest): Bronze, Silver, Gold, Platinum. Our holders: Atlas Phone Shell (Bronze).",
416
+ "score": 0.7434836216375554,
417
+ "source": "pentatonic-baseline",
418
+ "doc_id": "cert-cradle-to-cradle"
419
+ },
420
+ {
421
+ "text": "Take-back programme \u2014 Atlas Phone Shell (ATL-PHN-001). Accepted condition: any, including broken. Credit: 50% of original purchase price as store credit. Process: free returns label at pentatonic.com/takeback/ATL-PHN-001. Recovery route: shell is mechanically shredded, rPP-OB fraction recovered and fed back into the next production run. Closed-loop: YES.",
422
+ "score": 0.7151,
423
+ "source": "pentatonic-baseline:L0_workspace_bm25",
424
+ "doc_id": "takeback-atlas-phone"
425
+ }
426
+ ],
427
+ "correct": true,
428
+ "score": 1.0,
429
+ "grading_notes": "all substrings matched",
430
+ "search_time_ms": 1228.249842999503,
431
+ "generation_time_ms": 0.0,
432
+ "tokens_in": 0,
433
+ "tokens_out": 0,
434
+ "retrieval_tokens": 238,
435
+ "query_tokens": 8,
436
+ "context_tokens": 230,
437
+ "judge_tokens_in": 0,
438
+ "judge_tokens_out": 0,
439
+ "judge_latency_ms": 0.0
440
+ },
441
+ {
442
+ "task_id": "luna-takeback-split",
443
+ "query": "How is the take-back credit split between body and lid for the Luna bottle?",
444
+ "answer": "Take-back programme \u2014 Luna Water Bottle 500ml (LUN-BTL-042). Accepted condition: any. Credit: 40% of original price as store credit (body) + 10% (lid, if returned intact). Process: drop-off at any UK Pentatonic pop-up, or mail-in via takeback label. Recovery route: steel body enters industrial recycling (not closed-loop); silicone lid is down-cycled into playground surfacing granulate.\n---\nTake-back programme \u2014 Luna Water Bottle 500ml (LUN-BTL-042). Accepted condition: any. Credit: 40% of original price as store credit (body) + 10% (lid, if returned intact). Process: drop-off at any UK Pentatonic pop-up, or mail-in via takeback label. Recovery route: steel body enters industrial recycling (not closed-loop); silicone lid is down-cycled into playground surfacing granulate.\n---\nMaterial: 304 stainless steel with 65% recycled content. Source: mix of post-consumer scrap (mostly end-of-life appliances) and pre-consumer mill scrap. EPD (Environmental Product Declaration) available on request. Used in: Luna Water Bottle body, Nomad Kite Harness v3 spreader bar (316 variant with 55% recycled).",
445
+ "hits": [
446
+ {
447
+ "text": "Take-back programme \u2014 Luna Water Bottle 500ml (LUN-BTL-042). Accepted condition: any. Credit: 40% of original price as store credit (body) + 10% (lid, if returned intact). Process: drop-off at any UK Pentatonic pop-up, or mail-in via takeback label. Recovery route: steel body enters industrial recycling (not closed-loop); silicone lid is down-cycled into playground surfacing granulate.",
448
+ "score": 0.7598,
449
+ "source": "pentatonic-baseline",
450
+ "doc_id": "takeback-luna-bottle"
451
+ },
452
+ {
453
+ "text": "Take-back programme \u2014 Luna Water Bottle 500ml (LUN-BTL-042). Accepted condition: any. Credit: 40% of original price as store credit (body) + 10% (lid, if returned intact). Process: drop-off at any UK Pentatonic pop-up, or mail-in via takeback label. Recovery route: steel body enters industrial recycling (not closed-loop); silicone lid is down-cycled into playground surfacing granulate.",
454
+ "score": 0.7081,
455
+ "source": "pentatonic-baseline:L0_workspace_bm25",
456
+ "doc_id": "takeback-luna-bottle"
457
+ },
458
+ {
459
+ "text": "Material: 304 stainless steel with 65% recycled content. Source: mix of post-consumer scrap (mostly end-of-life appliances) and pre-consumer mill scrap. EPD (Environmental Product Declaration) available on request. Used in: Luna Water Bottle body, Nomad Kite Harness v3 spreader bar (316 variant with 55% recycled).",
460
+ "score": 0.5894,
461
+ "source": "pentatonic-baseline",
462
+ "doc_id": "material-steel-304-recycled"
463
+ }
464
+ ],
465
+ "correct": true,
466
+ "score": 1.0,
467
+ "grading_notes": "all substrings matched",
468
+ "search_time_ms": 1182.5325590034481,
469
+ "generation_time_ms": 0.0,
470
+ "tokens_in": 0,
471
+ "tokens_out": 0,
472
+ "retrieval_tokens": 281,
473
+ "query_tokens": 16,
474
+ "context_tokens": 265,
475
+ "judge_tokens_in": 0,
476
+ "judge_tokens_out": 0,
477
+ "judge_latency_ms": 0.0
478
+ },
479
+ {
480
+ "task_id": "luna-silicone-fate",
481
+ "query": "What happens to the Luna bottle's silicone lid after take-back?",
482
+ "answer": "Take-back programme \u2014 Luna Water Bottle 500ml (LUN-BTL-042). Accepted condition: any. Credit: 40% of original price as store credit (body) + 10% (lid, if returned intact). Process: drop-off at any UK Pentatonic pop-up, or mail-in via takeback label. Recovery route: steel body enters industrial recycling (not closed-loop); silicone lid is down-cycled into playground surfacing granulate.\n---\nTake-back programme \u2014 Luna Water Bottle 500ml (LUN-BTL-042). Accepted condition: any. Credit: 40% of original price as store credit (body) + 10% (lid, if returned intact). Process: drop-off at any UK Pentatonic pop-up, or mail-in via takeback label. Recovery route: steel body enters industrial recycling (not closed-loop); silicone lid is down-cycled into playground surfacing granulate.\n---\nMaterial: 304 stainless steel with 65% recycled content. Source: mix of post-consumer scrap (mostly end-of-life appliances) and pre-consumer mill scrap. EPD (Environmental Product Declaration) available on request. Used in: Luna Water Bottle body, Nomad Kite Harness v3 spreader bar (316 variant with 55% recycled).",
483
+ "hits": [
484
+ {
485
+ "text": "Take-back programme \u2014 Luna Water Bottle 500ml (LUN-BTL-042). Accepted condition: any. Credit: 40% of original price as store credit (body) + 10% (lid, if returned intact). Process: drop-off at any UK Pentatonic pop-up, or mail-in via takeback label. Recovery route: steel body enters industrial recycling (not closed-loop); silicone lid is down-cycled into playground surfacing granulate.",
486
+ "score": 0.8214,
487
+ "source": "pentatonic-baseline",
488
+ "doc_id": "takeback-luna-bottle"
489
+ },
490
+ {
491
+ "text": "Take-back programme \u2014 Luna Water Bottle 500ml (LUN-BTL-042). Accepted condition: any. Credit: 40% of original price as store credit (body) + 10% (lid, if returned intact). Process: drop-off at any UK Pentatonic pop-up, or mail-in via takeback label. Recovery route: steel body enters industrial recycling (not closed-loop); silicone lid is down-cycled into playground surfacing granulate.",
492
+ "score": 0.7472,
493
+ "source": "pentatonic-baseline:L0_workspace_bm25",
494
+ "doc_id": "takeback-luna-bottle"
495
+ },
496
+ {
497
+ "text": "Material: 304 stainless steel with 65% recycled content. Source: mix of post-consumer scrap (mostly end-of-life appliances) and pre-consumer mill scrap. EPD (Environmental Product Declaration) available on request. Used in: Luna Water Bottle body, Nomad Kite Harness v3 spreader bar (316 variant with 55% recycled).",
498
+ "score": 0.6001,
499
+ "source": "pentatonic-baseline",
500
+ "doc_id": "material-steel-304-recycled"
501
+ }
502
+ ],
503
+ "correct": true,
504
+ "score": 1.0,
505
+ "grading_notes": "all substrings matched",
506
+ "search_time_ms": 1168.442648020573,
507
+ "generation_time_ms": 0.0,
508
+ "tokens_in": 0,
509
+ "tokens_out": 0,
510
+ "retrieval_tokens": 278,
511
+ "query_tokens": 13,
512
+ "context_tokens": 265,
513
+ "judge_tokens_in": 0,
514
+ "judge_tokens_out": 0,
515
+ "judge_latency_ms": 0.0
516
+ },
517
+ {
518
+ "task_id": "pla-home-compost",
519
+ "query": "Can I compost a Loop coffee cup at home?",
520
+ "answer": "Material: PLA (polylactic acid). Bioplastic derived from corn starch (US Midwest feedstock for our supply). Industrially compostable under EN13432 (60\u00b0C, 60% humidity, 12 weeks). NOT home-compostable and NOT recyclable through standard plastic streams. Degradation threshold: 60\u00b0C \u2014 avoid hot liquids for direct-contact applications. Used in: Loop Reusable Coffee Cup body.\n---\nMaterial: PLA (polylactic acid). Bioplastic derived from corn starch (US Midwest feedstock for our supply). Industrially compostable under EN13432 (60\u00b0C, 60% humidity, 12 weeks). NOT home-compostable and NOT recyclable through standard plastic streams. Degradation threshold: 60\u00b0C \u2014 avoid hot liquids for direct-contact applications. Used in: Loop Reusable Coffee Cup body.\n---\nEN13432: European standard for industrially compostable packaging. Requires disintegration within 12 weeks and biodegradation within 6 months under industrial composting conditions (typically 58\u00b0C \u00b1 2\u00b0C). Does not imply home-compostability. Materials meeting EN13432: PLA-CORN (Loop Cup), PBAT-STARCH (Root Bin Liners).",
521
+ "hits": [
522
+ {
523
+ "text": "Material: PLA (polylactic acid). Bioplastic derived from corn starch (US Midwest feedstock for our supply). Industrially compostable under EN13432 (60\u00b0C, 60% humidity, 12 weeks). NOT home-compostable and NOT recyclable through standard plastic streams. Degradation threshold: 60\u00b0C \u2014 avoid hot liquids for direct-contact applications. Used in: Loop Reusable Coffee Cup body.",
524
+ "score": 0.7127,
525
+ "source": "pentatonic-baseline",
526
+ "doc_id": "material-pla-plant"
527
+ },
528
+ {
529
+ "text": "Material: PLA (polylactic acid). Bioplastic derived from corn starch (US Midwest feedstock for our supply). Industrially compostable under EN13432 (60\u00b0C, 60% humidity, 12 weeks). NOT home-compostable and NOT recyclable through standard plastic streams. Degradation threshold: 60\u00b0C \u2014 avoid hot liquids for direct-contact applications. Used in: Loop Reusable Coffee Cup body.",
530
+ "score": 0.6785,
531
+ "source": "pentatonic-baseline:L0_workspace_bm25",
532
+ "doc_id": "material-pla-plant"
533
+ },
534
+ {
535
+ "text": "EN13432: European standard for industrially compostable packaging. Requires disintegration within 12 weeks and biodegradation within 6 months under industrial composting conditions (typically 58\u00b0C \u00b1 2\u00b0C). Does not imply home-compostability. Materials meeting EN13432: PLA-CORN (Loop Cup), PBAT-STARCH (Root Bin Liners).",
536
+ "score": 0.6294,
537
+ "source": "pentatonic-baseline",
538
+ "doc_id": "cert-en13432"
539
+ }
540
+ ],
541
+ "correct": true,
542
+ "score": 1.0,
543
+ "grading_notes": "all substrings matched",
544
+ "search_time_ms": 1159.5256220025476,
545
+ "generation_time_ms": 0.0,
546
+ "tokens_in": 0,
547
+ "tokens_out": 0,
548
+ "retrieval_tokens": 262,
549
+ "query_tokens": 10,
550
+ "context_tokens": 252,
551
+ "judge_tokens_in": 0,
552
+ "judge_tokens_out": 0,
553
+ "judge_latency_ms": 0.0
554
+ },
555
+ {
556
+ "task_id": "pla-hot-drinks",
557
+ "query": "Can I put hot coffee in a Loop cup?",
558
+ "answer": "Material: PLA (polylactic acid). Bioplastic derived from corn starch (US Midwest feedstock for our supply). Industrially compostable under EN13432 (60\u00b0C, 60% humidity, 12 weeks). NOT home-compostable and NOT recyclable through standard plastic streams. Degradation threshold: 60\u00b0C \u2014 avoid hot liquids for direct-contact applications. Used in: Loop Reusable Coffee Cup body.\n---\nMaterial: PLA (polylactic acid). Bioplastic derived from corn starch (US Midwest feedstock for our supply). Industrially compostable under EN13432 (60\u00b0C, 60% humidity, 12 weeks). NOT home-compostable and NOT recyclable through standard plastic streams. Degradation threshold: 60\u00b0C \u2014 avoid hot liquids for direct-contact applications. Used in: Loop Reusable Coffee Cup body.\n---\nEN13432: European standard for industrially compostable packaging. Requires disintegration within 12 weeks and biodegradation within 6 months under industrial composting conditions (typically 58\u00b0C \u00b1 2\u00b0C). Does not imply home-compostability. Materials meeting EN13432: PLA-CORN (Loop Cup), PBAT-STARCH (Root Bin Liners).",
559
+ "hits": [
560
+ {
561
+ "text": "Material: PLA (polylactic acid). Bioplastic derived from corn starch (US Midwest feedstock for our supply). Industrially compostable under EN13432 (60\u00b0C, 60% humidity, 12 weeks). NOT home-compostable and NOT recyclable through standard plastic streams. Degradation threshold: 60\u00b0C \u2014 avoid hot liquids for direct-contact applications. Used in: Loop Reusable Coffee Cup body.",
562
+ "score": 0.6735,
563
+ "source": "pentatonic-baseline",
564
+ "doc_id": "material-pla-plant"
565
+ },
566
+ {
567
+ "text": "Material: PLA (polylactic acid). Bioplastic derived from corn starch (US Midwest feedstock for our supply). Industrially compostable under EN13432 (60\u00b0C, 60% humidity, 12 weeks). NOT home-compostable and NOT recyclable through standard plastic streams. Degradation threshold: 60\u00b0C \u2014 avoid hot liquids for direct-contact applications. Used in: Loop Reusable Coffee Cup body.",
568
+ "score": 0.6694,
569
+ "source": "pentatonic-baseline:L0_workspace_bm25",
570
+ "doc_id": "material-pla-plant"
571
+ },
572
+ {
573
+ "text": "EN13432: European standard for industrially compostable packaging. Requires disintegration within 12 weeks and biodegradation within 6 months under industrial composting conditions (typically 58\u00b0C \u00b1 2\u00b0C). Does not imply home-compostability. Materials meeting EN13432: PLA-CORN (Loop Cup), PBAT-STARCH (Root Bin Liners).",
574
+ "score": 0.5239,
575
+ "source": "pentatonic-baseline",
576
+ "doc_id": "cert-en13432"
577
+ }
578
+ ],
579
+ "correct": true,
580
+ "score": 1.0,
581
+ "grading_notes": "all substrings matched",
582
+ "search_time_ms": 1197.3502149921842,
583
+ "generation_time_ms": 0.0,
584
+ "tokens_in": 0,
585
+ "tokens_out": 0,
586
+ "retrieval_tokens": 262,
587
+ "query_tokens": 10,
588
+ "context_tokens": 252,
589
+ "judge_tokens_in": 0,
590
+ "judge_tokens_out": 0,
591
+ "judge_latency_ms": 0.0
592
+ },
593
+ {
594
+ "task_id": "pbat-local-authority",
595
+ "query": "Are Root bin liners accepted in UK council food-waste streams?",
596
+ "answer": "Material: PBAT (polybutylene adipate terephthalate) + corn starch biopolymer. Industrially compostable (EN13432). Home-compostability: NO \u2014 requires industrial temperatures. Used in: Root Bio Bin Liners. Regulatory note: accepted in most UK local-authority food-waste streams that accept certified compostable bags, but some authorities reject all bag types \u2014 check locally.\n---\nMaterial: PBAT (polybutylene adipate terephthalate) + corn starch biopolymer. Industrially compostable (EN13432). Home-compostability: NO \u2014 requires industrial temperatures. Used in: Root Bio Bin Liners. Regulatory note: accepted in most UK local-authority food-waste streams that accept certified compostable bags, but some authorities reject all bag types \u2014 check locally.\n---\nEN13432: European standard for industrially compostable packaging. Requires disintegration within 12 weeks and biodegradation within 6 months under industrial composting conditions (typically 58\u00b0C \u00b1 2\u00b0C). Does not imply home-compostability. Materials meeting EN13432: PLA-CORN (Loop Cup), PBAT-STARCH (Root Bin Liners).",
597
+ "hits": [
598
+ {
599
+ "text": "Material: PBAT (polybutylene adipate terephthalate) + corn starch biopolymer. Industrially compostable (EN13432). Home-compostability: NO \u2014 requires industrial temperatures. Used in: Root Bio Bin Liners. Regulatory note: accepted in most UK local-authority food-waste streams that accept certified compostable bags, but some authorities reject all bag types \u2014 check locally.",
600
+ "score": 0.7602,
601
+ "source": "pentatonic-baseline",
602
+ "doc_id": "material-pbat-starch"
603
+ },
604
+ {
605
+ "text": "Material: PBAT (polybutylene adipate terephthalate) + corn starch biopolymer. Industrially compostable (EN13432). Home-compostability: NO \u2014 requires industrial temperatures. Used in: Root Bio Bin Liners. Regulatory note: accepted in most UK local-authority food-waste streams that accept certified compostable bags, but some authorities reject all bag types \u2014 check locally.",
606
+ "score": 0.7112,
607
+ "source": "pentatonic-baseline:L0_workspace_bm25",
608
+ "doc_id": "material-pbat-starch"
609
+ },
610
+ {
611
+ "text": "EN13432: European standard for industrially compostable packaging. Requires disintegration within 12 weeks and biodegradation within 6 months under industrial composting conditions (typically 58\u00b0C \u00b1 2\u00b0C). Does not imply home-compostability. Materials meeting EN13432: PLA-CORN (Loop Cup), PBAT-STARCH (Root Bin Liners).",
612
+ "score": 0.5845,
613
+ "source": "pentatonic-baseline:L0_workspace_bm25",
614
+ "doc_id": "cert-en13432"
615
+ }
616
+ ],
617
+ "correct": true,
618
+ "score": 1.0,
619
+ "grading_notes": "all substrings matched",
620
+ "search_time_ms": 1273.854022991145,
621
+ "generation_time_ms": 0.0,
622
+ "tokens_in": 0,
623
+ "tokens_out": 0,
624
+ "retrieval_tokens": 259,
625
+ "query_tokens": 13,
626
+ "context_tokens": 246,
627
+ "judge_tokens_in": 0,
628
+ "judge_tokens_out": 0,
629
+ "judge_latency_ms": 0.0
630
+ },
631
+ {
632
+ "task_id": "ghost-net-source",
633
+ "query": "Which NGO supplies the ghost-net nylon for the Nomad Kite Harness?",
634
+ "answer": "Material: recycled nylon 6,6 from ghost nets. Feedstock: abandoned fishing nets retrieved by partner NGO (Healthy Seas, primarily Adriatic and Aegean waters). 92% recycled content, balance is processing aids. Tensile strength equivalent to virgin nylon 6,6. Used in: Nomad Kite Harness v3 outer shell.\n---\nTake-back programme \u2014 Nomad Kite Harness v3 (NMD-HRN-V3). Accepted condition: worn but repairable or end-of-life. Credit: 25% of original price. Alternative: repair-not-replace via the Nomad repair service (\u00a325 flat fee). Recovery route: ghost-net nylon is mechanically recycled back into new harness shells \u2014 closed-loop. EVA foam padding is NOT recovered (currently sent to energy-from-waste).\n---\nMaterial: recycled nylon 6,6 from ghost nets. Feedstock: abandoned fishing nets retrieved by partner NGO (Healthy Seas, primarily Adriatic and Aegean waters). 92% recycled content, balance is processing aids. Tensile strength equivalent to virgin nylon 6,6. Used in: Nomad Kite Harness v3 outer shell.",
635
+ "hits": [
636
+ {
637
+ "text": "Material: recycled nylon 6,6 from ghost nets. Feedstock: abandoned fishing nets retrieved by partner NGO (Healthy Seas, primarily Adriatic and Aegean waters). 92% recycled content, balance is processing aids. Tensile strength equivalent to virgin nylon 6,6. Used in: Nomad Kite Harness v3 outer shell.",
638
+ "score": 0.8084,
639
+ "source": "pentatonic-baseline",
640
+ "doc_id": "material-nylon-ghost-net"
641
+ },
642
+ {
643
+ "text": "Take-back programme \u2014 Nomad Kite Harness v3 (NMD-HRN-V3). Accepted condition: worn but repairable or end-of-life. Credit: 25% of original price. Alternative: repair-not-replace via the Nomad repair service (\u00a325 flat fee). Recovery route: ghost-net nylon is mechanically recycled back into new harness shells \u2014 closed-loop. EVA foam padding is NOT recovered (currently sent to energy-from-waste).",
644
+ "score": 0.7996419051187662,
645
+ "source": "pentatonic-baseline",
646
+ "doc_id": "takeback-kite-harness"
647
+ },
648
+ {
649
+ "text": "Material: recycled nylon 6,6 from ghost nets. Feedstock: abandoned fishing nets retrieved by partner NGO (Healthy Seas, primarily Adriatic and Aegean waters). 92% recycled content, balance is processing aids. Tensile strength equivalent to virgin nylon 6,6. Used in: Nomad Kite Harness v3 outer shell.",
650
+ "score": 0.75,
651
+ "source": "pentatonic-baseline:L0_workspace_bm25",
652
+ "doc_id": "material-nylon-ghost-net"
653
+ }
654
+ ],
655
+ "correct": true,
656
+ "score": 1.0,
657
+ "grading_notes": "all substrings matched",
658
+ "search_time_ms": 1266.3231230108067,
659
+ "generation_time_ms": 0.0,
660
+ "tokens_in": 0,
661
+ "tokens_out": 0,
662
+ "retrieval_tokens": 252,
663
+ "query_tokens": 15,
664
+ "context_tokens": 237,
665
+ "judge_tokens_in": 0,
666
+ "judge_tokens_out": 0,
667
+ "judge_latency_ms": 0.0
668
+ },
669
+ {
670
+ "task_id": "kite-harness-foam-recovery",
671
+ "query": "Is the EVA foam padding in the Nomad Kite Harness recovered via take-back?",
672
+ "answer": "Take-back programme \u2014 Nomad Kite Harness v3 (NMD-HRN-V3). Accepted condition: worn but repairable or end-of-life. Credit: 25% of original price. Alternative: repair-not-replace via the Nomad repair service (\u00a325 flat fee). Recovery route: ghost-net nylon is mechanically recycled back into new harness shells \u2014 closed-loop. EVA foam padding is NOT recovered (currently sent to energy-from-waste).\n---\nMaterial: recycled nylon 6,6 from ghost nets. Feedstock: abandoned fishing nets retrieved by partner NGO (Healthy Seas, primarily Adriatic and Aegean waters). 92% recycled content, balance is processing aids. Tensile strength equivalent to virgin nylon 6,6. Used in: Nomad Kite Harness v3 outer shell.\n---\nTake-back programme \u2014 Nomad Kite Harness v3 (NMD-HRN-V3). Accepted condition: worn but repairable or end-of-life. Credit: 25% of original price. Alternative: repair-not-replace via the Nomad repair service (\u00a325 flat fee). Recovery route: ghost-net nylon is mechanically recycled back into new harness shells \u2014 closed-loop. EVA foam padding is NOT recovered (currently sent to energy-from-waste).",
673
+ "hits": [
674
+ {
675
+ "text": "Take-back programme \u2014 Nomad Kite Harness v3 (NMD-HRN-V3). Accepted condition: worn but repairable or end-of-life. Credit: 25% of original price. Alternative: repair-not-replace via the Nomad repair service (\u00a325 flat fee). Recovery route: ghost-net nylon is mechanically recycled back into new harness shells \u2014 closed-loop. EVA foam padding is NOT recovered (currently sent to energy-from-waste).",
676
+ "score": 0.8261,
677
+ "source": "pentatonic-baseline",
678
+ "doc_id": "takeback-kite-harness"
679
+ },
680
+ {
681
+ "text": "Material: recycled nylon 6,6 from ghost nets. Feedstock: abandoned fishing nets retrieved by partner NGO (Healthy Seas, primarily Adriatic and Aegean waters). 92% recycled content, balance is processing aids. Tensile strength equivalent to virgin nylon 6,6. Used in: Nomad Kite Harness v3 outer shell.",
682
+ "score": 0.7599677601046569,
683
+ "source": "pentatonic-baseline",
684
+ "doc_id": "material-nylon-ghost-net"
685
+ },
686
+ {
687
+ "text": "Take-back programme \u2014 Nomad Kite Harness v3 (NMD-HRN-V3). Accepted condition: worn but repairable or end-of-life. Credit: 25% of original price. Alternative: repair-not-replace via the Nomad repair service (\u00a325 flat fee). Recovery route: ghost-net nylon is mechanically recycled back into new harness shells \u2014 closed-loop. EVA foam padding is NOT recovered (currently sent to energy-from-waste).",
688
+ "score": 0.75,
689
+ "source": "pentatonic-baseline:L0_workspace_bm25",
690
+ "doc_id": "takeback-kite-harness"
691
+ }
692
+ ],
693
+ "correct": true,
694
+ "score": 1.0,
695
+ "grading_notes": "all substrings matched",
696
+ "search_time_ms": 1179.8871149949264,
697
+ "generation_time_ms": 0.0,
698
+ "tokens_in": 0,
699
+ "tokens_out": 0,
700
+ "retrieval_tokens": 274,
701
+ "query_tokens": 18,
702
+ "context_tokens": 256,
703
+ "judge_tokens_in": 0,
704
+ "judge_tokens_out": 0,
705
+ "judge_latency_ms": 0.0
706
+ },
707
+ {
708
+ "task_id": "haven-sleeve-bottles-15",
709
+ "query": "Roughly how many plastic bottles go into a 15\" Haven Laptop Sleeve?",
710
+ "answer": "Material: 100% recycled PET felt. Feedstock: post-consumer PET bottles (UK kerbside collection). Approx 12 bottles per 13\" laptop sleeve, 18 bottles per 15\" laptop sleeve. No virgin polyester added. Recyclable at end-of-life via the Haven take-back programme.\n---\nTake-back programme \u2014 Haven Laptop Sleeves (HVN-SLV-13, HVN-SLV-15). Accepted condition: any. Credit: 30% of original price. Recovery route: rPET felt is shredded and reprocessed into non-woven interior linings for bag products. Closed-loop: PARTIALLY (same material, different product form).\n---\nGRS (Global Recycled Standard): third-party verification of recycled content, chain of custody, social and environmental practices, and chemical restrictions. Minimum 20% recycled content for a product to bear the GRS claim. Materials using GRS in our catalogue: rPP-OB (Atlas Phone Shell), rPET-FELT (Haven Sleeves).",
711
+ "hits": [
712
+ {
713
+ "text": "Material: 100% recycled PET felt. Feedstock: post-consumer PET bottles (UK kerbside collection). Approx 12 bottles per 13\" laptop sleeve, 18 bottles per 15\" laptop sleeve. No virgin polyester added. Recyclable at end-of-life via the Haven take-back programme.",
714
+ "score": 0.8046,
715
+ "source": "pentatonic-baseline",
716
+ "doc_id": "material-pet-felt-recycled"
717
+ },
718
+ {
719
+ "text": "Take-back programme \u2014 Haven Laptop Sleeves (HVN-SLV-13, HVN-SLV-15). Accepted condition: any. Credit: 30% of original price. Recovery route: rPET felt is shredded and reprocessed into non-woven interior linings for bag products. Closed-loop: PARTIALLY (same material, different product form).",
720
+ "score": 0.6629,
721
+ "source": "pentatonic-baseline",
722
+ "doc_id": "takeback-haven-sleeve"
723
+ },
724
+ {
725
+ "text": "GRS (Global Recycled Standard): third-party verification of recycled content, chain of custody, social and environmental practices, and chemical restrictions. Minimum 20% recycled content for a product to bear the GRS claim. Materials using GRS in our catalogue: rPP-OB (Atlas Phone Shell), rPET-FELT (Haven Sleeves).",
726
+ "score": 0.6726304124671972,
727
+ "source": "pentatonic-baseline",
728
+ "doc_id": "cert-grs"
729
+ }
730
+ ],
731
+ "correct": true,
732
+ "score": 1.0,
733
+ "grading_notes": "all substrings matched",
734
+ "search_time_ms": 1208.4916000021622,
735
+ "generation_time_ms": 0.0,
736
+ "tokens_in": 0,
737
+ "tokens_out": 0,
738
+ "retrieval_tokens": 228,
739
+ "query_tokens": 17,
740
+ "context_tokens": 211,
741
+ "judge_tokens_in": 0,
742
+ "judge_tokens_out": 0,
743
+ "judge_latency_ms": 0.0
744
+ },
745
+ {
746
+ "task_id": "cert-c2c-tiers",
747
+ "query": "What are the tiers of Cradle to Cradle certification?",
748
+ "answer": "Cradle to Cradle Certified: multi-attribute product certification covering material health, material reutilisation, renewable energy, water stewardship, and social fairness. Tiers (lowest to highest): Bronze, Silver, Gold, Platinum. Our holders: Atlas Phone Shell (Bronze).\n---\n<!-- doc_id: cert-cradle-to-cradle -->\nCradle to Cradle Certified: multi-attribute product certification covering material health, material reutilisation, renewable energy, water stewardship, and social fairness. Tiers (lowest to highest): Bronze, Silver, Gold, Platinum. Our holders: Atlas Phone Shell (Bronze).\n---\nMaterial: recycled ocean-bound polypropylene (rPP-OB). Feedstock: polypropylene waste collected within 50km of coastlines before it enters waterways. Certification: Global Recycled Standard (GRS) and OceanCycle. Typical recycled content: 95-99% (process additives make up the balance). Applications in our catalogue: Atlas Phone Shell (68% of shell mass), Luna bottle accessory clip.",
749
+ "hits": [
750
+ {
751
+ "text": "Cradle to Cradle Certified: multi-attribute product certification covering material health, material reutilisation, renewable energy, water stewardship, and social fairness. Tiers (lowest to highest): Bronze, Silver, Gold, Platinum. Our holders: Atlas Phone Shell (Bronze).",
752
+ "score": 0.72,
753
+ "source": "pentatonic-baseline:L0_workspace_bm25",
754
+ "doc_id": "cert-cradle-to-cradle"
755
+ },
756
+ {
757
+ "text": "<!-- doc_id: cert-cradle-to-cradle -->\nCradle to Cradle Certified: multi-attribute product certification covering material health, material reutilisation, renewable energy, water stewardship, and social fairness. Tiers (lowest to highest): Bronze, Silver, Gold, Platinum. Our holders: Atlas Phone Shell (Bronze).",
758
+ "score": 0.5109,
759
+ "source": "pentatonic-baseline:L6_documents",
760
+ "doc_id": "cert-cradle-to-cradle"
761
+ },
762
+ {
763
+ "text": "Material: recycled ocean-bound polypropylene (rPP-OB). Feedstock: polypropylene waste collected within 50km of coastlines before it enters waterways. Certification: Global Recycled Standard (GRS) and OceanCycle. Typical recycled content: 95-99% (process additives make up the balance). Applications in our catalogue: Atlas Phone Shell (68% of shell mass), Luna bottle accessory clip.",
764
+ "score": 0.4349,
765
+ "source": "pentatonic-baseline:L0_workspace_bm25",
766
+ "doc_id": "material-rpp-ocean"
767
+ }
768
+ ],
769
+ "correct": true,
770
+ "score": 1.0,
771
+ "grading_notes": "all substrings matched",
772
+ "search_time_ms": 1161.90087000723,
773
+ "generation_time_ms": 0.0,
774
+ "tokens_in": 0,
775
+ "tokens_out": 0,
776
+ "retrieval_tokens": 229,
777
+ "query_tokens": 12,
778
+ "context_tokens": 217,
779
+ "judge_tokens_in": 0,
780
+ "judge_tokens_out": 0,
781
+ "judge_latency_ms": 0.0
782
+ },
783
+ {
784
+ "task_id": "cert-c2c-our-products",
785
+ "query": "Which product in our catalogue holds a Cradle to Cradle certification?",
786
+ "answer": "Cradle to Cradle Certified: multi-attribute product certification covering material health, material reutilisation, renewable energy, water stewardship, and social fairness. Tiers (lowest to highest): Bronze, Silver, Gold, Platinum. Our holders: Atlas Phone Shell (Bronze).\n---\nMaterial: recycled ocean-bound polypropylene (rPP-OB). Feedstock: polypropylene waste collected within 50km of coastlines before it enters waterways. Certification: Global Recycled Standard (GRS) and OceanCycle. Typical recycled content: 95-99% (process additives make up the balance). Applications in our catalogue: Atlas Phone Shell (68% of shell mass), Luna bottle accessory clip.\n---\nGRS (Global Recycled Standard): third-party verification of recycled content, chain of custody, social and environmental practices, and chemical restrictions. Minimum 20% recycled content for a product to bear the GRS claim. Materials using GRS in our catalogue: rPP-OB (Atlas Phone Shell), rPET-FELT (Haven Sleeves).",
787
+ "hits": [
788
+ {
789
+ "text": "Cradle to Cradle Certified: multi-attribute product certification covering material health, material reutilisation, renewable energy, water stewardship, and social fairness. Tiers (lowest to highest): Bronze, Silver, Gold, Platinum. Our holders: Atlas Phone Shell (Bronze).",
790
+ "score": 0.682,
791
+ "source": "pentatonic-baseline:L0_workspace_bm25",
792
+ "doc_id": "cert-cradle-to-cradle"
793
+ },
794
+ {
795
+ "text": "Material: recycled ocean-bound polypropylene (rPP-OB). Feedstock: polypropylene waste collected within 50km of coastlines before it enters waterways. Certification: Global Recycled Standard (GRS) and OceanCycle. Typical recycled content: 95-99% (process additives make up the balance). Applications in our catalogue: Atlas Phone Shell (68% of shell mass), Luna bottle accessory clip.",
796
+ "score": 0.5567,
797
+ "source": "pentatonic-baseline:L0_workspace_bm25",
798
+ "doc_id": "material-rpp-ocean"
799
+ },
800
+ {
801
+ "text": "GRS (Global Recycled Standard): third-party verification of recycled content, chain of custody, social and environmental practices, and chemical restrictions. Minimum 20% recycled content for a product to bear the GRS claim. Materials using GRS in our catalogue: rPP-OB (Atlas Phone Shell), rPET-FELT (Haven Sleeves).",
802
+ "score": 0.5204,
803
+ "source": "pentatonic-baseline:L0_workspace_bm25",
804
+ "doc_id": "cert-grs"
805
+ }
806
+ ],
807
+ "correct": true,
808
+ "score": 1.0,
809
+ "grading_notes": "all substrings matched",
810
+ "search_time_ms": 1213.7703699991107,
811
+ "generation_time_ms": 0.0,
812
+ "tokens_in": 0,
813
+ "tokens_out": 0,
814
+ "retrieval_tokens": 232,
815
+ "query_tokens": 14,
816
+ "context_tokens": 218,
817
+ "judge_tokens_in": 0,
818
+ "judge_tokens_out": 0,
819
+ "judge_latency_ms": 0.0
820
+ },
821
+ {
822
+ "task_id": "cert-grs-threshold",
823
+ "query": "What's the minimum recycled content for the GRS claim?",
824
+ "answer": "GRS (Global Recycled Standard): third-party verification of recycled content, chain of custody, social and environmental practices, and chemical restrictions. Minimum 20% recycled content for a product to bear the GRS claim. Materials using GRS in our catalogue: rPP-OB (Atlas Phone Shell), rPET-FELT (Haven Sleeves).\n---\nMaterial: recycled ocean-bound polypropylene (rPP-OB). Feedstock: polypropylene waste collected within 50km of coastlines before it enters waterways. Certification: Global Recycled Standard (GRS) and OceanCycle. Typical recycled content: 95-99% (process additives make up the balance). Applications in our catalogue: Atlas Phone Shell (68% of shell mass), Luna bottle accessory clip.\n---\nMaterial: 304 stainless steel with 65% recycled content. Source: mix of post-consumer scrap (mostly end-of-life appliances) and pre-consumer mill scrap. EPD (Environmental Product Declaration) available on request. Used in: Luna Water Bottle body, Nomad Kite Harness v3 spreader bar (316 variant with 55% recycled).",
825
+ "hits": [
826
+ {
827
+ "text": "GRS (Global Recycled Standard): third-party verification of recycled content, chain of custody, social and environmental practices, and chemical restrictions. Minimum 20% recycled content for a product to bear the GRS claim. Materials using GRS in our catalogue: rPP-OB (Atlas Phone Shell), rPET-FELT (Haven Sleeves).",
828
+ "score": 0.7337,
829
+ "source": "pentatonic-baseline:L0_workspace_bm25",
830
+ "doc_id": "cert-grs"
831
+ },
832
+ {
833
+ "text": "Material: recycled ocean-bound polypropylene (rPP-OB). Feedstock: polypropylene waste collected within 50km of coastlines before it enters waterways. Certification: Global Recycled Standard (GRS) and OceanCycle. Typical recycled content: 95-99% (process additives make up the balance). Applications in our catalogue: Atlas Phone Shell (68% of shell mass), Luna bottle accessory clip.",
834
+ "score": 0.5884,
835
+ "source": "pentatonic-baseline:L0_workspace_bm25",
836
+ "doc_id": "material-rpp-ocean"
837
+ },
838
+ {
839
+ "text": "Material: 304 stainless steel with 65% recycled content. Source: mix of post-consumer scrap (mostly end-of-life appliances) and pre-consumer mill scrap. EPD (Environmental Product Declaration) available on request. Used in: Luna Water Bottle body, Nomad Kite Harness v3 spreader bar (316 variant with 55% recycled).",
840
+ "score": 0.4743,
841
+ "source": "pentatonic-baseline:L0_workspace_bm25",
842
+ "doc_id": "material-steel-304-recycled"
843
+ }
844
+ ],
845
+ "correct": true,
846
+ "score": 1.0,
847
+ "grading_notes": "all substrings matched",
848
+ "search_time_ms": 1206.5554400032852,
849
+ "generation_time_ms": 0.0,
850
+ "tokens_in": 0,
851
+ "tokens_out": 0,
852
+ "retrieval_tokens": 245,
853
+ "query_tokens": 12,
854
+ "context_tokens": 233,
855
+ "judge_tokens_in": 0,
856
+ "judge_tokens_out": 0,
857
+ "judge_latency_ms": 0.0
858
+ },
859
+ {
860
+ "task_id": "cert-en13432-temp",
861
+ "query": "What industrial composting temperature does EN13432 require?",
862
+ "answer": "EN13432: European standard for industrially compostable packaging. Requires disintegration within 12 weeks and biodegradation within 6 months under industrial composting conditions (typically 58\u00b0C \u00b1 2\u00b0C). Does not imply home-compostability. Materials meeting EN13432: PLA-CORN (Loop Cup), PBAT-STARCH (Root Bin Liners).\n---\nMaterial: PBAT (polybutylene adipate terephthalate) + corn starch biopolymer. Industrially compostable (EN13432). Home-compostability: NO \u2014 requires industrial temperatures. Used in: Root Bio Bin Liners. Regulatory note: accepted in most UK local-authority food-waste streams that accept certified compostable bags, but some authorities reject all bag types \u2014 check locally.\n---\n<!-- doc_id: cert-en13432 -->\nEN13432: European standard for industrially compostable packaging. Requires disintegration within 12 weeks and biodegradation within 6 months under industrial composting conditions (typically 58\u00b0C \u00b1 2\u00b0C). Does not imply home-compostability. Materials meeting EN13432: PLA-CORN (Loop Cup), PBAT-STARCH (Root Bin Liners).",
863
+ "hits": [
864
+ {
865
+ "text": "EN13432: European standard for industrially compostable packaging. Requires disintegration within 12 weeks and biodegradation within 6 months under industrial composting conditions (typically 58\u00b0C \u00b1 2\u00b0C). Does not imply home-compostability. Materials meeting EN13432: PLA-CORN (Loop Cup), PBAT-STARCH (Root Bin Liners).",
866
+ "score": 0.7287,
867
+ "source": "pentatonic-baseline:L0_workspace_bm25",
868
+ "doc_id": "cert-en13432"
869
+ },
870
+ {
871
+ "text": "Material: PBAT (polybutylene adipate terephthalate) + corn starch biopolymer. Industrially compostable (EN13432). Home-compostability: NO \u2014 requires industrial temperatures. Used in: Root Bio Bin Liners. Regulatory note: accepted in most UK local-authority food-waste streams that accept certified compostable bags, but some authorities reject all bag types \u2014 check locally.",
872
+ "score": 0.5635,
873
+ "source": "pentatonic-baseline:L0_workspace_bm25",
874
+ "doc_id": "material-pbat-starch"
875
+ },
876
+ {
877
+ "text": "<!-- doc_id: cert-en13432 -->\nEN13432: European standard for industrially compostable packaging. Requires disintegration within 12 weeks and biodegradation within 6 months under industrial composting conditions (typically 58\u00b0C \u00b1 2\u00b0C). Does not imply home-compostability. Materials meeting EN13432: PLA-CORN (Loop Cup), PBAT-STARCH (Root Bin Liners).",
878
+ "score": 0.5314,
879
+ "source": "pentatonic-baseline:L6_documents",
880
+ "doc_id": "cert-en13432"
881
+ }
882
+ ],
883
+ "correct": false,
884
+ "score": 0.5,
885
+ "grading_notes": "missing 1/2: ['60']",
886
+ "search_time_ms": 1180.057218996808,
887
+ "generation_time_ms": 0.0,
888
+ "tokens_in": 0,
889
+ "tokens_out": 0,
890
+ "retrieval_tokens": 255,
891
+ "query_tokens": 11,
892
+ "context_tokens": 244,
893
+ "judge_tokens_in": 0,
894
+ "judge_tokens_out": 0,
895
+ "judge_latency_ms": 0.0
896
+ },
897
+ {
898
+ "task_id": "reg-uk-epr-scope",
899
+ "query": "Is Pentatonic in scope for UK EPR packaging reporting?",
900
+ "answer": "UK Extended Producer Responsibility (EPR) for packaging: from 2025, packaging producers must report and pay fees based on the weight and recyclability of packaging placed on the UK market. Modulated fees favour recyclable formats. Reporting threshold: \u00a31M turnover AND 25 tonnes of packaging/year. Pentatonic: IN SCOPE. Our filings: due every 6 months.\n---\nUK Extended Producer Responsibility (EPR) for packaging: from 2025, packaging producers must report and pay fees based on the weight and recyclability of packaging placed on the UK market. Modulated fees favour recyclable formats. Reporting threshold: \u00a31M turnover AND 25 tonnes of packaging/year. Pentatonic: IN SCOPE. Our filings: due every 6 months.\n---\nCalifornia SB 54 (Plastic Pollution Prevention and Packaging Producer Responsibility Act, 2022): requires all packaging sold in California to be recyclable or compostable by 2032, with 65% recycling rate and 25% source reduction. PRO (Producer Responsibility Organisation) fees apply. Pentatonic: IN SCOPE for US-bound shipments to California.",
901
+ "hits": [
902
+ {
903
+ "text": "UK Extended Producer Responsibility (EPR) for packaging: from 2025, packaging producers must report and pay fees based on the weight and recyclability of packaging placed on the UK market. Modulated fees favour recyclable formats. Reporting threshold: \u00a31M turnover AND 25 tonnes of packaging/year. Pentatonic: IN SCOPE. Our filings: due every 6 months.",
904
+ "score": 0.8374,
905
+ "source": "pentatonic-baseline",
906
+ "doc_id": "reg-uk-epr"
907
+ },
908
+ {
909
+ "text": "UK Extended Producer Responsibility (EPR) for packaging: from 2025, packaging producers must report and pay fees based on the weight and recyclability of packaging placed on the UK market. Modulated fees favour recyclable formats. Reporting threshold: \u00a31M turnover AND 25 tonnes of packaging/year. Pentatonic: IN SCOPE. Our filings: due every 6 months.",
910
+ "score": 0.761,
911
+ "source": "pentatonic-baseline:L0_workspace_bm25",
912
+ "doc_id": "reg-uk-epr"
913
+ },
914
+ {
915
+ "text": "California SB 54 (Plastic Pollution Prevention and Packaging Producer Responsibility Act, 2022): requires all packaging sold in California to be recyclable or compostable by 2032, with 65% recycling rate and 25% source reduction. PRO (Producer Responsibility Organisation) fees apply. Pentatonic: IN SCOPE for US-bound shipments to California.",
916
+ "score": 0.694,
917
+ "source": "pentatonic-baseline",
918
+ "doc_id": "reg-ca-sb54"
919
+ }
920
+ ],
921
+ "correct": true,
922
+ "score": 1.0,
923
+ "grading_notes": "all substrings matched",
924
+ "search_time_ms": 1163.0040339950938,
925
+ "generation_time_ms": 0.0,
926
+ "tokens_in": 0,
927
+ "tokens_out": 0,
928
+ "retrieval_tokens": 244,
929
+ "query_tokens": 13,
930
+ "context_tokens": 231,
931
+ "judge_tokens_in": 0,
932
+ "judge_tokens_out": 0,
933
+ "judge_latency_ms": 0.0
934
+ },
935
+ {
936
+ "task_id": "reg-espr-dpp",
937
+ "query": "What are Digital Product Passports and when do they start applying?",
938
+ "answer": "EU Ecodesign for Sustainable Products Regulation (ESPR): framework regulation (2024) introducing Digital Product Passports (DPPs), ecodesign requirements, and destruction-of-unsold-goods restrictions. First delegated acts expected 2026 for textiles, electronics, batteries. Pentatonic: PREPARING DPPs for textile and electronics accessory lines.\n---\nEU Ecodesign for Sustainable Products Regulation (ESPR): framework regulation (2024) introducing Digital Product Passports (DPPs), ecodesign requirements, and destruction-of-unsold-goods restrictions. First delegated acts expected 2026 for textiles, electronics, batteries. Pentatonic: PREPARING DPPs for textile and electronics accessory lines.\n---\nProduct lifecycle stages in our system: Design \u2192 Materials sourcing \u2192 Manufacturing \u2192 Distribution \u2192 Use \u2192 End-of-use \u2192 Take-back \u2192 Recovery \u2192 (back to Materials sourcing or to Disposal). Each stage emits TES events the memory stack can index. Lifecycle closure: a SKU is 'closed-loop' when >80% of its mass re-enters Materials sourcing.",
939
+ "hits": [
940
+ {
941
+ "text": "EU Ecodesign for Sustainable Products Regulation (ESPR): framework regulation (2024) introducing Digital Product Passports (DPPs), ecodesign requirements, and destruction-of-unsold-goods restrictions. First delegated acts expected 2026 for textiles, electronics, batteries. Pentatonic: PREPARING DPPs for textile and electronics accessory lines.",
942
+ "score": 0.7411,
943
+ "source": "pentatonic-baseline",
944
+ "doc_id": "reg-eu-espr"
945
+ },
946
+ {
947
+ "text": "EU Ecodesign for Sustainable Products Regulation (ESPR): framework regulation (2024) introducing Digital Product Passports (DPPs), ecodesign requirements, and destruction-of-unsold-goods restrictions. First delegated acts expected 2026 for textiles, electronics, batteries. Pentatonic: PREPARING DPPs for textile and electronics accessory lines.",
948
+ "score": 0.7302,
949
+ "source": "pentatonic-baseline:L0_workspace_bm25",
950
+ "doc_id": "reg-eu-espr"
951
+ },
952
+ {
953
+ "text": "Product lifecycle stages in our system: Design \u2192 Materials sourcing \u2192 Manufacturing \u2192 Distribution \u2192 Use \u2192 End-of-use \u2192 Take-back \u2192 Recovery \u2192 (back to Materials sourcing or to Disposal). Each stage emits TES events the memory stack can index. Lifecycle closure: a SKU is 'closed-loop' when >80% of its mass re-enters Materials sourcing.",
954
+ "score": 0.6698362926720982,
955
+ "source": "pentatonic-baseline",
956
+ "doc_id": "lifecycle-stages"
957
+ }
958
+ ],
959
+ "correct": true,
960
+ "score": 1.0,
961
+ "grading_notes": "all substrings matched",
962
+ "search_time_ms": 1186.5456989908125,
963
+ "generation_time_ms": 0.0,
964
+ "tokens_in": 0,
965
+ "tokens_out": 0,
966
+ "retrieval_tokens": 236,
967
+ "query_tokens": 13,
968
+ "context_tokens": 223,
969
+ "judge_tokens_in": 0,
970
+ "judge_tokens_out": 0,
971
+ "judge_latency_ms": 0.0
972
+ },
973
+ {
974
+ "task_id": "reg-ca-sb54-deadline",
975
+ "query": "By when must packaging sold in California be recyclable or compostable?",
976
+ "answer": "California SB 54 (Plastic Pollution Prevention and Packaging Producer Responsibility Act, 2022): requires all packaging sold in California to be recyclable or compostable by 2032, with 65% recycling rate and 25% source reduction. PRO (Producer Responsibility Organisation) fees apply. Pentatonic: IN SCOPE for US-bound shipments to California.\n---\nCalifornia SB 54 (Plastic Pollution Prevention and Packaging Producer Responsibility Act, 2022): requires all packaging sold in California to be recyclable or compostable by 2032, with 65% recycling rate and 25% source reduction. PRO (Producer Responsibility Organisation) fees apply. Pentatonic: IN SCOPE for US-bound shipments to California.\n---\nUK Extended Producer Responsibility (EPR) for packaging: from 2025, packaging producers must report and pay fees based on the weight and recyclability of packaging placed on the UK market. Modulated fees favour recyclable formats. Reporting threshold: \u00a31M turnover AND 25 tonnes of packaging/year. Pentatonic: IN SCOPE. Our filings: due every 6 months.",
977
+ "hits": [
978
+ {
979
+ "text": "California SB 54 (Plastic Pollution Prevention and Packaging Producer Responsibility Act, 2022): requires all packaging sold in California to be recyclable or compostable by 2032, with 65% recycling rate and 25% source reduction. PRO (Producer Responsibility Organisation) fees apply. Pentatonic: IN SCOPE for US-bound shipments to California.",
980
+ "score": 0.7632,
981
+ "source": "pentatonic-baseline",
982
+ "doc_id": "reg-ca-sb54"
983
+ },
984
+ {
985
+ "text": "California SB 54 (Plastic Pollution Prevention and Packaging Producer Responsibility Act, 2022): requires all packaging sold in California to be recyclable or compostable by 2032, with 65% recycling rate and 25% source reduction. PRO (Producer Responsibility Organisation) fees apply. Pentatonic: IN SCOPE for US-bound shipments to California.",
986
+ "score": 0.7131,
987
+ "source": "pentatonic-baseline:L0_workspace_bm25",
988
+ "doc_id": "reg-ca-sb54"
989
+ },
990
+ {
991
+ "text": "UK Extended Producer Responsibility (EPR) for packaging: from 2025, packaging producers must report and pay fees based on the weight and recyclability of packaging placed on the UK market. Modulated fees favour recyclable formats. Reporting threshold: \u00a31M turnover AND 25 tonnes of packaging/year. Pentatonic: IN SCOPE. Our filings: due every 6 months.",
992
+ "score": 0.5861,
993
+ "source": "pentatonic-baseline:L0_workspace_bm25",
994
+ "doc_id": "reg-uk-epr"
995
+ }
996
+ ],
997
+ "correct": true,
998
+ "score": 1.0,
999
+ "grading_notes": "all substrings matched",
1000
+ "search_time_ms": 1135.151679016417,
1001
+ "generation_time_ms": 0.0,
1002
+ "tokens_in": 0,
1003
+ "tokens_out": 0,
1004
+ "retrieval_tokens": 240,
1005
+ "query_tokens": 14,
1006
+ "context_tokens": 226,
1007
+ "judge_tokens_in": 0,
1008
+ "judge_tokens_out": 0,
1009
+ "judge_latency_ms": 0.0
1010
+ },
1011
+ {
1012
+ "task_id": "reg-lithium-return",
1013
+ "query": "Can lithium-containing devices be returned through our standard take-back label?",
1014
+ "answer": "Lithium cell regulation: devices containing lithium cells are subject to UN 3480/3481 transport rules and cannot be returned via standard take-back labels. Pentatonic's policy: take-back for devices with lithium cells requires a dedicated labelled pack and courier pickup (not drop-in). Current catalogue impact: none (we do not sell lithium-containing products).\n---\nTake-back programme \u2014 Luna Water Bottle 500ml (LUN-BTL-042). Accepted condition: any. Credit: 40% of original price as store credit (body) + 10% (lid, if returned intact). Process: drop-off at any UK Pentatonic pop-up, or mail-in via takeback label. Recovery route: steel body enters industrial recycling (not closed-loop); silicone lid is down-cycled into playground surfacing granulate.\n---\nProduct lifecycle stages in our system: Design \u2192 Materials sourcing \u2192 Manufacturing \u2192 Distribution \u2192 Use \u2192 End-of-use \u2192 Take-back \u2192 Recovery \u2192 (back to Materials sourcing or to Disposal). Each stage emits TES events the memory stack can index. Lifecycle closure: a SKU is 'closed-loop' when >80% of its mass re-enters Materials sourcing.",
1015
+ "hits": [
1016
+ {
1017
+ "text": "Lithium cell regulation: devices containing lithium cells are subject to UN 3480/3481 transport rules and cannot be returned via standard take-back labels. Pentatonic's policy: take-back for devices with lithium cells requires a dedicated labelled pack and courier pickup (not drop-in). Current catalogue impact: none (we do not sell lithium-containing products).",
1018
+ "score": 0.7245,
1019
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1020
+ "doc_id": "reg-hazardous-lithium"
1021
+ },
1022
+ {
1023
+ "text": "Take-back programme \u2014 Luna Water Bottle 500ml (LUN-BTL-042). Accepted condition: any. Credit: 40% of original price as store credit (body) + 10% (lid, if returned intact). Process: drop-off at any UK Pentatonic pop-up, or mail-in via takeback label. Recovery route: steel body enters industrial recycling (not closed-loop); silicone lid is down-cycled into playground surfacing granulate.",
1024
+ "score": 0.5493,
1025
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1026
+ "doc_id": "takeback-luna-bottle"
1027
+ },
1028
+ {
1029
+ "text": "Product lifecycle stages in our system: Design \u2192 Materials sourcing \u2192 Manufacturing \u2192 Distribution \u2192 Use \u2192 End-of-use \u2192 Take-back \u2192 Recovery \u2192 (back to Materials sourcing or to Disposal). Each stage emits TES events the memory stack can index. Lifecycle closure: a SKU is 'closed-loop' when >80% of its mass re-enters Materials sourcing.",
1030
+ "score": 0.473,
1031
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1032
+ "doc_id": "lifecycle-stages"
1033
+ }
1034
+ ],
1035
+ "correct": true,
1036
+ "score": 1.0,
1037
+ "grading_notes": "all substrings matched",
1038
+ "search_time_ms": 1228.5156400175765,
1039
+ "generation_time_ms": 0.0,
1040
+ "tokens_in": 0,
1041
+ "tokens_out": 0,
1042
+ "retrieval_tokens": 255,
1043
+ "query_tokens": 13,
1044
+ "context_tokens": 242,
1045
+ "judge_tokens_in": 0,
1046
+ "judge_tokens_out": 0,
1047
+ "judge_latency_ms": 0.0
1048
+ },
1049
+ {
1050
+ "task_id": "closed-loop-threshold",
1051
+ "query": "At what mass-recovery percentage is a SKU considered closed-loop in our system?",
1052
+ "answer": "Product lifecycle stages in our system: Design \u2192 Materials sourcing \u2192 Manufacturing \u2192 Distribution \u2192 Use \u2192 End-of-use \u2192 Take-back \u2192 Recovery \u2192 (back to Materials sourcing or to Disposal). Each stage emits TES events the memory stack can index. Lifecycle closure: a SKU is 'closed-loop' when >80% of its mass re-enters Materials sourcing.\n---\n<!-- doc_id: lifecycle-stages -->\nProduct lifecycle stages in our system: Design \u2192 Materials sourcing \u2192 Manufacturing \u2192 Distribution \u2192 Use \u2192 End-of-use \u2192 Take-back \u2192 Recovery \u2192 (back to Materials sourcing or to Disposal). Each stage emits TES events the memory stack can index. Lifecycle closure: a SKU is 'closed-loop' when >80% of its mass re-enters Materials sourcing.\n---\n<!-- doc_id: takeback-haven-sleeve -->\nTake-back programme \u2014 Haven Laptop Sleeves (HVN-SLV-13, HVN-SLV-15). Accepted condition: any. Credit: 30% of original price. Recovery route: rPET felt is shredded and reprocessed into non-woven interior linings for bag products. Closed-loop: PARTIALLY (same material, different product form).",
1053
+ "hits": [
1054
+ {
1055
+ "text": "Product lifecycle stages in our system: Design \u2192 Materials sourcing \u2192 Manufacturing \u2192 Distribution \u2192 Use \u2192 End-of-use \u2192 Take-back \u2192 Recovery \u2192 (back to Materials sourcing or to Disposal). Each stage emits TES events the memory stack can index. Lifecycle closure: a SKU is 'closed-loop' when >80% of its mass re-enters Materials sourcing.",
1056
+ "score": 0.6638,
1057
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1058
+ "doc_id": "lifecycle-stages"
1059
+ },
1060
+ {
1061
+ "text": "<!-- doc_id: lifecycle-stages -->\nProduct lifecycle stages in our system: Design \u2192 Materials sourcing \u2192 Manufacturing \u2192 Distribution \u2192 Use \u2192 End-of-use \u2192 Take-back \u2192 Recovery \u2192 (back to Materials sourcing or to Disposal). Each stage emits TES events the memory stack can index. Lifecycle closure: a SKU is 'closed-loop' when >80% of its mass re-enters Materials sourcing.",
1062
+ "score": 0.4517,
1063
+ "source": "pentatonic-baseline:L6_documents",
1064
+ "doc_id": "lifecycle-stages"
1065
+ },
1066
+ {
1067
+ "text": "<!-- doc_id: takeback-haven-sleeve -->\nTake-back programme \u2014 Haven Laptop Sleeves (HVN-SLV-13, HVN-SLV-15). Accepted condition: any. Credit: 30% of original price. Recovery route: rPET felt is shredded and reprocessed into non-woven interior linings for bag products. Closed-loop: PARTIALLY (same material, different product form).",
1068
+ "score": 0.3831,
1069
+ "source": "pentatonic-baseline:L6_documents",
1070
+ "doc_id": "takeback-haven-sleeve"
1071
+ }
1072
+ ],
1073
+ "correct": true,
1074
+ "score": 1.0,
1075
+ "grading_notes": "all substrings matched",
1076
+ "search_time_ms": 1245.218866999494,
1077
+ "generation_time_ms": 0.0,
1078
+ "tokens_in": 0,
1079
+ "tokens_out": 0,
1080
+ "retrieval_tokens": 257,
1081
+ "query_tokens": 16,
1082
+ "context_tokens": 241,
1083
+ "judge_tokens_in": 0,
1084
+ "judge_tokens_out": 0,
1085
+ "judge_latency_ms": 0.0
1086
+ },
1087
+ {
1088
+ "task_id": "rubric-customer-greenwash-claim",
1089
+ "query": "A customer asks: 'Are your Loop cups just the same as plastic cups but with a greener label?' Write an honest response.",
1090
+ "answer": "Material: PLA (polylactic acid). Bioplastic derived from corn starch (US Midwest feedstock for our supply). Industrially compostable under EN13432 (60\u00b0C, 60% humidity, 12 weeks). NOT home-compostable and NOT recyclable through standard plastic streams. Degradation threshold: 60\u00b0C \u2014 avoid hot liquids for direct-contact applications. Used in: Loop Reusable Coffee Cup body.\n---\nEN13432: European standard for industrially compostable packaging. Requires disintegration within 12 weeks and biodegradation within 6 months under industrial composting conditions (typically 58\u00b0C \u00b1 2\u00b0C). Does not imply home-compostability. Materials meeting EN13432: PLA-CORN (Loop Cup), PBAT-STARCH (Root Bin Liners).\n---\nTake-back programme \u2014 Haven Laptop Sleeves (HVN-SLV-13, HVN-SLV-15). Accepted condition: any. Credit: 30% of original price. Recovery route: rPET felt is shredded and reprocessed into non-woven interior linings for bag products. Closed-loop: PARTIALLY (same material, different product form).",
1091
+ "hits": [
1092
+ {
1093
+ "text": "Material: PLA (polylactic acid). Bioplastic derived from corn starch (US Midwest feedstock for our supply). Industrially compostable under EN13432 (60\u00b0C, 60% humidity, 12 weeks). NOT home-compostable and NOT recyclable through standard plastic streams. Degradation threshold: 60\u00b0C \u2014 avoid hot liquids for direct-contact applications. Used in: Loop Reusable Coffee Cup body.",
1094
+ "score": 0.6405,
1095
+ "source": "pentatonic-baseline",
1096
+ "doc_id": "material-pla-plant"
1097
+ },
1098
+ {
1099
+ "text": "EN13432: European standard for industrially compostable packaging. Requires disintegration within 12 weeks and biodegradation within 6 months under industrial composting conditions (typically 58\u00b0C \u00b1 2\u00b0C). Does not imply home-compostability. Materials meeting EN13432: PLA-CORN (Loop Cup), PBAT-STARCH (Root Bin Liners).",
1100
+ "score": 0.522,
1101
+ "source": "pentatonic-baseline",
1102
+ "doc_id": "cert-en13432"
1103
+ },
1104
+ {
1105
+ "text": "Take-back programme \u2014 Haven Laptop Sleeves (HVN-SLV-13, HVN-SLV-15). Accepted condition: any. Credit: 30% of original price. Recovery route: rPET felt is shredded and reprocessed into non-woven interior linings for bag products. Closed-loop: PARTIALLY (same material, different product form).",
1106
+ "score": 0.4793,
1107
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1108
+ "doc_id": "takeback-haven-sleeve"
1109
+ }
1110
+ ],
1111
+ "correct": false,
1112
+ "score": 0.8,
1113
+ "grading_notes": "judge=0.80 \u2014 The answer clearly states that Loop cups are PLA, acknowledges the industrial compostability but not home-compostability",
1114
+ "search_time_ms": 1216.4374990097713,
1115
+ "generation_time_ms": 0.0,
1116
+ "tokens_in": 0,
1117
+ "tokens_out": 0,
1118
+ "retrieval_tokens": 264,
1119
+ "query_tokens": 27,
1120
+ "context_tokens": 237,
1121
+ "judge_tokens_in": 592,
1122
+ "judge_tokens_out": 70,
1123
+ "judge_latency_ms": 1744.0665280222893
1124
+ },
1125
+ {
1126
+ "task_id": "rubric-full-takeback-story-atlas",
1127
+ "query": "Walk me through the complete material journey of an Atlas Phone Shell returned via take-back.",
1128
+ "answer": "Take-back programme \u2014 Atlas Phone Shell (ATL-PHN-001). Accepted condition: any, including broken. Credit: 50% of original purchase price as store credit. Process: free returns label at pentatonic.com/takeback/ATL-PHN-001. Recovery route: shell is mechanically shredded, rPP-OB fraction recovered and fed back into the next production run. Closed-loop: YES.\n---\nCradle to Cradle Certified: multi-attribute product certification covering material health, material reutilisation, renewable energy, water stewardship, and social fairness. Tiers (lowest to highest): Bronze, Silver, Gold, Platinum. Our holders: Atlas Phone Shell (Bronze).\n---\nMaterial: recycled ocean-bound polypropylene (rPP-OB). Feedstock: polypropylene waste collected within 50km of coastlines before it enters waterways. Certification: Global Recycled Standard (GRS) and OceanCycle. Typical recycled content: 95-99% (process additives make up the balance). Applications in our catalogue: Atlas Phone Shell (68% of shell mass), Luna bottle accessory clip.",
1129
+ "hits": [
1130
+ {
1131
+ "text": "Take-back programme \u2014 Atlas Phone Shell (ATL-PHN-001). Accepted condition: any, including broken. Credit: 50% of original purchase price as store credit. Process: free returns label at pentatonic.com/takeback/ATL-PHN-001. Recovery route: shell is mechanically shredded, rPP-OB fraction recovered and fed back into the next production run. Closed-loop: YES.",
1132
+ "score": 0.791,
1133
+ "source": "pentatonic-baseline",
1134
+ "doc_id": "takeback-atlas-phone"
1135
+ },
1136
+ {
1137
+ "text": "Cradle to Cradle Certified: multi-attribute product certification covering material health, material reutilisation, renewable energy, water stewardship, and social fairness. Tiers (lowest to highest): Bronze, Silver, Gold, Platinum. Our holders: Atlas Phone Shell (Bronze).",
1138
+ "score": 0.7375950367135626,
1139
+ "source": "pentatonic-baseline",
1140
+ "doc_id": "cert-cradle-to-cradle"
1141
+ },
1142
+ {
1143
+ "text": "Material: recycled ocean-bound polypropylene (rPP-OB). Feedstock: polypropylene waste collected within 50km of coastlines before it enters waterways. Certification: Global Recycled Standard (GRS) and OceanCycle. Typical recycled content: 95-99% (process additives make up the balance). Applications in our catalogue: Atlas Phone Shell (68% of shell mass), Luna bottle accessory clip.",
1144
+ "score": 0.7170887489939137,
1145
+ "source": "pentatonic-baseline",
1146
+ "doc_id": "material-rpp-ocean"
1147
+ }
1148
+ ],
1149
+ "correct": false,
1150
+ "score": 0.5,
1151
+ "grading_notes": "judge=0.50 \u2014 The answer covers stages 2-3 (50% store credit issued and shell is mechanically shredded) but lacks information on the r",
1152
+ "search_time_ms": 1118.3548660192173,
1153
+ "generation_time_ms": 0.0,
1154
+ "tokens_in": 0,
1155
+ "tokens_out": 0,
1156
+ "retrieval_tokens": 248,
1157
+ "query_tokens": 17,
1158
+ "context_tokens": 231,
1159
+ "judge_tokens_in": 559,
1160
+ "judge_tokens_out": 46,
1161
+ "judge_latency_ms": 1309.8052870035172
1162
+ },
1163
+ {
1164
+ "task_id": "rubric-regulatory-scope-briefing",
1165
+ "query": "Give me a short regulatory briefing on what applies to Pentatonic in the UK, EU, and California.",
1166
+ "answer": "California SB 54 (Plastic Pollution Prevention and Packaging Producer Responsibility Act, 2022): requires all packaging sold in California to be recyclable or compostable by 2032, with 65% recycling rate and 25% source reduction. PRO (Producer Responsibility Organisation) fees apply. Pentatonic: IN SCOPE for US-bound shipments to California.\n---\nUK Extended Producer Responsibility (EPR) for packaging: from 2025, packaging producers must report and pay fees based on the weight and recyclability of packaging placed on the UK market. Modulated fees favour recyclable formats. Reporting threshold: \u00a31M turnover AND 25 tonnes of packaging/year. Pentatonic: IN SCOPE. Our filings: due every 6 months.\n---\nEU Ecodesign for Sustainable Products Regulation (ESPR): framework regulation (2024) introducing Digital Product Passports (DPPs), ecodesign requirements, and destruction-of-unsold-goods restrictions. First delegated acts expected 2026 for textiles, electronics, batteries. Pentatonic: PREPARING DPPs for textile and electronics accessory lines.",
1167
+ "hits": [
1168
+ {
1169
+ "text": "California SB 54 (Plastic Pollution Prevention and Packaging Producer Responsibility Act, 2022): requires all packaging sold in California to be recyclable or compostable by 2032, with 65% recycling rate and 25% source reduction. PRO (Producer Responsibility Organisation) fees apply. Pentatonic: IN SCOPE for US-bound shipments to California.",
1170
+ "score": 0.6596,
1171
+ "source": "pentatonic-baseline",
1172
+ "doc_id": "reg-ca-sb54"
1173
+ },
1174
+ {
1175
+ "text": "UK Extended Producer Responsibility (EPR) for packaging: from 2025, packaging producers must report and pay fees based on the weight and recyclability of packaging placed on the UK market. Modulated fees favour recyclable formats. Reporting threshold: \u00a31M turnover AND 25 tonnes of packaging/year. Pentatonic: IN SCOPE. Our filings: due every 6 months.",
1176
+ "score": 0.6335,
1177
+ "source": "pentatonic-baseline",
1178
+ "doc_id": "reg-uk-epr"
1179
+ },
1180
+ {
1181
+ "text": "EU Ecodesign for Sustainable Products Regulation (ESPR): framework regulation (2024) introducing Digital Product Passports (DPPs), ecodesign requirements, and destruction-of-unsold-goods restrictions. First delegated acts expected 2026 for textiles, electronics, batteries. Pentatonic: PREPARING DPPs for textile and electronics accessory lines.",
1182
+ "score": 0.5981,
1183
+ "source": "pentatonic-baseline",
1184
+ "doc_id": "reg-eu-espr"
1185
+ }
1186
+ ],
1187
+ "correct": false,
1188
+ "score": 0.8,
1189
+ "grading_notes": "judge=0.80 \u2014 The answer covers all three jurisdictions with at least one accurate detail each, but misses the exact reporting frequen",
1190
+ "search_time_ms": 1141.4888369909022,
1191
+ "generation_time_ms": 0.0,
1192
+ "tokens_in": 0,
1193
+ "tokens_out": 0,
1194
+ "retrieval_tokens": 249,
1195
+ "query_tokens": 22,
1196
+ "context_tokens": 227,
1197
+ "judge_tokens_in": 578,
1198
+ "judge_tokens_out": 62,
1199
+ "judge_latency_ms": 1220.718718022108
1200
+ },
1201
+ {
1202
+ "task_id": "entity-closed-loop-skus",
1203
+ "query": "Which SKUs in our catalogue achieve closed-loop material recovery?",
1204
+ "answer": "Product lifecycle stages in our system: Design \u2192 Materials sourcing \u2192 Manufacturing \u2192 Distribution \u2192 Use \u2192 End-of-use \u2192 Take-back \u2192 Recovery \u2192 (back to Materials sourcing or to Disposal). Each stage emits TES events the memory stack can index. Lifecycle closure: a SKU is 'closed-loop' when >80% of its mass re-enters Materials sourcing.\n---\nMaterial: recycled ocean-bound polypropylene (rPP-OB). Feedstock: polypropylene waste collected within 50km of coastlines before it enters waterways. Certification: Global Recycled Standard (GRS) and OceanCycle. Typical recycled content: 95-99% (process additives make up the balance). Applications in our catalogue: Atlas Phone Shell (68% of shell mass), Luna bottle accessory clip.\n---\nTake-back programme \u2014 Haven Laptop Sleeves (HVN-SLV-13, HVN-SLV-15). Accepted condition: any. Credit: 30% of original price. Recovery route: rPET felt is shredded and reprocessed into non-woven interior linings for bag products. Closed-loop: PARTIALLY (same material, different product form).",
1205
+ "hits": [
1206
+ {
1207
+ "text": "Product lifecycle stages in our system: Design \u2192 Materials sourcing \u2192 Manufacturing \u2192 Distribution \u2192 Use \u2192 End-of-use \u2192 Take-back \u2192 Recovery \u2192 (back to Materials sourcing or to Disposal). Each stage emits TES events the memory stack can index. Lifecycle closure: a SKU is 'closed-loop' when >80% of its mass re-enters Materials sourcing.",
1208
+ "score": 0.5676,
1209
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1210
+ "doc_id": "lifecycle-stages"
1211
+ },
1212
+ {
1213
+ "text": "Material: recycled ocean-bound polypropylene (rPP-OB). Feedstock: polypropylene waste collected within 50km of coastlines before it enters waterways. Certification: Global Recycled Standard (GRS) and OceanCycle. Typical recycled content: 95-99% (process additives make up the balance). Applications in our catalogue: Atlas Phone Shell (68% of shell mass), Luna bottle accessory clip.",
1214
+ "score": 0.5338,
1215
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1216
+ "doc_id": "material-rpp-ocean"
1217
+ },
1218
+ {
1219
+ "text": "Take-back programme \u2014 Haven Laptop Sleeves (HVN-SLV-13, HVN-SLV-15). Accepted condition: any. Credit: 30% of original price. Recovery route: rPET felt is shredded and reprocessed into non-woven interior linings for bag products. Closed-loop: PARTIALLY (same material, different product form).",
1220
+ "score": 0.5301,
1221
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1222
+ "doc_id": "takeback-haven-sleeve"
1223
+ }
1224
+ ],
1225
+ "correct": false,
1226
+ "score": 0.0,
1227
+ "grading_notes": "no expected_substrings set",
1228
+ "search_time_ms": 1105.5154380155727,
1229
+ "generation_time_ms": 0.0,
1230
+ "tokens_in": 0,
1231
+ "tokens_out": 0,
1232
+ "retrieval_tokens": 246,
1233
+ "query_tokens": 12,
1234
+ "context_tokens": 234,
1235
+ "judge_tokens_in": 0,
1236
+ "judge_tokens_out": 0,
1237
+ "judge_latency_ms": 0.0
1238
+ },
1239
+ {
1240
+ "task_id": "entity-home-compostable",
1241
+ "query": "List any materials in our catalogue that are home-compostable.",
1242
+ "answer": "Material: PLA (polylactic acid). Bioplastic derived from corn starch (US Midwest feedstock for our supply). Industrially compostable under EN13432 (60\u00b0C, 60% humidity, 12 weeks). NOT home-compostable and NOT recyclable through standard plastic streams. Degradation threshold: 60\u00b0C \u2014 avoid hot liquids for direct-contact applications. Used in: Loop Reusable Coffee Cup body.\n---\nGRS (Global Recycled Standard): third-party verification of recycled content, chain of custody, social and environmental practices, and chemical restrictions. Minimum 20% recycled content for a product to bear the GRS claim. Materials using GRS in our catalogue: rPP-OB (Atlas Phone Shell), rPET-FELT (Haven Sleeves).\n---\nProduct lifecycle stages in our system: Design \u2192 Materials sourcing \u2192 Manufacturing \u2192 Distribution \u2192 Use \u2192 End-of-use \u2192 Take-back \u2192 Recovery \u2192 (back to Materials sourcing or to Disposal). Each stage emits TES events the memory stack can index. Lifecycle closure: a SKU is 'closed-loop' when >80% of its mass re-enters Materials sourcing.",
1243
+ "hits": [
1244
+ {
1245
+ "text": "Material: PLA (polylactic acid). Bioplastic derived from corn starch (US Midwest feedstock for our supply). Industrially compostable under EN13432 (60\u00b0C, 60% humidity, 12 weeks). NOT home-compostable and NOT recyclable through standard plastic streams. Degradation threshold: 60\u00b0C \u2014 avoid hot liquids for direct-contact applications. Used in: Loop Reusable Coffee Cup body.",
1246
+ "score": 0.5764,
1247
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1248
+ "doc_id": "material-pla-plant"
1249
+ },
1250
+ {
1251
+ "text": "GRS (Global Recycled Standard): third-party verification of recycled content, chain of custody, social and environmental practices, and chemical restrictions. Minimum 20% recycled content for a product to bear the GRS claim. Materials using GRS in our catalogue: rPP-OB (Atlas Phone Shell), rPET-FELT (Haven Sleeves).",
1252
+ "score": 0.5347,
1253
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1254
+ "doc_id": "cert-grs"
1255
+ },
1256
+ {
1257
+ "text": "Product lifecycle stages in our system: Design \u2192 Materials sourcing \u2192 Manufacturing \u2192 Distribution \u2192 Use \u2192 End-of-use \u2192 Take-back \u2192 Recovery \u2192 (back to Materials sourcing or to Disposal). Each stage emits TES events the memory stack can index. Lifecycle closure: a SKU is 'closed-loop' when >80% of its mass re-enters Materials sourcing.",
1258
+ "score": 0.4869,
1259
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1260
+ "doc_id": "lifecycle-stages"
1261
+ }
1262
+ ],
1263
+ "correct": false,
1264
+ "score": 0.0,
1265
+ "grading_notes": "missing 1/3: ['none']; forbidden substring(s) present: ['PLA']",
1266
+ "search_time_ms": 1526.2463559920434,
1267
+ "generation_time_ms": 0.0,
1268
+ "tokens_in": 0,
1269
+ "tokens_out": 0,
1270
+ "retrieval_tokens": 248,
1271
+ "query_tokens": 13,
1272
+ "context_tokens": 235,
1273
+ "judge_tokens_in": 0,
1274
+ "judge_tokens_out": 0,
1275
+ "judge_latency_ms": 0.0
1276
+ }
1277
+ ]
1278
+ }