simplicio-prompt 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +173 -0
- package/YOOL_TUPLE_HAMT.md +1149 -0
- package/adopters.md +24 -0
- package/benchmarks/generate_prompt_benchmark_pdf.py +355 -0
- package/benchmarks/generate_v2_benchmark_pdf.py +302 -0
- package/benchmarks/prompt_vs_normal.py +431 -0
- package/benchmarks/prompt_vs_normal_benchmark.pdf +124 -0
- package/benchmarks/prompt_vs_normal_results.md +148 -0
- package/benchmarks/v2_safe_speed_benchmark.pdf +118 -0
- package/benchmarks/v2_safe_speed_benchmark.py +626 -0
- package/benchmarks/v2_safe_speed_results.json +446 -0
- package/benchmarks/v2_safe_speed_results.md +96 -0
- package/docs/assets/simplicio-prompt-hero.png +0 -0
- package/docs/assets/yool-v2-safe-speed-infographic-en.png +0 -0
- package/docs/assets/yool-v2-safe-speed-infographic-pt.png +0 -0
- package/examples/node/build-catalog.mjs +70 -0
- package/examples/python/minimal_bus.py +134 -0
- package/examples/python/receipts.py +152 -0
- package/guardrails/cpu_throttle.py +119 -0
- package/guardrails/disk_gc.py +212 -0
- package/kernel/README.md +82 -0
- package/kernel/yool_tuple_kernel.py +1109 -0
- package/kernel-implementation-request.md +38 -0
- package/package.json +40 -0
- package/prompts/agent-runtime-execution-prompt.md +119 -0
- package/prompts/legacy-tuple-space-engine-prompt.md +36 -0
|
@@ -0,0 +1,446 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Yool Safe-Speed Benchmark V2",
|
|
3
|
+
"run_date": "2026-05-21",
|
|
4
|
+
"environment": {
|
|
5
|
+
"python": "3.14.3",
|
|
6
|
+
"repository": "wesleysimplicio/simplicio-prompt",
|
|
7
|
+
"branch": "codex/lane-concurrency-runtime",
|
|
8
|
+
"v1_definition": "high-throughput runtime with fixed lane ceiling and safe-speed controls disabled",
|
|
9
|
+
"v2_definition": "V1 plus cache, adaptive lanes, backoff, circuit breaker, batching, context compression, local routing, and idempotent speculation"
|
|
10
|
+
},
|
|
11
|
+
"results": [
|
|
12
|
+
{
|
|
13
|
+
"scenario": "scale_representation",
|
|
14
|
+
"profile": "normal instruction",
|
|
15
|
+
"wall_ms": 174.48919999878854,
|
|
16
|
+
"tasks": 131072,
|
|
17
|
+
"peak_kb": 28751.21875,
|
|
18
|
+
"provider_calls": 0,
|
|
19
|
+
"cache_hits": 0,
|
|
20
|
+
"blocked_calls": 0,
|
|
21
|
+
"total_agents": 131072,
|
|
22
|
+
"virtual_agents": 0,
|
|
23
|
+
"tokens": 0,
|
|
24
|
+
"notes": "flat list materialization",
|
|
25
|
+
"throughput_tasks_s": 751175.4309201373
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"scenario": "scale_representation",
|
|
29
|
+
"profile": "V1 high-throughput",
|
|
30
|
+
"wall_ms": 0.07569999434053898,
|
|
31
|
+
"tasks": 1048576,
|
|
32
|
+
"peak_kb": 7.69921875,
|
|
33
|
+
"provider_calls": 0,
|
|
34
|
+
"cache_hits": 0,
|
|
35
|
+
"blocked_calls": 0,
|
|
36
|
+
"total_agents": 1048577,
|
|
37
|
+
"virtual_agents": 1048576,
|
|
38
|
+
"tokens": 0,
|
|
39
|
+
"notes": "lazy batch_spawn depth=4, branching=32",
|
|
40
|
+
"throughput_tasks_s": 13851731550.770605
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
"scenario": "scale_representation",
|
|
44
|
+
"profile": "V2 safe-speed",
|
|
45
|
+
"wall_ms": 0.05169998621568084,
|
|
46
|
+
"tasks": 1048576,
|
|
47
|
+
"peak_kb": 7.31640625,
|
|
48
|
+
"provider_calls": 0,
|
|
49
|
+
"cache_hits": 0,
|
|
50
|
+
"blocked_calls": 0,
|
|
51
|
+
"total_agents": 1048577,
|
|
52
|
+
"virtual_agents": 1048576,
|
|
53
|
+
"tokens": 0,
|
|
54
|
+
"notes": "lazy batch_spawn depth=4, branching=32",
|
|
55
|
+
"throughput_tasks_s": 20281939643.5731
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
"scenario": "active_execution",
|
|
59
|
+
"profile": "normal instruction",
|
|
60
|
+
"wall_ms": 5615.248199959751,
|
|
61
|
+
"tasks": 1024,
|
|
62
|
+
"peak_kb": 0.6484375,
|
|
63
|
+
"provider_calls": 1024,
|
|
64
|
+
"cache_hits": 0,
|
|
65
|
+
"blocked_calls": 0,
|
|
66
|
+
"total_agents": 1024,
|
|
67
|
+
"virtual_agents": 0,
|
|
68
|
+
"tokens": 0,
|
|
69
|
+
"notes": "sequential execution",
|
|
70
|
+
"throughput_tasks_s": 182.36059449826988
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
"scenario": "active_execution",
|
|
74
|
+
"profile": "V1 high-throughput",
|
|
75
|
+
"wall_ms": 237.90680000092834,
|
|
76
|
+
"tasks": 1024,
|
|
77
|
+
"peak_kb": 3408.0009765625,
|
|
78
|
+
"provider_calls": 1024,
|
|
79
|
+
"cache_hits": 0,
|
|
80
|
+
"blocked_calls": 0,
|
|
81
|
+
"total_agents": 0,
|
|
82
|
+
"virtual_agents": 0,
|
|
83
|
+
"tokens": 0,
|
|
84
|
+
"notes": "lane_concurrency=32, max_lane_concurrency=32",
|
|
85
|
+
"throughput_tasks_s": 4304.20652119235
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
"scenario": "active_execution",
|
|
89
|
+
"profile": "V2 safe-speed",
|
|
90
|
+
"wall_ms": 215.9010999603197,
|
|
91
|
+
"tasks": 1024,
|
|
92
|
+
"peak_kb": 3105.7685546875,
|
|
93
|
+
"provider_calls": 1024,
|
|
94
|
+
"cache_hits": 0,
|
|
95
|
+
"blocked_calls": 0,
|
|
96
|
+
"total_agents": 0,
|
|
97
|
+
"virtual_agents": 0,
|
|
98
|
+
"tokens": 0,
|
|
99
|
+
"notes": "lane_concurrency=32, max_lane_concurrency=64",
|
|
100
|
+
"throughput_tasks_s": 4742.912380660405
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
"scenario": "cache_dedupe",
|
|
104
|
+
"profile": "normal instruction",
|
|
105
|
+
"wall_ms": 417.67520003486425,
|
|
106
|
+
"tasks": 256,
|
|
107
|
+
"peak_kb": 5.3759765625,
|
|
108
|
+
"provider_calls": 256,
|
|
109
|
+
"cache_hits": 0,
|
|
110
|
+
"blocked_calls": 0,
|
|
111
|
+
"total_agents": 0,
|
|
112
|
+
"virtual_agents": 0,
|
|
113
|
+
"tokens": 0,
|
|
114
|
+
"notes": "64 unique inputs repeated across 256 tasks",
|
|
115
|
+
"throughput_tasks_s": 612.9164479447933
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
"scenario": "cache_dedupe",
|
|
119
|
+
"profile": "V1 high-throughput",
|
|
120
|
+
"wall_ms": 429.37830003211275,
|
|
121
|
+
"tasks": 256,
|
|
122
|
+
"peak_kb": 5.2470703125,
|
|
123
|
+
"provider_calls": 256,
|
|
124
|
+
"cache_hits": 0,
|
|
125
|
+
"blocked_calls": 0,
|
|
126
|
+
"total_agents": 0,
|
|
127
|
+
"virtual_agents": 0,
|
|
128
|
+
"tokens": 0,
|
|
129
|
+
"notes": "64 unique inputs repeated across 256 tasks",
|
|
130
|
+
"throughput_tasks_s": 596.2108471267738
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
"scenario": "cache_dedupe",
|
|
134
|
+
"profile": "V2 safe-speed",
|
|
135
|
+
"wall_ms": 125.83869998343289,
|
|
136
|
+
"tasks": 256,
|
|
137
|
+
"peak_kb": 43.0400390625,
|
|
138
|
+
"provider_calls": 64,
|
|
139
|
+
"cache_hits": 192,
|
|
140
|
+
"blocked_calls": 0,
|
|
141
|
+
"total_agents": 0,
|
|
142
|
+
"virtual_agents": 0,
|
|
143
|
+
"tokens": 0,
|
|
144
|
+
"notes": "64 unique inputs repeated across 256 tasks",
|
|
145
|
+
"throughput_tasks_s": 2034.3503233401434
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
"scenario": "small_task_batching",
|
|
149
|
+
"profile": "normal instruction",
|
|
150
|
+
"wall_ms": 729.9779999884777,
|
|
151
|
+
"tasks": 512,
|
|
152
|
+
"peak_kb": 0.515625,
|
|
153
|
+
"provider_calls": 512,
|
|
154
|
+
"cache_hits": 0,
|
|
155
|
+
"blocked_calls": 0,
|
|
156
|
+
"total_agents": 0,
|
|
157
|
+
"virtual_agents": 0,
|
|
158
|
+
"tokens": 0,
|
|
159
|
+
"notes": "one provider-sized call per small task",
|
|
160
|
+
"throughput_tasks_s": 701.3910008357535
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
"scenario": "small_task_batching",
|
|
164
|
+
"profile": "V1 high-throughput",
|
|
165
|
+
"wall_ms": 730.3357000346296,
|
|
166
|
+
"tasks": 512,
|
|
167
|
+
"peak_kb": 0.5078125,
|
|
168
|
+
"provider_calls": 512,
|
|
169
|
+
"cache_hits": 0,
|
|
170
|
+
"blocked_calls": 0,
|
|
171
|
+
"total_agents": 0,
|
|
172
|
+
"virtual_agents": 0,
|
|
173
|
+
"tokens": 0,
|
|
174
|
+
"notes": "one provider-sized call per small task",
|
|
175
|
+
"throughput_tasks_s": 701.047477174843
|
|
176
|
+
},
|
|
177
|
+
{
|
|
178
|
+
"scenario": "small_task_batching",
|
|
179
|
+
"profile": "V2 safe-speed",
|
|
180
|
+
"wall_ms": 20.444200024940073,
|
|
181
|
+
"tasks": 512,
|
|
182
|
+
"peak_kb": 540.5927734375,
|
|
183
|
+
"provider_calls": 16,
|
|
184
|
+
"cache_hits": 0,
|
|
185
|
+
"blocked_calls": 0,
|
|
186
|
+
"total_agents": 0,
|
|
187
|
+
"virtual_agents": 0,
|
|
188
|
+
"tokens": 0,
|
|
189
|
+
"notes": "batch_size=32",
|
|
190
|
+
"throughput_tasks_s": 25043.77766679089
|
|
191
|
+
},
|
|
192
|
+
{
|
|
193
|
+
"scenario": "provider_failure_control",
|
|
194
|
+
"profile": "normal instruction",
|
|
195
|
+
"wall_ms": 0.020899984519928694,
|
|
196
|
+
"tasks": 64,
|
|
197
|
+
"peak_kb": 0.6015625,
|
|
198
|
+
"provider_calls": 192,
|
|
199
|
+
"cache_hits": 0,
|
|
200
|
+
"blocked_calls": 0,
|
|
201
|
+
"total_agents": 0,
|
|
202
|
+
"virtual_agents": 0,
|
|
203
|
+
"tokens": 0,
|
|
204
|
+
"notes": "no provider circuit breaker",
|
|
205
|
+
"throughput_tasks_s": 3062203.22502987
|
|
206
|
+
},
|
|
207
|
+
{
|
|
208
|
+
"scenario": "provider_failure_control",
|
|
209
|
+
"profile": "V1 high-throughput",
|
|
210
|
+
"wall_ms": 0.008500006515532732,
|
|
211
|
+
"tasks": 64,
|
|
212
|
+
"peak_kb": 0.5625,
|
|
213
|
+
"provider_calls": 192,
|
|
214
|
+
"cache_hits": 0,
|
|
215
|
+
"blocked_calls": 0,
|
|
216
|
+
"total_agents": 0,
|
|
217
|
+
"virtual_agents": 0,
|
|
218
|
+
"tokens": 0,
|
|
219
|
+
"notes": "no provider circuit breaker",
|
|
220
|
+
"throughput_tasks_s": 7529405.993165741
|
|
221
|
+
},
|
|
222
|
+
{
|
|
223
|
+
"scenario": "provider_failure_control",
|
|
224
|
+
"profile": "V2 safe-speed",
|
|
225
|
+
"wall_ms": 0.5077999667264521,
|
|
226
|
+
"tasks": 64,
|
|
227
|
+
"peak_kb": 5.4736328125,
|
|
228
|
+
"provider_calls": 3,
|
|
229
|
+
"cache_hits": 0,
|
|
230
|
+
"blocked_calls": 63,
|
|
231
|
+
"total_agents": 0,
|
|
232
|
+
"virtual_agents": 0,
|
|
233
|
+
"tokens": 0,
|
|
234
|
+
"notes": "breaker opens after 3 provider failures",
|
|
235
|
+
"throughput_tasks_s": 126033.87986135158
|
|
236
|
+
},
|
|
237
|
+
{
|
|
238
|
+
"scenario": "context_compression",
|
|
239
|
+
"profile": "normal instruction",
|
|
240
|
+
"wall_ms": 0.0990999978967011,
|
|
241
|
+
"tasks": 1,
|
|
242
|
+
"peak_kb": 53.1337890625,
|
|
243
|
+
"provider_calls": 0,
|
|
244
|
+
"cache_hits": 0,
|
|
245
|
+
"blocked_calls": 0,
|
|
246
|
+
"total_agents": 0,
|
|
247
|
+
"virtual_agents": 0,
|
|
248
|
+
"tokens": 5016,
|
|
249
|
+
"notes": "20000 char context",
|
|
250
|
+
"throughput_tasks_s": 10090.817570373416
|
|
251
|
+
},
|
|
252
|
+
{
|
|
253
|
+
"scenario": "context_compression",
|
|
254
|
+
"profile": "V1 high-throughput",
|
|
255
|
+
"wall_ms": 0.07440004264935851,
|
|
256
|
+
"tasks": 1,
|
|
257
|
+
"peak_kb": 53.0869140625,
|
|
258
|
+
"provider_calls": 0,
|
|
259
|
+
"cache_hits": 0,
|
|
260
|
+
"blocked_calls": 0,
|
|
261
|
+
"total_agents": 0,
|
|
262
|
+
"virtual_agents": 0,
|
|
263
|
+
"tokens": 5016,
|
|
264
|
+
"notes": "20000 char context",
|
|
265
|
+
"throughput_tasks_s": 13440.85251016482
|
|
266
|
+
},
|
|
267
|
+
{
|
|
268
|
+
"scenario": "context_compression",
|
|
269
|
+
"profile": "V2 safe-speed",
|
|
270
|
+
"wall_ms": 0.15959999291226268,
|
|
271
|
+
"tasks": 1,
|
|
272
|
+
"peak_kb": 27.6318359375,
|
|
273
|
+
"provider_calls": 0,
|
|
274
|
+
"cache_hits": 0,
|
|
275
|
+
"blocked_calls": 0,
|
|
276
|
+
"total_agents": 0,
|
|
277
|
+
"virtual_agents": 0,
|
|
278
|
+
"tokens": 1188,
|
|
279
|
+
"notes": "20000 char context",
|
|
280
|
+
"throughput_tasks_s": 6265.664438655286
|
|
281
|
+
}
|
|
282
|
+
],
|
|
283
|
+
"comparisons": [
|
|
284
|
+
{
|
|
285
|
+
"scenario": "scale_representation",
|
|
286
|
+
"baseline": "normal instruction",
|
|
287
|
+
"improved": "V2 safe-speed",
|
|
288
|
+
"metric": "wall_ms",
|
|
289
|
+
"baseline_value": 174.48919999878854,
|
|
290
|
+
"improved_value": 0.05169998621568084,
|
|
291
|
+
"ratio": 3375.0337818439334,
|
|
292
|
+
"percent": 99.97037066694327
|
|
293
|
+
},
|
|
294
|
+
{
|
|
295
|
+
"scenario": "active_execution",
|
|
296
|
+
"baseline": "normal instruction",
|
|
297
|
+
"improved": "V1 high-throughput",
|
|
298
|
+
"metric": "wall_ms",
|
|
299
|
+
"baseline_value": 5615.248199959751,
|
|
300
|
+
"improved_value": 237.90680000092834,
|
|
301
|
+
"ratio": 23.60272257849645,
|
|
302
|
+
"percent": 95.76320063639157
|
|
303
|
+
},
|
|
304
|
+
{
|
|
305
|
+
"scenario": "active_execution",
|
|
306
|
+
"baseline": "normal instruction",
|
|
307
|
+
"improved": "V2 safe-speed",
|
|
308
|
+
"metric": "wall_ms",
|
|
309
|
+
"baseline_value": 5615.248199959751,
|
|
310
|
+
"improved_value": 215.9010999603197,
|
|
311
|
+
"ratio": 26.008427937568513,
|
|
312
|
+
"percent": 96.15509248617244
|
|
313
|
+
},
|
|
314
|
+
{
|
|
315
|
+
"scenario": "active_execution",
|
|
316
|
+
"baseline": "V1 high-throughput",
|
|
317
|
+
"improved": "V2 safe-speed",
|
|
318
|
+
"metric": "wall_ms",
|
|
319
|
+
"baseline_value": 237.90680000092834,
|
|
320
|
+
"improved_value": 215.9010999603197,
|
|
321
|
+
"ratio": 1.1019249093434589,
|
|
322
|
+
"percent": 9.249714611151415
|
|
323
|
+
},
|
|
324
|
+
{
|
|
325
|
+
"scenario": "cache_dedupe",
|
|
326
|
+
"baseline": "normal instruction",
|
|
327
|
+
"improved": "V2 safe-speed",
|
|
328
|
+
"metric": "wall_ms",
|
|
329
|
+
"baseline_value": 417.67520003486425,
|
|
330
|
+
"improved_value": 125.83869998343289,
|
|
331
|
+
"ratio": 3.3191315556331453,
|
|
332
|
+
"percent": 69.87163710631398
|
|
333
|
+
},
|
|
334
|
+
{
|
|
335
|
+
"scenario": "cache_dedupe",
|
|
336
|
+
"baseline": "normal instruction",
|
|
337
|
+
"improved": "V2 safe-speed",
|
|
338
|
+
"metric": "provider_calls",
|
|
339
|
+
"baseline_value": 256,
|
|
340
|
+
"improved_value": 64,
|
|
341
|
+
"ratio": 4.0,
|
|
342
|
+
"percent": 75.0
|
|
343
|
+
},
|
|
344
|
+
{
|
|
345
|
+
"scenario": "cache_dedupe",
|
|
346
|
+
"baseline": "V1 high-throughput",
|
|
347
|
+
"improved": "V2 safe-speed",
|
|
348
|
+
"metric": "wall_ms",
|
|
349
|
+
"baseline_value": 429.37830003211275,
|
|
350
|
+
"improved_value": 125.83869998343289,
|
|
351
|
+
"ratio": 3.4121323574436317,
|
|
352
|
+
"percent": 70.69281331310373
|
|
353
|
+
},
|
|
354
|
+
{
|
|
355
|
+
"scenario": "cache_dedupe",
|
|
356
|
+
"baseline": "V1 high-throughput",
|
|
357
|
+
"improved": "V2 safe-speed",
|
|
358
|
+
"metric": "provider_calls",
|
|
359
|
+
"baseline_value": 256,
|
|
360
|
+
"improved_value": 64,
|
|
361
|
+
"ratio": 4.0,
|
|
362
|
+
"percent": 75.0
|
|
363
|
+
},
|
|
364
|
+
{
|
|
365
|
+
"scenario": "small_task_batching",
|
|
366
|
+
"baseline": "normal instruction",
|
|
367
|
+
"improved": "V2 safe-speed",
|
|
368
|
+
"metric": "wall_ms",
|
|
369
|
+
"baseline_value": 729.9779999884777,
|
|
370
|
+
"improved_value": 20.444200024940073,
|
|
371
|
+
"ratio": 35.705872526093984,
|
|
372
|
+
"percent": 97.19934025062909
|
|
373
|
+
},
|
|
374
|
+
{
|
|
375
|
+
"scenario": "small_task_batching",
|
|
376
|
+
"baseline": "normal instruction",
|
|
377
|
+
"improved": "V2 safe-speed",
|
|
378
|
+
"metric": "provider_calls",
|
|
379
|
+
"baseline_value": 512,
|
|
380
|
+
"improved_value": 16,
|
|
381
|
+
"ratio": 32.0,
|
|
382
|
+
"percent": 96.875
|
|
383
|
+
},
|
|
384
|
+
{
|
|
385
|
+
"scenario": "small_task_batching",
|
|
386
|
+
"baseline": "V1 high-throughput",
|
|
387
|
+
"improved": "V2 safe-speed",
|
|
388
|
+
"metric": "wall_ms",
|
|
389
|
+
"baseline_value": 730.3357000346296,
|
|
390
|
+
"improved_value": 20.444200024940073,
|
|
391
|
+
"ratio": 35.72336893317841,
|
|
392
|
+
"percent": 97.20071194329256
|
|
393
|
+
},
|
|
394
|
+
{
|
|
395
|
+
"scenario": "small_task_batching",
|
|
396
|
+
"baseline": "V1 high-throughput",
|
|
397
|
+
"improved": "V2 safe-speed",
|
|
398
|
+
"metric": "provider_calls",
|
|
399
|
+
"baseline_value": 512,
|
|
400
|
+
"improved_value": 16,
|
|
401
|
+
"ratio": 32.0,
|
|
402
|
+
"percent": 96.875
|
|
403
|
+
},
|
|
404
|
+
{
|
|
405
|
+
"scenario": "provider_failure_control",
|
|
406
|
+
"baseline": "normal instruction",
|
|
407
|
+
"improved": "V2 safe-speed",
|
|
408
|
+
"metric": "provider_calls",
|
|
409
|
+
"baseline_value": 192,
|
|
410
|
+
"improved_value": 3,
|
|
411
|
+
"ratio": 64.0,
|
|
412
|
+
"percent": 98.4375
|
|
413
|
+
},
|
|
414
|
+
{
|
|
415
|
+
"scenario": "provider_failure_control",
|
|
416
|
+
"baseline": "V1 high-throughput",
|
|
417
|
+
"improved": "V2 safe-speed",
|
|
418
|
+
"metric": "provider_calls",
|
|
419
|
+
"baseline_value": 192,
|
|
420
|
+
"improved_value": 3,
|
|
421
|
+
"ratio": 64.0,
|
|
422
|
+
"percent": 98.4375
|
|
423
|
+
},
|
|
424
|
+
{
|
|
425
|
+
"scenario": "context_compression",
|
|
426
|
+
"baseline": "normal instruction",
|
|
427
|
+
"improved": "V2 safe-speed",
|
|
428
|
+
"metric": "tokens",
|
|
429
|
+
"baseline_value": 5016,
|
|
430
|
+
"improved_value": 1188,
|
|
431
|
+
"ratio": 4.222222222222222,
|
|
432
|
+
"percent": 76.31578947368422
|
|
433
|
+
},
|
|
434
|
+
{
|
|
435
|
+
"scenario": "context_compression",
|
|
436
|
+
"baseline": "V1 high-throughput",
|
|
437
|
+
"improved": "V2 safe-speed",
|
|
438
|
+
"metric": "tokens",
|
|
439
|
+
"baseline_value": 5016,
|
|
440
|
+
"improved_value": 1188,
|
|
441
|
+
"ratio": 4.222222222222222,
|
|
442
|
+
"percent": 76.31578947368422
|
|
443
|
+
}
|
|
444
|
+
],
|
|
445
|
+
"median_wall_ms": 73.14145000418648
|
|
446
|
+
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# Yool Safe-Speed Benchmark V2
|
|
2
|
+
|
|
3
|
+
Run date: 2026-05-21
|
|
4
|
+
|
|
5
|
+
This report compares three execution styles:
|
|
6
|
+
|
|
7
|
+
- Normal instruction: generic prompt, flat or repeated work, no runtime guardrails.
|
|
8
|
+
- V1 high-throughput: lazy `batch_spawn` and fixed `LaneWorkerPool` fan-out.
|
|
9
|
+
- V2 safe-speed: V1 plus cache, adaptive lanes, backoff, provider circuit breaker, batching, context compression, local routing, and idempotent speculation.
|
|
10
|
+
|
|
11
|
+
The benchmark is local. It does not call hosted LLMs or external APIs.
|
|
12
|
+
|
|
13
|
+
## Scale Representation
|
|
14
|
+
|
|
15
|
+
| Profile | Tasks | Wall ms | Throughput/s | Peak KiB | Provider calls | Cache hits | Blocked | Tokens | Notes |
|
|
16
|
+
|---|---:|---:|---:|---:|---:|---:|---:|---:|---|
|
|
17
|
+
| normal instruction | 131,072 | 174.49 | 751175.4 | 28751.2 | 0 | 0 | 0 | 0 | flat list materialization |
|
|
18
|
+
| V1 high-throughput | 1,048,576 | 0.08 | 13851731550.8 | 7.7 | 0 | 0 | 0 | 0 | lazy batch_spawn depth=4, branching=32 |
|
|
19
|
+
| V2 safe-speed | 1,048,576 | 0.05 | 20281939643.6 | 7.3 | 0 | 0 | 0 | 0 | lazy batch_spawn depth=4, branching=32 |
|
|
20
|
+
|
|
21
|
+
## Active Execution
|
|
22
|
+
|
|
23
|
+
| Profile | Tasks | Wall ms | Throughput/s | Peak KiB | Provider calls | Cache hits | Blocked | Tokens | Notes |
|
|
24
|
+
|---|---:|---:|---:|---:|---:|---:|---:|---:|---|
|
|
25
|
+
| normal instruction | 1,024 | 5615.25 | 182.4 | 0.6 | 1,024 | 0 | 0 | 0 | sequential execution |
|
|
26
|
+
| V1 high-throughput | 1,024 | 237.91 | 4304.2 | 3408.0 | 1,024 | 0 | 0 | 0 | lane_concurrency=32, max_lane_concurrency=32 |
|
|
27
|
+
| V2 safe-speed | 1,024 | 215.90 | 4742.9 | 3105.8 | 1,024 | 0 | 0 | 0 | lane_concurrency=32, max_lane_concurrency=64 |
|
|
28
|
+
|
|
29
|
+
## Cache Dedupe
|
|
30
|
+
|
|
31
|
+
| Profile | Tasks | Wall ms | Throughput/s | Peak KiB | Provider calls | Cache hits | Blocked | Tokens | Notes |
|
|
32
|
+
|---|---:|---:|---:|---:|---:|---:|---:|---:|---|
|
|
33
|
+
| normal instruction | 256 | 417.68 | 612.9 | 5.4 | 256 | 0 | 0 | 0 | 64 unique inputs repeated across 256 tasks |
|
|
34
|
+
| V1 high-throughput | 256 | 429.38 | 596.2 | 5.2 | 256 | 0 | 0 | 0 | 64 unique inputs repeated across 256 tasks |
|
|
35
|
+
| V2 safe-speed | 256 | 125.84 | 2034.4 | 43.0 | 64 | 192 | 0 | 0 | 64 unique inputs repeated across 256 tasks |
|
|
36
|
+
|
|
37
|
+
## Small Task Batching
|
|
38
|
+
|
|
39
|
+
| Profile | Tasks | Wall ms | Throughput/s | Peak KiB | Provider calls | Cache hits | Blocked | Tokens | Notes |
|
|
40
|
+
|---|---:|---:|---:|---:|---:|---:|---:|---:|---|
|
|
41
|
+
| normal instruction | 512 | 729.98 | 701.4 | 0.5 | 512 | 0 | 0 | 0 | one provider-sized call per small task |
|
|
42
|
+
| V1 high-throughput | 512 | 730.34 | 701.0 | 0.5 | 512 | 0 | 0 | 0 | one provider-sized call per small task |
|
|
43
|
+
| V2 safe-speed | 512 | 20.44 | 25043.8 | 540.6 | 16 | 0 | 0 | 0 | batch_size=32 |
|
|
44
|
+
|
|
45
|
+
## Provider Failure Control
|
|
46
|
+
|
|
47
|
+
| Profile | Tasks | Wall ms | Throughput/s | Peak KiB | Provider calls | Cache hits | Blocked | Tokens | Notes |
|
|
48
|
+
|---|---:|---:|---:|---:|---:|---:|---:|---:|---|
|
|
49
|
+
| normal instruction | 64 | 0.02 | 3062203.2 | 0.6 | 192 | 0 | 0 | 0 | no provider circuit breaker |
|
|
50
|
+
| V1 high-throughput | 64 | 0.01 | 7529406.0 | 0.6 | 192 | 0 | 0 | 0 | no provider circuit breaker |
|
|
51
|
+
| V2 safe-speed | 64 | 0.51 | 126033.9 | 5.5 | 3 | 0 | 63 | 0 | breaker opens after 3 provider failures |
|
|
52
|
+
|
|
53
|
+
## Context Compression
|
|
54
|
+
|
|
55
|
+
| Profile | Tasks | Wall ms | Throughput/s | Peak KiB | Provider calls | Cache hits | Blocked | Tokens | Notes |
|
|
56
|
+
|---|---:|---:|---:|---:|---:|---:|---:|---:|---|
|
|
57
|
+
| normal instruction | 1 | 0.10 | 10090.8 | 53.1 | 0 | 0 | 0 | 5,016 | 20000 char context |
|
|
58
|
+
| V1 high-throughput | 1 | 0.07 | 13440.9 | 53.1 | 0 | 0 | 0 | 5,016 | 20000 char context |
|
|
59
|
+
| V2 safe-speed | 1 | 0.16 | 6265.7 | 27.6 | 0 | 0 | 0 | 1,188 | 20000 char context |
|
|
60
|
+
|
|
61
|
+
## Gains
|
|
62
|
+
|
|
63
|
+
| Scenario | Baseline | Improved | Metric | Ratio | Gain |
|
|
64
|
+
|---|---|---|---|---:|---:|
|
|
65
|
+
| scale_representation | normal instruction | V2 safe-speed | wall_ms | 3375.03x | 99.97% |
|
|
66
|
+
| active_execution | normal instruction | V1 high-throughput | wall_ms | 23.60x | 95.76% |
|
|
67
|
+
| active_execution | normal instruction | V2 safe-speed | wall_ms | 26.01x | 96.16% |
|
|
68
|
+
| active_execution | V1 high-throughput | V2 safe-speed | wall_ms | 1.10x | 9.25% |
|
|
69
|
+
| cache_dedupe | normal instruction | V2 safe-speed | wall_ms | 3.32x | 69.87% |
|
|
70
|
+
| cache_dedupe | normal instruction | V2 safe-speed | provider_calls | 4.00x | 75.00% |
|
|
71
|
+
| cache_dedupe | V1 high-throughput | V2 safe-speed | wall_ms | 3.41x | 70.69% |
|
|
72
|
+
| cache_dedupe | V1 high-throughput | V2 safe-speed | provider_calls | 4.00x | 75.00% |
|
|
73
|
+
| small_task_batching | normal instruction | V2 safe-speed | wall_ms | 35.71x | 97.20% |
|
|
74
|
+
| small_task_batching | normal instruction | V2 safe-speed | provider_calls | 32.00x | 96.88% |
|
|
75
|
+
| small_task_batching | V1 high-throughput | V2 safe-speed | wall_ms | 35.72x | 97.20% |
|
|
76
|
+
| small_task_batching | V1 high-throughput | V2 safe-speed | provider_calls | 32.00x | 96.88% |
|
|
77
|
+
| provider_failure_control | normal instruction | V2 safe-speed | provider_calls | 64.00x | 98.44% |
|
|
78
|
+
| provider_failure_control | V1 high-throughput | V2 safe-speed | provider_calls | 64.00x | 98.44% |
|
|
79
|
+
| context_compression | normal instruction | V2 safe-speed | tokens | 4.22x | 76.32% |
|
|
80
|
+
| context_compression | V1 high-throughput | V2 safe-speed | tokens | 4.22x | 76.32% |
|
|
81
|
+
|
|
82
|
+
## Interpretation
|
|
83
|
+
|
|
84
|
+
- V2 keeps the V1 lazy million-agent scale model.
|
|
85
|
+
- V2 improves active fan-out by allowing lanes to grow toward the configured ceiling when backlog is high.
|
|
86
|
+
- Cache reduces repeated provider calls when the same `yool + data` appears again.
|
|
87
|
+
- Batching turns many tiny provider/API-sized operations into fewer bounded calls.
|
|
88
|
+
- Circuit breaker reduces hammering during provider outages, which is the anti-ban part of the speed model.
|
|
89
|
+
- Context compression lowers token transfer before LLM calls while preserving a digest and preview.
|
|
90
|
+
|
|
91
|
+
## Reproduce
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
python benchmarks/v2_safe_speed_benchmark.py --json-output benchmarks/v2_safe_speed_results.json --md-output benchmarks/v2_safe_speed_results.md
|
|
95
|
+
python benchmarks/generate_v2_benchmark_pdf.py
|
|
96
|
+
```
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Node wrapper around scripts/build_hamt.py.
|
|
4
|
+
* Lets JS/TS adopters consume the same catalog format.
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* node examples/node/build-catalog.mjs --source AGENTS.md --output .catalog/hamt.json
|
|
8
|
+
*/
|
|
9
|
+
import { spawn } from "node:child_process";
|
|
10
|
+
import { fileURLToPath } from "node:url";
|
|
11
|
+
import { dirname, resolve } from "node:path";
|
|
12
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
13
|
+
|
|
14
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
15
|
+
const REPO_ROOT = resolve(__dirname, "..", "..");
|
|
16
|
+
const PY_SCRIPT = resolve(REPO_ROOT, "scripts", "build_hamt.py");
|
|
17
|
+
|
|
18
|
+
function parseArgs(argv) {
|
|
19
|
+
const args = { format: "auto" };
|
|
20
|
+
for (let i = 0; i < argv.length; i++) {
|
|
21
|
+
const k = argv[i];
|
|
22
|
+
if (k === "--source") args.source = argv[++i];
|
|
23
|
+
else if (k === "--output") args.output = argv[++i];
|
|
24
|
+
else if (k === "--format") args.format = argv[++i];
|
|
25
|
+
else if (k === "--help" || k === "-h") {
|
|
26
|
+
console.log("Usage: build-catalog.mjs --source <path> --output <path> [--format auto|agents-md|yool-list]");
|
|
27
|
+
process.exit(0);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
if (!args.source || !args.output) {
|
|
31
|
+
console.error("error: --source and --output are required");
|
|
32
|
+
process.exit(2);
|
|
33
|
+
}
|
|
34
|
+
return args;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function runPython(args) {
|
|
38
|
+
return new Promise((resolveP, rejectP) => {
|
|
39
|
+
if (!existsSync(PY_SCRIPT)) {
|
|
40
|
+
rejectP(new Error(`build_hamt.py not found at ${PY_SCRIPT}`));
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
const py = process.env.PYTHON || "python3";
|
|
44
|
+
const child = spawn(py, [
|
|
45
|
+
PY_SCRIPT,
|
|
46
|
+
"--source", args.source,
|
|
47
|
+
"--output", args.output,
|
|
48
|
+
"--format", args.format,
|
|
49
|
+
], { stdio: ["ignore", "inherit", "inherit"] });
|
|
50
|
+
child.on("error", rejectP);
|
|
51
|
+
child.on("exit", (code) => {
|
|
52
|
+
if (code === 0) resolveP();
|
|
53
|
+
else rejectP(new Error(`build_hamt.py exited with code ${code}`));
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async function main() {
|
|
59
|
+
const args = parseArgs(process.argv.slice(2));
|
|
60
|
+
await runPython(args);
|
|
61
|
+
if (existsSync(args.output)) {
|
|
62
|
+
const catalog = JSON.parse(readFileSync(args.output, "utf-8"));
|
|
63
|
+
console.error(`# loaded catalog: ${catalog.meta?.count} entries`);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
main().catch((e) => {
|
|
68
|
+
console.error(e.message);
|
|
69
|
+
process.exit(1);
|
|
70
|
+
});
|