agency-lang 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/dist/lib/agents/agency-agent/agent.js +3 -8
  2. package/dist/lib/agents/agency-agent/subagents/review.js +0 -2
  3. package/dist/lib/agents/docs/cli/cli/optimize.md +195 -72
  4. package/dist/lib/agents/docs/cli/optimize.md +195 -72
  5. package/dist/lib/agents/policy/agent.js +0 -1
  6. package/dist/lib/agents/review/agent.js +0 -1
  7. package/dist/lib/backends/typescriptBuilder.d.ts +1 -0
  8. package/dist/lib/backends/typescriptBuilder.js +9 -1
  9. package/package.json +1 -1
  10. package/stdlib/agency.agency +10 -10
  11. package/stdlib/agency.js +10 -10
  12. package/stdlib/cli.agency +2 -2
  13. package/stdlib/cli.js +2 -3
  14. package/stdlib/clipboard.agency +1 -1
  15. package/stdlib/clipboard.js +1 -2
  16. package/stdlib/http.agency +3 -3
  17. package/stdlib/http.js +3 -3
  18. package/stdlib/index.agency +13 -3
  19. package/stdlib/index.js +4 -4
  20. package/stdlib/keyring.agency +1 -1
  21. package/stdlib/keyring.js +1 -1
  22. package/stdlib/layout.agency +3 -3
  23. package/stdlib/layout.js +3 -3
  24. package/stdlib/memory.agency +2 -2
  25. package/stdlib/memory.js +2 -4
  26. package/stdlib/policy.js +9 -18
  27. package/stdlib/shell.agency +3 -3
  28. package/stdlib/shell.js +3 -3
  29. package/stdlib/statelog.agency +4 -4
  30. package/stdlib/statelog.js +4 -8
  31. package/stdlib/syntax.agency +1 -1
  32. package/stdlib/syntax.js +1 -2
  33. package/stdlib/table.agency +1 -1
  34. package/stdlib/table.js +1 -1
  35. package/stdlib/threads.agency +1 -1
  36. package/stdlib/threads.js +1 -1
  37. package/stdlib/ui.agency +6 -6
  38. package/stdlib/ui.js +6 -6
  39. package/stdlib/validators.agency +11 -11
  40. package/stdlib/validators.js +11 -22
@@ -1444,7 +1444,6 @@ async function __loadAgentsMd_impl(dir) {
1444
1444
  ]);
1445
1445
  await runner.handle(2, async (__data) => approve(), async (runner2) => {
1446
1446
  await runner2.step(0, async (runner3) => {
1447
- __self.__retryable = false;
1448
1447
  __stack.locals.result = await __call(read, {
1449
1448
  type: "positional",
1450
1449
  args: [`AGENTS.md`, __stack.args.dir]
@@ -1456,10 +1455,7 @@ async function __loadAgentsMd_impl(dir) {
1456
1455
  }
1457
1456
  });
1458
1457
  });
1459
- await runner.step(3, async (runner2) => {
1460
- __self.__retryable = false;
1461
- });
1462
- await runner.ifElse(4, [
1458
+ await runner.ifElse(3, [
1463
1459
  {
1464
1460
  condition: async () => await isFailure(__stack.locals.result),
1465
1461
  body: async (runner2) => {
@@ -1471,7 +1467,7 @@ async function __loadAgentsMd_impl(dir) {
1471
1467
  }
1472
1468
  }
1473
1469
  ]);
1474
- await runner.step(5, async (runner2) => {
1470
+ await runner.step(4, async (runner2) => {
1475
1471
  __functionCompleted = true;
1476
1472
  runner2.halt(`
1477
1473
 
@@ -3147,7 +3143,6 @@ async function __printHeader_impl() {
3147
3143
  }
3148
3144
  });
3149
3145
  await runner.step(2, async (runner2) => {
3150
- __self.__retryable = false;
3151
3146
  __stack.locals.data = await __call(box, {
3152
3147
  type: "named",
3153
3148
  positionalArgs: [],
@@ -4899,7 +4894,7 @@ Agent crashed: ${__error.message}`);
4899
4894
  }
4900
4895
  }
4901
4896
  var stdin_default = graph;
4902
- const __sourceMap = { "dist/lib/agents/agency-agent/agent.agency:__cb_top_0": { "1": { "line": 97, "col": 2 }, "1.0": { "line": 98, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:__cb_top_1": { "1": { "line": 103, "col": 2 }, "1.0.0": { "line": 104, "col": 4 }, "1.0.1": { "line": 105, "col": 6 }, "1.0.2": { "line": 106, "col": 11 }, "1.0.3": { "line": 107, "col": 6 }, "1.0.4": { "line": 109, "col": 6 }, "1.0": { "line": 104, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:__cb_top_2": { "1": { "line": 115, "col": 2 }, "1.0": { "line": 116, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:__cb_top_3": { "1": { "line": 121, "col": 2 }, "1.0": { "line": 122, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:_showTraces": { "1": { "line": 93, "col": 2 } }, "dist/lib/agents/agency-agent/agent.agency:renderLLMCallResponse": { "1": { "line": 142, "col": 2 }, "2": { "line": 143, "col": 2 }, "3": { "line": 146, "col": 2 }, "5": { "line": 151, "col": 2 }, "2.0": { "line": 144, "col": 4 }, "3.0.0": { "line": 148, "col": 6 }, "3.0": { "line": 147, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:loadAgentsMd": { "1": { "line": 162, "col": 2 }, "2": { "line": 165, "col": 2 }, "4": { "line": 166, "col": 2 }, "5": { "line": 169, "col": 2 }, "1.0": { "line": 163, "col": 4 }, "2.0": { "line": 165, "col": 2 }, "4.0": { "line": 167, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:builtinPalette": { "1": { "line": 183, "col": 2 } }, "dist/lib/agents/agency-agent/agent.agency:mergedPalette": { "1": { "line": 195, "col": 2 }, "2": { "line": 196, "col": 2 }, "3": { "line": 203, "col": 2 }, "4": { "line": 204, "col": 2 }, "5": { "line": 207, "col": 2 }, "2.0": { "line": 197, "col": 4 }, "2.1.0": { "line": 199, "col": 6 }, "2.1": { "line": 198, "col": 4 }, "2.2": { "line": 201, "col": 4 }, "4.0": { "line": 205, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:_runTurn": { "2": { "line": 218, "col": 2 }, "3": { "line": 219, "col": 2 }, "4": { "line": 222, "col": 2 }, "5": { "line": 225, "col": 2 }, "7": { "line": 229, "col": 2 }, "9": { "line": 233, "col": 2 }, "11": { "line": 261, "col": 2 }, "12": { "line": 262, "col": 2 }, "14": { "line": 267, "col": 2 }, "3.0": { "line": 220, "col": 4 }, "4.0": { "line": 223, "col": 4 }, "5.0": { "line": 226, "col": 4 }, "5.1": { "line": 227, "col": 4 }, "7.0": { "line": 230, "col": 4 }, "7.1": { "line": 231, "col": 4 }, "9.1": { "line": 241, "col": 4 }, "9.2": { "line": 242, "col": 4 }, "9.3": { "line": 243, "col": 4 }, "9.4.0": { "line": 245, "col": 6 }, "9.4.1": { "line": 246, "col": 6 }, "9.4": { "line": 244, "col": 4 }, "9.5": { "line": 248, "col": 4 }, "9.6": { "line": 249, "col": 4 }, "9.7.0": { "line": 251, "col": 6 }, "9.7.1": { "line": 252, "col": 6 }, "9.7.2.0": { "line": 254, "col": 8 }, "9.7.2": { "line": 253, "col": 6 }, "9.7": { "line": 250, "col": 4 }, "9.9": { "line": 257, "col": 4 }, "12.0": { "line": 263, "col": 4 }, "12.1": { "line": 265, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:mainAgent": { "1": { "line": 450, "col": 2 }, "3": { "line": 464, "col": 2 }, "1.0": { "line": 451, "col": 4 }, "1.1.1": { "line": 456, "col": 6 }, "1.1.2": { "line": 457, "col": 6 }, "1.1": { "line": 452, "col": 4 }, "1.3": { "line": 459, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:agentReplyVia": { "1": { "line": 473, "col": 2 }, "2": { "line": 474, "col": 2 }, "4": { "line": 477, "col": 2 }, "6": { "line": 480, "col": 2 }, "8": { "line": 483, "col": 2 }, "10": { "line": 486, "col": 2 }, "12": { "line": 489, "col": 2 }, "2.0": { "line": 475, "col": 4 }, "4.0": { "line": 478, "col": 4 }, "6.0": { "line": 481, "col": 4 }, "8.0": { "line": 484, "col": 4 }, "10.0": { "line": 487, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:agentReply": { "1": { "line": 499, "col": 2 } }, "dist/lib/agents/agency-agent/agent.agency:roundedCost": { "1": { "line": 503, "col": 2 } }, "dist/lib/agents/agency-agent/agent.agency:_buildStatus": { "1": { "line": 507, "col": 2 } }, "dist/lib/agents/agency-agent/agent.agency:sample": { "1": { "line": 515, "col": 2 } }, "dist/lib/agents/agency-agent/agent.agency:printHeader": { "1": { "line": 519, "col": 2 }, "2": { "line": 520, "col": 2 }, "3": { "line": 542, "col": 2 } }, "dist/lib/agents/agency-agent/agent.agency:__block_0": { "2.0": { "line": 527, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:__block_1": { "2.0.0": { "line": 528, "col": 6 }, "2.0.1": { "line": 536, "col": 6 }, "2.0.2": { "line": 537, "col": 6 } }, "dist/lib/agents/agency-agent/agent.agency:__block_2": { "2.0.0.0": { "line": 529, "col": 8 }, "2.0.0.1": { "line": 530, "col": 8 }, "2.0.0.2": { "line": 531, "col": 8 }, "2.0.0.3": { "line": 532, "col": 8 }, "2.0.0.4": { "line": 533, "col": 8 }, "2.0.0.5": { "line": 534, "col": 8 } }, "dist/lib/agents/agency-agent/agent.agency:__block_3": { "2.0.2.0": { "line": 538, "col": 8 } }, "dist/lib/agents/agency-agent/agent.agency:givePolicyChoice": { "1": { "line": 546, "col": 2 }, "2": { "line": 547, "col": 2 }, "3": { "line": 548, "col": 2 }, "4": { "line": 559, "col": 2 }, "5": { "line": 560, "col": 2 } }, "dist/lib/agents/agency-agent/agent.agency:setupSession": { "2": { "line": 578, "col": 2 }, "3": { "line": 583, "col": 2 }, "4": { "line": 584, "col": 2 }, "5": { "line": 586, "col": 2 }, "6": { "line": 587, "col": 2 }, "8": { "line": 604, "col": 2 }, "3.0": { "line": 583, "col": 2 }, "6.0": { "line": 587, "col": 2 }, "6.1.0.0": { "line": 590, "col": 8 }, "6.1.0.1": { "line": 591, "col": 8 }, "6.1.0.2": { "line": 593, "col": 8 }, "6.1.0": { "line": 589, "col": 6 }, "6.1.2": { "line": 596, "col": 6 }, "6.1.3": { "line": 597, "col": 6 }, "6.1": { "line": 588, "col": 4 }, "6.3": { "line": 600, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:oneShotAgent": { "1": { "line": 614, "col": 2 }, "2": { "line": 615, "col": 2 }, "3": { "line": 616, "col": 2 }, "4": { "line": 617, "col": 2 }, "5": { "line": 622, "col": 2 }, "4.0": { "line": 618, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:_runSeedTurn": { "1": { "line": 629, "col": 2 }, "2": { "line": 630, "col": 2 }, "3": { "line": 631, "col": 2 }, "3.0": { "line": 632, "col": 4 }, "3.1": { "line": 634, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:startInteractive": { "1": { "line": 645, "col": 2 }, "3": { "line": 660, "col": 2 }, "1.0.0": { "line": 647, "col": 6 }, "1.0": { "line": 646, "col": 4 }, "1.1": { "line": 649, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:main": { "2": { "line": 666, "col": 2 }, "3": { "line": 714, "col": 2 }, "4": { "line": 717, "col": 2 }, "6": { "line": 728, "col": 2 }, "7": { "line": 738, "col": 2 }, "8": { "line": 739, "col": 2 }, "9": { "line": 740, "col": 2 }, "10": { "line": 741, "col": 2 }, "11": { "line": 744, "col": 2 }, "13": { "line": 759, "col": 2 }, "15": { "line": 778, "col": 2 }, "16": { "line": 779, "col": 2 }, "18": { "line": 797, "col": 2 }, "19": { "line": 798, "col": 2 }, "20": { "line": 799, "col": 2 }, "21": { "line": 800, "col": 2 }, "22": { "line": 801, "col": 2 }, "3.0": { "line": 715, "col": 4 }, "4.0": { "line": 718, "col": 4 }, "11.0": { "line": 745, "col": 4 }, "11.1": { "line": 750, "col": 4 }, "13.0": { "line": 760, "col": 4 }, "13.1": { "line": 761, "col": 4 }, "13.2": { "line": 762, "col": 4 }, "13.3": { "line": 763, "col": 4 }, "13.4": { "line": 764, "col": 4 }, "13.5": { "line": 765, "col": 4 }, "16.0": { "line": 780, "col": 4 }, "16.1.0": { "line": 782, "col": 6 }, "16.1.1.0": { "line": 784, "col": 8 }, "16.1.1": { "line": 783, "col": 6 }, "16.1.2": { "line": 786, "col": 6 }, "16.1": { "line": 781, "col": 4 }, "16.2": { "line": 788, "col": 4 }, "16.3": { "line": 789, "col": 4 } } };
4897
+ const __sourceMap = { "dist/lib/agents/agency-agent/agent.agency:__cb_top_0": { "1": { "line": 97, "col": 2 }, "1.0": { "line": 98, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:__cb_top_1": { "1": { "line": 103, "col": 2 }, "1.0.0": { "line": 104, "col": 4 }, "1.0.1": { "line": 105, "col": 6 }, "1.0.2": { "line": 106, "col": 11 }, "1.0.3": { "line": 107, "col": 6 }, "1.0.4": { "line": 109, "col": 6 }, "1.0": { "line": 104, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:__cb_top_2": { "1": { "line": 115, "col": 2 }, "1.0": { "line": 116, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:__cb_top_3": { "1": { "line": 121, "col": 2 }, "1.0": { "line": 122, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:_showTraces": { "1": { "line": 93, "col": 2 } }, "dist/lib/agents/agency-agent/agent.agency:renderLLMCallResponse": { "1": { "line": 142, "col": 2 }, "2": { "line": 143, "col": 2 }, "3": { "line": 146, "col": 2 }, "5": { "line": 151, "col": 2 }, "2.0": { "line": 144, "col": 4 }, "3.0.0": { "line": 148, "col": 6 }, "3.0": { "line": 147, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:loadAgentsMd": { "1": { "line": 162, "col": 2 }, "2": { "line": 165, "col": 2 }, "3": { "line": 166, "col": 2 }, "4": { "line": 169, "col": 2 }, "1.0": { "line": 163, "col": 4 }, "2.0": { "line": 165, "col": 2 }, "3.0": { "line": 167, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:builtinPalette": { "1": { "line": 183, "col": 2 } }, "dist/lib/agents/agency-agent/agent.agency:mergedPalette": { "1": { "line": 195, "col": 2 }, "2": { "line": 196, "col": 2 }, "3": { "line": 203, "col": 2 }, "4": { "line": 204, "col": 2 }, "5": { "line": 207, "col": 2 }, "2.0": { "line": 197, "col": 4 }, "2.1.0": { "line": 199, "col": 6 }, "2.1": { "line": 198, "col": 4 }, "2.2": { "line": 201, "col": 4 }, "4.0": { "line": 205, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:_runTurn": { "2": { "line": 218, "col": 2 }, "3": { "line": 219, "col": 2 }, "4": { "line": 222, "col": 2 }, "5": { "line": 225, "col": 2 }, "7": { "line": 229, "col": 2 }, "9": { "line": 233, "col": 2 }, "11": { "line": 261, "col": 2 }, "12": { "line": 262, "col": 2 }, "14": { "line": 267, "col": 2 }, "3.0": { "line": 220, "col": 4 }, "4.0": { "line": 223, "col": 4 }, "5.0": { "line": 226, "col": 4 }, "5.1": { "line": 227, "col": 4 }, "7.0": { "line": 230, "col": 4 }, "7.1": { "line": 231, "col": 4 }, "9.1": { "line": 241, "col": 4 }, "9.2": { "line": 242, "col": 4 }, "9.3": { "line": 243, "col": 4 }, "9.4.0": { "line": 245, "col": 6 }, "9.4.1": { "line": 246, "col": 6 }, "9.4": { "line": 244, "col": 4 }, "9.5": { "line": 248, "col": 4 }, "9.6": { "line": 249, "col": 4 }, "9.7.0": { "line": 251, "col": 6 }, "9.7.1": { "line": 252, "col": 6 }, "9.7.2.0": { "line": 254, "col": 8 }, "9.7.2": { "line": 253, "col": 6 }, "9.7": { "line": 250, "col": 4 }, "9.9": { "line": 257, "col": 4 }, "12.0": { "line": 263, "col": 4 }, "12.1": { "line": 265, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:mainAgent": { "1": { "line": 450, "col": 2 }, "3": { "line": 464, "col": 2 }, "1.0": { "line": 451, "col": 4 }, "1.1.1": { "line": 456, "col": 6 }, "1.1.2": { "line": 457, "col": 6 }, "1.1": { "line": 452, "col": 4 }, "1.3": { "line": 459, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:agentReplyVia": { "1": { "line": 473, "col": 2 }, "2": { "line": 474, "col": 2 }, "4": { "line": 477, "col": 2 }, "6": { "line": 480, "col": 2 }, "8": { "line": 483, "col": 2 }, "10": { "line": 486, "col": 2 }, "12": { "line": 489, "col": 2 }, "2.0": { "line": 475, "col": 4 }, "4.0": { "line": 478, "col": 4 }, "6.0": { "line": 481, "col": 4 }, "8.0": { "line": 484, "col": 4 }, "10.0": { "line": 487, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:agentReply": { "1": { "line": 499, "col": 2 } }, "dist/lib/agents/agency-agent/agent.agency:roundedCost": { "1": { "line": 503, "col": 2 } }, "dist/lib/agents/agency-agent/agent.agency:_buildStatus": { "1": { "line": 507, "col": 2 } }, "dist/lib/agents/agency-agent/agent.agency:sample": { "1": { "line": 515, "col": 2 } }, "dist/lib/agents/agency-agent/agent.agency:printHeader": { "1": { "line": 519, "col": 2 }, "2": { "line": 520, "col": 2 }, "3": { "line": 542, "col": 2 } }, "dist/lib/agents/agency-agent/agent.agency:__block_0": { "2.0": { "line": 527, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:__block_1": { "2.0.0": { "line": 528, "col": 6 }, "2.0.1": { "line": 536, "col": 6 }, "2.0.2": { "line": 537, "col": 6 } }, "dist/lib/agents/agency-agent/agent.agency:__block_2": { "2.0.0.0": { "line": 529, "col": 8 }, "2.0.0.1": { "line": 530, "col": 8 }, "2.0.0.2": { "line": 531, "col": 8 }, "2.0.0.3": { "line": 532, "col": 8 }, "2.0.0.4": { "line": 533, "col": 8 }, "2.0.0.5": { "line": 534, "col": 8 } }, "dist/lib/agents/agency-agent/agent.agency:__block_3": { "2.0.2.0": { "line": 538, "col": 8 } }, "dist/lib/agents/agency-agent/agent.agency:givePolicyChoice": { "1": { "line": 546, "col": 2 }, "2": { "line": 547, "col": 2 }, "3": { "line": 548, "col": 2 }, "4": { "line": 559, "col": 2 }, "5": { "line": 560, "col": 2 } }, "dist/lib/agents/agency-agent/agent.agency:setupSession": { "2": { "line": 578, "col": 2 }, "3": { "line": 583, "col": 2 }, "4": { "line": 584, "col": 2 }, "5": { "line": 586, "col": 2 }, "6": { "line": 587, "col": 2 }, "8": { "line": 604, "col": 2 }, "3.0": { "line": 583, "col": 2 }, "6.0": { "line": 587, "col": 2 }, "6.1.0.0": { "line": 590, "col": 8 }, "6.1.0.1": { "line": 591, "col": 8 }, "6.1.0.2": { "line": 593, "col": 8 }, "6.1.0": { "line": 589, "col": 6 }, "6.1.2": { "line": 596, "col": 6 }, "6.1.3": { "line": 597, "col": 6 }, "6.1": { "line": 588, "col": 4 }, "6.3": { "line": 600, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:oneShotAgent": { "1": { "line": 614, "col": 2 }, "2": { "line": 615, "col": 2 }, "3": { "line": 616, "col": 2 }, "4": { "line": 617, "col": 2 }, "5": { "line": 622, "col": 2 }, "4.0": { "line": 618, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:_runSeedTurn": { "1": { "line": 629, "col": 2 }, "2": { "line": 630, "col": 2 }, "3": { "line": 631, "col": 2 }, "3.0": { "line": 632, "col": 4 }, "3.1": { "line": 634, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:startInteractive": { "1": { "line": 645, "col": 2 }, "3": { "line": 660, "col": 2 }, "1.0.0": { "line": 647, "col": 6 }, "1.0": { "line": 646, "col": 4 }, "1.1": { "line": 649, "col": 4 } }, "dist/lib/agents/agency-agent/agent.agency:main": { "2": { "line": 666, "col": 2 }, "3": { "line": 714, "col": 2 }, "4": { "line": 717, "col": 2 }, "6": { "line": 728, "col": 2 }, "7": { "line": 738, "col": 2 }, "8": { "line": 739, "col": 2 }, "9": { "line": 740, "col": 2 }, "10": { "line": 741, "col": 2 }, "11": { "line": 744, "col": 2 }, "13": { "line": 759, "col": 2 }, "15": { "line": 778, "col": 2 }, "16": { "line": 779, "col": 2 }, "18": { "line": 797, "col": 2 }, "19": { "line": 798, "col": 2 }, "20": { "line": 799, "col": 2 }, "21": { "line": 800, "col": 2 }, "22": { "line": 801, "col": 2 }, "3.0": { "line": 715, "col": 4 }, "4.0": { "line": 718, "col": 4 }, "11.0": { "line": 745, "col": 4 }, "11.1": { "line": 750, "col": 4 }, "13.0": { "line": 760, "col": 4 }, "13.1": { "line": 761, "col": 4 }, "13.2": { "line": 762, "col": 4 }, "13.3": { "line": 763, "col": 4 }, "13.4": { "line": 764, "col": 4 }, "13.5": { "line": 765, "col": 4 }, "16.0": { "line": 780, "col": 4 }, "16.1.0": { "line": 782, "col": 6 }, "16.1.1.0": { "line": 784, "col": 8 }, "16.1.1": { "line": 783, "col": 6 }, "16.1.2": { "line": 786, "col": 6 }, "16.1": { "line": 781, "col": 4 }, "16.2": { "line": 788, "col": 4 }, "16.3": { "line": 789, "col": 4 } } };
4903
4898
  export {
4904
4899
  __getCheckpoints,
4905
4900
  __mainNodeParams,
@@ -1175,7 +1175,6 @@ async function ___typecheck_impl(agencyCode) {
1175
1175
  });
1176
1176
  });
1177
1177
  await runner.step(1, async (runner2) => {
1178
- __self.__retryable = false;
1179
1178
  __stack.locals.result = await __call(typecheck, {
1180
1179
  type: "positional",
1181
1180
  args: [__stack.args.agencyCode]
@@ -1378,7 +1377,6 @@ async function ___parse_impl(agencyCode) {
1378
1377
  });
1379
1378
  });
1380
1379
  await runner.step(1, async (runner2) => {
1381
- __self.__retryable = false;
1382
1380
  __stack.locals.result = await __call(parseAST, {
1383
1381
  type: "positional",
1384
1382
  args: [__stack.args.agencyCode]
@@ -5,117 +5,237 @@ description: Documents `agency eval optimize` — the eval-driven loop that rewr
5
5
 
6
6
  # Optimizing agents
7
7
 
8
- `agency eval optimize` (also `agency optimize`) improves an agent by rewriting the declarations you mark with the `optimize` modifier. It evaluates the baseline, asks a mutator model to propose new values for those declarations, runs and grades each candidate against your inputs, and keeps the best one.
8
+ `agency optimize` improves an agent by rewriting your prompts for you.
9
9
 
10
- ```bash
11
- agency optimize agent.agency --goal "Return the capital of the given country."
12
- agency optimize agent.agency --inputs inputs.json --graders grading.ts --iterations 5
13
- agency optimize agent.agency:main --inputs inputs.json --validation-split 0.3 --no-writeback
10
+ For example, let's say you are writing an agent to return the capital of India. Here's your code:
11
+
12
+ ```ts
13
+ node main() {
14
+ const prompt = "What is the capital of France?"
15
+ const response = llm(prompt)
16
+ return response
17
+ }
18
+ ```
19
+
20
+ Notice that the prompt is incorrectly asking for the capital of France. We're going to have the optimizer change this prompt to India. It's really easy to get started with the optimizer for a toy example like this. First, we need to mark the targets we want the optimizer to optimize:
21
+
22
+ ```ts
23
+ node main() {
24
+ // added `optimize` to next line
25
+ optimize const prompt = "What is the capital of France?"
26
+ const response = llm(prompt)
27
+ return response
28
+ }
29
+ ```
30
+
31
+ The only change needed is the `optimize` modifier on the `prompt` variable declaration. Now call the `optimize` command, giving it your agency file and a goal:
32
+
33
+ ```
34
+ agency optimize foo.agency --goal 'Return the capital of India'
35
+ ```
36
+
37
+ If you run this command, you'll see output similar to this:
38
+
39
+ ```
40
+ grading:
41
+ - goal
42
+ first input: input-1 — goal: Return the capital of India
43
+
44
+ == optimize greedy (run demo-run): 1 target(s), 1 input(s), up to 5 iteration(s) ==
45
+ - bar.agency:main:prompt = "What is the capital of France?"
46
+ baseline objective 0.000
47
+ iter 1/5 accepted objective 1.000 (6.3s)
48
+ ~ bar.agency:main:prompt:
49
+ - What is the capital of France?
50
+ + What is the capital of India?
51
+ The change focuses on directly addressing the goal of retrieving the capital of India by modifying the prompt to reflect…
52
+ reached the maximum objective (1.000) — stopping early
53
+
54
+ == Optimized variables ==
55
+ ~ bar.agency:main:prompt:
56
+ - What is the capital of France?
57
+ + What is the capital of India?
58
+
59
+ Complete: champion iteration 1, accepted 1, rejected 0, invalid 0 (10.0s)
60
+ Optimize demo-run completed: 1 accepted, 0 rejected
14
61
  ```
15
62
 
16
- ## Marking what to optimize
63
+ You can put `optimize` on any string `const` `let` to tell the the optimizer to rewrite it.
64
+
65
+ ## Inputs, graders, optimizers
66
+
67
+ The `--goal` flag makes it really easy to get started with the optimizer, but gives you limited control. Now let's look at a more real-world example. But first I need to explain how the optimizer works.
17
68
 
18
- Put `optimize` on any string `const`/`let` the optimizer may rewrite. Discovery starts at the agent file and follows local relative `.agency` imports.
69
+ The optimizer has three core things: inputs, graders, and the optimizer itself.
19
70
 
20
- ```agency
21
- optimize const systemPrompt = "Answer accurately."
71
+ ### Inputs
72
+ Inputs are examples you give to the optimizer. They are example input-output pairs.
22
73
 
23
- node main(question: string): string {
24
- optimize const prompt = "Answer accurately: ${question}"
25
- const answer: string = llm(prompt)
26
- return answer
74
+ For example, let's say we're optimizing this code:
75
+
76
+ ```ts
77
+ node main(country) {
78
+ // note prompt incorrectly says "area" instead of "capital"
79
+ optimize const prompt = `What is the area of ${country}?`
80
+ const response = llm(prompt)
81
+ return response
27
82
  }
28
83
  ```
29
84
 
30
- A rewritten value must preserve every interpolation placeholder the original used (`${question}` here). Legacy `@optimize(...)` tags are not supported.
85
+ It is very similar to the code we just saw, but now there's a `country` parameter for the node. We might give these inputs to the optimizer:
86
+
87
+ ```
88
+ {
89
+ "inputs": [
90
+ { "args": { "country": "India" }, "expected": "New Delhi" },
91
+ { "args": { "country": "Japan" }, "expected": "Tokyo" },
92
+ { "args": { "country": "Brazil" }, "expected": "Brasília" }
93
+ ]
94
+ }
95
+ ```
96
+
97
+ Save this as inputs.json and run the optimizer again:
98
+
99
+ ```
100
+ agency optimize foo.agency --goal 'Return the capital of India' --inputs inputs.json
101
+
102
+ ```
31
103
 
32
- ## Inputs and the goal
104
+ This will run the optimizer the same as earlier, except now it also has three example inputs to look at. The optimizer will run foo.agency once for each input. That means it will run your agent, setting country to `"India"` for the first iteration, `"Japan"` for the second iteration etc, and look at the return value of the node.
33
105
 
34
- You describe what to optimize against with inputs and/or a goal. An input is one invocation of the agent: `args` for the node, plus optional `goal`, `expected`, `node`, `working_dir`, and freeform `metadata`.
106
+ You can optionally also provide other values:
35
107
 
36
- ```json
37
- { "inputs": [
38
- { "id": "india", "args": { "country": "India" }, "expected": "New Delhi" },
39
- { "id": "japan", "args": { "country": "Japan" }, "expected": "Tokyo" }
40
- ] }
108
+ ```ts
109
+ export type Input = {
110
+ /** Unique id. Generated for you if not given.*/
111
+ id?: string;
112
+ /** What the agent should accomplish — read by the goal judge and the
113
+ * pairwise judge suite. This is a per-input goal.*/
114
+ goal?: string;
115
+ /** Entry node to run. Defaults to `main`. */
116
+ node?: string;
117
+ /** Freeform, grader-agnostic metadata (tags, expectedOutput, …). */
118
+ metadata?: Record<string, any>;
119
+ };
41
120
  ```
42
121
 
43
- - `--inputs <file|dir>` the input suite.
44
- - `--goal <text>` — an overall goal. **Combinable with `--inputs`**: it fills in as the goal for any input that doesn't set its own. Used alone, it creates one inline no-argument input (and fails upfront if the node requires arguments).
45
- - At least one of `--inputs` / `--goal` is required.
122
+ Notice that you can pass in a per-input goal, or an overall goal, as we have been doing with the `--goal` flag. You can pass in either one or both, but at least one goal is required. The `--goal` flag only fills in goals for inputs that don't have their own; they don't get combined. So if an input already has a goal, the `--goal` flag's value won't be used.
123
+
124
+ ### Graders
125
+ So, we pass in an input, an expected output, and a goal to the optimizer. How does the optimizer measure the expected output? In our example with capitals, the expected output for India was `"New Delhi"`. What if the agent instead returned `"the capital of India is New Delhi"`? It's the job of the *grader* to decide how well the agent did. Let's look at some examples of graders.
126
+
127
+ #### ExactMatchGrader
128
+ Returns a binary pass-fail. Not the most useful grader, because it would give both of these the same score, which makes it hard for the optimizer to see if its changes to the agent are making any progress:
46
129
 
47
- `expected` is the gold output for an input (any JSON). It's read by the built-in match graders and surfaced to the optimizer's reflection — see below.
130
+ ```
131
+ // these responses would get the same score:
132
+ response1 = "asdadasdasd"
133
+ response2 = "the capital of India is New Delhi"
134
+ ```
48
135
 
49
- ## Options
136
+ #### ContainsGrader
137
+ Also returns a binary pass/fail like exact match, but this one checks to see if the expected output is anywhere in the response. Slightly better.
50
138
 
51
- | Flag | Meaning |
52
- | --- | --- |
53
- | `<file>[:<node>]` | Required agent target. A directory resolves to `main.agency`; the node defaults to `main`. |
54
- | `--inputs <file\|dir>` | Input suite file or directory. |
55
- | `--goal <text>` | Overall goal (combinable with `--inputs`; or a single inline input on its own). |
56
- | `--graders <file>` | A TypeScript grading module that replaces the default goal judge. See [Custom graders](#custom-graders). |
57
- | `--validation-inputs <file\|dir>` | Held-out validation suite. See [Validation sets](#validation-sets). |
58
- | `--validation-split <ratio>` | Hold out this fraction of `--inputs` (seeded by `--seed`) when `--validation-inputs` is absent. |
59
- | `--optimizer <name>` | `greedy` (default), `gepa`, or `example`. |
60
- | `--iterations <n>` | Max candidate iterations after the baseline. Default `5`. |
61
- | `--minibatch <n>` | GEPA minibatch size (gepa only). Default `8`. |
62
- | `--seed <n>` | RNG seed for reproducible search / validation split. |
63
- | `--mutator-model <model>` | Model override for proposing mutations. |
64
- | `--no-writeback` | Don't write the champion back to the source files. |
65
- | `--silent` | Print nothing; artifacts are still written. |
66
- | `--run-id <id>` | Output run id (must not already exist). |
67
- | `--runs-dir <path>` | Output root. Defaults to `eval.optimizeRunsDir`, then `eval.runsDir/optimize`, then `runs/optimize`. |
139
+ #### SimilarityGrader
140
+ Calculates the levenshtein distance and returns a score between 0 and 1 (0 = no match, 1 = perfect match).
68
141
 
69
- The baseline runs the unmutated program first; if a baseline input fails (or fails a `mustPass` gate), the run aborts and reports the failing inputs — a failure before any mutation means the program or suite is broken, not the optimization.
142
+ #### LLM Judge
143
+ Asks an LLM to return a score between 0 and 1 (0 = no match, 1 = perfect match) for how well the response matches the expected output.
70
144
 
71
- ## Custom graders
145
+ This is the default grader.
72
146
 
73
- By default a run is graded by one built-in LLM judge that scores each output against the input's `goal` (or the overall `--goal`). To grade differently — match a known answer, run a deterministic check, combine several graders — pass `--graders ./grading.ts` (or set `eval.optimize.graders` in `agency.json`). The module **replaces** the default judge.
147
+ ### Custom graders
74
148
 
75
- A grading module **default-exports one grader or an array of graders**. A "grader" is any of:
149
+ So far, we have just been using the LLM Judge, which is the default grader. But we can also specify a custom grader using the `--graders` flag.
150
+
151
+ First write a grader file:
76
152
 
77
153
  ```ts
78
- import { grader, scalar, ExactMatch, Contains, LlmJudge, type Grader } from "agency-lang/optimize";
154
+ // graders.ts
155
+ import { type Grader } from "agency-lang/optimize";
156
+
157
+ // `input` is the typed Input; the gold answer is at `input.expected`
158
+ // `output` is the actual response from your agent.
159
+ const exact: Grader = ({ output, input }) => {
160
+ // return a number (0..1), a boolean, or a Grade
161
+ return output === input.expected ? 1 : 0;
162
+ }
163
+
164
+ export default exact;
165
+ ```
166
+
167
+ Use the grader:
168
+
169
+ ```
170
+ agency optimize foo.agency --goal 'Return the capital of India' --graders graders.ts
171
+ ```
172
+
173
+ That's a really simple example where we're writing a custom function to use as the grader. It's an exact match function which, as we know, isn't very good. We can easily change this though. Let's see some options.
79
174
 
80
- // (a) a metric function: ctx = { output, input, judge }
81
- // `input` is the typed Input; the gold answer is `input.expected`
82
- // (extra per-input data can also live under `input.metadata`).
83
- const exact: Grader = ({ output, input }) =>
84
- output === input.expected ? 1 : 0; // return a number (0..1), a boolean, or a Grade
175
+ We could call an LLM judge, passing it a custom judge prompt:
85
176
 
86
- // returning feedback too? use the scalar()/binary() constructors instead of a raw Grade literal:
177
+ ```ts
178
+ import { scalar, type Grader } from "agency-lang/optimize";
87
179
  const judged: Grader = async ({ output, input, judge }) => {
88
- const v = await judge({ goal: `Return ${input.expected}.`, output });
89
- return scalar(v.score, v.reasoning); // vs { score: { kind: "scalar", value: v.score }, feedback: v.reasoning }
180
+ const v = await judge({ goal:
181
+ `Hi this is my custom LLM judge prompt. The output should match this expected value: ${input.expected}.`,
182
+ output
183
+ });
184
+
185
+ // Agency func to return a scalar score + reasoning for the score.
186
+ // Generates something like:
187
+ //
188
+ // ```
189
+ // { score: { kind: "scalar", value: v.score }, feedback: v.reasoning }
190
+ // ```
191
+ return scalar(v.score, v.reasoning);
90
192
  };
193
+ ```
91
194
 
92
- // (b) a wrapped function carrying policy (mustPass gate, weight, threshold, samples, inputScope)
93
- const gate = grader(exact, { mustPass: true, name: "capital-exact" });
94
-
95
- // (c) a configured built-in — matchOn defaults to ["expected"]
96
- const has = new Contains({}); // output contains input.expected
97
- const judge = new LlmJudge({ goal: "Return the capital.", samples: 3 });
195
+ We could use a built-in grader:
98
196
 
99
- export default [gate, judged]; // or `export default exact` for the simple case
197
+ ```ts
198
+ import { Contains } from "agency-lang/optimize";
199
+ export default (new Contains({}));
100
200
  ```
101
201
 
102
- A metric function returns a **number** (0..1 scalar), a **boolean** (1.0/0.0), or a full **Grade**. For a Grade with feedback, the `scalar(value, feedback?)` and `binary(pass, feedback?)` constructors are the ergonomic way to build one.
202
+ Instead of a single grader, we can also return an array of graders:
103
203
 
104
- **How grades become the objective.** Every grade counts: a number contributes its value (0..1), and a boolean / `ExactMatch` / `Contains` result contributes `1.0` (pass) or `0.0` (fail) — so a binary-only grader gives you plain accuracy. The objective for an input is the weighted mean of its grades, and the run objective is the mean across inputs. `mustPass` is an orthogonal **gate**: a failed `mustPass` grader zeroes that input regardless of its other grades.
204
+ ```ts
205
+ import { Contains, Grader, scalar } from "agency-lang/optimize";
105
206
 
106
- > **Pick a grader that has a gradient.** Exact `===` against free-form LLM output almost never matches (`"The capital is New Delhi."` ≠ `"New Delhi"`), so it scores 0 for every candidate and the search can't climb. Use `Contains`, `Similarity`, or an `LlmJudge` (or constrain the prompt to emit only the value) so a better candidate actually scores higher.
207
+ const judged: Grader = async ({ output, input, judge }) => {
208
+ const v = await judge({
209
+ goal:
210
+ `Hi this is my custom LLM judge prompt. The output should match this expected value: ${input.expected}.`,
211
+ output
212
+ });
107
213
 
108
- `ctx.judge({ goal, output })` runs the bundled LLM goal judge from inside a metric function, so you can mix deterministic and LLM grading. When a grading module is configured, a per-input `goal` is optional.
214
+ return scalar(v.score, v.reasoning);
215
+ };
109
216
 
110
- ### Steering the search without a goal
217
+ export default [new Contains({}), judged];
218
+ ```
111
219
 
112
- The optimizer's reflection is fed each input's `expected` answer **and** each grader's `feedback`, so a self-explaining grader (one that returns `{ score, feedback }`) or labeled `expected` outputs can drive the rewrites *without* a `--goal` — `--goal` is then an optional extra steer. A grader that returns only a bare score and inputs with no `expected` leave the mutator nothing to learn from, so it can only guess from the current prompt; provide one or the other.
220
+ Finally, you can use the `grader` function to wrap a custom function and supply some metadata:
113
221
 
114
- The mutator is instructed **not** to hard-code the expected answers into the prompt. A [validation set](#validation-sets) is the backstop that fails any prompt which memorizes them anyway.
222
+ ```ts
223
+ // use the `exact` function as the grader.
224
+ // mustPass = if this grader fails, consider this entire iteration failed.
225
+ // name = shown in debug output.
226
+ const gate = grader(exact, { mustPass: true, name: "capital-exact" });
227
+ ```
228
+
229
+ To recap:
230
+ - A grading module **default-exports one grader or an array of graders**.
231
+ - A metric function returns a **number** (0..1 scalar), a **boolean** (1.0/0.0), or a full **Grade**. For a Grade with feedback, the `scalar(value, feedback?)` and `binary(pass, feedback?)` constructors are the ergonomic way to build one.
232
+
233
+ #### How grades become the objective
234
+ Every grade counts: a number contributes its value (0..1), and a boolean / `ExactMatch` / `Contains` result contributes `1.0` (pass) or `0.0` (fail) — so a binary-only grader gives you plain accuracy. The objective for an input is the weighted mean of its grades, and the run objective is the mean across inputs. `mustPass` is an orthogonal **gate**: a failed `mustPass` grader zeroes that input regardless of its other grades.
115
235
 
116
236
  ## Validation sets
117
237
 
118
- Pass `--validation-inputs <file|dir>` to grade the champion against held-out inputs, or `--validation-split <ratio>` to hold out a seeded fraction of `--inputs`. Search and candidate acceptance run on the **training** inputs; with the default `greedy` optimizer the champion written back is the one with the best **validation** objective, and `report.md` shows train-vs-validation side by side so an overfit prompt (high train, flat validation) is visible. `gepa` and `example` report a validation objective but select on training; the report says so.
238
+ Pass `--validation-inputs <file|dir>` to grade the champion against held-out inputs, or `--validation-split <ratio>` to hold out a seeded fraction of `--inputs`. Search and candidate acceptance run on the **training** inputs; with the default `greedy` optimizer the champion written back is the one with the best **validation** objective, and `report.md` shows train-vs-validation side by side so an overfit prompt (high train, flat validation) is visible.
119
239
 
120
240
  ## Configuration
121
241
 
@@ -152,6 +272,9 @@ runs/optimize/<run-id>/
152
272
 
153
273
  By default the optimizer also prints progress to the console (the resolved grading setup, per-iteration decisions, and the start→end value of every optimized variable). `--silent` suppresses console output; artifacts are still written.
154
274
 
275
+ ## Optimizers
276
+ Agency comes with two built-in optimizers, `greedy` and `gepa`. `greedy` is the default. You can specify the optimizer using the `--optimizer` flag. You can also write your own optimizers.
277
+
155
278
  ## Writing your own optimizer
156
279
 
157
280
  `greedy`, `gepa`, and `example` are built on a shared `BaseOptimizer`, which you can extend. Write a module that default-exports a **factory** `(config) => Optimizer`, then point `--optimizer` (or `eval.optimize.optimizer`) at its path — exactly like `--graders`:
@@ -178,4 +301,4 @@ agency optimize foo.agency --inputs inputs.json --optimizer ./myOptimizer.ts
178
301
 
179
302
  ## Notes
180
303
 
181
- The CLI installs an approval handler for the internal `std::agency.run(...)` calls used by eval execution. The stdlib `agency.eval.optimize(...)` function does **not** install a handler; Agency callers should wrap it in their own handler when they want auto-approval.
304
+ The CLI installs an approval handler for the internal `std::agency.run(...)` calls used by eval execution. The stdlib `agency.eval.optimize(...)` function does **not** install a handler; Agency callers should wrap it in their own handler when they want auto-approval.