@elizaos/plugin-training 2.0.3-beta.6 → 2.0.3-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (363) hide show
  1. package/dist/backends/native.d.ts +96 -0
  2. package/dist/backends/native.d.ts.map +1 -0
  3. package/dist/backends/native.js +308 -0
  4. package/dist/backends/native.js.map +1 -0
  5. package/dist/cli/train.d.ts +22 -0
  6. package/dist/cli/train.d.ts.map +1 -0
  7. package/dist/cli/train.js +219 -0
  8. package/dist/cli/train.js.map +1 -0
  9. package/dist/core/action-benchmark-runner.d.ts +55 -0
  10. package/dist/core/action-benchmark-runner.d.ts.map +1 -0
  11. package/dist/core/action-benchmark-runner.js +341 -0
  12. package/dist/core/action-benchmark-runner.js.map +1 -0
  13. package/dist/core/artifact-store.d.ts +72 -0
  14. package/dist/core/artifact-store.d.ts.map +1 -0
  15. package/dist/core/artifact-store.js +50 -0
  16. package/dist/core/artifact-store.js.map +1 -0
  17. package/dist/core/benchmark-matrix-artifact.d.ts +102 -0
  18. package/dist/core/benchmark-matrix-artifact.d.ts.map +1 -0
  19. package/dist/core/benchmark-matrix-artifact.js +381 -0
  20. package/dist/core/benchmark-matrix-artifact.js.map +1 -0
  21. package/dist/core/benchmark-vs-cerebras-runner.d.ts +37 -0
  22. package/dist/core/benchmark-vs-cerebras-runner.d.ts.map +1 -0
  23. package/dist/core/benchmark-vs-cerebras-runner.js +151 -0
  24. package/dist/core/benchmark-vs-cerebras-runner.js.map +1 -0
  25. package/dist/core/cerebras-eval-model.d.ts +54 -0
  26. package/dist/core/cerebras-eval-model.d.ts.map +1 -0
  27. package/dist/core/cerebras-eval-model.js +249 -0
  28. package/dist/core/cerebras-eval-model.js.map +1 -0
  29. package/dist/core/cli.d.ts +15 -0
  30. package/dist/core/cli.d.ts.map +1 -0
  31. package/dist/core/cli.js +1003 -0
  32. package/dist/core/cli.js.map +1 -0
  33. package/dist/core/context-audit.d.ts +51 -0
  34. package/dist/core/context-audit.d.ts.map +1 -0
  35. package/dist/core/context-audit.js +166 -0
  36. package/dist/core/context-audit.js.map +1 -0
  37. package/dist/core/context-catalog.d.ts +47 -0
  38. package/dist/core/context-catalog.d.ts.map +1 -0
  39. package/dist/core/context-catalog.js +269 -0
  40. package/dist/core/context-catalog.js.map +1 -0
  41. package/dist/core/context-types.d.ts +3 -0
  42. package/dist/core/context-types.d.ts.map +1 -0
  43. package/dist/core/context-types.js +18 -0
  44. package/dist/core/context-types.js.map +1 -0
  45. package/dist/core/dataset-generator.d.ts +135 -0
  46. package/dist/core/dataset-generator.d.ts.map +1 -0
  47. package/dist/core/dataset-generator.js +895 -0
  48. package/dist/core/dataset-generator.js.map +1 -0
  49. package/dist/core/eliza1-benchmark-recipe.d.ts +18 -0
  50. package/dist/core/eliza1-benchmark-recipe.d.ts.map +1 -0
  51. package/dist/core/eliza1-benchmark-recipe.js +64 -0
  52. package/dist/core/eliza1-benchmark-recipe.js.map +1 -0
  53. package/dist/core/eliza1-bundle-stager.d.ts +57 -0
  54. package/dist/core/eliza1-bundle-stager.d.ts.map +1 -0
  55. package/dist/core/eliza1-bundle-stager.js +149 -0
  56. package/dist/core/eliza1-bundle-stager.js.map +1 -0
  57. package/dist/core/ensure-cron-job.d.ts +53 -0
  58. package/dist/core/ensure-cron-job.d.ts.map +1 -0
  59. package/dist/core/ensure-cron-job.js +51 -0
  60. package/dist/core/ensure-cron-job.js.map +1 -0
  61. package/dist/core/eval-comparison-artifact.d.ts +72 -0
  62. package/dist/core/eval-comparison-artifact.d.ts.map +1 -0
  63. package/dist/core/eval-comparison-artifact.js +281 -0
  64. package/dist/core/eval-comparison-artifact.js.map +1 -0
  65. package/dist/core/feed-generation-runner.d.ts +37 -0
  66. package/dist/core/feed-generation-runner.d.ts.map +1 -0
  67. package/dist/core/feed-generation-runner.js +232 -0
  68. package/dist/core/feed-generation-runner.js.map +1 -0
  69. package/dist/core/html-escape.d.ts +5 -0
  70. package/dist/core/html-escape.d.ts.map +1 -0
  71. package/dist/core/html-escape.js +11 -0
  72. package/dist/core/html-escape.js.map +1 -0
  73. package/dist/core/huggingface-dataset-ingest.d.ts +52 -0
  74. package/dist/core/huggingface-dataset-ingest.d.ts.map +1 -0
  75. package/dist/core/huggingface-dataset-ingest.js +134 -0
  76. package/dist/core/huggingface-dataset-ingest.js.map +1 -0
  77. package/dist/core/index.d.ts +29 -0
  78. package/dist/core/index.d.ts.map +1 -0
  79. package/dist/core/index.js +204 -0
  80. package/dist/core/index.js.map +1 -0
  81. package/dist/core/privacy-filter.d.ts +95 -0
  82. package/dist/core/privacy-filter.d.ts.map +1 -0
  83. package/dist/core/privacy-filter.js +324 -0
  84. package/dist/core/privacy-filter.js.map +1 -0
  85. package/dist/core/promotion-gate.d.ts +117 -0
  86. package/dist/core/promotion-gate.d.ts.map +1 -0
  87. package/dist/core/promotion-gate.js +85 -0
  88. package/dist/core/promotion-gate.js.map +1 -0
  89. package/dist/core/promotion-persist.d.ts +116 -0
  90. package/dist/core/promotion-persist.d.ts.map +1 -0
  91. package/dist/core/promotion-persist.js +93 -0
  92. package/dist/core/promotion-persist.js.map +1 -0
  93. package/dist/core/prompt-compare.d.ts +99 -0
  94. package/dist/core/prompt-compare.d.ts.map +1 -0
  95. package/dist/core/prompt-compare.js +210 -0
  96. package/dist/core/prompt-compare.js.map +1 -0
  97. package/dist/core/replay-validator.d.ts +136 -0
  98. package/dist/core/replay-validator.d.ts.map +1 -0
  99. package/dist/core/replay-validator.js +312 -0
  100. package/dist/core/replay-validator.js.map +1 -0
  101. package/dist/core/roleplay-executor.d.ts +123 -0
  102. package/dist/core/roleplay-executor.d.ts.map +1 -0
  103. package/dist/core/roleplay-executor.js +675 -0
  104. package/dist/core/roleplay-executor.js.map +1 -0
  105. package/dist/core/roleplay-trajectories.d.ts +54 -0
  106. package/dist/core/roleplay-trajectories.d.ts.map +1 -0
  107. package/dist/core/roleplay-trajectories.js +88 -0
  108. package/dist/core/roleplay-trajectories.js.map +1 -0
  109. package/dist/core/scenario-blueprints.d.ts +62 -0
  110. package/dist/core/scenario-blueprints.d.ts.map +1 -0
  111. package/dist/core/scenario-blueprints.js +850 -0
  112. package/dist/core/scenario-blueprints.js.map +1 -0
  113. package/dist/core/scenario-runner.d.ts +36 -0
  114. package/dist/core/scenario-runner.d.ts.map +1 -0
  115. package/dist/core/scenario-runner.js +216 -0
  116. package/dist/core/scenario-runner.js.map +1 -0
  117. package/dist/core/skill-scoring-cron.d.ts +57 -0
  118. package/dist/core/skill-scoring-cron.d.ts.map +1 -0
  119. package/dist/core/skill-scoring-cron.js +180 -0
  120. package/dist/core/skill-scoring-cron.js.map +1 -0
  121. package/dist/core/test-trajectory-collector.d.ts +37 -0
  122. package/dist/core/test-trajectory-collector.d.ts.map +1 -0
  123. package/dist/core/test-trajectory-collector.js +225 -0
  124. package/dist/core/test-trajectory-collector.js.map +1 -0
  125. package/dist/core/track-c-queue-task.d.ts +37 -0
  126. package/dist/core/track-c-queue-task.d.ts.map +1 -0
  127. package/dist/core/track-c-queue-task.js +104 -0
  128. package/dist/core/track-c-queue-task.js.map +1 -0
  129. package/dist/core/training-analysis-index.d.ts +104 -0
  130. package/dist/core/training-analysis-index.d.ts.map +1 -0
  131. package/dist/core/training-analysis-index.js +3297 -0
  132. package/dist/core/training-analysis-index.js.map +1 -0
  133. package/dist/core/training-collection-runner.d.ts +508 -0
  134. package/dist/core/training-collection-runner.d.ts.map +1 -0
  135. package/dist/core/training-collection-runner.js +2299 -0
  136. package/dist/core/training-collection-runner.js.map +1 -0
  137. package/dist/core/training-config.d.ts +52 -0
  138. package/dist/core/training-config.d.ts.map +1 -0
  139. package/dist/core/training-config.js +117 -0
  140. package/dist/core/training-config.js.map +1 -0
  141. package/dist/core/training-orchestrator.d.ts +112 -0
  142. package/dist/core/training-orchestrator.d.ts.map +1 -0
  143. package/dist/core/training-orchestrator.js +729 -0
  144. package/dist/core/training-orchestrator.js.map +1 -0
  145. package/dist/core/training-readiness-report.d.ts +52 -0
  146. package/dist/core/training-readiness-report.d.ts.map +1 -0
  147. package/dist/core/training-readiness-report.js +765 -0
  148. package/dist/core/training-readiness-report.js.map +1 -0
  149. package/dist/core/trajectory-consumer.d.ts +15 -0
  150. package/dist/core/trajectory-consumer.d.ts.map +1 -0
  151. package/dist/core/trajectory-consumer.js +61 -0
  152. package/dist/core/trajectory-consumer.js.map +1 -0
  153. package/dist/core/trajectory-export-bundle.d.ts +95 -0
  154. package/dist/core/trajectory-export-bundle.d.ts.map +1 -0
  155. package/dist/core/trajectory-export-bundle.js +561 -0
  156. package/dist/core/trajectory-export-bundle.js.map +1 -0
  157. package/dist/core/trajectory-export-cron.d.ts +57 -0
  158. package/dist/core/trajectory-export-cron.d.ts.map +1 -0
  159. package/dist/core/trajectory-export-cron.js +170 -0
  160. package/dist/core/trajectory-export-cron.js.map +1 -0
  161. package/dist/core/trajectory-hf-upload.d.ts +50 -0
  162. package/dist/core/trajectory-hf-upload.d.ts.map +1 -0
  163. package/dist/core/trajectory-hf-upload.js +111 -0
  164. package/dist/core/trajectory-hf-upload.js.map +1 -0
  165. package/dist/core/trajectory-task-datasets.d.ts +62 -0
  166. package/dist/core/trajectory-task-datasets.d.ts.map +1 -0
  167. package/dist/core/trajectory-task-datasets.js +427 -0
  168. package/dist/core/trajectory-task-datasets.js.map +1 -0
  169. package/dist/core/wait-for-service.d.ts +25 -0
  170. package/dist/core/wait-for-service.d.ts.map +1 -0
  171. package/dist/core/wait-for-service.js +19 -0
  172. package/dist/core/wait-for-service.js.map +1 -0
  173. package/dist/core/workspace-runtime.d.ts +4 -0
  174. package/dist/core/workspace-runtime.d.ts.map +1 -0
  175. package/dist/core/workspace-runtime.js +25 -0
  176. package/dist/core/workspace-runtime.js.map +1 -0
  177. package/dist/dspy/artifact.d.ts +54 -0
  178. package/dist/dspy/artifact.d.ts.map +1 -0
  179. package/dist/dspy/artifact.js +61 -0
  180. package/dist/dspy/artifact.js.map +1 -0
  181. package/dist/dspy/chain-of-thought.d.ts +27 -0
  182. package/dist/dspy/chain-of-thought.d.ts.map +1 -0
  183. package/dist/dspy/chain-of-thought.js +43 -0
  184. package/dist/dspy/chain-of-thought.js.map +1 -0
  185. package/dist/dspy/examples.d.ts +72 -0
  186. package/dist/dspy/examples.d.ts.map +1 -0
  187. package/dist/dspy/examples.js +105 -0
  188. package/dist/dspy/examples.js.map +1 -0
  189. package/dist/dspy/index.d.ts +15 -0
  190. package/dist/dspy/index.d.ts.map +1 -0
  191. package/dist/dspy/index.js +40 -0
  192. package/dist/dspy/index.js.map +1 -0
  193. package/dist/dspy/lm-adapter.d.ts +100 -0
  194. package/dist/dspy/lm-adapter.d.ts.map +1 -0
  195. package/dist/dspy/lm-adapter.js +81 -0
  196. package/dist/dspy/lm-adapter.js.map +1 -0
  197. package/dist/dspy/optimizers/dspy-bootstrap-fewshot.d.ts +23 -0
  198. package/dist/dspy/optimizers/dspy-bootstrap-fewshot.d.ts.map +1 -0
  199. package/dist/dspy/optimizers/dspy-bootstrap-fewshot.js +85 -0
  200. package/dist/dspy/optimizers/dspy-bootstrap-fewshot.js.map +1 -0
  201. package/dist/dspy/optimizers/dspy-copro.d.ts +29 -0
  202. package/dist/dspy/optimizers/dspy-copro.d.ts.map +1 -0
  203. package/dist/dspy/optimizers/dspy-copro.js +141 -0
  204. package/dist/dspy/optimizers/dspy-copro.js.map +1 -0
  205. package/dist/dspy/optimizers/dspy-mipro.d.ts +37 -0
  206. package/dist/dspy/optimizers/dspy-mipro.d.ts.map +1 -0
  207. package/dist/dspy/optimizers/dspy-mipro.js +194 -0
  208. package/dist/dspy/optimizers/dspy-mipro.js.map +1 -0
  209. package/dist/dspy/optimizers/index.d.ts +5 -0
  210. package/dist/dspy/optimizers/index.d.ts.map +1 -0
  211. package/dist/dspy/optimizers/index.js +11 -0
  212. package/dist/dspy/optimizers/index.js.map +1 -0
  213. package/dist/dspy/optimizers/types.d.ts +39 -0
  214. package/dist/dspy/optimizers/types.d.ts.map +1 -0
  215. package/dist/dspy/optimizers/types.js +1 -0
  216. package/dist/dspy/optimizers/types.js.map +1 -0
  217. package/dist/dspy/predict.d.ts +49 -0
  218. package/dist/dspy/predict.d.ts.map +1 -0
  219. package/dist/dspy/predict.js +73 -0
  220. package/dist/dspy/predict.js.map +1 -0
  221. package/dist/dspy/signature.d.ts +88 -0
  222. package/dist/dspy/signature.d.ts.map +1 -0
  223. package/dist/dspy/signature.js +205 -0
  224. package/dist/dspy/signature.js.map +1 -0
  225. package/dist/index.d.ts +15 -0
  226. package/dist/index.d.ts.map +1 -0
  227. package/dist/index.js +15 -0
  228. package/dist/index.js.map +1 -0
  229. package/dist/optimizers/bootstrap-fewshot.d.ts +42 -0
  230. package/dist/optimizers/bootstrap-fewshot.d.ts.map +1 -0
  231. package/dist/optimizers/bootstrap-fewshot.js +92 -0
  232. package/dist/optimizers/bootstrap-fewshot.js.map +1 -0
  233. package/dist/optimizers/gepa.d.ts +63 -0
  234. package/dist/optimizers/gepa.d.ts.map +1 -0
  235. package/dist/optimizers/gepa.js +232 -0
  236. package/dist/optimizers/gepa.js.map +1 -0
  237. package/dist/optimizers/index.d.ts +7 -0
  238. package/dist/optimizers/index.d.ts.map +1 -0
  239. package/dist/optimizers/index.js +51 -0
  240. package/dist/optimizers/index.js.map +1 -0
  241. package/dist/optimizers/instruction-search.d.ts +39 -0
  242. package/dist/optimizers/instruction-search.d.ts.map +1 -0
  243. package/dist/optimizers/instruction-search.js +108 -0
  244. package/dist/optimizers/instruction-search.js.map +1 -0
  245. package/dist/optimizers/prompt-evolution.d.ts +39 -0
  246. package/dist/optimizers/prompt-evolution.d.ts.map +1 -0
  247. package/dist/optimizers/prompt-evolution.js +101 -0
  248. package/dist/optimizers/prompt-evolution.js.map +1 -0
  249. package/dist/optimizers/scoring.d.ts +139 -0
  250. package/dist/optimizers/scoring.d.ts.map +1 -0
  251. package/dist/optimizers/scoring.js +299 -0
  252. package/dist/optimizers/scoring.js.map +1 -0
  253. package/dist/optimizers/types.d.ts +105 -0
  254. package/dist/optimizers/types.d.ts.map +1 -0
  255. package/dist/optimizers/types.js +1 -0
  256. package/dist/optimizers/types.js.map +1 -0
  257. package/dist/register-runtime.d.ts +3 -0
  258. package/dist/register-runtime.d.ts.map +1 -0
  259. package/dist/register-runtime.js +60 -0
  260. package/dist/register-runtime.js.map +1 -0
  261. package/dist/register-terminal-view.d.ts +15 -0
  262. package/dist/register-terminal-view.d.ts.map +1 -0
  263. package/dist/register-terminal-view.js +31 -0
  264. package/dist/register-terminal-view.js.map +1 -0
  265. package/dist/routes/experience-routes.d.ts +21 -0
  266. package/dist/routes/experience-routes.d.ts.map +1 -0
  267. package/dist/routes/experience-routes.js +513 -0
  268. package/dist/routes/experience-routes.js.map +1 -0
  269. package/dist/routes/index.d.ts +5 -0
  270. package/dist/routes/index.d.ts.map +1 -0
  271. package/dist/routes/index.js +17 -0
  272. package/dist/routes/index.js.map +1 -0
  273. package/dist/routes/training-routes.d.ts +10 -0
  274. package/dist/routes/training-routes.d.ts.map +1 -0
  275. package/dist/routes/training-routes.js +1239 -0
  276. package/dist/routes/training-routes.js.map +1 -0
  277. package/dist/routes/training-vast-routes.d.ts +35 -0
  278. package/dist/routes/training-vast-routes.d.ts.map +1 -0
  279. package/dist/routes/training-vast-routes.js +249 -0
  280. package/dist/routes/training-vast-routes.js.map +1 -0
  281. package/dist/routes/trajectory-routes.d.ts +19 -0
  282. package/dist/routes/trajectory-routes.d.ts.map +1 -0
  283. package/dist/routes/trajectory-routes.js +1122 -0
  284. package/dist/routes/trajectory-routes.js.map +1 -0
  285. package/dist/services/index.d.ts +9 -0
  286. package/dist/services/index.d.ts.map +1 -0
  287. package/dist/services/index.js +63 -0
  288. package/dist/services/index.js.map +1 -0
  289. package/dist/services/training-backend-check.d.ts +8 -0
  290. package/dist/services/training-backend-check.d.ts.map +1 -0
  291. package/dist/services/training-backend-check.js +31 -0
  292. package/dist/services/training-backend-check.js.map +1 -0
  293. package/dist/services/training-service-like.d.ts +40 -0
  294. package/dist/services/training-service-like.d.ts.map +1 -0
  295. package/dist/services/training-service-like.js +1 -0
  296. package/dist/services/training-service-like.js.map +1 -0
  297. package/dist/services/training-service-registry.d.ts +4 -0
  298. package/dist/services/training-service-registry.d.ts.map +1 -0
  299. package/dist/services/training-service-registry.js +12 -0
  300. package/dist/services/training-service-registry.js.map +1 -0
  301. package/dist/services/training-service.d.ts +59 -0
  302. package/dist/services/training-service.d.ts.map +1 -0
  303. package/dist/services/training-service.js +154 -0
  304. package/dist/services/training-service.js.map +1 -0
  305. package/dist/services/training-trigger.d.ts +177 -0
  306. package/dist/services/training-trigger.d.ts.map +1 -0
  307. package/dist/services/training-trigger.js +300 -0
  308. package/dist/services/training-trigger.js.map +1 -0
  309. package/dist/services/training-vast-service.d.ts +149 -0
  310. package/dist/services/training-vast-service.d.ts.map +1 -0
  311. package/dist/services/training-vast-service.js +648 -0
  312. package/dist/services/training-vast-service.js.map +1 -0
  313. package/dist/services/vast-inference-stats.d.ts +37 -0
  314. package/dist/services/vast-inference-stats.d.ts.map +1 -0
  315. package/dist/services/vast-inference-stats.js +81 -0
  316. package/dist/services/vast-inference-stats.js.map +1 -0
  317. package/dist/services/vast-job-store.d.ts +74 -0
  318. package/dist/services/vast-job-store.d.ts.map +1 -0
  319. package/dist/services/vast-job-store.js +194 -0
  320. package/dist/services/vast-job-store.js.map +1 -0
  321. package/dist/services/vast-subprocess.d.ts +27 -0
  322. package/dist/services/vast-subprocess.d.ts.map +1 -0
  323. package/dist/services/vast-subprocess.js +78 -0
  324. package/dist/services/vast-subprocess.js.map +1 -0
  325. package/dist/setup-routes.d.ts +17 -0
  326. package/dist/setup-routes.d.ts.map +1 -0
  327. package/dist/setup-routes.js +319 -0
  328. package/dist/setup-routes.js.map +1 -0
  329. package/dist/ui/FineTuningSpatialView.d.ts +49 -0
  330. package/dist/ui/FineTuningSpatialView.d.ts.map +1 -0
  331. package/dist/ui/FineTuningSpatialView.js +154 -0
  332. package/dist/ui/FineTuningSpatialView.js.map +1 -0
  333. package/dist/ui/FineTuningView.d.ts +7 -0
  334. package/dist/ui/FineTuningView.d.ts.map +1 -0
  335. package/dist/ui/FineTuningView.helpers.d.ts +17 -0
  336. package/dist/ui/FineTuningView.helpers.d.ts.map +1 -0
  337. package/dist/ui/FineTuningView.helpers.js +30 -0
  338. package/dist/ui/FineTuningView.helpers.js.map +1 -0
  339. package/dist/ui/FineTuningView.interact.d.ts +2 -0
  340. package/dist/ui/FineTuningView.interact.d.ts.map +1 -0
  341. package/dist/ui/FineTuningView.interact.js +300 -0
  342. package/dist/ui/FineTuningView.interact.js.map +1 -0
  343. package/dist/ui/FineTuningView.js +4653 -0
  344. package/dist/ui/FineTuningView.js.map +1 -0
  345. package/dist/ui/fine-tuning-panels.d.ts +100 -0
  346. package/dist/ui/fine-tuning-panels.d.ts.map +1 -0
  347. package/dist/ui/fine-tuning-panels.helpers.d.ts +19 -0
  348. package/dist/ui/fine-tuning-panels.helpers.d.ts.map +1 -0
  349. package/dist/ui/fine-tuning-panels.helpers.js +77 -0
  350. package/dist/ui/fine-tuning-panels.helpers.js.map +1 -0
  351. package/dist/ui/fine-tuning-panels.js +928 -0
  352. package/dist/ui/fine-tuning-panels.js.map +1 -0
  353. package/dist/ui/index.d.ts +5 -0
  354. package/dist/ui/index.d.ts.map +1 -0
  355. package/dist/ui/index.js +5 -0
  356. package/dist/ui/index.js.map +1 -0
  357. package/dist/ui/training-view-bundle.d.ts +3 -0
  358. package/dist/ui/training-view-bundle.d.ts.map +1 -0
  359. package/dist/ui/training-view-bundle.js +7 -0
  360. package/dist/ui/training-view-bundle.js.map +1 -0
  361. package/dist/views/bundle.js +5312 -0
  362. package/dist/views/bundle.js.map +1 -0
  363. package/package.json +7 -7
@@ -0,0 +1,92 @@
1
+ const DEMONSTRATION_HEADER = "Demonstrations:";
2
+ function trimDemonstrationInput(rawInput) {
3
+ const userMatch = rawInput.match(
4
+ /(?:^|\n)user(?:\s+message)?\s*:\s*([^\n]+(?:\n(?!\w+:)[^\n]+)*)/i
5
+ ) ?? rawInput.match(/(?:^|\n)user_message\s*:\s*([^\n]+(?:\n(?!\w+:)[^\n]+)*)/i);
6
+ const candidate = userMatch?.[1]?.trim();
7
+ if (candidate && candidate.length > 0 && candidate.length <= 600) {
8
+ return candidate;
9
+ }
10
+ if (candidate && candidate.length > 0) {
11
+ return `${candidate.slice(0, 600).trimEnd()} \u2026`;
12
+ }
13
+ if (rawInput.length <= 600) return rawInput;
14
+ return rawInput.slice(0, 400).trimEnd() + "\n\u2026\n" + rawInput.slice(-200).trimStart();
15
+ }
16
+ function renderDemonstrations(examples) {
17
+ if (examples.length === 0) return "";
18
+ const lines = [DEMONSTRATION_HEADER, ""];
19
+ let idx = 1;
20
+ for (const example of examples) {
21
+ lines.push(`Example ${idx}:`);
22
+ lines.push(`Input:
23
+ ${trimDemonstrationInput(example.input.user)}`);
24
+ lines.push(`Expected:
25
+ ${example.expectedOutput}`);
26
+ lines.push("");
27
+ idx += 1;
28
+ }
29
+ return lines.join("\n").trimEnd();
30
+ }
31
+ function withDemonstrations(baseline, examples) {
32
+ if (examples.length === 0) return baseline;
33
+ const demos = renderDemonstrations(examples);
34
+ return `${baseline.trimEnd()}
35
+
36
+ ${demos}
37
+ `;
38
+ }
39
+ async function runBootstrapFewshot(input) {
40
+ const k = Math.max(1, input.options?.k ?? 5);
41
+ const lineage = [];
42
+ const baselineScore = await input.scorer(input.baselinePrompt, input.dataset);
43
+ lineage.push({
44
+ round: 0,
45
+ variant: 0,
46
+ score: baselineScore,
47
+ notes: "baseline"
48
+ });
49
+ const ranked = await rankExamples(input);
50
+ const fewShot = ranked.slice(0, Math.min(k, ranked.length));
51
+ const optimizedPrompt = withDemonstrations(input.baselinePrompt, fewShot);
52
+ const optimizedScore = await input.scorer(optimizedPrompt, input.dataset);
53
+ lineage.push({
54
+ round: 1,
55
+ variant: 1,
56
+ score: optimizedScore,
57
+ notes: `injected ${fewShot.length} demonstrations`
58
+ });
59
+ return {
60
+ optimizedPrompt,
61
+ score: optimizedScore,
62
+ baseline: baselineScore,
63
+ lineage,
64
+ fewShotExamples: fewShot
65
+ };
66
+ }
67
+ async function rankExamples(input) {
68
+ if (input.options?.rankByScorer) {
69
+ const scored = [];
70
+ for (const example of input.dataset) {
71
+ const score = await input.scorer(input.baselinePrompt, [example]);
72
+ scored.push({ example, score });
73
+ }
74
+ scored.sort(
75
+ (a, b) => b.score - a.score || (b.example.reward ?? 0) - (a.example.reward ?? 0)
76
+ );
77
+ return scored.map((entry) => entry.example);
78
+ }
79
+ const ordered = input.dataset.map((example, index) => ({
80
+ example,
81
+ index,
82
+ reward: example.reward ?? 0
83
+ }));
84
+ ordered.sort((a, b) => b.reward - a.reward || a.index - b.index);
85
+ return ordered.map((entry) => entry.example);
86
+ }
87
+ export {
88
+ renderDemonstrations,
89
+ runBootstrapFewshot,
90
+ withDemonstrations
91
+ };
92
+ //# sourceMappingURL=bootstrap-fewshot.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/optimizers/bootstrap-fewshot.ts"],"sourcesContent":["/**\n * Bootstrap-fewshot optimizer.\n *\n * Picks the top-K examples from the dataset (ranked by reward, then by\n * agreement against the baseline) and injects them as in-context\n * demonstrations into the prompt. The output prompt is the baseline plus a\n * `Demonstrations:` block that the runtime threads through unchanged.\n *\n * This is the cheapest optimizer — it does not propose new instructions, only\n * conditions the existing prompt on a curated few-shot set. It is the fallback\n * used when threshold-fired bootstrap runs need fast turnaround.\n */\n\nimport type {\n LlmAdapter,\n OptimizationExample,\n OptimizerLineageEntry,\n OptimizerResult,\n PromptScorer,\n} from \"./types.js\";\n\nexport interface BootstrapFewshotOptions {\n /** Number of demonstrations to inject. Defaults to 5. */\n k?: number;\n /**\n * Optional scorer override. When supplied, examples are ranked by score\n * against the baseline prompt instead of by `example.reward`.\n */\n rankByScorer?: boolean;\n}\n\nexport interface BootstrapFewshotInput {\n baselinePrompt: string;\n dataset: OptimizationExample[];\n scorer: PromptScorer;\n llm: LlmAdapter;\n options?: BootstrapFewshotOptions;\n}\n\nconst DEMONSTRATION_HEADER = \"Demonstrations:\";\n\n/**\n * Trim a demonstration's `input.user` down to the bits that meaningfully\n * teach the model. The recorded planner inputs include the full provider\n * block + tool catalog + conversation history (often ~30K chars), but for\n * ICL we only need the user's current-turn request. Pulls the user message\n * out of the recorded transcript, falls back to a head/tail truncation if\n * we can't find one cleanly. Capped at ~600 chars so 5 demos stay well\n * under the model's context budget.\n */\nfunction trimDemonstrationInput(rawInput: string): string {\n // Look for a `User:` / `User message:` / final speaker line. Recorded\n // planner inputs typically end with \"<TURN-N>\\nuser: <message>\" or a\n // similar marker.\n const userMatch =\n rawInput.match(\n /(?:^|\\n)user(?:\\s+message)?\\s*:\\s*([^\\n]+(?:\\n(?!\\w+:)[^\\n]+)*)/i,\n ) ??\n rawInput.match(/(?:^|\\n)user_message\\s*:\\s*([^\\n]+(?:\\n(?!\\w+:)[^\\n]+)*)/i);\n const candidate = userMatch?.[1]?.trim();\n if (candidate && candidate.length > 0 && candidate.length <= 600) {\n return candidate;\n }\n if (candidate && candidate.length > 0) {\n return `${candidate.slice(0, 600).trimEnd()} …`;\n }\n // Fallback: first 400 chars + \"...\" + last 200 chars so the model sees\n // both the framing and the request without the middle bulk.\n if (rawInput.length <= 600) return rawInput;\n return (\n rawInput.slice(0, 400).trimEnd() +\n \"\\n…\\n\" +\n rawInput.slice(-200).trimStart()\n );\n}\n\n/**\n * Render a demonstration block exactly the way the runtime wires it back into\n * the system prompt. Public so `OptimizedPromptService` can rebuild the\n * combined prompt at load time.\n */\nexport function renderDemonstrations(examples: OptimizationExample[]): string {\n if (examples.length === 0) return \"\";\n const lines: string[] = [DEMONSTRATION_HEADER, \"\"];\n let idx = 1;\n for (const example of examples) {\n lines.push(`Example ${idx}:`);\n lines.push(`Input:\\n${trimDemonstrationInput(example.input.user)}`);\n lines.push(`Expected:\\n${example.expectedOutput}`);\n lines.push(\"\");\n idx += 1;\n }\n return lines.join(\"\\n\").trimEnd();\n}\n\n/**\n * Combine a baseline prompt with a rendered demonstration block. Idempotent\n * when `examples` is empty.\n */\nexport function withDemonstrations(\n baseline: string,\n examples: OptimizationExample[],\n): string {\n if (examples.length === 0) return baseline;\n const demos = renderDemonstrations(examples);\n return `${baseline.trimEnd()}\\n\\n${demos}\\n`;\n}\n\nexport async function runBootstrapFewshot(\n input: BootstrapFewshotInput,\n): Promise<OptimizerResult> {\n const k = Math.max(1, input.options?.k ?? 5);\n const lineage: OptimizerLineageEntry[] = [];\n const baselineScore = await input.scorer(input.baselinePrompt, input.dataset);\n lineage.push({\n round: 0,\n variant: 0,\n score: baselineScore,\n notes: \"baseline\",\n });\n\n const ranked = await rankExamples(input);\n const fewShot = ranked.slice(0, Math.min(k, ranked.length));\n\n const optimizedPrompt = withDemonstrations(input.baselinePrompt, fewShot);\n const optimizedScore = await input.scorer(optimizedPrompt, input.dataset);\n lineage.push({\n round: 1,\n variant: 1,\n score: optimizedScore,\n notes: `injected ${fewShot.length} demonstrations`,\n });\n\n return {\n optimizedPrompt,\n score: optimizedScore,\n baseline: baselineScore,\n lineage,\n fewShotExamples: fewShot,\n };\n}\n\nasync function rankExamples(\n input: BootstrapFewshotInput,\n): Promise<OptimizationExample[]> {\n if (input.options?.rankByScorer) {\n const scored: Array<{ example: OptimizationExample; score: number }> = [];\n for (const example of input.dataset) {\n const score = await input.scorer(input.baselinePrompt, [example]);\n scored.push({ example, score });\n }\n // Pull demonstrations the baseline already gets right; they are the\n // highest-confidence anchors for the model to imitate. Tie-break by\n // reward when the scorer is uninformative.\n scored.sort(\n (a, b) =>\n b.score - a.score || (b.example.reward ?? 0) - (a.example.reward ?? 0),\n );\n return scored.map((entry) => entry.example);\n }\n\n // Reward-first ranking. Examples without a recorded reward fall through\n // to a stable order at the back so the dataset's natural ordering wins\n // the tie-break.\n const ordered = input.dataset.map((example, index) => ({\n example,\n index,\n reward: example.reward ?? 0,\n }));\n ordered.sort((a, b) => b.reward - a.reward || a.index - b.index);\n return ordered.map((entry) => entry.example);\n}\n"],"mappings":"AAuCA,MAAM,uBAAuB;AAW7B,SAAS,uBAAuB,UAA0B;AAIxD,QAAM,YACJ,SAAS;AAAA,IACP;AAAA,EACF,KACA,SAAS,MAAM,2DAA2D;AAC5E,QAAM,YAAY,YAAY,CAAC,GAAG,KAAK;AACvC,MAAI,aAAa,UAAU,SAAS,KAAK,UAAU,UAAU,KAAK;AAChE,WAAO;AAAA,EACT;AACA,MAAI,aAAa,UAAU,SAAS,GAAG;AACrC,WAAO,GAAG,UAAU,MAAM,GAAG,GAAG,EAAE,QAAQ,CAAC;AAAA,EAC7C;AAGA,MAAI,SAAS,UAAU,IAAK,QAAO;AACnC,SACE,SAAS,MAAM,GAAG,GAAG,EAAE,QAAQ,IAC/B,eACA,SAAS,MAAM,IAAI,EAAE,UAAU;AAEnC;AAOO,SAAS,qBAAqB,UAAyC;AAC5E,MAAI,SAAS,WAAW,EAAG,QAAO;AAClC,QAAM,QAAkB,CAAC,sBAAsB,EAAE;AACjD,MAAI,MAAM;AACV,aAAW,WAAW,UAAU;AAC9B,UAAM,KAAK,WAAW,GAAG,GAAG;AAC5B,UAAM,KAAK;AAAA,EAAW,uBAAuB,QAAQ,MAAM,IAAI,CAAC,EAAE;AAClE,UAAM,KAAK;AAAA,EAAc,QAAQ,cAAc,EAAE;AACjD,UAAM,KAAK,EAAE;AACb,WAAO;AAAA,EACT;AACA,SAAO,MAAM,KAAK,IAAI,EAAE,QAAQ;AAClC;AAMO,SAAS,mBACd,UACA,UACQ;AACR,MAAI,SAAS,WAAW,EAAG,QAAO;AAClC,QAAM,QAAQ,qBAAqB,QAAQ;AAC3C,SAAO,GAAG,SAAS,QAAQ,CAAC;AAAA;AAAA,EAAO,KAAK;AAAA;AAC1C;AAEA,eAAsB,oBACpB,OAC0B;AAC1B,QAAM,IAAI,KAAK,IAAI,GAAG,MAAM,SAAS,KAAK,CAAC;AAC3C,QAAM,UAAmC,CAAC;AAC1C,QAAM,gBAAgB,MAAM,MAAM,OAAO,MAAM,gBAAgB,MAAM,OAAO;AAC5E,UAAQ,KAAK;AAAA,IACX,OAAO;AAAA,IACP,SAAS;AAAA,IACT,OAAO;AAAA,IACP,OAAO;AAAA,EACT,CAAC;AAED,QAAM,SAAS,MAAM,aAAa,KAAK;AACvC,QAAM,UAAU,OAAO,MAAM,GAAG,KAAK,IAAI,GAAG,OAAO,MAAM,CAAC;AAE1D,QAAM,kBAAkB,mBAAmB,MAAM,gBAAgB,OAAO;AACxE,QAAM,iBAAiB,MAAM,MAAM,OAAO,iBAAiB,MAAM,OAAO;AACxE,UAAQ,KAAK;AAAA,IACX,OAAO;AAAA,IACP,SAAS;AAAA,IACT,OAAO;AAAA,IACP,OAAO,YAAY,QAAQ,MAAM;AAAA,EACnC,CAAC;AAED,SAAO;AAAA,IACL;AAAA,IACA,OAAO;AAAA,IACP,UAAU;AAAA,IACV;AAAA,IACA,iBAAiB;AAAA,EACnB;AACF;AAEA,eAAe,aACb,OACgC;AAChC,MAAI,MAAM,SAAS,cAAc;AAC/B,UAAM,SAAiE,CAAC;AACxE,eAAW,WAAW,MAAM,SAAS;AACnC,YAAM,QAAQ,MAAM,MAAM,OAAO,MAAM,gBAAgB,CAAC,OAAO,CAAC;AAChE,aAAO,KAAK,EAAE,SAAS,MAAM,CAAC;AAAA,IAChC;AAIA,WAAO;AAAA,MACL,CAAC,GAAG,MACF,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,UAAU,MAAM,EAAE,QAAQ,UAAU;AAAA,IACxE;AACA,WAAO,OAAO,IAAI,CAAC,UAAU,MAAM,OAAO;AAAA,EAC5C;AAKA,QAAM,UAAU,MAAM,QAAQ,IAAI,CAAC,SAAS,WAAW;AAAA,IACrD;AAAA,IACA;AAAA,IACA,QAAQ,QAAQ,UAAU;AAAA,EAC5B,EAAE;AACF,UAAQ,KAAK,CAAC,GAAG,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,QAAQ,EAAE,KAAK;AAC/D,SAAO,QAAQ,IAAI,CAAC,UAAU,MAAM,OAAO;AAC7C;","names":[]}
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Formal GEPA optimizer (Goyal et al. 2024, https://arxiv.org/abs/2407.10718).
3
+ *
4
+ * Distinct from `prompt-evolution.ts` (plain genetic mutation):
5
+ * 1. Reflective feedback — each generation the LLM is shown
6
+ * (prompt, predicted, expected) and asked WHY it failed. The diagnostic
7
+ * text feeds the next mutation step.
8
+ * 2. Pareto-frontier selection over (score, prompt_token_count). Survivors
9
+ * are the non-dominated set, not the score top-half.
10
+ * 3. Two mutation flavors per survivor: feedback-guided rewrite and
11
+ * token-compression rewrite.
12
+ * 4. Crossover step — top two by score merged via LLM.
13
+ *
14
+ * Returns the best-score candidate from the final Pareto frontier. Ties on
15
+ * score broken by fewer tokens. Lineage records (round, variant, score,
16
+ * feedback excerpt) so OptimizedPromptArtifact consumes it transparently.
17
+ */
18
+ import type { LlmAdapter, OptimizationExample, OptimizerResult, PromptScorer } from "./types.js";
19
+ export interface GepaOptions {
20
+ /** Population size. Defaults to 12. */
21
+ population?: number;
22
+ /** Generations. Defaults to 8. */
23
+ generations?: number;
24
+ /** Held-out examples scored per candidate. Defaults to all examples. */
25
+ scoringSubset?: number;
26
+ /** Examples shown per reflection call. Defaults to 3. */
27
+ reflectionBatchSize?: number;
28
+ /** Mutation sampling temperature. Defaults to 0.8. */
29
+ temperature?: number;
30
+ /** Reflection sampling temperature. Defaults to 0.4. */
31
+ reflectionTemperature?: number;
32
+ /** Mutation completion max tokens. Defaults to 1024. */
33
+ maxTokens?: number;
34
+ /** Reflection completion max tokens. Defaults to 512. */
35
+ reflectionMaxTokens?: number;
36
+ /** Enable crossover step. Defaults to true. */
37
+ crossover?: boolean;
38
+ /** Deterministic RNG (tests). Defaults to Math.random. */
39
+ rng?: () => number;
40
+ }
41
+ export interface GepaInput {
42
+ baselinePrompt: string;
43
+ dataset: OptimizationExample[];
44
+ scorer: PromptScorer;
45
+ llm: LlmAdapter;
46
+ options?: GepaOptions;
47
+ }
48
+ interface Candidate {
49
+ prompt: string;
50
+ score: number;
51
+ tokens: number;
52
+ feedback: string;
53
+ origin: string;
54
+ }
55
+ export declare function runGepa(input: GepaInput): Promise<OptimizerResult>;
56
+ /**
57
+ * Pareto frontier over (score asc, tokens asc): a candidate is dominated when
58
+ * another has strictly higher score AND fewer-or-equal tokens, or
59
+ * higher-or-equal score AND strictly fewer tokens.
60
+ */
61
+ export declare function paretoFrontier(pool: Candidate[]): Candidate[];
62
+ export {};
63
+ //# sourceMappingURL=gepa.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gepa.d.ts","sourceRoot":"","sources":["../../src/optimizers/gepa.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAGH,OAAO,KAAK,EACV,UAAU,EACV,mBAAmB,EAEnB,eAAe,EACf,YAAY,EACb,MAAM,YAAY,CAAC;AAEpB,MAAM,WAAW,WAAW;IAC1B,uCAAuC;IACvC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,kCAAkC;IAClC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,wEAAwE;IACxE,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,yDAAyD;IACzD,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,sDAAsD;IACtD,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,wDAAwD;IACxD,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,wDAAwD;IACxD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,yDAAyD;IACzD,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,+CAA+C;IAC/C,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,0DAA0D;IAC1D,GAAG,CAAC,EAAE,MAAM,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,SAAS;IACxB,cAAc,EAAE,MAAM,CAAC;IACvB,OAAO,EAAE,mBAAmB,EAAE,CAAC;IAC/B,MAAM,EAAE,YAAY,CAAC;IACrB,GAAG,EAAE,UAAU,CAAC;IAChB,OAAO,CAAC,EAAE,WAAW,CAAC;CACvB;AAkBD,UAAU,SAAS;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;CAChB;AAaD,wBAAsB,OAAO,CAAC,KAAK,EAAE,SAAS,GAAG,OAAO,CAAC,eAAe,CAAC,CA0HxE;AAoGD;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,SAAS,EAAE,GAAG,SAAS,EAAE,CAuB7D"}
@@ -0,0 +1,232 @@
1
+ import { subsample } from "./scoring.js";
2
+ const SYS_FEEDBACK = `Revise the SYSTEM PROMPT below based on observed failure analysis.
3
+
4
+ You will receive the current prompt and a short feedback note explaining what went wrong. Produce a revised prompt that addresses the feedback. Preserve the task contract (inputs, outputs, format) and every literal placeholder ({{agentName}}, {{providers}}, etc.) byte-identical. Output only the revised prompt body. No commentary, no fenced code blocks.`;
5
+ const SYS_COMPRESS = `Reduce the SYSTEM PROMPT below to its essentials.
6
+
7
+ Rewrite it shorter while preserving every contract guarantee. Drop redundant phrasing, collapse parallel rules, remove decorative bullets and meta-commentary. Keep every literal placeholder byte-identical. Output only the revised prompt body. No commentary, no fenced code blocks.`;
8
+ const SYS_CROSSOVER = `Merge two candidate SYSTEM PROMPTS into one.
9
+
10
+ You will receive PROMPT A and PROMPT B. Produce a single prompt that takes the strongest guidance from each. Preserve the task contract and every literal placeholder. Do not exceed 1.2x the longer parent's character count. Output only the merged prompt body. No commentary, no fenced code blocks.`;
11
+ const SYS_REFLECT = `You are diagnosing why a SYSTEM PROMPT is failing.
12
+
13
+ You will receive the current prompt and a small batch of examples: each shows the user input, the model's actual output, and the expected output. Write a SHORT diagnostic (max 4 sentences) naming the concrete failure mode and a specific change to the prompt that would fix it. No filler. No restatement of the prompt. No fenced code blocks. Output plain text only.`;
14
+ async function runGepa(input) {
15
+ const population = Math.max(2, input.options?.population ?? 12);
16
+ const generations = input.options?.generations ?? 8;
17
+ const rng = input.options?.rng ?? Math.random;
18
+ const lineage = [];
19
+ const heldOut = typeof input.options?.scoringSubset === "number" ? subsample(input.dataset, input.options.scoringSubset, rng) : input.dataset;
20
+ const ctx = {
21
+ llm: input.llm,
22
+ scorer: input.scorer,
23
+ heldOut,
24
+ reflectionBatchSize: Math.max(1, input.options?.reflectionBatchSize ?? 3),
25
+ temperature: input.options?.temperature ?? 0.8,
26
+ reflectionTemperature: input.options?.reflectionTemperature ?? 0.4,
27
+ maxTokens: input.options?.maxTokens ?? 1024,
28
+ reflectionMaxTokens: input.options?.reflectionMaxTokens ?? 512
29
+ };
30
+ const enableCrossover = input.options?.crossover ?? true;
31
+ const baseline = await scoreCandidate(
32
+ ctx,
33
+ input.baselinePrompt,
34
+ "baseline",
35
+ 0,
36
+ 0,
37
+ lineage
38
+ );
39
+ let pool = [baseline];
40
+ for (let i = 1; i < population; i += 1) {
41
+ const mode = i % 2 === 0 ? "compress" : "feedback";
42
+ const seed = await mutate(
43
+ ctx,
44
+ input.baselinePrompt,
45
+ baseline.feedback,
46
+ mode
47
+ );
48
+ pool.push(await scoreCandidate(ctx, seed, `seed-${mode}`, 0, i, lineage));
49
+ }
50
+ for (let gen = 1; gen <= generations; gen += 1) {
51
+ const frontier = paretoFrontier(pool);
52
+ const next = [...frontier];
53
+ let variantIdx = next.length;
54
+ for (const parent of frontier) {
55
+ if (next.length >= population) break;
56
+ const child = await mutate(
57
+ ctx,
58
+ parent.prompt,
59
+ parent.feedback,
60
+ "feedback"
61
+ );
62
+ next.push(
63
+ await scoreCandidate(
64
+ ctx,
65
+ child,
66
+ "feedback-mut",
67
+ gen,
68
+ variantIdx++,
69
+ lineage
70
+ )
71
+ );
72
+ if (next.length >= population) break;
73
+ const comp = await mutate(ctx, parent.prompt, "", "compress");
74
+ next.push(
75
+ await scoreCandidate(
76
+ ctx,
77
+ comp,
78
+ "compress-mut",
79
+ gen,
80
+ variantIdx++,
81
+ lineage
82
+ )
83
+ );
84
+ }
85
+ if (enableCrossover && next.length < population && frontier.length >= 2) {
86
+ const [a, b] = [...frontier].sort((x, y) => y.score - x.score);
87
+ if (a && b && a.prompt !== b.prompt) {
88
+ const merged = await llmCall(
89
+ ctx.llm,
90
+ SYS_CROSSOVER,
91
+ `PROMPT A:
92
+ ${a.prompt}
93
+
94
+ PROMPT B:
95
+ ${b.prompt}`,
96
+ ctx.temperature,
97
+ ctx.maxTokens,
98
+ a.prompt
99
+ );
100
+ next.push(
101
+ await scoreCandidate(
102
+ ctx,
103
+ merged,
104
+ "crossover",
105
+ gen,
106
+ variantIdx++,
107
+ lineage
108
+ )
109
+ );
110
+ }
111
+ }
112
+ pool = next;
113
+ }
114
+ const finalFrontier = paretoFrontier(pool);
115
+ const best = finalFrontier.reduce((acc, cur) => {
116
+ if (cur.score > acc.score) return cur;
117
+ if (cur.score === acc.score && cur.tokens < acc.tokens) return cur;
118
+ return acc;
119
+ }, finalFrontier[0] ?? pool[0]);
120
+ return {
121
+ optimizedPrompt: best.prompt,
122
+ score: best.score,
123
+ baseline: baseline.score,
124
+ lineage
125
+ };
126
+ }
127
+ async function scoreCandidate(ctx, prompt, origin, round, variant, lineage) {
128
+ const score = await ctx.scorer(prompt, ctx.heldOut);
129
+ const feedback = await reflect(ctx, prompt);
130
+ const tokens = approxTokenCount(prompt);
131
+ const note = origin === "baseline" ? "baseline" : origin === "compress-mut" || origin === "seed-compress" ? `${origin} | tokens=${tokens}` : `${origin} | ${truncate(feedback, 120)}`;
132
+ lineage.push({ round, variant, score, notes: note });
133
+ return { prompt, score, tokens, feedback, origin };
134
+ }
135
+ async function mutate(ctx, prompt, feedback, mode) {
136
+ if (mode === "compress") {
137
+ return llmCall(
138
+ ctx.llm,
139
+ SYS_COMPRESS,
140
+ prompt,
141
+ ctx.temperature,
142
+ ctx.maxTokens,
143
+ prompt
144
+ );
145
+ }
146
+ const user = `Current prompt:
147
+ ${prompt}
148
+
149
+ Failure analysis:
150
+ ${feedback || "(none provided \u2014 explore a phrasing change)"}`;
151
+ return llmCall(
152
+ ctx.llm,
153
+ SYS_FEEDBACK,
154
+ user,
155
+ ctx.temperature,
156
+ ctx.maxTokens,
157
+ prompt
158
+ );
159
+ }
160
+ async function reflect(ctx, prompt) {
161
+ if (ctx.heldOut.length === 0) return "";
162
+ const batch = ctx.heldOut.slice(0, ctx.reflectionBatchSize);
163
+ const transcripts = [];
164
+ for (let i = 0; i < batch.length; i += 1) {
165
+ const ex = batch[i];
166
+ const actual = await ctx.llm.complete({
167
+ system: prompt,
168
+ user: ex.input.user,
169
+ temperature: 0,
170
+ maxTokens: 256
171
+ });
172
+ transcripts.push(
173
+ `Example ${i + 1}:
174
+ User: ${truncate(ex.input.user, 400)}
175
+ Actual: ${truncate(actual, 400)}
176
+ Expected: ${truncate(ex.expectedOutput, 400)}`
177
+ );
178
+ }
179
+ const user = `Prompt:
180
+ ${prompt}
181
+
182
+ ${transcripts.join("\n\n")}`;
183
+ return llmCall(
184
+ ctx.llm,
185
+ SYS_REFLECT,
186
+ user,
187
+ ctx.reflectionTemperature,
188
+ ctx.reflectionMaxTokens,
189
+ ""
190
+ );
191
+ }
192
+ async function llmCall(llm, system, user, temperature, maxTokens, fallback) {
193
+ const result = await llm.complete({ system, user, temperature, maxTokens });
194
+ const cleaned = result.trim();
195
+ return cleaned.length > 0 ? cleaned : fallback;
196
+ }
197
+ function paretoFrontier(pool) {
198
+ const frontier = [];
199
+ for (const cur of pool) {
200
+ let dominated = false;
201
+ for (const other of pool) {
202
+ if (other === cur) continue;
203
+ const strictlyBetterScore = other.score > cur.score;
204
+ const strictlyFewerTokens = other.tokens < cur.tokens;
205
+ const noWorseScore = other.score >= cur.score;
206
+ const noMoreTokens = other.tokens <= cur.tokens;
207
+ if (strictlyBetterScore && noMoreTokens || noWorseScore && strictlyFewerTokens) {
208
+ dominated = true;
209
+ break;
210
+ }
211
+ }
212
+ if (!dominated && !frontier.some((c) => c.prompt === cur.prompt)) {
213
+ frontier.push(cur);
214
+ }
215
+ }
216
+ return frontier;
217
+ }
218
+ function approxTokenCount(text) {
219
+ if (text.length === 0) return 0;
220
+ const words = text.trim().split(/\s+/).filter((s) => s.length > 0).length;
221
+ const puncts = (text.match(/[.,;:!?(){}[\]"'`]/g) ?? []).length;
222
+ return words + Math.floor(puncts / 2);
223
+ }
224
+ function truncate(text, max) {
225
+ if (text.length <= max) return text;
226
+ return `${text.slice(0, max - 1)}\u2026`;
227
+ }
228
+ export {
229
+ paretoFrontier,
230
+ runGepa
231
+ };
232
+ //# sourceMappingURL=gepa.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/optimizers/gepa.ts"],"sourcesContent":["/**\n * Formal GEPA optimizer (Goyal et al. 2024, https://arxiv.org/abs/2407.10718).\n *\n * Distinct from `prompt-evolution.ts` (plain genetic mutation):\n * 1. Reflective feedback — each generation the LLM is shown\n * (prompt, predicted, expected) and asked WHY it failed. The diagnostic\n * text feeds the next mutation step.\n * 2. Pareto-frontier selection over (score, prompt_token_count). Survivors\n * are the non-dominated set, not the score top-half.\n * 3. Two mutation flavors per survivor: feedback-guided rewrite and\n * token-compression rewrite.\n * 4. Crossover step — top two by score merged via LLM.\n *\n * Returns the best-score candidate from the final Pareto frontier. Ties on\n * score broken by fewer tokens. Lineage records (round, variant, score,\n * feedback excerpt) so OptimizedPromptArtifact consumes it transparently.\n */\n\nimport { subsample } from \"./scoring.js\";\nimport type {\n LlmAdapter,\n OptimizationExample,\n OptimizerLineageEntry,\n OptimizerResult,\n PromptScorer,\n} from \"./types.js\";\n\nexport interface GepaOptions {\n /** Population size. Defaults to 12. */\n population?: number;\n /** Generations. Defaults to 8. */\n generations?: number;\n /** Held-out examples scored per candidate. Defaults to all examples. */\n scoringSubset?: number;\n /** Examples shown per reflection call. Defaults to 3. */\n reflectionBatchSize?: number;\n /** Mutation sampling temperature. Defaults to 0.8. */\n temperature?: number;\n /** Reflection sampling temperature. Defaults to 0.4. */\n reflectionTemperature?: number;\n /** Mutation completion max tokens. Defaults to 1024. */\n maxTokens?: number;\n /** Reflection completion max tokens. Defaults to 512. */\n reflectionMaxTokens?: number;\n /** Enable crossover step. Defaults to true. */\n crossover?: boolean;\n /** Deterministic RNG (tests). Defaults to Math.random. */\n rng?: () => number;\n}\n\nexport interface GepaInput {\n baselinePrompt: string;\n dataset: OptimizationExample[];\n scorer: PromptScorer;\n llm: LlmAdapter;\n options?: GepaOptions;\n}\n\nconst SYS_FEEDBACK = `Revise the SYSTEM PROMPT below based on observed failure analysis.\n\nYou will receive the current prompt and a short feedback note explaining what went wrong. Produce a revised prompt that addresses the feedback. Preserve the task contract (inputs, outputs, format) and every literal placeholder ({{agentName}}, {{providers}}, etc.) byte-identical. Output only the revised prompt body. No commentary, no fenced code blocks.`;\n\nconst SYS_COMPRESS = `Reduce the SYSTEM PROMPT below to its essentials.\n\nRewrite it shorter while preserving every contract guarantee. Drop redundant phrasing, collapse parallel rules, remove decorative bullets and meta-commentary. Keep every literal placeholder byte-identical. Output only the revised prompt body. No commentary, no fenced code blocks.`;\n\nconst SYS_CROSSOVER = `Merge two candidate SYSTEM PROMPTS into one.\n\nYou will receive PROMPT A and PROMPT B. Produce a single prompt that takes the strongest guidance from each. Preserve the task contract and every literal placeholder. Do not exceed 1.2x the longer parent's character count. Output only the merged prompt body. No commentary, no fenced code blocks.`;\n\nconst SYS_REFLECT = `You are diagnosing why a SYSTEM PROMPT is failing.\n\nYou will receive the current prompt and a small batch of examples: each shows the user input, the model's actual output, and the expected output. Write a SHORT diagnostic (max 4 sentences) naming the concrete failure mode and a specific change to the prompt that would fix it. No filler. No restatement of the prompt. No fenced code blocks. Output plain text only.`;\n\ninterface Candidate {\n prompt: string;\n score: number;\n tokens: number;\n feedback: string;\n origin: string;\n}\n\ninterface Ctx {\n llm: LlmAdapter;\n scorer: PromptScorer;\n heldOut: OptimizationExample[];\n reflectionBatchSize: number;\n temperature: number;\n reflectionTemperature: number;\n maxTokens: number;\n reflectionMaxTokens: number;\n}\n\nexport async function runGepa(input: GepaInput): Promise<OptimizerResult> {\n const population = Math.max(2, input.options?.population ?? 12);\n const generations = input.options?.generations ?? 8;\n const rng = input.options?.rng ?? Math.random;\n const lineage: OptimizerLineageEntry[] = [];\n const heldOut =\n typeof input.options?.scoringSubset === \"number\"\n ? subsample(input.dataset, input.options.scoringSubset, rng)\n : input.dataset;\n const ctx: Ctx = {\n llm: input.llm,\n scorer: input.scorer,\n heldOut,\n reflectionBatchSize: Math.max(1, input.options?.reflectionBatchSize ?? 3),\n temperature: input.options?.temperature ?? 0.8,\n reflectionTemperature: input.options?.reflectionTemperature ?? 0.4,\n maxTokens: input.options?.maxTokens ?? 1024,\n reflectionMaxTokens: input.options?.reflectionMaxTokens ?? 512,\n };\n const enableCrossover = input.options?.crossover ?? true;\n\n const baseline = await scoreCandidate(\n ctx,\n input.baselinePrompt,\n \"baseline\",\n 0,\n 0,\n lineage,\n );\n let pool: Candidate[] = [baseline];\n\n // Seed: alternate feedback-guided and compression mutations of the baseline.\n for (let i = 1; i < population; i += 1) {\n const mode: \"feedback\" | \"compress\" = i % 2 === 0 ? \"compress\" : \"feedback\";\n const seed = await mutate(\n ctx,\n input.baselinePrompt,\n baseline.feedback,\n mode,\n );\n pool.push(await scoreCandidate(ctx, seed, `seed-${mode}`, 0, i, lineage));\n }\n\n for (let gen = 1; gen <= generations; gen += 1) {\n const frontier = paretoFrontier(pool);\n const next: Candidate[] = [...frontier];\n let variantIdx = next.length;\n\n // K=2 children per survivor: one feedback-guided, one compression.\n for (const parent of frontier) {\n if (next.length >= population) break;\n const child = await mutate(\n ctx,\n parent.prompt,\n parent.feedback,\n \"feedback\",\n );\n next.push(\n await scoreCandidate(\n ctx,\n child,\n \"feedback-mut\",\n gen,\n variantIdx++,\n lineage,\n ),\n );\n if (next.length >= population) break;\n const comp = await mutate(ctx, parent.prompt, \"\", \"compress\");\n next.push(\n await scoreCandidate(\n ctx,\n comp,\n \"compress-mut\",\n gen,\n variantIdx++,\n lineage,\n ),\n );\n }\n\n // Crossover: merge the top two on the frontier when budget remains.\n if (enableCrossover && next.length < population && frontier.length >= 2) {\n const [a, b] = [...frontier].sort((x, y) => y.score - x.score);\n if (a && b && a.prompt !== b.prompt) {\n const merged = await llmCall(\n ctx.llm,\n SYS_CROSSOVER,\n `PROMPT A:\\n${a.prompt}\\n\\nPROMPT B:\\n${b.prompt}`,\n ctx.temperature,\n ctx.maxTokens,\n a.prompt,\n );\n next.push(\n await scoreCandidate(\n ctx,\n merged,\n \"crossover\",\n gen,\n variantIdx++,\n lineage,\n ),\n );\n }\n }\n\n pool = next;\n }\n\n const finalFrontier = paretoFrontier(pool);\n const best = finalFrontier.reduce<Candidate>((acc, cur) => {\n if (cur.score > acc.score) return cur;\n if (cur.score === acc.score && cur.tokens < acc.tokens) return cur;\n return acc;\n }, finalFrontier[0] ?? pool[0]!);\n\n return {\n optimizedPrompt: best.prompt,\n score: best.score,\n baseline: baseline.score,\n lineage,\n };\n}\n\n/**\n * Score a candidate prompt, run reflection on it, push lineage, and return\n * the populated Candidate. Centralized so seed / generation / crossover\n * paths share identical bookkeeping.\n */\nasync function scoreCandidate(\n ctx: Ctx,\n prompt: string,\n origin: string,\n round: number,\n variant: number,\n lineage: OptimizerLineageEntry[],\n): Promise<Candidate> {\n const score = await ctx.scorer(prompt, ctx.heldOut);\n const feedback = await reflect(ctx, prompt);\n const tokens = approxTokenCount(prompt);\n const note =\n origin === \"baseline\"\n ? \"baseline\"\n : origin === \"compress-mut\" || origin === \"seed-compress\"\n ? `${origin} | tokens=${tokens}`\n : `${origin} | ${truncate(feedback, 120)}`;\n lineage.push({ round, variant, score, notes: note });\n return { prompt, score, tokens, feedback, origin };\n}\n\nasync function mutate(\n ctx: Ctx,\n prompt: string,\n feedback: string,\n mode: \"feedback\" | \"compress\",\n): Promise<string> {\n if (mode === \"compress\") {\n return llmCall(\n ctx.llm,\n SYS_COMPRESS,\n prompt,\n ctx.temperature,\n ctx.maxTokens,\n prompt,\n );\n }\n const user = `Current prompt:\\n${prompt}\\n\\nFailure analysis:\\n${feedback || \"(none provided — explore a phrasing change)\"}`;\n return llmCall(\n ctx.llm,\n SYS_FEEDBACK,\n user,\n ctx.temperature,\n ctx.maxTokens,\n prompt,\n );\n}\n\n/**\n * Run the prompt against a small batch of examples, then ask the LLM to\n * diagnose what went wrong. The diagnostic feeds the next mutation step —\n * this is the \"reflective evolution\" half of GEPA.\n */\nasync function reflect(ctx: Ctx, prompt: string): Promise<string> {\n if (ctx.heldOut.length === 0) return \"\";\n const batch = ctx.heldOut.slice(0, ctx.reflectionBatchSize);\n const transcripts: string[] = [];\n for (let i = 0; i < batch.length; i += 1) {\n const ex = batch[i]!;\n const actual = await ctx.llm.complete({\n system: prompt,\n user: ex.input.user,\n temperature: 0,\n maxTokens: 256,\n });\n transcripts.push(\n `Example ${i + 1}:\\nUser: ${truncate(ex.input.user, 400)}\\nActual: ${truncate(actual, 400)}\\nExpected: ${truncate(ex.expectedOutput, 400)}`,\n );\n }\n const user = `Prompt:\\n${prompt}\\n\\n${transcripts.join(\"\\n\\n\")}`;\n return llmCall(\n ctx.llm,\n SYS_REFLECT,\n user,\n ctx.reflectionTemperature,\n ctx.reflectionMaxTokens,\n \"\",\n );\n}\n\nasync function llmCall(\n llm: LlmAdapter,\n system: string,\n user: string,\n temperature: number,\n maxTokens: number,\n fallback: string,\n): Promise<string> {\n const result = await llm.complete({ system, user, temperature, maxTokens });\n const cleaned = result.trim();\n return cleaned.length > 0 ? cleaned : fallback;\n}\n\n/**\n * Pareto frontier over (score asc, tokens asc): a candidate is dominated when\n * another has strictly higher score AND fewer-or-equal tokens, or\n * higher-or-equal score AND strictly fewer tokens.\n */\nexport function paretoFrontier(pool: Candidate[]): Candidate[] {\n const frontier: Candidate[] = [];\n for (const cur of pool) {\n let dominated = false;\n for (const other of pool) {\n if (other === cur) continue;\n const strictlyBetterScore = other.score > cur.score;\n const strictlyFewerTokens = other.tokens < cur.tokens;\n const noWorseScore = other.score >= cur.score;\n const noMoreTokens = other.tokens <= cur.tokens;\n if (\n (strictlyBetterScore && noMoreTokens) ||\n (noWorseScore && strictlyFewerTokens)\n ) {\n dominated = true;\n break;\n }\n }\n if (!dominated && !frontier.some((c) => c.prompt === cur.prompt)) {\n frontier.push(cur);\n }\n }\n return frontier;\n}\n\n/**\n * Cheap token-count proxy: whitespace word count + half the punctuation\n * count. Good enough for relative Pareto comparisons; never persisted.\n */\nfunction approxTokenCount(text: string): number {\n if (text.length === 0) return 0;\n const words = text\n .trim()\n .split(/\\s+/)\n .filter((s) => s.length > 0).length;\n const puncts = (text.match(/[.,;:!?(){}[\\]\"'`]/g) ?? []).length;\n return words + Math.floor(puncts / 2);\n}\n\nfunction truncate(text: string, max: number): string {\n if (text.length <= max) return text;\n return `${text.slice(0, max - 1)}…`;\n}\n"],"mappings":"AAkBA,SAAS,iBAAiB;AAwC1B,MAAM,eAAe;AAAA;AAAA;AAIrB,MAAM,eAAe;AAAA;AAAA;AAIrB,MAAM,gBAAgB;AAAA;AAAA;AAItB,MAAM,cAAc;AAAA;AAAA;AAuBpB,eAAsB,QAAQ,OAA4C;AACxE,QAAM,aAAa,KAAK,IAAI,GAAG,MAAM,SAAS,cAAc,EAAE;AAC9D,QAAM,cAAc,MAAM,SAAS,eAAe;AAClD,QAAM,MAAM,MAAM,SAAS,OAAO,KAAK;AACvC,QAAM,UAAmC,CAAC;AAC1C,QAAM,UACJ,OAAO,MAAM,SAAS,kBAAkB,WACpC,UAAU,MAAM,SAAS,MAAM,QAAQ,eAAe,GAAG,IACzD,MAAM;AACZ,QAAM,MAAW;AAAA,IACf,KAAK,MAAM;AAAA,IACX,QAAQ,MAAM;AAAA,IACd;AAAA,IACA,qBAAqB,KAAK,IAAI,GAAG,MAAM,SAAS,uBAAuB,CAAC;AAAA,IACxE,aAAa,MAAM,SAAS,eAAe;AAAA,IAC3C,uBAAuB,MAAM,SAAS,yBAAyB;AAAA,IAC/D,WAAW,MAAM,SAAS,aAAa;AAAA,IACvC,qBAAqB,MAAM,SAAS,uBAAuB;AAAA,EAC7D;AACA,QAAM,kBAAkB,MAAM,SAAS,aAAa;AAEpD,QAAM,WAAW,MAAM;AAAA,IACrB;AAAA,IACA,MAAM;AAAA,IACN;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,MAAI,OAAoB,CAAC,QAAQ;AAGjC,WAAS,IAAI,GAAG,IAAI,YAAY,KAAK,GAAG;AACtC,UAAM,OAAgC,IAAI,MAAM,IAAI,aAAa;AACjE,UAAM,OAAO,MAAM;AAAA,MACjB;AAAA,MACA,MAAM;AAAA,MACN,SAAS;AAAA,MACT;AAAA,IACF;AACA,SAAK,KAAK,MAAM,eAAe,KAAK,MAAM,QAAQ,IAAI,IAAI,GAAG,GAAG,OAAO,CAAC;AAAA,EAC1E;AAEA,WAAS,MAAM,GAAG,OAAO,aAAa,OAAO,GAAG;AAC9C,UAAM,WAAW,eAAe,IAAI;AACpC,UAAM,OAAoB,CAAC,GAAG,QAAQ;AACtC,QAAI,aAAa,KAAK;AAGtB,eAAW,UAAU,UAAU;AAC7B,UAAI,KAAK,UAAU,WAAY;AAC/B,YAAM,QAAQ,MAAM;AAAA,QAClB;AAAA,QACA,OAAO;AAAA,QACP,OAAO;AAAA,QACP;AAAA,MACF;AACA,WAAK;AAAA,QACH,MAAM;AAAA,UACJ;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,MACF;AACA,UAAI,KAAK,UAAU,WAAY;AAC/B,YAAM,OAAO,MAAM,OAAO,KAAK,OAAO,QAAQ,IAAI,UAAU;AAC5D,WAAK;AAAA,QACH,MAAM;AAAA,UACJ;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAGA,QAAI,mBAAmB,KAAK,SAAS,cAAc,SAAS,UAAU,GAAG;AACvE,YAAM,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,QAAQ,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAC7D,UAAI,KAAK,KAAK,EAAE,WAAW,EAAE,QAAQ;AACnC,cAAM,SAAS,MAAM;AAAA,UACnB,IAAI;AAAA,UACJ;AAAA,UACA;AAAA,EAAc,EAAE,MAAM;AAAA;AAAA;AAAA,EAAkB,EAAE,MAAM;AAAA,UAChD,IAAI;AAAA,UACJ,IAAI;AAAA,UACJ,EAAE;AAAA,QACJ;AACA,aAAK;AAAA,UACH,MAAM;AAAA,YACJ;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAEA,QAAM,gBAAgB,eAAe,IAAI;AACzC,QAAM,OAAO,cAAc,OAAkB,CAAC,KAAK,QAAQ;AACzD,QAAI,IAAI,QAAQ,IAAI,MAAO,QAAO;AAClC,QAAI,IAAI,UAAU,IAAI,SAAS,IAAI,SAAS,IAAI,OAAQ,QAAO;AAC/D,WAAO;AAAA,EACT,GAAG,cAAc,CAAC,KAAK,KAAK,CAAC,CAAE;AAE/B,SAAO;AAAA,IACL,iBAAiB,KAAK;AAAA,IACtB,OAAO,KAAK;AAAA,IACZ,UAAU,SAAS;AAAA,IACnB;AAAA,EACF;AACF;AAOA,eAAe,eACb,KACA,QACA,QACA,OACA,SACA,SACoB;AACpB,QAAM,QAAQ,MAAM,IAAI,OAAO,QAAQ,IAAI,OAAO;AAClD,QAAM,WAAW,MAAM,QAAQ,KAAK,MAAM;AAC1C,QAAM,SAAS,iBAAiB,MAAM;AACtC,QAAM,OACJ,WAAW,aACP,aACA,WAAW,kBAAkB,WAAW,kBACtC,GAAG,MAAM,aAAa,MAAM,KAC5B,GAAG,MAAM,MAAM,SAAS,UAAU,GAAG,CAAC;AAC9C,UAAQ,KAAK,EAAE,OAAO,SAAS,OAAO,OAAO,KAAK,CAAC;AACnD,SAAO,EAAE,QAAQ,OAAO,QAAQ,UAAU,OAAO;AACnD;AAEA,eAAe,OACb,KACA,QACA,UACA,MACiB;AACjB,MAAI,SAAS,YAAY;AACvB,WAAO;AAAA,MACL,IAAI;AAAA,MACJ;AAAA,MACA;AAAA,MACA,IAAI;AAAA,MACJ,IAAI;AAAA,MACJ;AAAA,IACF;AAAA,EACF;AACA,QAAM,OAAO;AAAA,EAAoB,MAAM;AAAA;AAAA;AAAA,EAA0B,YAAY,kDAA6C;AAC1H,SAAO;AAAA,IACL,IAAI;AAAA,IACJ;AAAA,IACA;AAAA,IACA,IAAI;AAAA,IACJ,IAAI;AAAA,IACJ;AAAA,EACF;AACF;AAOA,eAAe,QAAQ,KAAU,QAAiC;AAChE,MAAI,IAAI,QAAQ,WAAW,EAAG,QAAO;AACrC,QAAM,QAAQ,IAAI,QAAQ,MAAM,GAAG,IAAI,mBAAmB;AAC1D,QAAM,cAAwB,CAAC;AAC/B,WAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,GAAG;AACxC,UAAM,KAAK,MAAM,CAAC;AAClB,UAAM,SAAS,MAAM,IAAI,IAAI,SAAS;AAAA,MACpC,QAAQ;AAAA,MACR,MAAM,GAAG,MAAM;AAAA,MACf,aAAa;AAAA,MACb,WAAW;AAAA,IACb,CAAC;AACD,gBAAY;AAAA,MACV,WAAW,IAAI,CAAC;AAAA,QAAY,SAAS,GAAG,MAAM,MAAM,GAAG,CAAC;AAAA,UAAa,SAAS,QAAQ,GAAG,CAAC;AAAA,YAAe,SAAS,GAAG,gBAAgB,GAAG,CAAC;AAAA,IAC3I;AAAA,EACF;AACA,QAAM,OAAO;AAAA,EAAY,MAAM;AAAA;AAAA,EAAO,YAAY,KAAK,MAAM,CAAC;AAC9D,SAAO;AAAA,IACL,IAAI;AAAA,IACJ;AAAA,IACA;AAAA,IACA,IAAI;AAAA,IACJ,IAAI;AAAA,IACJ;AAAA,EACF;AACF;AAEA,eAAe,QACb,KACA,QACA,MACA,aACA,WACA,UACiB;AACjB,QAAM,SAAS,MAAM,IAAI,SAAS,EAAE,QAAQ,MAAM,aAAa,UAAU,CAAC;AAC1E,QAAM,UAAU,OAAO,KAAK;AAC5B,SAAO,QAAQ,SAAS,IAAI,UAAU;AACxC;AAOO,SAAS,eAAe,MAAgC;AAC7D,QAAM,WAAwB,CAAC;AAC/B,aAAW,OAAO,MAAM;AACtB,QAAI,YAAY;AAChB,eAAW,SAAS,MAAM;AACxB,UAAI,UAAU,IAAK;AACnB,YAAM,sBAAsB,MAAM,QAAQ,IAAI;AAC9C,YAAM,sBAAsB,MAAM,SAAS,IAAI;AAC/C,YAAM,eAAe,MAAM,SAAS,IAAI;AACxC,YAAM,eAAe,MAAM,UAAU,IAAI;AACzC,UACG,uBAAuB,gBACvB,gBAAgB,qBACjB;AACA,oBAAY;AACZ;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,aAAa,CAAC,SAAS,KAAK,CAAC,MAAM,EAAE,WAAW,IAAI,MAAM,GAAG;AAChE,eAAS,KAAK,GAAG;AAAA,IACnB;AAAA,EACF;AACA,SAAO;AACT;AAMA,SAAS,iBAAiB,MAAsB;AAC9C,MAAI,KAAK,WAAW,EAAG,QAAO;AAC9B,QAAM,QAAQ,KACX,KAAK,EACL,MAAM,KAAK,EACX,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC,EAAE;AAC/B,QAAM,UAAU,KAAK,MAAM,qBAAqB,KAAK,CAAC,GAAG;AACzD,SAAO,QAAQ,KAAK,MAAM,SAAS,CAAC;AACtC;AAEA,SAAS,SAAS,MAAc,KAAqB;AACnD,MAAI,KAAK,UAAU,IAAK,QAAO;AAC/B,SAAO,GAAG,KAAK,MAAM,GAAG,MAAM,CAAC,CAAC;AAClC;","names":[]}
@@ -0,0 +1,7 @@
1
+ export { type BootstrapFewshotInput, type BootstrapFewshotOptions, renderDemonstrations, runBootstrapFewshot, withDemonstrations, } from "./bootstrap-fewshot.js";
2
+ export { type GepaInput, type GepaOptions, runGepa, } from "./gepa.js";
3
+ export { type InstructionSearchInput, type InstructionSearchOptions, runInstructionSearch, } from "./instruction-search.js";
4
+ export { type PromptEvolutionInput, type PromptEvolutionOptions, runPromptEvolution, } from "./prompt-evolution.js";
5
+ export { createPromptScorer, createRuntimeAdapter, extractPlannerAction, extractPlannerView, LIFEOPS_SCORER_TASKS, LIFEOPS_STRUCTURED_SCORER_TASKS, scoreActionSet, scoreAgreement, scoreLifeOpsTask, scorePlannerAction, scoreStructuredFields, scoreViewSelection, subsample, type UseModelHandler, } from "./scoring.js";
6
+ export type { LlmAdapter, OptimizationExample, OptimizedPromptArtifact, OptimizerLineageEntry, OptimizerName, OptimizerResult, PromptScorer, } from "./types.js";
7
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/optimizers/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,qBAAqB,EAC1B,KAAK,uBAAuB,EAC5B,oBAAoB,EACpB,mBAAmB,EACnB,kBAAkB,GACnB,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,KAAK,SAAS,EACd,KAAK,WAAW,EAChB,OAAO,GACR,MAAM,WAAW,CAAC;AACnB,OAAO,EACL,KAAK,sBAAsB,EAC3B,KAAK,wBAAwB,EAC7B,oBAAoB,GACrB,MAAM,yBAAyB,CAAC;AACjC,OAAO,EACL,KAAK,oBAAoB,EACzB,KAAK,sBAAsB,EAC3B,kBAAkB,GACnB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EACL,kBAAkB,EAClB,oBAAoB,EACpB,oBAAoB,EACpB,kBAAkB,EAClB,oBAAoB,EACpB,+BAA+B,EAC/B,cAAc,EACd,cAAc,EACd,gBAAgB,EAChB,kBAAkB,EAClB,qBAAqB,EACrB,kBAAkB,EAClB,SAAS,EACT,KAAK,eAAe,GACrB,MAAM,cAAc,CAAC;AACtB,YAAY,EACV,UAAU,EACV,mBAAmB,EACnB,uBAAuB,EACvB,qBAAqB,EACrB,aAAa,EACb,eAAe,EACf,YAAY,GACb,MAAM,YAAY,CAAC"}
@@ -0,0 +1,51 @@
1
+ import {
2
+ renderDemonstrations,
3
+ runBootstrapFewshot,
4
+ withDemonstrations
5
+ } from "./bootstrap-fewshot.js";
6
+ import {
7
+ runGepa
8
+ } from "./gepa.js";
9
+ import {
10
+ runInstructionSearch
11
+ } from "./instruction-search.js";
12
+ import {
13
+ runPromptEvolution
14
+ } from "./prompt-evolution.js";
15
+ import {
16
+ createPromptScorer,
17
+ createRuntimeAdapter,
18
+ extractPlannerAction,
19
+ extractPlannerView,
20
+ LIFEOPS_SCORER_TASKS,
21
+ LIFEOPS_STRUCTURED_SCORER_TASKS,
22
+ scoreActionSet,
23
+ scoreAgreement,
24
+ scoreLifeOpsTask,
25
+ scorePlannerAction,
26
+ scoreStructuredFields,
27
+ scoreViewSelection,
28
+ subsample
29
+ } from "./scoring.js";
30
+ export {
31
+ LIFEOPS_SCORER_TASKS,
32
+ LIFEOPS_STRUCTURED_SCORER_TASKS,
33
+ createPromptScorer,
34
+ createRuntimeAdapter,
35
+ extractPlannerAction,
36
+ extractPlannerView,
37
+ renderDemonstrations,
38
+ runBootstrapFewshot,
39
+ runGepa,
40
+ runInstructionSearch,
41
+ runPromptEvolution,
42
+ scoreActionSet,
43
+ scoreAgreement,
44
+ scoreLifeOpsTask,
45
+ scorePlannerAction,
46
+ scoreStructuredFields,
47
+ scoreViewSelection,
48
+ subsample,
49
+ withDemonstrations
50
+ };
51
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/optimizers/index.ts"],"sourcesContent":["export {\n type BootstrapFewshotInput,\n type BootstrapFewshotOptions,\n renderDemonstrations,\n runBootstrapFewshot,\n withDemonstrations,\n} from \"./bootstrap-fewshot.js\";\nexport {\n type GepaInput,\n type GepaOptions,\n runGepa,\n} from \"./gepa.js\";\nexport {\n type InstructionSearchInput,\n type InstructionSearchOptions,\n runInstructionSearch,\n} from \"./instruction-search.js\";\nexport {\n type PromptEvolutionInput,\n type PromptEvolutionOptions,\n runPromptEvolution,\n} from \"./prompt-evolution.js\";\nexport {\n createPromptScorer,\n createRuntimeAdapter,\n extractPlannerAction,\n extractPlannerView,\n LIFEOPS_SCORER_TASKS,\n LIFEOPS_STRUCTURED_SCORER_TASKS,\n scoreActionSet,\n scoreAgreement,\n scoreLifeOpsTask,\n scorePlannerAction,\n scoreStructuredFields,\n scoreViewSelection,\n subsample,\n type UseModelHandler,\n} from \"./scoring.js\";\nexport type {\n LlmAdapter,\n OptimizationExample,\n OptimizedPromptArtifact,\n OptimizerLineageEntry,\n OptimizerName,\n OptimizerResult,\n PromptScorer,\n} from \"./types.js\";\n"],"mappings":"AAAA;AAAA,EAGE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EAGE;AAAA,OACK;AACP;AAAA,EAGE;AAAA,OACK;AACP;AAAA,EAGE;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;","names":[]}
@@ -0,0 +1,39 @@
1
+ /**
2
+ * MIPRO-style instruction search optimizer.
3
+ *
4
+ * Iterative improvement loop:
5
+ * 1. Ask the LLM to propose N rewrites of the current baseline prompt that
6
+ * preserve the task contract but tighten language, add guardrails, or
7
+ * reorder for clarity.
8
+ * 2. Score each candidate (plus the current baseline) on a held-out subset
9
+ * of the dataset.
10
+ * 3. Keep the highest-scoring candidate as the next round's baseline.
11
+ * 4. Repeat for `rounds` iterations.
12
+ *
13
+ * Returns the best prompt observed across all rounds along with full lineage
14
+ * (every (round, variant) -> score), so the caller can render an audit trail.
15
+ */
16
+ import type { LlmAdapter, OptimizationExample, OptimizerResult, PromptScorer } from "./types.js";
17
+ export interface InstructionSearchOptions {
18
+ /** Variants to propose per round. Defaults to 8. */
19
+ variants?: number;
20
+ /** Search rounds. Defaults to 3. */
21
+ rounds?: number;
22
+ /** Held-out examples scored per candidate. Defaults to all examples. */
23
+ scoringSubset?: number;
24
+ /** Sampling temperature for variant generation. Defaults to 0.7. */
25
+ temperature?: number;
26
+ /** Max tokens for the rewrite completion. Defaults to 1024. */
27
+ maxTokens?: number;
28
+ /** Deterministic RNG override (tests). Defaults to Math.random. */
29
+ rng?: () => number;
30
+ }
31
+ export interface InstructionSearchInput {
32
+ baselinePrompt: string;
33
+ dataset: OptimizationExample[];
34
+ scorer: PromptScorer;
35
+ llm: LlmAdapter;
36
+ options?: InstructionSearchOptions;
37
+ }
38
+ export declare function runInstructionSearch(input: InstructionSearchInput): Promise<OptimizerResult>;
39
+ //# sourceMappingURL=instruction-search.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"instruction-search.d.ts","sourceRoot":"","sources":["../../src/optimizers/instruction-search.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAGH,OAAO,KAAK,EACV,UAAU,EACV,mBAAmB,EAEnB,eAAe,EACf,YAAY,EACb,MAAM,YAAY,CAAC;AAEpB,MAAM,WAAW,wBAAwB;IACvC,oDAAoD;IACpD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,oCAAoC;IACpC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,wEAAwE;IACxE,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,oEAAoE;IACpE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,+DAA+D;IAC/D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,mEAAmE;IACnE,GAAG,CAAC,EAAE,MAAM,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,sBAAsB;IACrC,cAAc,EAAE,MAAM,CAAC;IACvB,OAAO,EAAE,mBAAmB,EAAE,CAAC;IAC/B,MAAM,EAAE,YAAY,CAAC;IACrB,GAAG,EAAE,UAAU,CAAC;IAChB,OAAO,CAAC,EAAE,wBAAwB,CAAC;CACpC;AAuBD,wBAAsB,oBAAoB,CACxC,KAAK,EAAE,sBAAsB,GAC5B,OAAO,CAAC,eAAe,CAAC,CAgG1B"}