@ax-llm/ax 19.0.45 → 20.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ax-llm/ax",
3
- "version": "19.0.45",
3
+ "version": "20.0.0",
4
4
  "type": "module",
5
5
  "description": "The best library to work with LLMs",
6
6
  "repository": {
@@ -25,20 +25,6 @@
25
25
  "optional": true
26
26
  }
27
27
  },
28
- "ava": {
29
- "failFast": true,
30
- "timeout": "180s",
31
- "concurrency": 1,
32
- "extensions": {
33
- "ts": "module"
34
- },
35
- "nodeArguments": [
36
- "--import=tsimp"
37
- ],
38
- "files": [
39
- "!dist/**/*"
40
- ]
41
- },
42
28
  "tsd": {
43
29
  "directory": "./"
44
30
  },
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: ax-agent-optimize
3
3
  description: This skill helps an LLM generate correct AxAgent tuning and evaluation code using @ax-llm/ax. Use when the user asks about agent.optimize(...), judgeOptions, eval datasets, optimization targets, saved optimizedProgram artifacts, or recursive optimization guidance.
4
- version: "19.0.45"
4
+ version: "20.0.0"
5
5
  ---
6
6
 
7
7
  # AxAgent Optimize Codegen Rules (@ax-llm/ax)
@@ -18,21 +18,27 @@ Your job is to help the model choose a good optimization setup for the user's ac
18
18
  ## Use These Defaults
19
19
 
20
20
  - Use `agent.optimize(...)` only after the agent is already configured and runnable.
21
- - Prefer a deterministic custom `metric` when success is easy to score from the prediction and task record.
22
- - Prefer the built-in judge path for open-ended assistant tasks: `judgeAI` plus `judgeOptions`.
21
+ - Prefer the built-in judge path first for normal agent tuning. Most users should start with tasks that include `input` and `criteria`, then let `agent.optimize(...)` use its default actor target and judge-based metric.
22
+ - Prefer a deterministic custom `metric` only when success is easy to score from the prediction and task record.
23
+ - Add `judgeAI` plus `judgeOptions` when the judge should run on a stronger or separate model than the agent runtime model.
23
24
  - Only reach for a plain typed `AxGen` evaluator when the user needs LLM-as-judge behavior outside the built-in `agent.optimize(...)` flow.
24
- - Default optimize target is `root.actor`; use `target: 'responder'` or explicit program IDs only when the user clearly asks for that.
25
+ - Default optimize target is the actor path; do not surface `target` unless the user clearly wants responder-only tuning or explicit program IDs.
25
26
  - Use eval-safe tools or in-memory mocks because optimization replays tasks many times.
26
27
  - Prefer precise tool return schemas such as `f.object(...)` over vague `f.json(...)` whenever the agent must reason about returned fields.
27
28
  - Prefer task wording with canonical entity names like "the Atlas project" instead of ambiguous labels like "Atlas" when ambiguity could trigger pointless clarification.
28
- - Save `result.optimizedProgram`, then restore with `new AxOptimizedProgramImpl(...)` and `agent.applyOptimization(...)`.
29
+ - Save artifacts with `axSerializeOptimizedProgram(result.optimizedProgram!)`, then restore with `axDeserializeOptimizedProgram(saved)` and `agent.applyOptimization(...)`.
30
+ - For browser-safe persistence, let the caller store the serialized JSON anywhere they want such as localStorage, IndexedDB, or a backend.
31
+ - If `bootstrap` is enabled, bootstrapped demos are persisted inside `result.optimizedProgram.demos`; raw failed traces are not saved in v1.
32
+ - For first examples, pass a plain task array instead of splitting into `train` and `validation` unless the user already has a holdout set.
33
+ - GEPA-backed `agent.optimize(...)` now optimizes generic components exposed by the selected target programs; `target: 'actor'` only tunes actor components, `target: 'responder'` only tunes responder components, and `target: 'all'` broadens the component set.
34
+ - `result.optimizedProgram.componentMap` is the canonical saved artifact for agent GEPA runs. It may include actor instructions, descriptions, tool descriptions/names, templates, or runtime primitives depending on what the selected target exposes.
29
35
  - When recursive behavior matters, keep `mode: 'advanced'` on the agent and tune against realistic `recursionOptions`.
30
36
 
31
37
  ## Decision Guide
32
38
 
33
39
  Pick the optimization shape from the user's need:
34
40
 
35
- - "Make the agent use tools correctly" -> optimize `root.actor` with `expectedActions` and `forbiddenActions`.
41
+ - "Make the agent use tools correctly" -> keep the default actor target and use `expectedActions` and `forbiddenActions`.
36
42
  - "Make final answers read better" -> consider `target: 'responder'`, but only if the task is not mostly tool-selection or clarification behavior.
37
43
  - "Make the whole agent better" -> use the default actor target first; only broaden target selection when the user clearly wants that extra scope.
38
44
  - "Tune recursive delegation" -> keep `mode: 'advanced'` and use tasks that actually exercise recursion depth, fan-out, and termination choices.
@@ -101,12 +107,13 @@ Important:
101
107
  import {
102
108
  AxAIGoogleGeminiModel,
103
109
  AxJSRuntime,
104
- AxOptimizedProgramImpl,
105
110
  axDefaultOptimizerLogger,
106
111
  agent,
107
112
  ai,
108
113
  f,
109
114
  fn,
115
+ axDeserializeOptimizedProgram,
116
+ axSerializeOptimizedProgram,
110
117
  } from '@ax-llm/ax';
111
118
 
112
119
  const tools = [
@@ -159,22 +166,38 @@ const tasks = [
159
166
  ];
160
167
 
161
168
  const result = await assistant.optimize(tasks, {
162
- target: 'actor',
163
169
  maxMetricCalls: 12,
164
170
  verbose: true,
165
- optimizerLogger: axDefaultOptimizerLogger,
166
- onProgress: (progress) => {
167
- console.log(
168
- `round ${progress.round}/${progress.totalRounds} current=${progress.currentScore} best=${progress.bestScore}`
169
- );
170
- },
171
171
  });
172
172
 
173
- const saved = JSON.stringify(result.optimizedProgram, null, 2);
174
- const restored = new AxOptimizedProgramImpl(JSON.parse(saved));
173
+ const saved = axSerializeOptimizedProgram(result.optimizedProgram!);
174
+ const restored = axDeserializeOptimizedProgram(saved);
175
175
  assistant.applyOptimization(restored);
176
176
  ```
177
177
 
178
+ ## Minimal Normal-User Pattern
179
+
180
+ Start here unless the user clearly needs a hand-built scorer:
181
+
182
+ ```typescript
183
+ const tasks = [
184
+ {
185
+ input: { query: 'Send an email to Jim saying good morning.' },
186
+ criteria: 'Use the email tool and send the message to Jim.',
187
+ expectedActions: ['email.sendEmail'],
188
+ },
189
+ ];
190
+
191
+ const result = await assistant.optimize(tasks);
192
+ assistant.applyOptimization(result.optimizedProgram!);
193
+ ```
194
+
195
+ - `target` defaults to actor optimization.
196
+ - `metric` defaults to the built-in LLM judge.
197
+ - `judgeAI` is optional; if omitted, the agent falls back to its configured judge model or runtime model.
198
+ - `bootstrap: true` is a good next step for tool-heavy agents when you want GEPA to start from successful traces from the provided tasks.
199
+ - The one thing users still need is realistic task records with clear `criteria`.
200
+
178
201
  ## Deterministic Metric Pattern
179
202
 
180
203
  Use this when the task has crisp correctness and cost/behavior tradeoffs:
@@ -319,12 +342,14 @@ Decision rules:
319
342
 
320
343
  - Save `result.optimizedProgram` if the user wants portable artifacts.
321
344
  - Restore artifacts with `new AxOptimizedProgramImpl(...)`, then call `agent.applyOptimization(...)`.
345
+ - Preserve the full optimized program when saving GEPA artifacts; `componentMap` reapplies the learned strings.
322
346
  - For demonstrations, use fresh eval-safe tool state for baseline, optimize, and restored replay so side effects do not leak across phases.
323
347
  - If the user wants to show improvement, run a held-out task before optimization, then replay it on a freshly restored optimized agent.
324
348
 
325
349
  ## Examples
326
350
 
327
351
  - [RLM Agent Optimize](https://raw.githubusercontent.com/ax-llm/ax/refs/heads/main/src/examples/rlm-agent-optimize.ts) — Gemini office-assistant tuning with save/load
352
+ - [AxAgent GEPA Component Optimization](https://raw.githubusercontent.com/ax-llm/ax/refs/heads/main/src/examples/axagent-gepa-optimization.ts) — compact support-agent GEPA run with deterministic metric and artifact replay
328
353
  - [RLM Agent Recursive Optimize](https://raw.githubusercontent.com/ax-llm/ax/refs/heads/main/src/examples/rlm-agent-recursive-optimize.ts) — recursive-slot optimization artifacts
329
354
 
330
355
  ## Do Not Generate
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: ax-agent
3
3
  description: This skill helps an LLM generate correct AxAgent code using @ax-llm/ax. Use when the user asks about agent(), child agents, namespaced functions, discovery mode, shared fields, llmQuery(...), RLM code execution, recursionOptions, or agent runtime behavior. For tuning and eval with agent.optimize(...), use ax-agent-optimize.
4
- version: "19.0.45"
4
+ version: "20.0.0"
5
5
  ---
6
6
 
7
7
  # AxAgent Codegen Rules (@ax-llm/ax)
package/skills/ax-ai.md CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: ax-ai
3
3
  description: This skill helps an LLM generate correct AI provider setup and configuration code using @ax-llm/ax. Use when the user asks about ai(), providers, models, presets, embeddings, extended thinking, context caching, or mentions OpenAI/Anthropic/Google/Azure/Groq/DeepSeek/Mistral/Cohere/Together/Ollama/HuggingFace/Reka/OpenRouter with @ax-llm/ax.
4
- version: "19.0.45"
4
+ version: "20.0.0"
5
5
  ---
6
6
 
7
7
  # AI Provider Codegen Rules (@ax-llm/ax)
package/skills/ax-flow.md CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: ax-flow
3
3
  description: This skill helps an LLM generate correct AxFlow workflow code using @ax-llm/ax. Use when the user asks about flow(), AxFlow, workflow orchestration, parallel execution, DAG workflows, conditional routing, map/reduce patterns, or multi-node AI pipelines.
4
- version: "19.0.45"
4
+ version: "20.0.0"
5
5
  ---
6
6
 
7
7
  # AxFlow Codegen Rules (@ax-llm/ax)
package/skills/ax-gen.md CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: ax-gen
3
3
  description: This skill helps an LLM generate correct AxGen code using @ax-llm/ax. Use when the user asks about ax(), AxGen, generators, forward(), streamingForward(), assertions, field processors, step hooks, self-tuning, or structured outputs.
4
- version: "19.0.45"
4
+ version: "20.0.0"
5
5
  ---
6
6
 
7
7
  # AxGen Codegen Rules (@ax-llm/ax)
package/skills/ax-gepa.md CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: ax-gepa
3
3
  description: This skill helps an LLM generate correct AxGEPA optimization code using @ax-llm/ax. Use when the user asks about AxGEPA, GEPA, Pareto optimization, multi-objective prompt tuning, reflective prompt evolution, validationExamples, maxMetricCalls, or optimizing a generator, flow, or agent tree.
4
- version: "19.0.45"
4
+ version: "20.0.0"
5
5
  ---
6
6
 
7
7
  # AxGEPA Codegen Rules (@ax-llm/ax)
@@ -17,20 +17,22 @@ Use this skill to generate direct `AxGEPA` optimization code. Prefer short, mode
17
17
  - Always set `maxMetricCalls` to bound optimizer cost.
18
18
  - Use scalar metrics for one objective and object metrics for Pareto optimization.
19
19
  - Apply results with `program.applyOptimization(result.optimizedProgram!)`.
20
- - For tree-wide runs, expect `optimizedProgram.instructionMap`.
20
+ - For tree-wide runs, expect `optimizedProgram.componentMap`.
21
+ - Persist artifacts with `axSerializeOptimizedProgram(...)` and restore them with `axDeserializeOptimizedProgram(...)` so the same flow works in browsers and Node.
21
22
 
22
23
  ## Critical Rules
23
24
 
24
- - `AxGEPA.compile()` works for a single generator and for tree-aware roots such as flows or agents with registered instruction-bearing descendants.
25
+ - `AxGEPA.compile()` works for a single generator and for tree-aware roots such as flows or agents with registered optimizable descendants.
25
26
  - There is no separate flow-only GEPA optimizer. Use `AxGEPA` for flows too.
26
27
  - The metric may return either `number` or `Record<string, number>`.
27
28
  - Keep metrics deterministic and cheap by default.
28
29
  - Avoid extra LLM calls inside the metric unless the user explicitly wants judge-based evaluation.
29
30
  - If the user needs LLM-as-judge scoring for a non-agent GEPA run, prefer a plain typed `AxGen` evaluator instead of writing a custom judge abstraction.
30
31
  - `maxMetricCalls` must be large enough to cover the initial validation pass over `validationExamples`.
31
- - GEPA optimizes instructions. If a tree has no instruction-bearing nodes, optimization will fail.
32
+ - GEPA optimizes generic string components exposed by `getOptimizableComponents()`. If a tree exposes no components, optimization will fail.
32
33
  - Use held-out validation examples for selection. Do not reuse the training set as `validationExamples`.
33
34
  - `result.optimizedProgram` is the easy-to-apply best candidate. `result.paretoFront` is the full trade-off set for multi-objective runs.
35
+ - `bootstrap: true` can seed GEPA with demos collected from successful runs on the provided training tasks.
34
36
 
35
37
  ## Metric Selection
36
38
 
@@ -39,12 +41,12 @@ Choose the evaluation path deliberately:
39
41
  - Prefer a deterministic metric when correctness can be read directly from `prediction` and `example`.
40
42
  - Prefer a deterministic metric when cost, latency, recursion depth, or tool count matters.
41
43
  - Use a plain typed `AxGen` evaluator only when the task is genuinely qualitative and hard to score exactly.
42
- - For `agent.optimize(...)`, prefer the built-in judge path instead of manually wrapping a judge metric.
44
+ - For `agent.optimize(...)`, prefer the built-in judge path instead of manually wrapping a judge metric. Normal agent users usually do not need to set `target` or `metric` at all.
43
45
 
44
46
  Rule of thumb:
45
47
 
46
48
  - `AxGEPA` on `AxGen` or flow: use a metric first, optionally a plain typed `AxGen` evaluator if needed.
47
- - `agent.optimize(...)`: use custom `metric` for crisp scoring, otherwise `judgeAI` plus `judgeOptions`.
49
+ - `agent.optimize(...)`: use custom `metric` for crisp scoring, otherwise let the built-in judge handle scoring. Add `judgeAI` plus `judgeOptions` only when you want a stronger or separate judge model.
48
50
 
49
51
  ## Canonical Scalar Pattern
50
52
 
@@ -169,7 +171,7 @@ for (const point of result.paretoFront) {
169
171
  }
170
172
 
171
173
  wf.applyOptimization(result.optimizedProgram!);
172
- console.log(result.optimizedProgram?.instructionMap);
174
+ console.log(result.optimizedProgram?.componentMap);
173
175
  ```
174
176
 
175
177
  ## Metric Patterns
@@ -209,9 +211,9 @@ const loaded = JSON.parse(saved);
209
211
  program.applyOptimization(loaded);
210
212
  ```
211
213
 
212
- - Single-target runs usually populate both `optimizedProgram.instruction` and `optimizedProgram.instructionMap`.
213
- - Tree-wide runs rely on `instructionMap`, keyed by full program ID.
214
- - Pareto points expose candidate configs under `point.configuration.instructionMap`.
214
+ - Single-target runs usually populate both `optimizedProgram.instruction` and `optimizedProgram.componentMap`.
215
+ - Tree-wide runs rely on `componentMap`, keyed by full component key.
216
+ - Pareto points expose candidate configs under `point.configuration.componentMap`.
215
217
 
216
218
  ## Useful Options
217
219
 
@@ -244,13 +246,16 @@ const optimizer = new AxGEPA({
244
246
  - Size `maxMetricCalls` for at least one full validation pass plus several rounds.
245
247
  - If the user wants a strict budget, say so explicitly and set `maxMetricCalls`.
246
248
  - For expensive trees, start with `auto: 'light'` or fewer `numTrials`, then scale up.
249
+ - GEPA selects among exposed components using measured accept/reject history, not LLM-generated numeric scores. The LLM proposes component text; metrics decide whether to keep it.
250
+ - Function/tool trace reflection is keyed by stable component IDs where available, so function renames do not break saved candidate maps.
247
251
 
248
252
  ## Troubleshooting
249
253
 
250
254
  - Error about `maxMetricCalls` being too small: increase it until the initial validation pass fits.
251
255
  - Empty or poor Pareto front: verify the metric returns numbers for every example.
252
- - No tree optimization effect: ensure child programs are registered under the root and have instructions to mutate.
253
- - Saved optimization applies only partly: use `program.applyOptimization(...)`, not just `setInstruction(...)`, so `instructionMap` reaches the full tree.
256
+ - No tree optimization effect: ensure child programs are registered under the root and expose optimizable components.
257
+ - Saved optimization applies only partly: use `program.applyOptimization(...)`, not just `setInstruction(...)`, so `componentMap` reaches the full tree.
258
+ - Agent target seems too broad: when using `agent.optimize(...)`, set `target: 'actor'`, `'responder'`, `'all'`, or explicit program IDs. The wrapper filters GEPA components to the selected target.
254
259
 
255
260
  ## Good Example Targets
256
261
 
@@ -258,3 +263,4 @@ const optimizer = new AxGEPA({
258
263
  - `/Users/vr/src/ax/src/examples/gepa-flow.ts`
259
264
  - `/Users/vr/src/ax/src/examples/gepa-train-inference.ts`
260
265
  - `/Users/vr/src/ax/src/examples/gepa-quality-vs-speed-optimization.ts`
266
+ - `/Users/vr/src/ax/src/examples/axagent-gepa-optimization.ts`
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: ax-learn
3
3
  description: This skill helps an LLM generate correct AxLearn code using @ax-llm/ax. Use when the user asks about self-improving agents, trace-backed learning, feedback-aware updates, or AxLearn modes.
4
- version: "19.0.45"
4
+ version: "20.0.0"
5
5
  ---
6
6
 
7
7
  # AxLearn Codegen Rules (@ax-llm/ax)
package/skills/ax-llm.md CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: ax
3
3
  description: This skill helps with using the @ax-llm/ax TypeScript library for building LLM applications. Use when the user asks about ax(), ai(), f(), s(), agent(), flow(), AxGen, AxAgent, AxFlow, signatures, streaming, or mentions @ax-llm/ax.
4
- version: "19.0.45"
4
+ version: "20.0.0"
5
5
  ---
6
6
 
7
7
  # Ax Library (@ax-llm/ax) Quick Reference
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: ax-signature
3
3
  description: This skill helps an LLM generate correct DSPy signature code using @ax-llm/ax. Use when the user asks about signatures, s(), f(), field types, string syntax, fluent builder API, validation constraints, or type-safe inputs/outputs.
4
- version: "19.0.45"
4
+ version: "20.0.0"
5
5
  ---
6
6
 
7
7
  # Ax Signature Reference