@sebastiantuyu/agest 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -110,12 +110,28 @@ npx tsx examples/openrouter.test.ts
110
110
 
111
111
  ## Roadmap
112
112
 
113
- - [ ] Multi-run support: `.runs(n)` per scene for statistical significance
114
- - [ ] Suite-level runs: `agent(exec, { runs: 3 }, () => { ... })` for overall stability benchmarks
115
- - [ ] Additional matchers: `toBe.semanticallySimilarTo(text, threshold)`, `toBe.matchingSchema(zodSchema)`
116
- - [ ] JSON/file reporters for persisting reports to disk
117
- - [ ] Snapshot comparison: diff reports across runs to track agent regression
118
- - [ ] More adapters: Vercel AI SDK, OpenAI Agents SDK, raw API calls
113
+ ### Shipped
114
+ - [x] Multi-turn support: `.turns(n)` per scene
115
+ - [x] LLM-as-judge: `.judgedBy({ criteria, failWhen })`
116
+ - [x] Remote HTTP adapter for framework-agnostic testing
117
+ - [x] Report persistence to `.reports/` with YAML format
118
+ - [x] Stats CLI with multi-model comparison and dimension analysis
119
+
120
+ ### Up next
121
+ - [ ] Schema validation: `toBe.matchingSchema(zodSchema)`
122
+ - [ ] Semantic similarity: `toBe.semanticallySimilarTo(text, threshold)`
123
+ - [ ] Statistical runs: `.runs(n)` per scene with mean/stddev reporting
124
+ - [ ] Vercel AI SDK adapter
125
+ - [ ] Snapshot regression: diff current run against a saved baseline
126
+
127
+ ### Planned
128
+ - [ ] Cost estimation per scene (token count to dollar cost)
129
+ - [ ] CI/CD reporter (GitHub Actions PR comments)
130
+ - [ ] Tool-call trajectory assertions
131
+ - [ ] Watch mode for TDD-style iteration
132
+ - [ ] OpenAI Agents SDK adapter
133
+ - [ ] Webhook/n8n adapter for no-code agent sources
134
+ - [ ] Jest/Vitest custom matcher export
119
135
 
120
136
  ## Development requirements
121
137
  - Node 22+
@@ -4,6 +4,12 @@ export interface RemoteAdapterOptions {
4
4
  headers?: Record<string, string>;
5
5
  /** HTTP method, defaults to POST */
6
6
  method?: "POST" | "PUT" | "GET";
7
+ /**
8
+ * Extra fields merged into the request body.
9
+ * Merged *under* the output of `buildRequest`, so `buildRequest` wins on conflicts.
10
+ * Ignored when method is GET.
11
+ */
12
+ body?: Record<string, unknown>;
7
13
  /**
8
14
  * Build the request body from the input prompt.
9
15
  * Defaults to `{ prompt: input }`.
@@ -23,7 +23,7 @@
23
23
  * ```
24
24
  */
25
25
  export function remote(endpoint, options = {}) {
26
- const { headers = {}, method = "POST", buildRequest = defaultBuildRequest, parseResponse, metadata: staticMetadata, } = options;
26
+ const { headers = {}, method = "POST", body: extraBody, buildRequest = defaultBuildRequest, parseResponse, metadata: staticMetadata, } = options;
27
27
  return async (input) => {
28
28
  let res;
29
29
  try {
@@ -32,7 +32,13 @@ export function remote(endpoint, options = {}) {
32
32
  headers: { "Content-Type": "application/json", ...headers },
33
33
  };
34
34
  if (method !== "GET") {
35
- fetchOptions.body = JSON.stringify(buildRequest(input));
35
+ const built = buildRequest(input);
36
+ const merged = extraBody && typeof built === "object" && built !== null
37
+ ? { ...extraBody, ...built }
38
+ : extraBody && typeof built !== "object"
39
+ ? { ...extraBody, prompt: built }
40
+ : built;
41
+ fetchOptions.body = JSON.stringify(merged);
36
42
  }
37
43
  res = await fetch(endpoint, fetchOptions);
38
44
  }
package/dist/index.d.ts CHANGED
@@ -12,4 +12,6 @@ export interface AgentOptions {
12
12
  name?: string;
13
13
  }
14
14
  export declare function scene(prompt: string): SceneBuilder;
15
+ /** @internal reset auto-run state between tests */
16
+ export declare function _resetAutoRun(): void;
15
17
  export declare function agent(executor: AgentExecutor, fn: () => void, options?: AgentOptions): Promise<AgentReport>;
package/dist/index.js CHANGED
@@ -5,14 +5,36 @@ export { defineConfig } from "./config";
5
5
  export function scene(prompt) {
6
6
  return getContext().registerScene(prompt);
7
7
  }
8
- export async function agent(executor, fn, options) {
8
+ const pendingAgents = [];
9
+ let autoRunScheduled = false;
10
+ /** @internal reset auto-run state between tests */
11
+ export function _resetAutoRun() {
12
+ pendingAgents.length = 0;
13
+ autoRunScheduled = false;
14
+ }
15
+ export function agent(executor, fn, options) {
9
16
  const ctx = new AgentContext(executor, options?.name);
10
17
  setContext(ctx);
11
18
  try {
12
19
  fn();
13
20
  }
14
- finally {
21
+ catch (err) {
15
22
  setContext(null);
23
+ return Promise.reject(err);
24
+ }
25
+ setContext(null);
26
+ const promise = ctx.execute();
27
+ pendingAgents.push(promise);
28
+ if (!autoRunScheduled) {
29
+ autoRunScheduled = true;
30
+ process.nextTick(async () => {
31
+ try {
32
+ await Promise.all(pendingAgents);
33
+ }
34
+ catch {
35
+ process.exitCode = 1;
36
+ }
37
+ });
16
38
  }
17
- return ctx.execute();
39
+ return promise;
18
40
  }
package/dist/runner.js CHANGED
@@ -22,9 +22,12 @@ export async function executeScene(executor, scene, globalTimeout, judgeConfig,
22
22
  const start = performance.now();
23
23
  let input = scene.prompt;
24
24
  for (let t = 0; t < turns; t++) {
25
+ let timer;
25
26
  response = await Promise.race([
26
- executor(input),
27
- new Promise((_, reject) => setTimeout(() => reject(new Error(`Scene timed out after ${timeoutMs}ms`)), timeoutMs)),
27
+ executor(input).finally(() => clearTimeout(timer)),
28
+ new Promise((_, reject) => {
29
+ timer = setTimeout(() => reject(new Error(`Scene timed out after ${timeoutMs}ms`)), timeoutMs);
30
+ }),
28
31
  ]);
29
32
  if (response.executionError)
30
33
  break;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sebastiantuyu/agest",
3
- "version": "0.2.1",
3
+ "version": "0.2.2",
4
4
  "description": "A testing library for agents",
5
5
  "repository": {
6
6
  "type": "git",