@sebastiantuyu/agest 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -6
- package/dist/adapters/remote.d.ts +7 -1
- package/dist/adapters/remote.js +8 -2
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +43 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +25 -3
- package/dist/preview.d.ts +2 -1
- package/dist/preview.js +1 -4
- package/dist/runner.js +5 -2
- package/dist/stats.d.ts +2 -1
- package/dist/stats.js +1 -4
- package/package.json +6 -3
package/README.md
CHANGED
|
@@ -110,12 +110,28 @@ npx tsx examples/openrouter.test.ts
|
|
|
110
110
|
|
|
111
111
|
## Roadmap
|
|
112
112
|
|
|
113
|
-
|
|
114
|
-
- [
|
|
115
|
-
- [
|
|
116
|
-
- [
|
|
117
|
-
- [
|
|
118
|
-
- [
|
|
113
|
+
### Shipped
|
|
114
|
+
- [x] Multi-turn support: `.turns(n)` per scene
|
|
115
|
+
- [x] LLM-as-judge: `.judgedBy({ criteria, failWhen })`
|
|
116
|
+
- [x] Remote HTTP adapter for framework-agnostic testing
|
|
117
|
+
- [x] Report persistence to `.reports/` with YAML format
|
|
118
|
+
- [x] Stats CLI with multi-model comparison and dimension analysis
|
|
119
|
+
|
|
120
|
+
### Up next
|
|
121
|
+
- [ ] Schema validation: `toBe.matchingSchema(zodSchema)`
|
|
122
|
+
- [ ] Semantic similarity: `toBe.semanticallySimilarTo(text, threshold)`
|
|
123
|
+
- [ ] Statistical runs: `.runs(n)` per scene with mean/stddev reporting
|
|
124
|
+
- [ ] Vercel AI SDK adapter
|
|
125
|
+
- [ ] Snapshot regression: diff current run against a saved baseline
|
|
126
|
+
|
|
127
|
+
### Planned
|
|
128
|
+
- [ ] Cost estimation per scene (token count to dollar cost)
|
|
129
|
+
- [ ] CI/CD reporter (GitHub Actions PR comments)
|
|
130
|
+
- [ ] Tool-call trajectory assertions
|
|
131
|
+
- [ ] Watch mode for TDD-style iteration
|
|
132
|
+
- [ ] OpenAI Agents SDK adapter
|
|
133
|
+
- [ ] Webhook/n8n adapter for no-code agent sources
|
|
134
|
+
- [ ] Jest/Vitest custom matcher export
|
|
119
135
|
|
|
120
136
|
## Development requirements
|
|
121
137
|
- Node 22+
|
|
@@ -4,6 +4,12 @@ export interface RemoteAdapterOptions {
|
|
|
4
4
|
headers?: Record<string, string>;
|
|
5
5
|
/** HTTP method, defaults to POST */
|
|
6
6
|
method?: "POST" | "PUT" | "GET";
|
|
7
|
+
/**
|
|
8
|
+
* Extra fields merged into the request body.
|
|
9
|
+
* Merged *under* the output of `buildRequest`, so `buildRequest` wins on conflicts.
|
|
10
|
+
* Ignored when method is GET.
|
|
11
|
+
*/
|
|
12
|
+
body?: Record<string, unknown>;
|
|
7
13
|
/**
|
|
8
14
|
* Build the request body from the input prompt.
|
|
9
15
|
* Defaults to `{ prompt: input }`.
|
|
@@ -14,7 +20,7 @@ export interface RemoteAdapterOptions {
|
|
|
14
20
|
* When omitted the adapter tries common shapes:
|
|
15
21
|
* - `{ text }` / `{ response }` / `{ output }` / `{ message }` / plain string
|
|
16
22
|
*/
|
|
17
|
-
parseResponse?: (body:
|
|
23
|
+
parseResponse?: <TBody = unknown>(body: TBody) => AgentResponse;
|
|
18
24
|
/**
|
|
19
25
|
* Static metadata for this remote agent.
|
|
20
26
|
* Because the remote endpoint is opaque, metadata like model name,
|
package/dist/adapters/remote.js
CHANGED
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
* ```
|
|
24
24
|
*/
|
|
25
25
|
export function remote(endpoint, options = {}) {
|
|
26
|
-
const { headers = {}, method = "POST", buildRequest = defaultBuildRequest, parseResponse, metadata: staticMetadata, } = options;
|
|
26
|
+
const { headers = {}, method = "POST", body: extraBody, buildRequest = defaultBuildRequest, parseResponse, metadata: staticMetadata, } = options;
|
|
27
27
|
return async (input) => {
|
|
28
28
|
let res;
|
|
29
29
|
try {
|
|
@@ -32,7 +32,13 @@ export function remote(endpoint, options = {}) {
|
|
|
32
32
|
headers: { "Content-Type": "application/json", ...headers },
|
|
33
33
|
};
|
|
34
34
|
if (method !== "GET") {
|
|
35
|
-
|
|
35
|
+
const built = buildRequest(input);
|
|
36
|
+
const merged = extraBody && typeof built === "object" && built !== null
|
|
37
|
+
? { ...extraBody, ...built }
|
|
38
|
+
: extraBody && typeof built !== "object"
|
|
39
|
+
? { ...extraBody, prompt: built }
|
|
40
|
+
: built;
|
|
41
|
+
fetchOptions.body = JSON.stringify(merged);
|
|
36
42
|
}
|
|
37
43
|
res = await fetch(endpoint, fetchOptions);
|
|
38
44
|
}
|
package/dist/cli.d.ts
ADDED
package/dist/cli.js
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { spawn } from "child_process";
|
|
3
|
+
import { main as stats } from "./stats.js";
|
|
4
|
+
import { main as preview } from "./preview.js";
|
|
5
|
+
const command = process.argv[2];
|
|
6
|
+
async function run() {
|
|
7
|
+
const files = process.argv.slice(3);
|
|
8
|
+
if (files.length === 0) {
|
|
9
|
+
console.error(" Usage: agest run <file...>");
|
|
10
|
+
process.exit(1);
|
|
11
|
+
}
|
|
12
|
+
for (const file of files) {
|
|
13
|
+
const child = spawn("npx", ["tsx", file], {
|
|
14
|
+
stdio: "inherit",
|
|
15
|
+
shell: true,
|
|
16
|
+
});
|
|
17
|
+
const code = await new Promise((resolve) => child.on("close", (c) => resolve(c ?? 1)));
|
|
18
|
+
if (code !== 0)
|
|
19
|
+
process.exit(code);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
const commands = {
|
|
23
|
+
stats,
|
|
24
|
+
preview,
|
|
25
|
+
run,
|
|
26
|
+
};
|
|
27
|
+
if (!command || !commands[command]) {
|
|
28
|
+
console.log(`
|
|
29
|
+
Usage: agest <command>
|
|
30
|
+
|
|
31
|
+
Commands:
|
|
32
|
+
run Run test file(s) agest run tests/*.test.ts
|
|
33
|
+
stats Show aggregated test statistics
|
|
34
|
+
preview Generate an HTML report preview
|
|
35
|
+
`);
|
|
36
|
+
process.exit(command ? 1 : 0);
|
|
37
|
+
}
|
|
38
|
+
// Forward remaining args so subcommands see them at process.argv[2+]
|
|
39
|
+
process.argv = [process.argv[0], process.argv[1], ...process.argv.slice(3)];
|
|
40
|
+
commands[command]().catch((err) => {
|
|
41
|
+
console.error("Error:", err.message);
|
|
42
|
+
process.exit(1);
|
|
43
|
+
});
|
package/dist/index.d.ts
CHANGED
|
@@ -12,4 +12,6 @@ export interface AgentOptions {
|
|
|
12
12
|
name?: string;
|
|
13
13
|
}
|
|
14
14
|
export declare function scene(prompt: string): SceneBuilder;
|
|
15
|
+
/** @internal reset auto-run state between tests */
|
|
16
|
+
export declare function _resetAutoRun(): void;
|
|
15
17
|
export declare function agent(executor: AgentExecutor, fn: () => void, options?: AgentOptions): Promise<AgentReport>;
|
package/dist/index.js
CHANGED
|
@@ -5,14 +5,36 @@ export { defineConfig } from "./config";
|
|
|
5
5
|
export function scene(prompt) {
|
|
6
6
|
return getContext().registerScene(prompt);
|
|
7
7
|
}
|
|
8
|
-
|
|
8
|
+
const pendingAgents = [];
|
|
9
|
+
let autoRunScheduled = false;
|
|
10
|
+
/** @internal reset auto-run state between tests */
|
|
11
|
+
export function _resetAutoRun() {
|
|
12
|
+
pendingAgents.length = 0;
|
|
13
|
+
autoRunScheduled = false;
|
|
14
|
+
}
|
|
15
|
+
export function agent(executor, fn, options) {
|
|
9
16
|
const ctx = new AgentContext(executor, options?.name);
|
|
10
17
|
setContext(ctx);
|
|
11
18
|
try {
|
|
12
19
|
fn();
|
|
13
20
|
}
|
|
14
|
-
|
|
21
|
+
catch (err) {
|
|
15
22
|
setContext(null);
|
|
23
|
+
return Promise.reject(err);
|
|
24
|
+
}
|
|
25
|
+
setContext(null);
|
|
26
|
+
const promise = ctx.execute();
|
|
27
|
+
pendingAgents.push(promise);
|
|
28
|
+
if (!autoRunScheduled) {
|
|
29
|
+
autoRunScheduled = true;
|
|
30
|
+
process.nextTick(async () => {
|
|
31
|
+
try {
|
|
32
|
+
await Promise.all(pendingAgents);
|
|
33
|
+
}
|
|
34
|
+
catch {
|
|
35
|
+
process.exitCode = 1;
|
|
36
|
+
}
|
|
37
|
+
});
|
|
16
38
|
}
|
|
17
|
-
return
|
|
39
|
+
return promise;
|
|
18
40
|
}
|
package/dist/preview.d.ts
CHANGED
|
@@ -1 +1,2 @@
|
|
|
1
|
-
|
|
1
|
+
declare function main(): Promise<void>;
|
|
2
|
+
export { main };
|
package/dist/preview.js
CHANGED
package/dist/runner.js
CHANGED
|
@@ -22,9 +22,12 @@ export async function executeScene(executor, scene, globalTimeout, judgeConfig,
|
|
|
22
22
|
const start = performance.now();
|
|
23
23
|
let input = scene.prompt;
|
|
24
24
|
for (let t = 0; t < turns; t++) {
|
|
25
|
+
let timer;
|
|
25
26
|
response = await Promise.race([
|
|
26
|
-
executor(input),
|
|
27
|
-
new Promise((_, reject) =>
|
|
27
|
+
executor(input).finally(() => clearTimeout(timer)),
|
|
28
|
+
new Promise((_, reject) => {
|
|
29
|
+
timer = setTimeout(() => reject(new Error(`Scene timed out after ${timeoutMs}ms`)), timeoutMs);
|
|
30
|
+
}),
|
|
28
31
|
]);
|
|
29
32
|
if (response.executionError)
|
|
30
33
|
break;
|
package/dist/stats.d.ts
CHANGED
|
@@ -1 +1,2 @@
|
|
|
1
|
-
|
|
1
|
+
declare function main(): Promise<void>;
|
|
2
|
+
export { main };
|
package/dist/stats.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sebastiantuyu/agest",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "A testing library for agents",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -10,6 +10,9 @@
|
|
|
10
10
|
"files": [
|
|
11
11
|
"dist"
|
|
12
12
|
],
|
|
13
|
+
"bin": {
|
|
14
|
+
"agest": "dist/cli.js"
|
|
15
|
+
},
|
|
13
16
|
"main": "dist/index.js",
|
|
14
17
|
"types": "dist/index.d.ts",
|
|
15
18
|
"exports": {
|
|
@@ -29,8 +32,8 @@
|
|
|
29
32
|
"test:coverage": "vitest run --coverage",
|
|
30
33
|
"dev": "tsx examples/basic.test.ts",
|
|
31
34
|
"test:examples": "tsx examples/basic.test.ts && tsx examples/agent.test.ts",
|
|
32
|
-
"stats": "tsx src/
|
|
33
|
-
"preview": "tsx src/
|
|
35
|
+
"stats": "tsx src/cli.ts stats",
|
|
36
|
+
"preview": "tsx src/cli.ts preview",
|
|
34
37
|
"site:preview": "npx serve site -p 3000",
|
|
35
38
|
"release:patch": "npm version patch && git push && git push --tags",
|
|
36
39
|
"release:minor": "npm version minor && git push && git push --tags",
|