@sebastiantuyu/agest 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -6
- package/dist/adapters/remote.d.ts +6 -0
- package/dist/adapters/remote.js +8 -2
- package/dist/index.d.ts +2 -0
- package/dist/index.js +25 -3
- package/dist/runner.js +5 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -110,12 +110,28 @@ npx tsx examples/openrouter.test.ts
|
|
|
110
110
|
|
|
111
111
|
## Roadmap
|
|
112
112
|
|
|
113
|
-
|
|
114
|
-
- [
|
|
115
|
-
- [
|
|
116
|
-
- [
|
|
117
|
-
- [
|
|
118
|
-
- [
|
|
113
|
+
### Shipped
|
|
114
|
+
- [x] Multi-turn support: `.turns(n)` per scene
|
|
115
|
+
- [x] LLM-as-judge: `.judgedBy({ criteria, failWhen })`
|
|
116
|
+
- [x] Remote HTTP adapter for framework-agnostic testing
|
|
117
|
+
- [x] Report persistence to `.reports/` with YAML format
|
|
118
|
+
- [x] Stats CLI with multi-model comparison and dimension analysis
|
|
119
|
+
|
|
120
|
+
### Up next
|
|
121
|
+
- [ ] Schema validation: `toBe.matchingSchema(zodSchema)`
|
|
122
|
+
- [ ] Semantic similarity: `toBe.semanticallySimilarTo(text, threshold)`
|
|
123
|
+
- [ ] Statistical runs: `.runs(n)` per scene with mean/stddev reporting
|
|
124
|
+
- [ ] Vercel AI SDK adapter
|
|
125
|
+
- [ ] Snapshot regression: diff current run against a saved baseline
|
|
126
|
+
|
|
127
|
+
### Planned
|
|
128
|
+
- [ ] Cost estimation per scene (token count to dollar cost)
|
|
129
|
+
- [ ] CI/CD reporter (GitHub Actions PR comments)
|
|
130
|
+
- [ ] Tool-call trajectory assertions
|
|
131
|
+
- [ ] Watch mode for TDD-style iteration
|
|
132
|
+
- [ ] OpenAI Agents SDK adapter
|
|
133
|
+
- [ ] Webhook/n8n adapter for no-code agent sources
|
|
134
|
+
- [ ] Jest/Vitest custom matcher export
|
|
119
135
|
|
|
120
136
|
## Development requirements
|
|
121
137
|
- Node 22+
|
|
@@ -4,6 +4,12 @@ export interface RemoteAdapterOptions {
|
|
|
4
4
|
headers?: Record<string, string>;
|
|
5
5
|
/** HTTP method, defaults to POST */
|
|
6
6
|
method?: "POST" | "PUT" | "GET";
|
|
7
|
+
/**
|
|
8
|
+
* Extra fields merged into the request body.
|
|
9
|
+
* Merged *under* the output of `buildRequest`, so `buildRequest` wins on conflicts.
|
|
10
|
+
* Ignored when method is GET.
|
|
11
|
+
*/
|
|
12
|
+
body?: Record<string, unknown>;
|
|
7
13
|
/**
|
|
8
14
|
* Build the request body from the input prompt.
|
|
9
15
|
* Defaults to `{ prompt: input }`.
|
package/dist/adapters/remote.js
CHANGED
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
* ```
|
|
24
24
|
*/
|
|
25
25
|
export function remote(endpoint, options = {}) {
|
|
26
|
-
const { headers = {}, method = "POST", buildRequest = defaultBuildRequest, parseResponse, metadata: staticMetadata, } = options;
|
|
26
|
+
const { headers = {}, method = "POST", body: extraBody, buildRequest = defaultBuildRequest, parseResponse, metadata: staticMetadata, } = options;
|
|
27
27
|
return async (input) => {
|
|
28
28
|
let res;
|
|
29
29
|
try {
|
|
@@ -32,7 +32,13 @@ export function remote(endpoint, options = {}) {
|
|
|
32
32
|
headers: { "Content-Type": "application/json", ...headers },
|
|
33
33
|
};
|
|
34
34
|
if (method !== "GET") {
|
|
35
|
-
|
|
35
|
+
const built = buildRequest(input);
|
|
36
|
+
const merged = extraBody && typeof built === "object" && built !== null
|
|
37
|
+
? { ...extraBody, ...built }
|
|
38
|
+
: extraBody && typeof built !== "object"
|
|
39
|
+
? { ...extraBody, prompt: built }
|
|
40
|
+
: built;
|
|
41
|
+
fetchOptions.body = JSON.stringify(merged);
|
|
36
42
|
}
|
|
37
43
|
res = await fetch(endpoint, fetchOptions);
|
|
38
44
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -12,4 +12,6 @@ export interface AgentOptions {
|
|
|
12
12
|
name?: string;
|
|
13
13
|
}
|
|
14
14
|
export declare function scene(prompt: string): SceneBuilder;
|
|
15
|
+
/** @internal reset auto-run state between tests */
|
|
16
|
+
export declare function _resetAutoRun(): void;
|
|
15
17
|
export declare function agent(executor: AgentExecutor, fn: () => void, options?: AgentOptions): Promise<AgentReport>;
|
package/dist/index.js
CHANGED
|
@@ -5,14 +5,36 @@ export { defineConfig } from "./config";
|
|
|
5
5
|
export function scene(prompt) {
|
|
6
6
|
return getContext().registerScene(prompt);
|
|
7
7
|
}
|
|
8
|
-
|
|
8
|
+
const pendingAgents = [];
|
|
9
|
+
let autoRunScheduled = false;
|
|
10
|
+
/** @internal reset auto-run state between tests */
|
|
11
|
+
export function _resetAutoRun() {
|
|
12
|
+
pendingAgents.length = 0;
|
|
13
|
+
autoRunScheduled = false;
|
|
14
|
+
}
|
|
15
|
+
export function agent(executor, fn, options) {
|
|
9
16
|
const ctx = new AgentContext(executor, options?.name);
|
|
10
17
|
setContext(ctx);
|
|
11
18
|
try {
|
|
12
19
|
fn();
|
|
13
20
|
}
|
|
14
|
-
|
|
21
|
+
catch (err) {
|
|
15
22
|
setContext(null);
|
|
23
|
+
return Promise.reject(err);
|
|
24
|
+
}
|
|
25
|
+
setContext(null);
|
|
26
|
+
const promise = ctx.execute();
|
|
27
|
+
pendingAgents.push(promise);
|
|
28
|
+
if (!autoRunScheduled) {
|
|
29
|
+
autoRunScheduled = true;
|
|
30
|
+
process.nextTick(async () => {
|
|
31
|
+
try {
|
|
32
|
+
await Promise.all(pendingAgents);
|
|
33
|
+
}
|
|
34
|
+
catch {
|
|
35
|
+
process.exitCode = 1;
|
|
36
|
+
}
|
|
37
|
+
});
|
|
16
38
|
}
|
|
17
|
-
return
|
|
39
|
+
return promise;
|
|
18
40
|
}
|
package/dist/runner.js
CHANGED
|
@@ -22,9 +22,12 @@ export async function executeScene(executor, scene, globalTimeout, judgeConfig,
|
|
|
22
22
|
const start = performance.now();
|
|
23
23
|
let input = scene.prompt;
|
|
24
24
|
for (let t = 0; t < turns; t++) {
|
|
25
|
+
let timer;
|
|
25
26
|
response = await Promise.race([
|
|
26
|
-
executor(input),
|
|
27
|
-
new Promise((_, reject) =>
|
|
27
|
+
executor(input).finally(() => clearTimeout(timer)),
|
|
28
|
+
new Promise((_, reject) => {
|
|
29
|
+
timer = setTimeout(() => reject(new Error(`Scene timed out after ${timeoutMs}ms`)), timeoutMs);
|
|
30
|
+
}),
|
|
28
31
|
]);
|
|
29
32
|
if (response.executionError)
|
|
30
33
|
break;
|