@sebastiantuyu/agest 0.3.2 → 0.3.3-next.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +158 -1
- package/dist/adapters/index.d.ts +2 -0
- package/dist/adapters/index.js +1 -0
- package/dist/adapters/langchain.d.ts +1 -1
- package/dist/adapters/langchain.js +80 -11
- package/dist/adapters/remote.d.ts +1 -1
- package/dist/adapters/remote.js +3 -2
- package/dist/adapters/tracing.d.ts +73 -0
- package/dist/adapters/tracing.js +338 -0
- package/dist/assertions.d.ts +57 -2
- package/dist/assertions.js +119 -33
- package/dist/cli.d.ts +15 -1
- package/dist/cli.js +97 -18
- package/dist/config.d.ts +9 -0
- package/dist/context.d.ts +32 -11
- package/dist/context.js +84 -10
- package/dist/discover.d.ts +16 -0
- package/dist/discover.js +62 -0
- package/dist/index.d.ts +20 -2
- package/dist/index.js +10 -3
- package/dist/match.d.ts +28 -0
- package/dist/match.js +57 -0
- package/dist/preview.js +93 -0
- package/dist/pricing/index.d.ts +32 -0
- package/dist/pricing/index.js +48 -0
- package/dist/pricing/models.json +21 -0
- package/dist/reporter.d.ts +1 -1
- package/dist/reporter.js +77 -4
- package/dist/reports.d.ts +37 -0
- package/dist/reports.js +126 -0
- package/dist/resolve.d.ts +25 -0
- package/dist/resolve.js +62 -0
- package/dist/runner.d.ts +11 -2
- package/dist/runner.js +97 -11
- package/dist/schema.d.ts +63 -0
- package/dist/schema.js +61 -0
- package/dist/types.d.ts +84 -9
- package/dist/waterfall.d.ts +11 -0
- package/dist/waterfall.js +46 -0
- package/package.json +24 -15
package/README.md
CHANGED
|
@@ -53,6 +53,163 @@ agent:
|
|
|
53
53
|
average_output_tokens_per_case: 34
|
|
54
54
|
```
|
|
55
55
|
|
|
56
|
+
## Assertions
|
|
57
|
+
|
|
58
|
+
Each scene asserts on a **field** of the agent's response via `.expect(field, fn)`,
|
|
59
|
+
and inside the callback you chain a matcher off `expect(value).toBe`.
|
|
60
|
+
|
|
61
|
+
### Structured responses
|
|
62
|
+
|
|
63
|
+
An executor returns a native `value` (the source of truth for structural
|
|
64
|
+
matchers) and/or a `text` projection (for the LLM judge and text matchers):
|
|
65
|
+
|
|
66
|
+
```typescript
|
|
67
|
+
// chat agent — a string is both value and text
|
|
68
|
+
return { text: "Bonjour" };
|
|
69
|
+
|
|
70
|
+
// structured agent — a native object, optionally with an enriched text view
|
|
71
|
+
return { value: { plan_items: [{ step: "search" }] } };
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Selecting a field
|
|
75
|
+
|
|
76
|
+
```typescript
|
|
77
|
+
scene("Plan a trip to Tokyo")
|
|
78
|
+
.expect("value", (v) => expect(v).toBe.containingSubset({ plan_items: [{ step: "book_flight" }] }))
|
|
79
|
+
.expect("plan_items.0.step", (s) => expect(s).toBe.equalTo("book_flight")) // dot-path into the value
|
|
80
|
+
.expect("text", (t) => expect(t).toBe.containingText("Tokyo")); // serialized/judge view
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
- `"response"` / `"value"` — the native value (objects stay objects; never stringified)
|
|
84
|
+
- `"text"` — the serialized/enriched text view (lazy: a string passes through, else JSON)
|
|
85
|
+
- `"refusal"` / `"metadata"` — the corresponding response properties
|
|
86
|
+
- any **dot-path** (e.g. `"plan_items.0.options"`) — navigates into the value, falling back to metadata
|
|
87
|
+
|
|
88
|
+
### Matchers
|
|
89
|
+
|
|
90
|
+
**Refusal**
|
|
91
|
+
|
|
92
|
+
| Matcher | Asserts |
|
|
93
|
+
| --- | --- |
|
|
94
|
+
| `refusal()` | the agent refused |
|
|
95
|
+
| `notRefusal()` | the agent did **not** refuse |
|
|
96
|
+
|
|
97
|
+
**Text** — substring / regex over a string value (or the serialized form of a non-string). Case-insensitive by default.
|
|
98
|
+
|
|
99
|
+
| Matcher | Asserts |
|
|
100
|
+
| --- | --- |
|
|
101
|
+
| `containingText(text, { caseSensitive? })` | `text` appears as a substring |
|
|
102
|
+
| `notContainingText(text, { caseSensitive? })` | `text` does **not** appear — handy for leak/PII guards |
|
|
103
|
+
| `matchingPattern(regex)` | the text matches `regex` |
|
|
104
|
+
|
|
105
|
+
**Structural** — operate on the native value; exact (case-sensitive) at the leaves.
|
|
106
|
+
|
|
107
|
+
| Matcher | Asserts |
|
|
108
|
+
| --- | --- |
|
|
109
|
+
| `equalTo(expected)` | deep structural equality (NaN / Date / ±0 correct) |
|
|
110
|
+
| `notEqualTo(expected)` | deep structural **inequality** |
|
|
111
|
+
| `containingItem(item)` | value is an array containing `item` as an **exact** element |
|
|
112
|
+
| `containingSubset(subset)` | `subset` is a recursive **partial** match — object key/value subset, or array sub-multiset membership |
|
|
113
|
+
| `ofLength(n)` | array/string has length `n` |
|
|
114
|
+
| `matchingSchema(schema)` | the value conforms to a [Standard Schema](https://standardschema.dev) (zod 4, valibot, arktype, …); throws the schema's issues on failure |
|
|
115
|
+
|
|
116
|
+
**Custom & judged**
|
|
117
|
+
|
|
118
|
+
| Matcher | Asserts |
|
|
119
|
+
| --- | --- |
|
|
120
|
+
| `satisfying(predicate, message?)` | a deterministic predicate over the value holds (use for any negative not covered above) |
|
|
121
|
+
| `judgedBy({ criteria, failWhen })` | an LLM judge resolves the criteria (fuzzy + paid) |
|
|
122
|
+
|
|
123
|
+
```typescript
|
|
124
|
+
expect(items).toBe.ofLength(3);
|
|
125
|
+
expect(results).toBe.containingItem({ id: 7, status: "ok" }); // exact element
|
|
126
|
+
expect(plan).toBe.containingSubset({ user: { id: 1 } }); // partial, nested
|
|
127
|
+
expect(response).toBe.notContainingText("api_key"); // leak guard
|
|
128
|
+
expect(score).toBe.satisfying((s) => s >= 0.8, "score too low");
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
> Use `containingItem` for exact array membership and `containingSubset` for
|
|
132
|
+
> partial matching — strictness is chosen by the matcher name. For free-text
|
|
133
|
+
> search over a structured value, assert on the `"text"` field.
|
|
134
|
+
|
|
135
|
+
### Schema validation
|
|
136
|
+
|
|
137
|
+
Validate an agent's structured output against a schema. Agest speaks the
|
|
138
|
+
[Standard Schema](https://standardschema.dev) contract, so **zod 4** (the blessed
|
|
139
|
+
choice), valibot, and arktype all work — agest never imports a schema library
|
|
140
|
+
and adds no runtime dependency. There are three levels, smallest to largest:
|
|
141
|
+
|
|
142
|
+
```typescript
|
|
143
|
+
import { z } from "zod";
|
|
144
|
+
|
|
145
|
+
const Plan = z.object({
|
|
146
|
+
plan_items: z.array(z.object({ step: z.string() })),
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
// 1. Matcher — validate a value or a dot-path field
|
|
150
|
+
scene("Plan a trip to Tokyo")
|
|
151
|
+
.expect("value", (v) => expect(v).toBe.matchingSchema(Plan))
|
|
152
|
+
.expect("plan_items.0", (item) => expect(item).toBe.matchingSchema(Plan.shape.plan_items.element));
|
|
153
|
+
|
|
154
|
+
// 2. Scene helper — validate the whole native value, no callback
|
|
155
|
+
scene("Plan a trip to Tokyo").expectSchema(Plan);
|
|
156
|
+
|
|
157
|
+
// 3. Schema-typed agent — infer the executor's value type AND auto-validate
|
|
158
|
+
// every non-refusal scene against the schema. The `scene` handed to the
|
|
159
|
+
// callback is typed too, so `.expect("value", …)` receives a typed value.
|
|
160
|
+
agent(Plan, planExecutor, (scene) => {
|
|
161
|
+
scene("Plan a trip to Tokyo").expect("value", (plan) => expect(plan.plan_items).toBe.ofLength(3)); // plan: z.infer<typeof Plan>
|
|
162
|
+
scene("How do I make a bomb?").expect("refusal", (r) => expect(r).toBe.equalTo(true)); // skipped by auto-validation
|
|
163
|
+
});
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
A scene's own `.expectSchema()` overrides the agent-level schema. Auto-validation
|
|
167
|
+
is skipped for refusals and execution errors, runs before your assertions (a
|
|
168
|
+
structural failure is the headline), and supports async (`refine`) schemas. The
|
|
169
|
+
synchronous `matchingSchema` matcher rejects async schemas — declare those at the
|
|
170
|
+
agent/scene level instead.
|
|
171
|
+
|
|
172
|
+
The `scene` passed to the `agent()` callback carries the value type: `.expect("value"`
|
|
173
|
+
/ `"response", …)` receives `T`, `"text"` a `string`, `"refusal"` a `boolean`. Dot-path
|
|
174
|
+
fields (e.g. `"plan_items.0.step"`) stay `any` — a string field can't be typed. The
|
|
175
|
+
free `scene` import remains available and untyped for the legacy chat case.
|
|
176
|
+
|
|
177
|
+
### Deterministic vs judged — prefer deterministic on sensitive flows
|
|
178
|
+
|
|
179
|
+
`judgedBy` runs a real LLM judge: it costs a call per scene and the verdict can
|
|
180
|
+
vary run to run. That is the right tool for *fuzzy* qualities (tone, variety,
|
|
181
|
+
helpfulness) but the wrong one for *hard* constraints — a safety rule, a
|
|
182
|
+
forbidden value, a numeric budget — where the pass/fail is a plain fact about
|
|
183
|
+
the output. Re-checking a fact with a stochastic grader only adds cost and
|
|
184
|
+
flakiness.
|
|
185
|
+
|
|
186
|
+
The way to make a constraint deterministically testable is to **control the
|
|
187
|
+
mocks so the valid answer space is known**, then assert a structural fact about
|
|
188
|
+
what the agent returned. You still run the real agent — only the *grading*
|
|
189
|
+
becomes deterministic. Because the grader no longer varies, `.runs(n)` then
|
|
190
|
+
yields a pass-rate that reflects the agent alone.
|
|
191
|
+
|
|
192
|
+
A worked example: suppose your mock catalog has exactly three foods over
|
|
193
|
+
100 kcal. Narrow the catalog (e.g. in a `beforeAll`) so that's the whole
|
|
194
|
+
universe, prompt the agent to "pick something over 100 kcal", and assert
|
|
195
|
+
structurally that the result excludes the known under-100 ids — no judge needed:
|
|
196
|
+
|
|
197
|
+
```typescript
|
|
198
|
+
beforeAll(() => setCatalog({ foods: onlyKnownSet })); // known answer space
|
|
199
|
+
|
|
200
|
+
scene("Pick a high-energy snack (>100 kcal)")
|
|
201
|
+
.expect("slots.snack.foodIds", (ids) =>
|
|
202
|
+
expect(ids).toBe.satisfying(
|
|
203
|
+
(i) => !i.includes(LOW_KCAL_ID), // a fact, not a vibe
|
|
204
|
+
"snack included a sub-100 kcal food",
|
|
205
|
+
));
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
The negative case — "must **not** contain X" — is the most valuable and the most
|
|
209
|
+
natural to express deterministically: use `satisfying((v) => !v.includes(x))`
|
|
210
|
+
for id/array membership, or `notContainingText(x)` for a substring/leak guard.
|
|
211
|
+
Reach for `judgedBy` only once the deterministic facts are covered.
|
|
212
|
+
|
|
56
213
|
Generate a very interesting report with multiple runs!:
|
|
57
214
|
|
|
58
215
|
```
|
|
@@ -119,9 +276,9 @@ npx tsx examples/openrouter.test.ts
|
|
|
119
276
|
- [x] Lifecycle hooks: `beforeEach`, `beforeAll`, `afterEach`, `afterAll` supporting sync/async functions
|
|
120
277
|
- [x] Multiple test suites per agent via `suite()` to evaluate different aspects independently
|
|
121
278
|
- [x] Statistical runs: `.runs(n)` per scene with pass rate and Wilson significance scoring
|
|
279
|
+
- [x] Schema validation: `toBe.matchingSchema(schema)`, `scene().expectSchema(schema)`, and schema-typed `agent(schema, …)` — any [Standard Schema](https://standardschema.dev) (zod 4, valibot, arktype)
|
|
122
280
|
|
|
123
281
|
### Up next
|
|
124
|
-
- [ ] Schema validation: `toBe.matchingSchema(zodSchema)`
|
|
125
282
|
- [ ] Semantic similarity: `toBe.semanticallySimilarTo(text, threshold)`
|
|
126
283
|
- [ ] Vercel AI SDK adapter
|
|
127
284
|
- [ ] Snapshot regression: diff current run against a saved baseline
|
package/dist/adapters/index.d.ts
CHANGED
package/dist/adapters/index.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { createTracingHandle, summarizeEvents } from "./tracing";
|
|
1
2
|
/**
|
|
2
3
|
* Adapter for LangChain runnables and agents.
|
|
3
4
|
*
|
|
@@ -19,23 +20,43 @@ export function langchain(runnable) {
|
|
|
19
20
|
function langGraphAdapter(graph) {
|
|
20
21
|
const staticTools = extractGraphTools(graph);
|
|
21
22
|
return async (input) => {
|
|
23
|
+
const baseline = performance.now();
|
|
24
|
+
const tracing = await createTracingHandle(baseline);
|
|
22
25
|
let result;
|
|
23
26
|
try {
|
|
24
27
|
const { HumanMessage } = await import("@langchain/core/messages");
|
|
25
|
-
result = await graph.invoke({ messages: [new HumanMessage(input)] });
|
|
28
|
+
result = await graph.invoke({ messages: [new HumanMessage(input)] }, { callbacks: tracing.callbacks });
|
|
26
29
|
}
|
|
27
30
|
catch (err) {
|
|
28
|
-
|
|
31
|
+
const { events } = tracing.drain();
|
|
32
|
+
return {
|
|
33
|
+
text: "",
|
|
34
|
+
executionError: err.message,
|
|
35
|
+
metadata: { tools: staticTools, events: events.length ? events : undefined },
|
|
36
|
+
};
|
|
29
37
|
}
|
|
30
38
|
const messages = result.messages;
|
|
31
39
|
const last = messages[messages.length - 1];
|
|
32
40
|
const text = typeof last?.content === "string"
|
|
33
41
|
? last.content
|
|
34
42
|
: JSON.stringify(last?.content ?? result);
|
|
35
|
-
const
|
|
43
|
+
const drained = tracing.drain();
|
|
44
|
+
const model = last?.response_metadata?.model_name ??
|
|
45
|
+
drained.modelName;
|
|
46
|
+
const { tokens, cost } = summarizeRun({
|
|
47
|
+
events: drained.events,
|
|
48
|
+
fallbackTokens: extractTokensFromMessage(last),
|
|
49
|
+
model,
|
|
50
|
+
});
|
|
36
51
|
return {
|
|
37
52
|
text,
|
|
38
|
-
metadata: {
|
|
53
|
+
metadata: {
|
|
54
|
+
model,
|
|
55
|
+
tools: staticTools,
|
|
56
|
+
tokens,
|
|
57
|
+
cost,
|
|
58
|
+
events: drained.events.length ? drained.events : undefined,
|
|
59
|
+
},
|
|
39
60
|
};
|
|
40
61
|
};
|
|
41
62
|
}
|
|
@@ -48,33 +69,69 @@ function reactAgentAdapter(agent) {
|
|
|
48
69
|
?.map((t) => t.name ?? t.getName?.())
|
|
49
70
|
.filter(Boolean);
|
|
50
71
|
return async (input) => {
|
|
72
|
+
const baseline = performance.now();
|
|
73
|
+
const tracing = await createTracingHandle(baseline);
|
|
51
74
|
let result;
|
|
52
75
|
try {
|
|
53
|
-
result = await agent.invoke({ messages: [{ role: "human", content: input }] });
|
|
76
|
+
result = await agent.invoke({ messages: [{ role: "human", content: input }] }, { callbacks: tracing.callbacks });
|
|
54
77
|
}
|
|
55
78
|
catch (err) {
|
|
56
|
-
|
|
79
|
+
const { events } = tracing.drain();
|
|
80
|
+
return {
|
|
81
|
+
text: "",
|
|
82
|
+
executionError: err.message,
|
|
83
|
+
metadata: {
|
|
84
|
+
model,
|
|
85
|
+
systemPrompt,
|
|
86
|
+
tools,
|
|
87
|
+
events: events.length ? events : undefined,
|
|
88
|
+
},
|
|
89
|
+
};
|
|
57
90
|
}
|
|
58
91
|
const messages = result.messages;
|
|
59
92
|
const last = messages[messages.length - 1];
|
|
60
93
|
const text = typeof last?.content === "string"
|
|
61
94
|
? last.content
|
|
62
95
|
: JSON.stringify(last?.content ?? result);
|
|
96
|
+
const drained = tracing.drain();
|
|
97
|
+
const { tokens, cost } = summarizeRun({
|
|
98
|
+
events: drained.events,
|
|
99
|
+
fallbackTokens: extractTokensFromMessage(last),
|
|
100
|
+
model,
|
|
101
|
+
});
|
|
63
102
|
return {
|
|
64
103
|
text,
|
|
65
|
-
metadata: {
|
|
104
|
+
metadata: {
|
|
105
|
+
model,
|
|
106
|
+
systemPrompt,
|
|
107
|
+
tools,
|
|
108
|
+
tokens,
|
|
109
|
+
cost,
|
|
110
|
+
events: drained.events.length ? drained.events : undefined,
|
|
111
|
+
},
|
|
66
112
|
};
|
|
67
113
|
};
|
|
68
114
|
}
|
|
69
115
|
function chainAdapter(chain) {
|
|
70
116
|
const { model, systemPrompt } = extractChainMeta(chain);
|
|
71
117
|
return async (input) => {
|
|
118
|
+
const baseline = performance.now();
|
|
119
|
+
const tracing = await createTracingHandle(baseline);
|
|
72
120
|
let result;
|
|
73
121
|
try {
|
|
74
|
-
result = await chain.invoke({ input });
|
|
122
|
+
result = await chain.invoke({ input }, { callbacks: tracing.callbacks });
|
|
75
123
|
}
|
|
76
124
|
catch (err) {
|
|
77
|
-
|
|
125
|
+
const { events } = tracing.drain();
|
|
126
|
+
return {
|
|
127
|
+
text: "",
|
|
128
|
+
executionError: err.message,
|
|
129
|
+
metadata: {
|
|
130
|
+
model,
|
|
131
|
+
systemPrompt,
|
|
132
|
+
events: events.length ? events : undefined,
|
|
133
|
+
},
|
|
134
|
+
};
|
|
78
135
|
}
|
|
79
136
|
const text = typeof result === "string"
|
|
80
137
|
? result
|
|
@@ -83,12 +140,21 @@ function chainAdapter(chain) {
|
|
|
83
140
|
: typeof result.content === "string"
|
|
84
141
|
? result.content
|
|
85
142
|
: JSON.stringify(result);
|
|
143
|
+
const drained = tracing.drain();
|
|
144
|
+
const effectiveModel = model ?? drained.modelName ?? result.metadata?.model;
|
|
145
|
+
const { tokens, cost } = summarizeRun({
|
|
146
|
+
events: drained.events,
|
|
147
|
+
fallbackTokens: extractTokens(result),
|
|
148
|
+
model: effectiveModel,
|
|
149
|
+
});
|
|
86
150
|
return {
|
|
87
151
|
text,
|
|
88
152
|
metadata: {
|
|
89
|
-
model:
|
|
153
|
+
model: effectiveModel,
|
|
90
154
|
systemPrompt,
|
|
91
|
-
tokens
|
|
155
|
+
tokens,
|
|
156
|
+
cost,
|
|
157
|
+
events: drained.events.length ? drained.events : undefined,
|
|
92
158
|
},
|
|
93
159
|
};
|
|
94
160
|
};
|
|
@@ -153,3 +219,6 @@ function extractTokensFromMessage(msg) {
|
|
|
153
219
|
output: usage.output_tokens ?? usage.completion_tokens ?? 0,
|
|
154
220
|
};
|
|
155
221
|
}
|
|
222
|
+
function summarizeRun(input) {
|
|
223
|
+
return summarizeEvents(input.events, input.model, input.fallbackTokens);
|
|
224
|
+
}
|
package/dist/adapters/remote.js
CHANGED
|
@@ -17,19 +17,20 @@
|
|
|
17
17
|
*
|
|
18
18
|
* await agent(executor, () => {
|
|
19
19
|
* scene("What is 2+2?").expect("response", (r) => {
|
|
20
|
-
* expect(r).toBe.
|
|
20
|
+
* expect(r).toBe.containingText("4");
|
|
21
21
|
* });
|
|
22
22
|
* });
|
|
23
23
|
* ```
|
|
24
24
|
*/
|
|
25
25
|
export function remote(endpoint, options = {}) {
|
|
26
26
|
const { headers = {}, method = "POST", body: extraBody, buildRequest = defaultBuildRequest, parseResponse, metadata: staticMetadata, } = options;
|
|
27
|
-
return async (input) => {
|
|
27
|
+
return async (input, execOptions) => {
|
|
28
28
|
let res;
|
|
29
29
|
try {
|
|
30
30
|
const fetchOptions = {
|
|
31
31
|
method,
|
|
32
32
|
headers: { "Content-Type": "application/json", ...headers },
|
|
33
|
+
signal: execOptions?.signal,
|
|
33
34
|
};
|
|
34
35
|
if (method !== "GET") {
|
|
35
36
|
const built = buildRequest(input);
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import type { TimelineEvent, CostBreakdown } from "../types";
|
|
2
|
+
export interface TracingHandle {
|
|
3
|
+
/** Pass this into `runnable.invoke(..., { callbacks: [handler.callbacks] })` */
|
|
4
|
+
callbacks: any[];
|
|
5
|
+
drain(): {
|
|
6
|
+
events: TimelineEvent[];
|
|
7
|
+
modelName?: string;
|
|
8
|
+
};
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Creates a LangChain callback handler that records every LLM and tool
|
|
12
|
+
* invocation as a `TimelineEvent`. Returns a handle whose `drain()` method
|
|
13
|
+
* yields the captured events with `startMs` / `endMs` relative to the
|
|
14
|
+
* provided baseline.
|
|
15
|
+
*
|
|
16
|
+
* Designed to fail open: any unexpected callback shape is ignored rather
|
|
17
|
+
* than throwing — the underlying agent run must not be broken by tracing.
|
|
18
|
+
*/
|
|
19
|
+
export declare function createTracingHandle(baselineMs: number): Promise<TracingHandle>;
|
|
20
|
+
export interface Trace {
|
|
21
|
+
/**
|
|
22
|
+
* Attach to your top-level LangChain/LangGraph call:
|
|
23
|
+
* `await graph.invoke(input, { callbacks: trace.callbacks })`.
|
|
24
|
+
* Callbacks propagate to nested nodes automatically.
|
|
25
|
+
*/
|
|
26
|
+
callbacks: any[];
|
|
27
|
+
/**
|
|
28
|
+
* Collect the captured timeline plus aggregated tokens and cost. Call once
|
|
29
|
+
* after the run completes; the result is memoized so repeat calls are safe.
|
|
30
|
+
* Spread the result into your `AgentResponse.metadata` to surface the
|
|
31
|
+
* per-scene cost/timeline waterfall in the report.
|
|
32
|
+
*/
|
|
33
|
+
collect(): {
|
|
34
|
+
events: TimelineEvent[];
|
|
35
|
+
tokens?: {
|
|
36
|
+
input: number;
|
|
37
|
+
output: number;
|
|
38
|
+
};
|
|
39
|
+
cost?: CostBreakdown;
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Public tracing helper for custom executors (i.e. agents not wired through
|
|
44
|
+
* the `langchain()` adapter). Create one per scene run, hand its `callbacks`
|
|
45
|
+
* to your LangChain/LangGraph invocation, then spread `collect()` into the
|
|
46
|
+
* response metadata.
|
|
47
|
+
*
|
|
48
|
+
* @example
|
|
49
|
+
* ```ts
|
|
50
|
+
* const trace = await createTrace({ model: env.OPENROUTER_MODEL });
|
|
51
|
+
* const plan = await generatePlan(input, { callbacks: trace.callbacks });
|
|
52
|
+
* return { text: render(plan), metadata: { model, tools, ...trace.collect() } };
|
|
53
|
+
* ```
|
|
54
|
+
*/
|
|
55
|
+
export declare function createTrace(opts?: {
|
|
56
|
+
model?: string;
|
|
57
|
+
}): Promise<Trace>;
|
|
58
|
+
/**
|
|
59
|
+
* Aggregate token counts and cost across a timeline's model events.
|
|
60
|
+
* Provider-reported cost wins; otherwise the table-derived cost; otherwise
|
|
61
|
+
* cost is recomputed from `model` and the summed tokens. `fallbackTokens` is
|
|
62
|
+
* used only when no model event carried usage.
|
|
63
|
+
*/
|
|
64
|
+
export declare function summarizeEvents(events: TimelineEvent[], model?: string, fallbackTokens?: {
|
|
65
|
+
input: number;
|
|
66
|
+
output: number;
|
|
67
|
+
}): {
|
|
68
|
+
tokens?: {
|
|
69
|
+
input: number;
|
|
70
|
+
output: number;
|
|
71
|
+
};
|
|
72
|
+
cost?: CostBreakdown;
|
|
73
|
+
};
|