@mishri/ai-term 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +77 -0
- package/dist/agent/context/compaction.d.ts +3 -0
- package/dist/agent/context/compaction.d.ts.map +1 -0
- package/dist/agent/context/compaction.js +39 -0
- package/dist/agent/context/compaction.js.map +1 -0
- package/dist/agent/context/index.d.ts +7 -0
- package/dist/agent/context/index.d.ts.map +1 -0
- package/dist/agent/context/index.js +24 -0
- package/dist/agent/context/index.js.map +1 -0
- package/dist/agent/context/modelLimits.d.ts +7 -0
- package/dist/agent/context/modelLimits.d.ts.map +1 -0
- package/dist/agent/context/modelLimits.js +31 -0
- package/dist/agent/context/modelLimits.js.map +1 -0
- package/dist/agent/context/tokenEstimator.d.ts +3 -0
- package/dist/agent/context/tokenEstimator.d.ts.map +1 -0
- package/dist/agent/context/tokenEstimator.js +28 -0
- package/dist/agent/context/tokenEstimator.js.map +1 -0
- package/dist/agent/evals/agent-multiturn.eval.d.ts +2 -0
- package/dist/agent/evals/agent-multiturn.eval.d.ts.map +1 -0
- package/dist/agent/evals/agent-multiturn.eval.js +32 -0
- package/dist/agent/evals/agent-multiturn.eval.js.map +1 -0
- package/dist/agent/evals/data/agent-multiturn.json +111 -0
- package/dist/agent/evals/data/tool-calls.json +28 -0
- package/dist/agent/evals/evaluators.d.ts +6 -0
- package/dist/agent/evals/evaluators.d.ts.map +1 -0
- package/dist/agent/evals/evaluators.js +59 -0
- package/dist/agent/evals/evaluators.js.map +1 -0
- package/dist/agent/evals/executors.d.ts +31 -0
- package/dist/agent/evals/executors.d.ts.map +1 -0
- package/dist/agent/evals/executors.js +107 -0
- package/dist/agent/evals/executors.js.map +1 -0
- package/dist/agent/evals/file-tools.eval.d.ts +2 -0
- package/dist/agent/evals/file-tools.eval.d.ts.map +1 -0
- package/dist/agent/evals/file-tools.eval.js +27 -0
- package/dist/agent/evals/file-tools.eval.js.map +1 -0
- package/dist/agent/evals/mocks/tools.d.ts +32 -0
- package/dist/agent/evals/mocks/tools.d.ts.map +1 -0
- package/dist/agent/evals/mocks/tools.js +58 -0
- package/dist/agent/evals/mocks/tools.js.map +1 -0
- package/dist/agent/evals/types.d.ts +55 -0
- package/dist/agent/evals/types.d.ts.map +1 -0
- package/dist/agent/evals/types.js +2 -0
- package/dist/agent/evals/types.js.map +1 -0
- package/dist/agent/evals/utils.d.ts +4 -0
- package/dist/agent/evals/utils.d.ts.map +1 -0
- package/dist/agent/evals/utils.js +20 -0
- package/dist/agent/evals/utils.js.map +1 -0
- package/dist/agent/executeTool.d.ts +42 -0
- package/dist/agent/executeTool.d.ts.map +1 -0
- package/dist/agent/executeTool.js +14 -0
- package/dist/agent/executeTool.js.map +1 -0
- package/dist/agent/run.d.ts +4 -0
- package/dist/agent/run.d.ts.map +1 -0
- package/dist/agent/run.js +134 -0
- package/dist/agent/run.js.map +1 -0
- package/dist/agent/system/filterMessages.d.ts +3 -0
- package/dist/agent/system/filterMessages.d.ts.map +1 -0
- package/dist/agent/system/filterMessages.js +25 -0
- package/dist/agent/system/filterMessages.js.map +1 -0
- package/dist/agent/system/system_prompt.d.ts +2 -0
- package/dist/agent/system/system_prompt.d.ts.map +1 -0
- package/dist/agent/system/system_prompt.js +8 -0
- package/dist/agent/system/system_prompt.js.map +1 -0
- package/dist/agent/system_prompt.d.ts +2 -0
- package/dist/agent/system_prompt.d.ts.map +1 -0
- package/dist/agent/system_prompt.js +8 -0
- package/dist/agent/system_prompt.js.map +1 -0
- package/dist/agent/tools/execCode.d.ts +5 -0
- package/dist/agent/tools/execCode.d.ts.map +1 -0
- package/dist/agent/tools/execCode.js +58 -0
- package/dist/agent/tools/execCode.js.map +1 -0
- package/dist/agent/tools/file.d.ts +29 -0
- package/dist/agent/tools/file.d.ts.map +1 -0
- package/dist/agent/tools/file.js +92 -0
- package/dist/agent/tools/file.js.map +1 -0
- package/dist/agent/tools/index.d.ts +44 -0
- package/dist/agent/tools/index.d.ts.map +1 -0
- package/dist/agent/tools/index.js +14 -0
- package/dist/agent/tools/index.js.map +1 -0
- package/dist/agent/tools/shell.d.ts +4 -0
- package/dist/agent/tools/shell.d.ts.map +1 -0
- package/dist/agent/tools/shell.js +30 -0
- package/dist/agent/tools/shell.js.map +1 -0
- package/dist/agent/tools/websearch.d.ts +22 -0
- package/dist/agent/tools/websearch.d.ts.map +1 -0
- package/dist/agent/tools/websearch.js +3 -0
- package/dist/agent/tools/websearch.js.map +1 -0
- package/dist/agent/type.d.ts +27 -0
- package/dist/agent/type.d.ts.map +1 -0
- package/dist/agent/type.js +2 -0
- package/dist/agent/type.js.map +1 -0
- package/dist/config.d.ts +4 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +6 -0
- package/dist/config.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +12 -0
- package/dist/index.js.map +1 -0
- package/dist/ui/App.d.ts +2 -0
- package/dist/ui/App.d.ts.map +1 -0
- package/dist/ui/App.js +89 -0
- package/dist/ui/App.js.map +1 -0
- package/dist/ui/components/Input.d.ts +7 -0
- package/dist/ui/components/Input.d.ts.map +1 -0
- package/dist/ui/components/Input.js +26 -0
- package/dist/ui/components/Input.js.map +1 -0
- package/dist/ui/components/MessageList.d.ts +10 -0
- package/dist/ui/components/MessageList.d.ts.map +1 -0
- package/dist/ui/components/MessageList.js +7 -0
- package/dist/ui/components/MessageList.js.map +1 -0
- package/dist/ui/components/TokenUsage.d.ts +8 -0
- package/dist/ui/components/TokenUsage.d.ts.map +1 -0
- package/dist/ui/components/TokenUsage.js +20 -0
- package/dist/ui/components/TokenUsage.js.map +1 -0
- package/dist/ui/components/ToolApproval.d.ts +6 -0
- package/dist/ui/components/ToolApproval.d.ts.map +1 -0
- package/dist/ui/components/ToolApproval.js +19 -0
- package/dist/ui/components/ToolApproval.js.map +1 -0
- package/dist/ui/components/ToolCall.d.ts +8 -0
- package/dist/ui/components/ToolCall.d.ts.map +1 -0
- package/dist/ui/components/ToolCall.js +13 -0
- package/dist/ui/components/ToolCall.js.map +1 -0
- package/package.json +58 -0
package/README.md
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# ai-term
|
|
2
|
+
|
|
3
|
+
A terminal AI agent with tool use — read/write files, run shell commands, execute code, and search the web. Built with [Ink](https://github.com/vadimdemedes/ink) and the [Vercel AI SDK](https://sdk.vercel.ai/).
|
|
4
|
+
|
|
5
|
+
## Requirements
|
|
6
|
+
|
|
7
|
+
- Node.js 20+
|
|
8
|
+
- An [OpenAI API key](https://platform.openai.com/api-keys)
|
|
9
|
+
|
|
10
|
+
## Quick start (from source)
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
git clone https://github.com/Mishragini/ai-term.git
|
|
14
|
+
cd ai-term
|
|
15
|
+
npm install
|
|
16
|
+
cp .env.example .env
|
|
17
|
+
# Add your OPENAI_API_KEY to .env
|
|
18
|
+
npm start
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Type your prompt at the `>` prompt. Type `exit` or `quit` to leave.
|
|
22
|
+
|
|
23
|
+
## Install globally (after publishing to npm)
|
|
24
|
+
|
|
25
|
+
Once published:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
npm install -g ai-term
|
|
29
|
+
export OPENAI_API_KEY=your-key-here
|
|
30
|
+
ai-term
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Or run without installing:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
npx ai-term
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Set `OPENAI_API_KEY` in your environment or in a `.env` file in the directory where you run the command.
|
|
40
|
+
|
|
41
|
+
## Environment variables
|
|
42
|
+
|
|
43
|
+
| Variable | Required | Description |
|
|
44
|
+
|----------|----------|-------------|
|
|
45
|
+
| `OPENAI_API_KEY` | Yes | OpenAI API key for the agent |
|
|
46
|
+
| `LMNR_PROJECT_API_KEY` | No | [Laminar](https://lmnr.ai/) project key for tracing |
|
|
47
|
+
|
|
48
|
+
## Tools
|
|
49
|
+
|
|
50
|
+
The agent can use these tools (shell and code execution require your approval before running):
|
|
51
|
+
|
|
52
|
+
- **readFile / writeFile / listFiles / deleteFile** — file operations in the working directory
|
|
53
|
+
- **shellCommand** — run shell commands (approval required)
|
|
54
|
+
- **execCode** — execute JavaScript/TypeScript snippets (approval required)
|
|
55
|
+
- **webSearch** — search the web via OpenAI
|
|
56
|
+
|
|
57
|
+
## Development
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
npm run build # compile TypeScript to dist/
|
|
61
|
+
npm start # build and run
|
|
62
|
+
npm run eval-file # run file-tool evals
|
|
63
|
+
npm run eval-multiturn # run multi-turn agent evals
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Publishing to npm
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
npm login
|
|
70
|
+
npm publish
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
The package ships the compiled `dist/` output and exposes the `ai-term` CLI command.
|
|
74
|
+
|
|
75
|
+
## License
|
|
76
|
+
|
|
77
|
+
ISC
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"compaction.d.ts","sourceRoot":"","sources":["../../../src/agent/context/compaction.ts"],"names":[],"mappings":"AAAA,OAAO,EAAgB,KAAK,YAAY,EAAE,MAAM,IAAI,CAAC;AAqBrD,wBAAsB,mBAAmB,CACrC,QAAQ,EAAE,YAAY,EAAE,EACxB,KAAK,GAAE,MAAqB,2BA2B/B"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { generateText } from "ai";
|
|
2
|
+
import { extractMessageText } from "./tokenEstimator.js";
|
|
3
|
+
import { openai } from "@ai-sdk/openai";
|
|
4
|
+
const SUMMARIZATION_PROMPT = `You are a conversation summarizer. Your task is to create a concise summary of the conversation so far that preserves:
|
|
5
|
+
|
|
6
|
+
1. Key decisions and conclusions reached
|
|
7
|
+
2. Important context and facts mentioned
|
|
8
|
+
3. Any pending tasks or questions
|
|
9
|
+
4. The overall goal of the conversation
|
|
10
|
+
|
|
11
|
+
Be concise but complete. The summary should allow the conversation to continue naturally.
|
|
12
|
+
|
|
13
|
+
Conversation to summarize:
|
|
14
|
+
`;
|
|
15
|
+
function messagesToText(messages) {
|
|
16
|
+
return messages.map((msg) => `[${msg.role}]: ${extractMessageText(msg)}`).join("\n\n");
|
|
17
|
+
}
|
|
18
|
+
export async function compactConversation(messages, model = "gpt-5-mini") {
|
|
19
|
+
const conversationMessages = messages.filter((msg) => msg.role !== "system");
|
|
20
|
+
if (conversationMessages.length === 0)
|
|
21
|
+
return [];
|
|
22
|
+
const conversationText = messagesToText(conversationMessages);
|
|
23
|
+
const { text: summary } = await generateText({
|
|
24
|
+
model: openai(model),
|
|
25
|
+
prompt: `${SUMMARIZATION_PROMPT} ${conversationText}`
|
|
26
|
+
});
|
|
27
|
+
const compactMessages = [
|
|
28
|
+
{
|
|
29
|
+
role: "user",
|
|
30
|
+
content: `[CONVERSATION SUMMARY]\nThe following is a summary of our conversation so far:\n\n${summary}\n\nPlease continue from where we left off.`
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
role: "assistant",
|
|
34
|
+
content: "I understand. I've reviewed the summary of our conversation and I'm ready to continue. How can I help you next?",
|
|
35
|
+
}
|
|
36
|
+
];
|
|
37
|
+
return compactMessages;
|
|
38
|
+
}
|
|
39
|
+
//# sourceMappingURL=compaction.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"compaction.js","sourceRoot":"","sources":["../../../src/agent/context/compaction.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAqB,MAAM,IAAI,CAAC;AACrD,OAAO,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AACzD,OAAO,EAAE,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAExC,MAAM,oBAAoB,GAAG;;;;;;;;;;CAU5B,CAAC;AAGF,SAAS,cAAc,CAAC,QAAwB;IAC5C,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,GAAG,CAAC,IAAI,MAAM,kBAAkB,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;AAC1F,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACrC,QAAwB,EACxB,QAAgB,YAAY;IAG5B,MAAM,oBAAoB,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAA;IAE5E,IAAI,oBAAoB,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAA;IAEhD,MAAM,gBAAgB,GAAG,cAAc,CAAC,oBAAoB,CAAC,CAAA;IAE7D,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,GAAG,MAAM,YAAY,CAAC;QACzC,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC;QACpB,MAAM,EAAE,GAAG,oBAAoB,IAAI,gBAAgB,EAAE;KACxD,CAAC,CAAA;IAGF,MAAM,eAAe,GAAmB;QACpC;YACI,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE,qFAAqF,OAAO,6CAA6C;SACrJ;QACD;YACI,IAAI,EAAE,WAAW;YACjB,OAAO,EAAE,iHAAiH;SAC7H;KACJ,CAAA;IAED,OAAO,eAAe,CAAA;AAC1B,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agent/context/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AASvC,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,YAAY,EAAE;;;;EAoB9D"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { extractMessageText } from "./tokenEstimator.js";
|
|
2
|
+
//usually use and llm to estimate token but sine i dont have money use vague ass assumption that 3.75chars = 1 token
|
|
3
|
+
function estimateTokens(text) {
|
|
4
|
+
return Math.ceil(text.length / 3.75);
|
|
5
|
+
}
|
|
6
|
+
export function estimateMessagesTokens(messages) {
|
|
7
|
+
let input = 0, output = 0;
|
|
8
|
+
for (const msg of messages) {
|
|
9
|
+
const text = extractMessageText(msg);
|
|
10
|
+
const tokens = estimateTokens(text);
|
|
11
|
+
if (msg.role === "assistant") {
|
|
12
|
+
output += tokens;
|
|
13
|
+
}
|
|
14
|
+
else {
|
|
15
|
+
input += tokens;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return {
|
|
19
|
+
input,
|
|
20
|
+
output,
|
|
21
|
+
total: input + output
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/agent/context/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAC;AAGzD,oHAAoH;AACpH,SAAS,cAAc,CAAC,IAAY;IAChC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,CAAA;AACxC,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,QAAwB;IAE3D,IAAI,KAAK,GAAG,CAAC,EAAE,MAAM,GAAG,CAAC,CAAA;IACzB,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,kBAAkB,CAAC,GAAG,CAAC,CAAA;QACpC,MAAM,MAAM,GAAG,cAAc,CAAC,IAAI,CAAC,CAAA;QAEnC,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;YAC3B,MAAM,IAAI,MAAM,CAAA;QACpB,CAAC;aAAM,CAAC;YACJ,KAAK,IAAI,MAAM,CAAA;QACnB,CAAC;IACL,CAAC;IAED,OAAO;QACH,KAAK;QACL,MAAM;QACN,KAAK,EAAE,KAAK,GAAG,MAAM;KACxB,CAAA;AAEL,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { ModelLimits } from "../type.js";
|
|
2
|
+
export declare const DEFAULT_THRESHOLD = 0.8;
|
|
3
|
+
export declare const MODEL_LIMITS: Record<string, ModelLimits>;
|
|
4
|
+
export declare function getModelLimits(model: string): ModelLimits;
|
|
5
|
+
export declare function isOverThreshold(totalTokens: number, contextWindow: number, threshold?: number): boolean;
|
|
6
|
+
export declare function calculateUsagePercentage(totalTokens: number, contextWindow: number): number;
|
|
7
|
+
//# sourceMappingURL=modelLimits.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"modelLimits.d.ts","sourceRoot":"","sources":["../../../src/agent/context/modelLimits.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,YAAY,CAAA;AAE7C,eAAO,MAAM,iBAAiB,MAAM,CAAA;AAEpC,eAAO,MAAM,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,WAAW,CAYpD,CAAA;AASD,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,eAO3C;AAED,wBAAgB,eAAe,CAC3B,WAAW,EAAE,MAAM,EACnB,aAAa,EAAE,MAAM,EACrB,SAAS,GAAE,MAA0B,WAGxC;AAGD,wBAAgB,wBAAwB,CACpC,WAAW,EAAE,MAAM,EACnB,aAAa,EAAE,MAAM,UAGxB"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
export const DEFAULT_THRESHOLD = 0.8;
|
|
2
|
+
export const MODEL_LIMITS = {
|
|
3
|
+
"gpt-5": {
|
|
4
|
+
inputLimit: 272000,
|
|
5
|
+
outputLimit: 128000,
|
|
6
|
+
contextWindow: 400000
|
|
7
|
+
},
|
|
8
|
+
"gpt-5-mini": {
|
|
9
|
+
inputLimit: 272000,
|
|
10
|
+
outputLimit: 128000,
|
|
11
|
+
contextWindow: 400000,
|
|
12
|
+
},
|
|
13
|
+
};
|
|
14
|
+
const DEFAULT_LIMITS = {
|
|
15
|
+
inputLimit: 128000,
|
|
16
|
+
outputLimit: 16000,
|
|
17
|
+
contextWindow: 128000,
|
|
18
|
+
};
|
|
19
|
+
export function getModelLimits(model) {
|
|
20
|
+
if (MODEL_LIMITS[model]) {
|
|
21
|
+
return MODEL_LIMITS[model];
|
|
22
|
+
}
|
|
23
|
+
return DEFAULT_LIMITS;
|
|
24
|
+
}
|
|
25
|
+
export function isOverThreshold(totalTokens, contextWindow, threshold = DEFAULT_THRESHOLD) {
|
|
26
|
+
return totalTokens > contextWindow * threshold;
|
|
27
|
+
}
|
|
28
|
+
export function calculateUsagePercentage(totalTokens, contextWindow) {
|
|
29
|
+
return (totalTokens / contextWindow) * 100;
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=modelLimits.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"modelLimits.js","sourceRoot":"","sources":["../../../src/agent/context/modelLimits.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,iBAAiB,GAAG,GAAG,CAAA;AAEpC,MAAM,CAAC,MAAM,YAAY,GAAgC;IACrD,OAAO,EAAE;QACL,UAAU,EAAE,MAAM;QAClB,WAAW,EAAE,MAAM;QACnB,aAAa,EAAE,MAAM;KACxB;IAED,YAAY,EAAE;QACV,UAAU,EAAE,MAAM;QAClB,WAAW,EAAE,MAAM;QACnB,aAAa,EAAE,MAAM;KACxB;CACJ,CAAA;AAED,MAAM,cAAc,GAAgB;IAChC,UAAU,EAAE,MAAM;IAClB,WAAW,EAAE,KAAK;IAClB,aAAa,EAAE,MAAM;CACxB,CAAC;AAGF,MAAM,UAAU,cAAc,CAAC,KAAa;IACxC,IAAI,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,YAAY,CAAC,KAAK,CAAC,CAAA;IAC9B,CAAC;IAED,OAAO,cAAc,CAAA;AAEzB,CAAC;AAED,MAAM,UAAU,eAAe,CAC3B,WAAmB,EACnB,aAAqB,EACrB,YAAoB,iBAAiB;IAErC,OAAO,WAAW,GAAG,aAAa,GAAG,SAAS,CAAA;AAClD,CAAC;AAGD,MAAM,UAAU,wBAAwB,CACpC,WAAmB,EACnB,aAAqB;IAErB,OAAO,CAAC,WAAW,GAAG,aAAa,CAAC,GAAG,GAAG,CAAA;AAC9C,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenEstimator.d.ts","sourceRoot":"","sources":["../../../src/agent/context/tokenEstimator.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAEvC,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,YAAY,UA2BnD"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export function extractMessageText(msg) {
|
|
2
|
+
if (typeof msg.content === "string") {
|
|
3
|
+
return msg.content;
|
|
4
|
+
}
|
|
5
|
+
if (Array.isArray(msg.content)) {
|
|
6
|
+
return msg.content.map((part) => {
|
|
7
|
+
if (typeof part === "string") {
|
|
8
|
+
return part;
|
|
9
|
+
}
|
|
10
|
+
//assistant/user text content
|
|
11
|
+
if ("text" in part)
|
|
12
|
+
return part.text;
|
|
13
|
+
//tool-call-content
|
|
14
|
+
if ("value" in part && typeof part.value === "string")
|
|
15
|
+
return part.value;
|
|
16
|
+
//tool-result-content
|
|
17
|
+
if ("output" in part && typeof part.output === "object" && part.output) {
|
|
18
|
+
const output = part.output;
|
|
19
|
+
if ("value" in output && typeof output.value === "string") {
|
|
20
|
+
return output.value;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return JSON.stringify(part);
|
|
24
|
+
}).join(" ");
|
|
25
|
+
}
|
|
26
|
+
return JSON.stringify(msg.content);
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=tokenEstimator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenEstimator.js","sourceRoot":"","sources":["../../../src/agent/context/tokenEstimator.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,kBAAkB,CAAC,GAAiB;IAChD,IAAI,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;QAClC,OAAO,GAAG,CAAC,OAAO,CAAA;IACtB,CAAC;IAED,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;QAC7B,OAAO,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;YAC5B,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC3B,OAAO,IAAI,CAAA;YACf,CAAC;YACD,6BAA6B;YAC7B,IAAI,MAAM,IAAI,IAAI;gBAAE,OAAO,IAAI,CAAC,IAAI,CAAA;YACpC,mBAAmB;YACnB,IAAI,OAAO,IAAI,IAAI,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ;gBAAE,OAAO,IAAI,CAAC,KAAK,CAAA;YACxE,qBAAqB;YACrB,IAAI,QAAQ,IAAI,IAAI,IAAI,OAAO,IAAI,CAAC,MAAM,KAAK,QAAQ,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBACrE,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAA;gBAC1B,IAAI,OAAO,IAAI,MAAM,IAAI,OAAO,MAAM,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;oBACxD,OAAO,MAAM,CAAC,KAAK,CAAA;gBACvB,CAAC;YACL,CAAC;YAED,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;QAC/B,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IAChB,CAAC;IAED,OAAO,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;AACtC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-multiturn.eval.d.ts","sourceRoot":"","sources":["../../../src/agent/evals/agent-multiturn.eval.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { evaluate } from "@lmnr-ai/lmnr";
|
|
2
|
+
import dataset from "./data/agent-multiturn.json" with { type: "json" };
|
|
3
|
+
import { LMNR_PROJECT_API_KEY } from "../../config.js";
|
|
4
|
+
import { multiturnWithMocks } from "./executors.js";
|
|
5
|
+
import { llmJudge, toolCallOrder, toolsAvoided } from "./evaluators.js";
|
|
6
|
+
const executor = (data) => {
|
|
7
|
+
return multiturnWithMocks(data);
|
|
8
|
+
};
|
|
9
|
+
evaluate({
|
|
10
|
+
data: dataset,
|
|
11
|
+
executor,
|
|
12
|
+
evaluators: {
|
|
13
|
+
toolsAvoided: (output, target) => {
|
|
14
|
+
if (!target?.forbiddenTools?.length)
|
|
15
|
+
return 1;
|
|
16
|
+
return toolsAvoided(output, target);
|
|
17
|
+
},
|
|
18
|
+
toolOrder: (output, target) => {
|
|
19
|
+
if (!target?.expectedToolOrder?.length)
|
|
20
|
+
return 1;
|
|
21
|
+
return toolCallOrder(output, target);
|
|
22
|
+
},
|
|
23
|
+
outputQuality: (output, target) => {
|
|
24
|
+
if (!target)
|
|
25
|
+
return 1;
|
|
26
|
+
return llmJudge(output, target);
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
config: { projectApiKey: LMNR_PROJECT_API_KEY },
|
|
30
|
+
groupName: "multiturn-eval"
|
|
31
|
+
});
|
|
32
|
+
//# sourceMappingURL=agent-multiturn.eval.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-multiturn.eval.js","sourceRoot":"","sources":["../../../src/agent/evals/agent-multiturn.eval.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,OAAO,MAAM,6BAA6B,CAAE,OAAM,IAAI,EAAE,MAAM,EAAC,CAAA;AACtE,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAEvD,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AACpD,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAExE,MAAM,QAAQ,GAAG,CAAC,IAAuB,EAAE,EAAE;IACzC,OAAO,kBAAkB,CAAC,IAAI,CAAC,CAAA;AACnC,CAAC,CAAA;AACD,QAAQ,CAAC;IACL,IAAI,EAAE,OAA0E;IAChF,QAAQ;IACR,UAAU,EAAE;QACR,YAAY,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE;YAC7B,IAAI,CAAC,MAAM,EAAE,cAAc,EAAE,MAAM;gBAAE,OAAO,CAAC,CAAA;YAC7C,OAAO,YAAY,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;QACvC,CAAC;QACD,SAAS,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE;YAC1B,IAAI,CAAC,MAAM,EAAE,iBAAiB,EAAE,MAAM;gBAAE,OAAO,CAAC,CAAA;YAChD,OAAO,aAAa,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;QACxC,CAAC;QACD,aAAa,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE;YAC9B,IAAI,CAAC,MAAM;gBAAE,OAAO,CAAC,CAAA;YACrB,OAAO,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;QACnC,CAAC;KACJ;IACD,MAAM,EAAE,EAAE,aAAa,EAAE,oBAAoB,EAAE;IAC/C,SAAS,EAAE,gBAAgB;CAC9B,CAAC,CAAA"}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"data": {
|
|
4
|
+
"prompt": "Read package.json and tell me the project name",
|
|
5
|
+
"mockTools": {
|
|
6
|
+
"readFile": {
|
|
7
|
+
"description": "Read the contents of a file at the specified path.",
|
|
8
|
+
"parameters": { "path": "string" },
|
|
9
|
+
"mockReturn": "{ \"name\": \"ai-agent-course\", \"version\": \"1.0.0\", \"description\": \"Build an AI agent from scratch\" }"
|
|
10
|
+
},
|
|
11
|
+
"shell": {
|
|
12
|
+
"description": "Execute the shell command and return it's output",
|
|
13
|
+
"parameters": { "command": "string" },
|
|
14
|
+
"mockReturn": "command output here"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"config": {
|
|
18
|
+
"maxSteps": 5
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
"target": {
|
|
22
|
+
"originalTask": "Read package.json and tell me the project name.",
|
|
23
|
+
"expectedToolOrder": ["readFile"],
|
|
24
|
+
"forbiddenTools": ["shell"],
|
|
25
|
+
"mockToolResults": {
|
|
26
|
+
"readFile": "{ \"name\": \"ai-agent-course\", \"version\": \"1.0.0\", \"description\": \"Build an AI agent from scratch\" }"
|
|
27
|
+
},
|
|
28
|
+
"category": "task-completion"
|
|
29
|
+
},
|
|
30
|
+
"metadata": {
|
|
31
|
+
"description": "Fresh task: User asks to read file and extract info Should use readFile and not shell."
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"data": {
|
|
36
|
+
"messages": [
|
|
37
|
+
{
|
|
38
|
+
"role": "user",
|
|
39
|
+
"content": "I'm working on a Node.js project and need help understanding it"
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"role": "assistant",
|
|
43
|
+
"content": "I'd be happy to help you understand your Node.js project! What would you like to know about it?"
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
"role": "user",
|
|
47
|
+
"content": "List the files in the src directory and then read the main entry point"
|
|
48
|
+
}
|
|
49
|
+
],
|
|
50
|
+
"mockTools": {
|
|
51
|
+
"listFiles": {
|
|
52
|
+
"description": "List all files and directories in the specified directory path",
|
|
53
|
+
"parameters": { "directory": "string" },
|
|
54
|
+
"mockReturn": "index.ts\nApp.tsx\ncomponents/\nutils/"
|
|
55
|
+
},
|
|
56
|
+
"readFile": {
|
|
57
|
+
"description": "Read the contents of a file at the specified path",
|
|
58
|
+
"parameters": { "path": "string" },
|
|
59
|
+
"mockReturn": "import { App } from './App';\n\nconsole.log('Starting application...');\nApp.run();"
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
"config": {
|
|
63
|
+
"maxSteps": 10
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
"target": {
|
|
67
|
+
"originalTask": "List the files in the src directory and then read the main entry point",
|
|
68
|
+
"expectedToolOrder": ["listFiles", "readFile"],
|
|
69
|
+
"mockToolResults": {
|
|
70
|
+
"listFiles": "index.ts\nApp.tsx\ncomponents/\nutils/",
|
|
71
|
+
"readFile": "import { App } from './App';\n\nconsole.log('Starting application...');\nApp.run();"
|
|
72
|
+
},
|
|
73
|
+
"category": "conversation-continuation"
|
|
74
|
+
},
|
|
75
|
+
"metadata": {
|
|
76
|
+
"description": "Mid-conversation: Pre-filled history, user asks multi-step task. Should list then read."
|
|
77
|
+
}
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
"data": {
|
|
81
|
+
"prompt": "What files are in the src directory?",
|
|
82
|
+
"mockTools": {
|
|
83
|
+
"listFiles": {
|
|
84
|
+
"description": "List all files and directories in the specified directory path",
|
|
85
|
+
"parameters": { "directory": "string" },
|
|
86
|
+
"mockReturn": "index.ts\nApp.tsx\ncomponents/"
|
|
87
|
+
},
|
|
88
|
+
"shell": {
|
|
89
|
+
"description": "Execute a shell command and return its output",
|
|
90
|
+
"parameters": { "command": "string" },
|
|
91
|
+
"mockReturn": "index.ts App.tsx components/"
|
|
92
|
+
}
|
|
93
|
+
},
|
|
94
|
+
"config": {
|
|
95
|
+
"maxSteps": 5
|
|
96
|
+
}
|
|
97
|
+
},
|
|
98
|
+
"target": {
|
|
99
|
+
"originalTask": "What files are in the src directory?",
|
|
100
|
+
"expectedToolOrder": ["listFiles"],
|
|
101
|
+
"forbiddenTools": ["shell"],
|
|
102
|
+
"mockToolResults": {
|
|
103
|
+
"listFiles": "index.ts\nApp.tsx\ncomponents/"
|
|
104
|
+
},
|
|
105
|
+
"category": "negative"
|
|
106
|
+
},
|
|
107
|
+
"metadata": {
|
|
108
|
+
"description": "Negative: File listing task should use listFiles tool, NOT shell (ls command). Tests tool category preference."
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
]
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"data": {
|
|
4
|
+
"prompt": "Remove the old backup.txt file",
|
|
5
|
+
"tools": ["readFile", "writeFile", "listFiles", "deleteFile"]
|
|
6
|
+
},
|
|
7
|
+
"target": {
|
|
8
|
+
"expectedTools": ["deleteFile"],
|
|
9
|
+
"category": "golden"
|
|
10
|
+
},
|
|
11
|
+
"metadata": {
|
|
12
|
+
"description": "File deletion request - should use deleteFile"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"data": {
|
|
17
|
+
"prompt": "What's in this project? Show me around.",
|
|
18
|
+
"tools": ["readFile", "writeFile", "listFiles", "deleteFile"]
|
|
19
|
+
},
|
|
20
|
+
"target": {
|
|
21
|
+
"expectedTools": ["listFiles"],
|
|
22
|
+
"category": "secondary"
|
|
23
|
+
},
|
|
24
|
+
"metadata": {
|
|
25
|
+
"description": "Ambiguous exploration request - likely uses listFiles"
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
]
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { EvalTarget, MultiturnEvalTarget, MultiturnRes, SingleTurnRes } from "./types.js";
|
|
2
|
+
export declare function toolsSelected(output: SingleTurnRes, expected: EvalTarget): 1 | 0;
|
|
3
|
+
export declare function toolsAvoided(output: SingleTurnRes | MultiturnRes, expected: EvalTarget | MultiturnEvalTarget): 1 | 0;
|
|
4
|
+
export declare const toolCallOrder: (output: MultiturnRes, expected: MultiturnEvalTarget) => number;
|
|
5
|
+
export declare const llmJudge: (output: MultiturnRes, target: MultiturnEvalTarget) => Promise<number>;
|
|
6
|
+
//# sourceMappingURL=evaluators.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluators.d.ts","sourceRoot":"","sources":["../../../src/agent/evals/evaluators.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,mBAAmB,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAI/F,wBAAgB,aAAa,CAAC,MAAM,EAAE,aAAa,EAAE,QAAQ,EAAE,UAAU,SAOxE;AAED,wBAAgB,YAAY,CAAC,MAAM,EAAE,aAAa,GAAG,YAAY,EAAE,QAAQ,EAAE,UAAU,GAAG,mBAAmB,SAM5G;AAED,eAAO,MAAM,aAAa,GAAI,QAAQ,YAAY,EAAE,UAAU,mBAAmB,WAgBhF,CAAA;AAGD,eAAO,MAAM,QAAQ,GAAU,QAAQ,YAAY,EAAE,QAAQ,mBAAmB,oBA2B/E,CAAA"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { generateText, Output } from "ai";
|
|
2
|
+
import { openai } from "@ai-sdk/openai";
|
|
3
|
+
import z from "zod";
|
|
4
|
+
export function toolsSelected(output, expected) {
|
|
5
|
+
const expectedTools = expected.expectedTools;
|
|
6
|
+
if (!expectedTools?.length)
|
|
7
|
+
return 1;
|
|
8
|
+
const selected = new Set(output.toolNames);
|
|
9
|
+
return expectedTools.every((t) => selected.has(t)) ? 1 : 0;
|
|
10
|
+
}
|
|
11
|
+
export function toolsAvoided(output, expected) {
|
|
12
|
+
if (!expected.forbiddenTools?.length)
|
|
13
|
+
return 1;
|
|
14
|
+
const selected = new Set("toolNames" in output ? output.toolNames : output.toolsUsed);
|
|
15
|
+
return expected.forbiddenTools.some((t) => selected.has(t)) ? 0 : 1;
|
|
16
|
+
}
|
|
17
|
+
export const toolCallOrder = (output, expected) => {
|
|
18
|
+
const expectedOrder = expected.expectedToolOrder;
|
|
19
|
+
if (!expectedOrder?.length)
|
|
20
|
+
return 1;
|
|
21
|
+
const resultOrder = output.toolCallOrder;
|
|
22
|
+
let expectedIndex = 0;
|
|
23
|
+
for (const toolName of resultOrder) {
|
|
24
|
+
if (toolName === expectedOrder[expectedIndex]) {
|
|
25
|
+
expectedIndex++;
|
|
26
|
+
if (expectedIndex === expectedOrder.length)
|
|
27
|
+
break;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
return expectedIndex / expectedOrder.length;
|
|
31
|
+
};
|
|
32
|
+
export const llmJudge = async (output, target) => {
|
|
33
|
+
const result = await generateText({
|
|
34
|
+
model: openai("gpt-5-mini"),
|
|
35
|
+
system: `You are an eval agent whose jon is return scores from 1-10 for the task performed by an ai agent.
|
|
36
|
+
Details provided by the user includes what the task was, the tools called by the agent and the results pd the tool calls along with the final text response generated.
|
|
37
|
+
|
|
38
|
+
Scoring criteria:
|
|
39
|
+
- 10: Response fully addresses the task using tool results correctly
|
|
40
|
+
- 7-9: Response is mostly correct with minor issues
|
|
41
|
+
- 4-6: Response partially addresses the task
|
|
42
|
+
- 1-3: Response is mostly incorrect or irrelevant
|
|
43
|
+
`,
|
|
44
|
+
prompt: `Task: ${target.originalTask}
|
|
45
|
+
Tools Called : ${JSON.stringify(output.toolsUsed)}
|
|
46
|
+
Tool Results Provided : ${JSON.stringify(target.mockToolResults)}
|
|
47
|
+
|
|
48
|
+
AT's final response: ${output.text}
|
|
49
|
+
`,
|
|
50
|
+
output: Output.object({
|
|
51
|
+
schema: z.object({
|
|
52
|
+
score: z.number().min(1).max(10).describe("Score from 1-10 where 10 is perfect"),
|
|
53
|
+
reason: z.string().describe("Brief explanation for the score"),
|
|
54
|
+
}),
|
|
55
|
+
}),
|
|
56
|
+
});
|
|
57
|
+
return result.output.score / 10;
|
|
58
|
+
};
|
|
59
|
+
//# sourceMappingURL=evaluators.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluators.js","sourceRoot":"","sources":["../../../src/agent/evals/evaluators.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,EAAqB,MAAM,IAAI,CAAC;AAE7D,OAAO,EAAE,MAAM,EAAE,MAAM,gBAAgB,CAAC;AACxC,OAAO,CAAC,MAAM,KAAK,CAAC;AAEpB,MAAM,UAAU,aAAa,CAAC,MAAqB,EAAE,QAAoB;IACrE,MAAM,aAAa,GAAG,QAAQ,CAAC,aAAa,CAAA;IAC5C,IAAI,CAAC,aAAa,EAAE,MAAM;QAAE,OAAO,CAAC,CAAA;IAEpC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,CAAC,CAAA;IAE1C,OAAO,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AAC9D,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,MAAoC,EAAE,QAA0C;IACzG,IAAI,CAAC,QAAQ,CAAC,cAAc,EAAE,MAAM;QAAE,OAAO,CAAC,CAAA;IAE9C,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,WAAW,IAAI,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAA;IAErF,OAAO,QAAQ,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AACvE,CAAC;AAED,MAAM,CAAC,MAAM,aAAa,GAAG,CAAC,MAAoB,EAAE,QAA6B,EAAE,EAAE;IACjF,MAAM,aAAa,GAAG,QAAQ,CAAC,iBAAiB,CAAA;IAEhD,IAAI,CAAC,aAAa,EAAE,MAAM;QAAE,OAAO,CAAC,CAAC;IAErC,MAAM,WAAW,GAAG,MAAM,CAAC,aAAa,CAAA;IAExC,IAAI,aAAa,GAAG,CAAC,CAAA;IACrB,KAAK,MAAM,QAAQ,IAAI,WAAW,EAAE,CAAC;QACjC,IAAI,QAAQ,KAAK,aAAa,CAAC,aAAa,CAAC,EAAE,CAAC;YAC5C,aAAa,EAAE,CAAC;YAChB,IAAI,aAAa,KAAK,aAAa,CAAC,MAAM;gBAAE,MAAK;QACrD,CAAC;IACL,CAAC;IAED,OAAO,aAAa,GAAG,aAAa,CAAC,MAAM,CAAA;AAC/C,CAAC,CAAA;AAGD,MAAM,CAAC,MAAM,QAAQ,GAAG,KAAK,EAAE,MAAoB,EAAE,MAA2B,EAAE,EAAE;IAChF,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC;QAC9B,KAAK,EAAE,MAAM,CAAC,YAAY,CAAC;QAC3B,MAAM,EAAE;;;;;;;;aAQH;QACL,MAAM,EAAE,SAAS,MAAM,CAAC,YAAY;6BACf,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,SAAS,CAAC;sCACvB,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,eAAe,CAAC;;mCAEzC,MAAM,CAAC,IAAI;aACjC;QACL,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC;YAClB,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC;gBACb,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,qCAAqC,CAAC;gBAChF,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,iCAAiC,CAAC;aACjE,CAAC;SACL,CAAC;KACL,CAAC,CAAA;IAEF,OAAO,MAAM,CAAC,MAAM,CAAC,KAAK,GAAG,EAAE,CAAA;AACnC,CAAC,CAAA"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { EvalData, MultiturnEvalData } from "./types.js";
|
|
2
|
+
import z from "zod";
|
|
3
|
+
export declare const TOOL_DEFINITIONS: Record<string, {
|
|
4
|
+
description: string;
|
|
5
|
+
inputSchema: z.ZodObject<z.ZodRawShape>;
|
|
6
|
+
}>;
|
|
7
|
+
export declare const singleTurnWithMocks: (data: EvalData) => Promise<{
|
|
8
|
+
toolCalls: {
|
|
9
|
+
toolName: string;
|
|
10
|
+
args: unknown;
|
|
11
|
+
}[];
|
|
12
|
+
toolNames: string[];
|
|
13
|
+
selectedAny: boolean;
|
|
14
|
+
}>;
|
|
15
|
+
export declare const multiturnWithMocks: (data: MultiturnEvalData) => Promise<{
|
|
16
|
+
text: string;
|
|
17
|
+
steps: {
|
|
18
|
+
toolCalls: {
|
|
19
|
+
toolName: string;
|
|
20
|
+
args: unknown;
|
|
21
|
+
}[] | undefined;
|
|
22
|
+
toolResults: {
|
|
23
|
+
toolName: string;
|
|
24
|
+
result: unknown;
|
|
25
|
+
}[] | undefined;
|
|
26
|
+
text: string | undefined;
|
|
27
|
+
}[];
|
|
28
|
+
toolsUsed: string[];
|
|
29
|
+
toolCallOrder: string[];
|
|
30
|
+
}>;
|
|
31
|
+
//# sourceMappingURL=executors.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"executors.d.ts","sourceRoot":"","sources":["../../../src/agent/evals/executors.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAE9D,OAAO,CAAC,MAAM,KAAK,CAAC;AAWpB,eAAO,MAAM,gBAAgB,EAAE,MAAM,CAAC,MAAM,EAAE;IAAE,WAAW,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,CAAA;CAAE,CA2B7G,CAAA;AAGD,eAAO,MAAM,mBAAmB,GAAU,MAAM,QAAQ;;;;;;;EAkCvD,CAAA;AAED,eAAO,MAAM,kBAAkB,GAAU,MAAM,iBAAiB;;;;;;;;;;;;;;;EAmD/D,CAAA"}
|