clawlet 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -0
- package/package.json +3 -1
- package/src/agent.eval.test.ts +139 -132
- package/src/agent.ts +64 -1020
- package/src/cli.ts +3 -1
- package/src/evals/extend_agents_md.yaml +161 -0
- package/src/evals/external_data.yaml +5 -5
- package/src/evals/skill_sandbox_execution.yaml +7 -7
- package/src/llm.ts +35 -0
- package/src/logger.ts +39 -0
- package/src/memory.ts +104 -27
- package/src/storage.ts +152 -95
- package/src/tools.ts +1044 -0
- package/template/AGENTS.template +1 -1
package/README.md
CHANGED
|
@@ -80,6 +80,14 @@ $ pnpm start
|
|
|
80
80
|
- [ ] an *.app for mac
|
|
81
81
|
- [ ] an .exe for windows
|
|
82
82
|
|
|
83
|
+
# Similiar projects
|
|
84
|
+
|
|
85
|
+
* Typescript
|
|
86
|
+
* <https://github.com/openclaw/openclaw>
|
|
87
|
+
* GO
|
|
88
|
+
* <https://github.com/sipeed/picoclaw>
|
|
89
|
+
* <https://github.com/HKUDS/nanobot>
|
|
90
|
+
|
|
83
91
|
# License
|
|
84
92
|
|
|
85
93
|
clawlet is copyright 2026 by DracoBlue and licensed under the MIT License.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "clawlet",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"description": "A lightweight AI based personal assistant.",
|
|
5
5
|
"main": "src/cli.ts",
|
|
6
6
|
"type": "module",
|
|
@@ -30,9 +30,11 @@
|
|
|
30
30
|
"@ai-sdk/openai": "^1.3.22",
|
|
31
31
|
"@ai-sdk/openai-compatible": "^2.0.28",
|
|
32
32
|
"@libsql/client": "^0.17.0",
|
|
33
|
+
"@vitest/coverage-v8": "^4.0.18",
|
|
33
34
|
"ai": "^6.0.58",
|
|
34
35
|
"dotenv": "^17.2.2",
|
|
35
36
|
"grammy": "^1.39.3",
|
|
37
|
+
"pino": "^10.3.1",
|
|
36
38
|
"tsx": "^4.21.0",
|
|
37
39
|
"turndown": "^7.2.2",
|
|
38
40
|
"unstorage": "^1.17.4",
|
package/src/agent.eval.test.ts
CHANGED
|
@@ -3,39 +3,15 @@ import fs from 'node:fs/promises';
|
|
|
3
3
|
import path from 'node:path';
|
|
4
4
|
import { fileURLToPath } from 'node:url';
|
|
5
5
|
import YAML from 'yaml';
|
|
6
|
-
import { createStorage } from 'unstorage';
|
|
7
|
-
import memoryDriver from 'unstorage/drivers/memory';
|
|
8
6
|
import { generateText } from 'ai';
|
|
9
|
-
import { Agent
|
|
7
|
+
import { Agent } from './agent.js';
|
|
10
8
|
import { AgentMemory } from './memory.js';
|
|
11
|
-
import {
|
|
12
|
-
import type { ModelMessage } from 'ai';
|
|
13
|
-
|
|
14
|
-
// --- MOCK SETUP ---
|
|
15
|
-
class TestAgentMemory extends AgentMemory {
|
|
16
|
-
constructor() {
|
|
17
|
-
super();
|
|
18
|
-
this.workspace = createStorage({ driver: memoryDriver() });
|
|
19
|
-
this.secrets = new LibSqlKeyValueStorage(':memory:');
|
|
20
|
-
this.history = new LibSqlListStorage<ModelMessage>(':memory:');
|
|
21
|
-
this.skillHistory = new SkillHistoryStorage<ModelMessage>(':memory:');
|
|
22
|
-
}
|
|
23
|
-
}
|
|
9
|
+
import { model } from './llm.js';
|
|
24
10
|
|
|
25
11
|
const __filename = fileURLToPath(import.meta.url);
|
|
26
12
|
const __dirname = path.dirname(__filename);
|
|
27
13
|
|
|
28
14
|
const evalDir = path.join(__dirname, 'evals');
|
|
29
|
-
const dirFiles = await fs.readdir(evalDir);
|
|
30
|
-
const yamlFiles = dirFiles.filter(f => f.endsWith('.yaml') || f.endsWith('.yml'));
|
|
31
|
-
|
|
32
|
-
const testCases = await Promise.all(yamlFiles.map(async (file) => {
|
|
33
|
-
const content = await fs.readFile(path.join(evalDir, file), 'utf-8');
|
|
34
|
-
return {
|
|
35
|
-
filename: file,
|
|
36
|
-
data: YAML.parse(content)
|
|
37
|
-
};
|
|
38
|
-
}));
|
|
39
15
|
|
|
40
16
|
/**
|
|
41
17
|
* Unstorage uses `:` as path separator internally.
|
|
@@ -55,7 +31,7 @@ async function runLlmJudge(
|
|
|
55
31
|
agentOutput: string
|
|
56
32
|
): Promise<{ pass: boolean; reasoning: string }> {
|
|
57
33
|
const { text } = await generateText({
|
|
58
|
-
model
|
|
34
|
+
model,
|
|
59
35
|
messages: [
|
|
60
36
|
{
|
|
61
37
|
role: 'system',
|
|
@@ -86,133 +62,164 @@ Reasoning: <brief explanation>`
|
|
|
86
62
|
// Default timeout for LLM-backed eval tests (2 minutes)
|
|
87
63
|
const EVAL_TIMEOUT = 120_000;
|
|
88
64
|
|
|
89
|
-
|
|
65
|
+
const runTestCaseFile = async (filename: string) => {
|
|
66
|
+
const content = await fs.readFile(path.join(evalDir, filename), 'utf-8');
|
|
67
|
+
const data = YAML.parse(content);
|
|
68
|
+
// 1. SETUP
|
|
69
|
+
const memory = await AgentMemory.createInMemory();
|
|
70
|
+
|
|
71
|
+
// Seed workspace files
|
|
72
|
+
if (data.setup?.files) {
|
|
73
|
+
for (const [name, content] of Object.entries(data.setup.files)) {
|
|
74
|
+
await memory.workspace.setItem(normalizeStorageKey(name), content as string);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
90
77
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
const
|
|
78
|
+
// Seed KV store
|
|
79
|
+
if (data.setup?.kv) {
|
|
80
|
+
for (const [key, value] of Object.entries(data.setup.kv)) {
|
|
81
|
+
await memory.secrets.set(key, value as string);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
94
84
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
85
|
+
// 2. EXECUTION
|
|
86
|
+
const agent = new Agent(memory, model);
|
|
87
|
+
let output = "";
|
|
98
88
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
89
|
+
// Output capture
|
|
90
|
+
agent.addOutput({
|
|
91
|
+
onAgentStart: () => {},
|
|
92
|
+
onResponseChunk: () => {},
|
|
93
|
+
onResponseEnd: (full) => { output = full; },
|
|
94
|
+
onError: (e) => { throw e; }
|
|
95
|
+
});
|
|
105
96
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
}
|
|
97
|
+
await (new Promise((resolve, reject) => {
|
|
98
|
+
agent.addInput({
|
|
99
|
+
onMessage: async (handler : (text: string, label: string) => Promise<void>) => {
|
|
100
|
+
await handler(data.input, 'test');
|
|
101
|
+
resolve(true);
|
|
102
|
+
},
|
|
103
|
+
start: () => {}
|
|
104
|
+
})
|
|
105
|
+
}));
|
|
112
106
|
|
|
113
|
-
|
|
114
|
-
const agent = new Agent(memory);
|
|
115
|
-
let output = "";
|
|
107
|
+
// 3. ASSERTIONS
|
|
116
108
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
});
|
|
109
|
+
// a) Response keywords (ALL must match)
|
|
110
|
+
if (data.validate?.response?.contains) {
|
|
111
|
+
data.validate.response.contains.forEach((keyword: string) => {
|
|
112
|
+
expect(output.toLowerCase()).toContain(keyword.toLowerCase());
|
|
113
|
+
});
|
|
114
|
+
}
|
|
124
115
|
|
|
125
|
-
|
|
126
|
-
|
|
116
|
+
// b) Response keywords (ALL must not match)
|
|
117
|
+
if (data.validate?.response?.must_not_contain) {
|
|
118
|
+
data.validate.response.must_not_contain.forEach((keyword: string) => {
|
|
119
|
+
expect(output.toLowerCase()).not.toContain(keyword.toLowerCase());
|
|
120
|
+
});
|
|
121
|
+
}
|
|
127
122
|
|
|
128
|
-
|
|
123
|
+
// c) Response keywords (ANY must match — at least one)
|
|
124
|
+
if (data.validate?.response?.contains_any) {
|
|
125
|
+
const matches = data.validate.response.contains_any.some(
|
|
126
|
+
(keyword: string) => output.toLowerCase().includes(keyword.toLowerCase())
|
|
127
|
+
);
|
|
128
|
+
expect(
|
|
129
|
+
matches,
|
|
130
|
+
`Expected response to contain at least one of: ${data.validate.response.contains_any.join(', ')}`
|
|
131
|
+
).toBe(true);
|
|
132
|
+
}
|
|
129
133
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
+
// d) File content check
|
|
135
|
+
if (data.validate?.files) {
|
|
136
|
+
for (const [filepath, rules] of Object.entries(data.validate.files as Record<string, any>)) {
|
|
137
|
+
const storageKey = normalizeStorageKey(filepath);
|
|
138
|
+
const content = await memory.workspace.getItem(storageKey);
|
|
139
|
+
// Unstorage memory driver may auto-parse JSON strings into objects
|
|
140
|
+
const textContent = content
|
|
141
|
+
? (typeof content === 'object' ? JSON.stringify(content, null, 2) : String(content))
|
|
142
|
+
: "";
|
|
143
|
+
|
|
144
|
+
// ALL must be present
|
|
145
|
+
if (rules.contains) {
|
|
146
|
+
rules.contains.forEach((str: string) => {
|
|
147
|
+
expect(textContent, `File "${filepath}" should contain "${str}"`).toContain(str);
|
|
134
148
|
});
|
|
135
149
|
}
|
|
136
150
|
|
|
137
|
-
//
|
|
138
|
-
if (
|
|
139
|
-
const matches =
|
|
140
|
-
(
|
|
151
|
+
// At least ONE must be present
|
|
152
|
+
if (rules.contains_any) {
|
|
153
|
+
const matches = rules.contains_any.some(
|
|
154
|
+
(str: string) => textContent.includes(str)
|
|
141
155
|
);
|
|
142
156
|
expect(
|
|
143
157
|
matches,
|
|
144
|
-
`
|
|
158
|
+
`File "${filepath}" should contain at least one of: ${rules.contains_any.join(', ')}`
|
|
145
159
|
).toBe(true);
|
|
146
160
|
}
|
|
147
161
|
|
|
148
|
-
//
|
|
149
|
-
if (
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
// Unstorage memory driver may auto-parse JSON strings into objects
|
|
154
|
-
const textContent = content
|
|
155
|
-
? (typeof content === 'object' ? JSON.stringify(content, null, 2) : String(content))
|
|
156
|
-
: "";
|
|
157
|
-
|
|
158
|
-
// ALL must be present
|
|
159
|
-
if (rules.contains) {
|
|
160
|
-
rules.contains.forEach((str: string) => {
|
|
161
|
-
expect(textContent, `File "${filepath}" should contain "${str}"`).toContain(str);
|
|
162
|
-
});
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
// At least ONE must be present
|
|
166
|
-
if (rules.contains_any) {
|
|
167
|
-
const matches = rules.contains_any.some(
|
|
168
|
-
(str: string) => textContent.includes(str)
|
|
169
|
-
);
|
|
170
|
-
expect(
|
|
171
|
-
matches,
|
|
172
|
-
`File "${filepath}" should contain at least one of: ${rules.contains_any.join(', ')}`
|
|
173
|
-
).toBe(true);
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
// NONE must be present
|
|
177
|
-
if (rules.must_not_contain) {
|
|
178
|
-
rules.must_not_contain.forEach((str: string) => {
|
|
179
|
-
expect(textContent, `File "${filepath}" should NOT contain "${str}"`).not.toContain(str);
|
|
180
|
-
});
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
// File must exist (non-empty)
|
|
184
|
-
if (rules.exists === true) {
|
|
185
|
-
expect(textContent.length, `File "${filepath}" should exist and not be empty`).toBeGreaterThan(0);
|
|
186
|
-
}
|
|
187
|
-
}
|
|
162
|
+
// NONE must be present
|
|
163
|
+
if (rules.must_not_contain) {
|
|
164
|
+
rules.must_not_contain.forEach((str: string) => {
|
|
165
|
+
expect(textContent, `File "${filepath}" should NOT contain "${str}"`).not.toContain(str);
|
|
166
|
+
});
|
|
188
167
|
}
|
|
189
168
|
|
|
190
|
-
//
|
|
191
|
-
if (
|
|
192
|
-
|
|
193
|
-
const value = await memory.secrets.get(key);
|
|
194
|
-
|
|
195
|
-
if (rules.exists === true) {
|
|
196
|
-
expect(value, `KV key "${key}" should exist`).not.toBeNull();
|
|
197
|
-
}
|
|
198
|
-
if (rules.contains) {
|
|
199
|
-
rules.contains.forEach((str: string) => {
|
|
200
|
-
expect(value ?? '', `KV key "${key}" should contain "${str}"`).toContain(str);
|
|
201
|
-
});
|
|
202
|
-
}
|
|
203
|
-
}
|
|
169
|
+
// File must exist (non-empty)
|
|
170
|
+
if (rules.exists === true) {
|
|
171
|
+
expect(textContent.length, `File "${filepath}" should exist and not be empty`).toBeGreaterThan(0);
|
|
204
172
|
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
205
175
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
);
|
|
213
|
-
|
|
176
|
+
// e) KV store assertions
|
|
177
|
+
if (data.validate?.kv) {
|
|
178
|
+
for (const [key, rules] of Object.entries(data.validate.kv as Record<string, any>)) {
|
|
179
|
+
const value = await memory.secrets.get(key);
|
|
180
|
+
|
|
181
|
+
if (rules.exists === true) {
|
|
182
|
+
expect(value, `KV key "${key}" should exist`).not.toBeNull();
|
|
183
|
+
}
|
|
184
|
+
if (rules.contains) {
|
|
185
|
+
rules.contains.forEach((str: string) => {
|
|
186
|
+
expect(value ?? '', `KV key "${key}" should contain "${str}"`).toContain(str);
|
|
187
|
+
});
|
|
214
188
|
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
215
191
|
|
|
216
|
-
|
|
217
|
-
|
|
192
|
+
// f) LLM judge evaluation using localModel
|
|
193
|
+
if (data.validate?.llm_eval) {
|
|
194
|
+
const { pass, reasoning } = await runLlmJudge(
|
|
195
|
+
data.validate.llm_eval,
|
|
196
|
+
data.input,
|
|
197
|
+
output
|
|
198
|
+
);
|
|
199
|
+
expect(pass, `LLM judge failed:\n${reasoning} (eval: ${data.validate.llm_eval}, output: ${output})`).toBe(true);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
describe('Agent Evals (LLM)', () => {
|
|
204
|
+
it(`bootstrap_trigger`, async () => runTestCaseFile('bootstrap_trigger.yaml'), EVAL_TIMEOUT);
|
|
205
|
+
it(`connection_auth`, async () => runTestCaseFile('connection_auth.yaml'), EVAL_TIMEOUT);
|
|
206
|
+
it(`create_python_file`, async () => runTestCaseFile('create_python_file.yaml'), EVAL_TIMEOUT);
|
|
207
|
+
it(`directory_traversal`, async () => runTestCaseFile('directory_traversal.yaml'), EVAL_TIMEOUT);
|
|
208
|
+
it(`empty_directory`, async () => runTestCaseFile('empty_directory.yaml'), EVAL_TIMEOUT);
|
|
209
|
+
it(`extend_agents_md`, async () => runTestCaseFile('extend_agents_md.yaml'), EVAL_TIMEOUT * 2);
|
|
210
|
+
it(`external_data`, async () => runTestCaseFile('external_data.yaml'), EVAL_TIMEOUT);
|
|
211
|
+
it(`file_not_found`, async () => runTestCaseFile('file_not_found.yaml'), EVAL_TIMEOUT);
|
|
212
|
+
it(`memory_persistence`, async () => runTestCaseFile('memory_persistence.yaml'), EVAL_TIMEOUT);
|
|
213
|
+
it(`move_and_rename`, async () => runTestCaseFile('move_and_rename.yaml'), EVAL_TIMEOUT);
|
|
214
|
+
it(`needle_in_haystack`, async () => runTestCaseFile('needle_in_haystack.yaml'), EVAL_TIMEOUT);
|
|
215
|
+
it(`persona_tone`, async () => runTestCaseFile('persona_tone.yaml'), EVAL_TIMEOUT);
|
|
216
|
+
it(`rag_user`, async () => runTestCaseFile('rag_user.yaml'), EVAL_TIMEOUT);
|
|
217
|
+
it(`reasoning_multi_step`, async () => runTestCaseFile('reasoning_multi_step.yaml'), EVAL_TIMEOUT);
|
|
218
|
+
it(`refactoring_edit`, async () => runTestCaseFile('refactoring_edit.yaml'), EVAL_TIMEOUT);
|
|
219
|
+
it(`skill_sandbox_execution`, async () => runTestCaseFile('skill_sandbox_execution.yaml'), EVAL_TIMEOUT);
|
|
220
|
+
it(`skill_system_installation`, async () => runTestCaseFile('skill_system_installation.yaml'), EVAL_TIMEOUT);
|
|
221
|
+
it(`soft_delete`, async () => runTestCaseFile('soft_delete.yaml'), EVAL_TIMEOUT);
|
|
222
|
+
it(`stat_check`, async () => runTestCaseFile('stat_check.yaml'), EVAL_TIMEOUT);
|
|
223
|
+
it(`workflow_cleanup`, async () => runTestCaseFile('workflow_cleanup.yaml'), EVAL_TIMEOUT);
|
|
224
|
+
it(`write_complex_json`, async () => runTestCaseFile('write_complex_json.yaml'), EVAL_TIMEOUT);
|
|
218
225
|
});
|