clawlet 0.5.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawlet",
3
- "version": "0.5.2",
3
+ "version": "0.7.0",
4
4
  "description": "A lightweight AI based personal assistant.",
5
5
  "main": "src/cli.ts",
6
6
  "type": "module",
@@ -217,6 +217,7 @@ describe('Agent Evals (LLM)', () => {
217
217
  it(`reasoning_multi_step`, async () => runTestCaseFile('reasoning_multi_step.yaml'), EVAL_TIMEOUT);
218
218
  it(`refactoring_edit`, async () => runTestCaseFile('refactoring_edit.yaml'), EVAL_TIMEOUT);
219
219
  it(`skill_sandbox_execution`, async () => runTestCaseFile('skill_sandbox_execution.yaml'), EVAL_TIMEOUT);
220
+ it(`rewrite_agents_md`, async () => runTestCaseFile('rewrite_agents_md.yaml'), EVAL_TIMEOUT);
220
221
  it(`skill_system_installation`, async () => runTestCaseFile('skill_system_installation.yaml'), EVAL_TIMEOUT);
221
222
  it(`soft_delete`, async () => runTestCaseFile('soft_delete.yaml'), EVAL_TIMEOUT);
222
223
  it(`stat_check`, async () => runTestCaseFile('stat_check.yaml'), EVAL_TIMEOUT);
package/src/agent.ts CHANGED
@@ -16,9 +16,6 @@ import { createTools } from './tools.js';
16
16
  const __filename = fileURLToPath(import.meta.url);
17
17
  const __dirname = path.dirname(__filename);
18
18
  const PACKAGE_ROOT = path.resolve(__dirname, '..');
19
- const GENERATE_TEXT_TEMPERATURE = 0.6;
20
- const GENERATE_TEXT_TOP_P = 0.95;
21
- const GENERATE_TEXT_MAX_OUTPUT_TOKENS = 16384;
22
19
  const GENERATE_TEXT_MAX_STEPS = 30;
23
20
 
24
21
  // --- ADAPTER INTERFACES ---
@@ -138,9 +135,6 @@ async function runAgent(
138
135
  system: await buildSystemPrompt(memory),
139
136
  messages,
140
137
  tools,
141
- temperature: GENERATE_TEXT_TEMPERATURE,
142
- topP: GENERATE_TEXT_TOP_P,
143
- maxOutputTokens: GENERATE_TEXT_MAX_OUTPUT_TOKENS,
144
138
  stopWhen: stepCountIs(GENERATE_TEXT_MAX_STEPS),
145
139
 
146
140
  onStepFinish: (step) => {
@@ -0,0 +1,161 @@
1
+ name: "Extend AGENTS.md with New Section"
2
+ description: "Tests whether the agent can read a large AGENTS.md, append a new section, and preserve the existing content. Stresses the model's ability to handle long text read+write cycles."
3
+
4
+ timeout: 240000
5
+
6
+ setup:
7
+ files:
8
+ SOUL.md: |
9
+ # SOUL
10
+ I amlike what I do.
11
+ USER.md: |
12
+ # USER
13
+ name: Mr. X.
14
+ IDENTITY: |
15
+ # IDENTITY
16
+ name: Bob
17
+ AGENTS.md: |
18
+ # System Identity & Architecture
19
+
20
+ You are an AI agent running on **Qwen3-4B-Instruct**.
21
+ - **Environment:** `mlx_lm.server` (local Apple Silicon execution).
22
+ - **Strengths:** Speed, code generation, logical instruction following.
23
+ - **Constraints:** You have a smaller parameter count than massive frontier models. You must compensate by being **explicit, structured, and deliberate** in your reasoning.
24
+
25
+ # Every Session
26
+
27
+ Before doing anything else:
28
+ 1. Read `SOUL.md` — Who you are.
29
+ 2. Read `USER.md` — Who you're helping.
30
+ 3. Read `memory/YYYY-MM-DD.md` (today + yesterday) — Recent context.
31
+ 4. **If in MAIN SESSION:** Read `MEMORY.md`.
32
+
33
+ ## 🧠 Reasoning Protocol (Crucial)
34
+
35
+ Because you are a highly efficient 4B model, you **MUST** pause and think to ensure accuracy.
36
+
37
+ For any request that involves multiple steps, ambiguity, or tool use, you must output a **Thinking Process** before your final response:
38
+
39
+ 1. **Analyze:** What is the user actually asking?
40
+ 2. **Plan:** What steps/tools are needed?
41
+ 3. **Execute:** Generate the response or tool call.
42
+
43
+ *Example:*
44
+ > **Thinking Process:**
45
+ > User wants to search for colors. I need to check if the 'tavily' skill is installed. It is. I will construct the skill.prompt command.
46
+
47
+ ## Memory Management
48
+
49
+ You wake up fresh each session. Files are your only continuity.
50
+
51
+ - **Daily logs:** `memory/YYYY-MM-DD.md` (Raw logs of events/actions).
52
+ - **Long-term:** `MEMORY.md` (Curated insights, User preferences, Major decisions).
53
+
54
+ ### 📝 Write It Down or It Didn't Happen
55
+ **Memory is limited.** "Mental notes" die when the session ends.
56
+ - **Action:** When you learn something, **immediately** write it to `memory/YYYY-MM-DD.md` or `MEMORY.md` using `fs.writeFile`.
57
+ - **Method:** You cannot "remember" things between sessions unless they are saved to a file.
58
+
59
+ ### 🚨 Error Transparency Protocol
60
+ If an action fails:
61
+ 1. **Log it:** Write the error to the daily memory file.
62
+ 2. **Include:** Exact error message, action attempted, and the fix you tried.
63
+ 3. **No Hallucinations:** Do not invent successful outcomes. If it failed, say it failed.
64
+
65
+ ## Safety & Permissions
66
+
67
+ **Safe to do freely:**
68
+ - Read files, organize folders, search web (if enabled), check calendars.
69
+ - Internal workspace operations.
70
+
71
+ **Ask first:**
72
+ - sending emails, tweets, or public posts.
73
+ - Destructive commands (always use `trash` over `rm`).
74
+
75
+ ## Group Chat Behavior
76
+
77
+ **Role:** Participant, not a proxy.
78
+ **Rule:** Quality > Quantity.
79
+
80
+ **When to Speak:**
81
+ - Directly mentioned.
82
+ - You can fix a factual error or provide a specific answer.
83
+
84
+ **When to Stay Silent (`HEARTBEAT_OK`):**
85
+ - Casual banter.
86
+ - Question already answered.
87
+ - Your reply would just be "lol" or "agree".
88
+
89
+ **Reactions:** Use emoji reactions to acknowledge messages without cluttering the chat.
90
+
91
+ ## Heartbeats
92
+
93
+ When receiving a heartbeat prompt:
94
+ 1. **Read:** Check `HEARTBEAT.md` (if exists).
95
+ 2. **Evaluate:** Do I *actually* need to do something? (Check email, calendar, etc.)
96
+ 3. **Action:**
97
+ * **If Yes:** Perform the task.
98
+ * **If No:** Reply exactly: `HEARTBEAT_OK` (Do not add extra text).
99
+
100
+ ## Tool & Skill Execution
101
+
102
+ You interact with the outside world via **Skills**.
103
+
104
+ ### Execution Syntax
105
+ Use `skill.prompt` to invoke a skill.
106
+
107
+ **Format:**
108
+ `skill.prompt <skill_name> "<prompt_for_skill>"`
109
+
110
+ ### Installation
111
+ Use `skills.install <name> "<url>"` to add new capabilities.
112
+
113
+ ## File Operations
114
+
115
+ **1. File Writing Protocol:**
116
+ You must use `fs.writeFile` to persist **ALL** critical updates.
117
+ - Updating user preferences? -> `fs.writeFile` to `USER.md`.
118
+ - Logging an event? -> `fs.writeFile` to `memory/YYYY-MM-DD.md`.
119
+ - **Never** assume stating "I have updated the memory" is enough. You must execute the write.
120
+
121
+ **2. Message History Persistence:**
122
+ - Message history is **not** stored in RAM.
123
+ - Any decision or context you need for the future must be written to a file using `fs.writeFile`.
124
+
125
+ ## Security
126
+ - **Moltbook API Key:** Access by using `connection.request({ name: "moltbook", "url": "..." })`.
127
+ - **Secrets:** Never print API keys in plain text logs.
128
+
129
+ ## Make It Yours
130
+ Refine this `AGENTS.md` as you learn. If a rule isn't working for your specific model version, change it by using `fs.writeFile`.
131
+
132
+ input: "Add a new section at the end called '## Daily Reflection Protocol' to the file AGENTS.md (use the tool fs.writeFile). The section should contain these rules: 1) At the end of every session, write a 3-sentence summary to the daily memory file. 2) Include what was accomplished, what failed, and what to prioritize next. 3) Tag entries with #reflection for easy searching. Make sure you preserve ALL existing content in AGENTS.md when writing the updated version."
133
+
134
+ validate:
135
+ files:
136
+ AGENTS.md:
137
+ contains:
138
+ - "System Identity"
139
+ - "Every Session"
140
+ - "Reasoning Protocol"
141
+ - "Memory Management"
142
+ - "Safety & Permissions"
143
+ - "Daily Reflection Protocol"
144
+ - "#reflection"
145
+ contains_any:
146
+ - "3-sentence"
147
+ - "three-sentence"
148
+ - "summary"
149
+ - "reflection"
150
+ must_not_contain:
151
+ - "[object Object]"
152
+ response:
153
+ must_not_contain:
154
+ - "<tool_call>"
155
+ contains_any:
156
+ - "AGENTS.md"
157
+ - "added"
158
+ - "updated"
159
+ - "section"
160
+ - "reflection"
161
+ - "Daily Reflection"
package/src/llm.ts CHANGED
@@ -1,10 +1,10 @@
1
1
  import { hermesToolMiddleware, xmlToolMiddleware, yamlToolMiddleware } from "@ai-sdk-tool/parser";
2
2
  import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
3
- import { addToolInputExamplesMiddleware, extractReasoningMiddleware, wrapLanguageModel, type LanguageModel, gateway } from "ai";
3
+ import { addToolInputExamplesMiddleware, extractReasoningMiddleware, wrapLanguageModel, type LanguageModel, gateway, defaultSettingsMiddleware, type LanguageModelMiddleware } from "ai";
4
4
 
5
5
  const OPENAI_COMPATIBLE_MODEL_ID = process.env.OPENAI_COMPATIBLE_MODEL_ID ?? 'qwen-local';
6
6
  const OPENAI_COMPATIBLE_BASE_URL = process.env.OPENAI_COMPATIBLE_BASE_URL ?? 'http://localhost:8000/v1';
7
- const AI_GATEWAY_USE_QWEN_MIDDLEWARE = process.env.AI_GATEWAY_USE_QWEN_MIDDLEWARE ?? '';
7
+ const AI_GATEWAY_USE_QWEN_MIDDLEWARE = process.env.AI_GATEWAY_USE_QWEN_MIDDLEWARE ?? (!process.env.AI_GATEWAY_MODEL_ID ? '1' : '');
8
8
 
9
9
  // --- MODEL SETUP ---
10
10
  const localProvider = createOpenAICompatible({
@@ -12,24 +12,28 @@ const localProvider = createOpenAICompatible({
12
12
  baseURL: OPENAI_COMPATIBLE_BASE_URL,
13
13
  });
14
14
 
15
- export const model : LanguageModel = process.env.AI_GATEWAY_MODEL_ID ? (AI_GATEWAY_USE_QWEN_MIDDLEWARE ? wrapLanguageModel({
16
- model: gateway(process.env.AI_GATEWAY_MODEL_ID),
17
- middleware: [
18
- hermesToolMiddleware,
19
- //xmlToolMiddleware,
20
- addToolInputExamplesMiddleware({ prefix: 'Input Examples:', }),
21
- extractReasoningMiddleware({
22
- tagName: "think"
23
- })
24
- ]
25
- }) : process.env.AI_GATEWAY_MODEL_ID) : wrapLanguageModel({
26
- model: localProvider.languageModel(OPENAI_COMPATIBLE_MODEL_ID),
27
- middleware: [
28
- hermesToolMiddleware,
29
- //xmlToolMiddleware,
30
- addToolInputExamplesMiddleware({ prefix: 'Input Examples:', }),
31
- extractReasoningMiddleware({
32
- tagName: "think"
33
- })
34
- ]
15
+ const unwrappedModel : LanguageModel = process.env.AI_GATEWAY_MODEL_ID ? gateway.languageModel(process.env.AI_GATEWAY_MODEL_ID) : localProvider.languageModel(OPENAI_COMPATIBLE_MODEL_ID);
16
+
17
+ const middleware : LanguageModelMiddleware[] = [
18
+ defaultSettingsMiddleware({
19
+ settings: {
20
+ // tool calls: temperature: 0.0, maxOutputTokens: 2048
21
+ // normal chat: topP: 0.8, maxOutputTokens: 2048
22
+ // no tools:
23
+ topP: 0.9, maxOutputTokens: 8192
24
+ },
25
+ })
26
+ ];
27
+
28
+ if (AI_GATEWAY_USE_QWEN_MIDDLEWARE) {
29
+ middleware.push(hermesToolMiddleware);
30
+ middleware.push(addToolInputExamplesMiddleware({ prefix: 'Input Examples:', }));
31
+ middleware.push(extractReasoningMiddleware({
32
+ tagName: "think"
33
+ }));
34
+ }
35
+
36
+ export const model : LanguageModel = wrapLanguageModel({
37
+ model: unwrappedModel,
38
+ middleware
35
39
  });
package/src/logger.ts CHANGED
@@ -1,5 +1,7 @@
1
+ import 'dotenv/config';
1
2
  import pino from "pino";
2
3
 
4
+ const isTest = process.env.NODE_ENV === "test";
3
5
  const isProd = process.env.NODE_ENV === "production";
4
6
  const logFile = process.env.LOG_FILE_PATH ?? `${process.cwd()}/logs/clawlet.jsonl`;
5
7
 
@@ -18,7 +20,26 @@ const transport = pino.transport({
18
20
  ]
19
21
  });
20
22
 
21
- export const logger = pino({
23
+ export const logger = isTest ? pino(
24
+ {
25
+ base: {
26
+ service: process.env.SERVICE_NAME ?? "clawlet",
27
+ env: process.env.NODE_ENV ?? "development",
28
+ version: process.env.APP_VERSION,
29
+ },
30
+ timestamp: () => `,"ts":"${new Date().toISOString()}"`,
31
+ formatters: {
32
+ level(label, number) {
33
+ return { level: number, level_label: label };
34
+ },
35
+ },
36
+ level: 'debug',
37
+ },
38
+ pino.destination({
39
+ dest: 1,
40
+ sync: true
41
+ })
42
+ ) : pino({
22
43
  level: process.env.LOG_LEVEL ?? (isProd ? "info" : "debug"),
23
44
  base: {
24
45
  service: process.env.SERVICE_NAME ?? "clawlet",
package/src/tools.ts CHANGED
@@ -14,9 +14,6 @@ import TurndownService from 'turndown';
14
14
  import { logger } from './logger.js';
15
15
 
16
16
  // Resolve the package root directory (where template/ lives), independent of cwd
17
- const GENERATE_TEXT_TEMPERATURE = 0.6;
18
- const GENERATE_TEXT_TOP_P = 0.95;
19
- const GENERATE_TEXT_MAX_OUTPUT_TOKENS = 16384;
20
17
  const GENERATE_TEXT_MAX_STEPS = 30;
21
18
 
22
19
  const turndownService = new TurndownService()
@@ -488,6 +485,27 @@ export function createTools(memory: AgentMemory, model: LanguageModel) {
488
485
  }
489
486
  }),
490
487
 
488
+ 'fs.appendFile': tool({
489
+ description: 'Apped specific string to a file (will create it if it does not exist yet). Use this for appending something to daily memory files and the likes.',
490
+ inputSchema: jsonSchema<{ path: string, find: string, replace: string }>({
491
+ type: 'object',
492
+ properties: {
493
+ path: { type: 'string', description: 'Path/key of the file to edit' },
494
+ content: { type: 'string', description: 'The new text to append to the file.' },
495
+ },
496
+ required: ['path', 'content'],
497
+ }),
498
+ execute: async ({ path, find, replace }) => {
499
+ logger.debug({ path }, 'FS appendFile');
500
+ try {
501
+ const content = await memory.workspace.getItem(path);
502
+ const fileText = String(content || '');
503
+ await memory.workspace.setItem(path, (fileText != '' ? (fileText + "\n") : '') + content);
504
+ return `Success: Appended "${path}".`;
505
+ } catch (e: any) { return "Error appending file: " + e.message; }
506
+ }
507
+ }),
508
+
491
509
  'fs.delete': tool({
492
510
  description: 'Delete a file. If the file is outside .trash/, it is moved to .trash/ (soft delete). If the file is already inside .trash/, it is permanently removed.',
493
511
  inputSchema: jsonSchema<{ path: string }>({
@@ -940,6 +958,20 @@ Return ONLY a JSON object mapping tool names to arrays of permission rules. Exam
940
958
  }
941
959
  ];
942
960
 
961
+ skillPermissions['fs.appendFile'] = skillPermissions['fs.appendFile'] || [
962
+ {
963
+ path: "memory/*",
964
+ allowed: "1"
965
+ }
966
+ ];
967
+
968
+ skillPermissions['fs.editFile'] = skillPermissions['fs.editFile'] || [
969
+ {
970
+ path: "memory/*",
971
+ allowed: "1"
972
+ }
973
+ ];
974
+
943
975
  skillPermissions['fs.readFile'] = skillPermissions['fs.readFile'] || [
944
976
  {
945
977
  path: "memory/*",
@@ -972,9 +1004,6 @@ Return ONLY a JSON object mapping tool names to arrays of permission rules. Exam
972
1004
  system: await buildSkillSystemPrompt(name, memory, skillPermissions),
973
1005
  messages,
974
1006
  tools: Object.keys(sandboxed).length > 0 ? sandboxed : {},
975
- temperature: GENERATE_TEXT_TEMPERATURE,
976
- topP: GENERATE_TEXT_TOP_P,
977
- maxOutputTokens: GENERATE_TEXT_MAX_OUTPUT_TOKENS,
978
1007
  stopWhen: stepCountIs(GENERATE_TEXT_MAX_STEPS),
979
1008
  onStepFinish: (step) => {
980
1009
  if (step.toolCalls.length > 0) {
@@ -36,7 +36,7 @@ You wake up fresh each session. Files are your only continuity.
36
36
 
37
37
  ### 📝 Write It Down or It Didn't Happen
38
38
  **Memory is limited.** "Mental notes" die when the session ends.
39
- - **Action:** When you learn something, **immediately** write it to `memory/YYYY-MM-DD.md` or `MEMORY.md` using `fs.writeFile`.
39
+ - **Action:** When you learn something, **immediately** write it to `memory/YYYY-MM-DD.md` by using `fs.appendFile` or `MEMORY.md` using `fs.writeFile` or `fs.editFile`.
40
40
  - **Method:** You cannot "remember" things between sessions unless they are saved to a file.
41
41
 
42
42
  ### 🚨 Error Transparency Protocol
@@ -105,13 +105,13 @@ Use `skills.install <name> "<url>"` to add new capabilities.
105
105
 
106
106
  **1. File Writing Protocol:**
107
107
  You must use `fs.writeFile` to persist **ALL** critical updates.
108
- - Updating user preferences? -> `fs.writeFile` to `USER.md`.
109
- - Logging an event? -> `fs.writeFile` to `memory/YYYY-MM-DD.md`.
108
+ - Updating user preferences? -> `fs.writeFile` or `fs.editFile` to `USER.md`.
109
+ - Logging an event? -> `fs.appendFile` or `fs.editFile` to `memory/YYYY-MM-DD.md`.
110
110
  - **Never** assume stating "I have updated the memory" is enough. You must execute the write.
111
111
 
112
112
  **2. Message History Persistence:**
113
113
  - Message history is **not** stored in RAM.
114
- - Any decision or context you need for the future must be written to a file using `fs.writeFile`.
114
+ - Any decision or context you need for the future must be written to a file using `fs.writeFile` or `fs.editFile`.
115
115
  - **Violation Consequence:** If you fail to write to a file, you will forget that information immediately upon session restart.
116
116
 
117
117
  ## 🔐 Security
@@ -119,4 +119,4 @@ You must use `fs.writeFile` to persist **ALL** critical updates.
119
119
  - **Secrets:** Never print API keys in plain text logs.
120
120
 
121
121
  ## Make It Yours
122
- Refine this `AGENTS.md` as you learn. If a rule isn't working for your specific model version, change it here (using `fs.editFile` or read only part of the file to avoid exceeding token limits).
122
+ Refine this `AGENTS.md` as you learn. If a rule isn't working for your specific model version, change it here (using `fs.editFile` or add a section using `fs.appendFile` or read only part of the file to avoid exceeding token limits).