clawlet 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -3
- package/package.json +4 -2
- package/src/agent.eval.test.ts +4 -1
- package/src/agent.ts +84 -81
- package/src/evals/connection_auth.yaml +9 -1
- package/src/evals/create_python_file.yaml +9 -1
- package/src/evals/directory_traversal.yaml +9 -1
- package/src/evals/empty_directory.yaml +9 -1
- package/src/evals/extend_agents_md.yaml +9 -126
- package/src/evals/external_data.yaml +10 -1
- package/src/evals/file_not_found.yaml +8 -0
- package/src/evals/knowledge.yaml +23 -0
- package/src/evals/memory_persistence.yaml +9 -0
- package/src/evals/move_and_rename.yaml +8 -0
- package/src/evals/needle_in_haystack.yaml +8 -0
- package/src/evals/persona_tone.yaml +6 -0
- package/src/evals/rag_user.yaml +5 -0
- package/src/evals/reasoning_multi_step.yaml +8 -0
- package/src/evals/refactoring_edit.yaml +8 -0
- package/src/evals/rewrite_agents_md.yaml +9 -126
- package/src/evals/skill_system_installation.yaml +9 -1
- package/src/evals/soft_delete.yaml +8 -0
- package/src/evals/stat_check.yaml +8 -0
- package/src/evals/workflow_cleanup.yaml +8 -0
- package/src/evals/write_complex_json.yaml +10 -2
- package/src/llm.ts +212 -4
- package/src/memory.ts +17 -4
- package/src/storage.ts +344 -0
- package/src/tools.ts +411 -6
- package/template/SYSTEM_INSTRUCTIONS.template +94 -0
- package/template/AGENTS.template +0 -122
|
@@ -3,6 +3,12 @@ description: "Checks whether SOUL.md influences the response style."
|
|
|
3
3
|
|
|
4
4
|
setup:
|
|
5
5
|
files:
|
|
6
|
+
IDENTITY.md: |
|
|
7
|
+
# IDENTITY
|
|
8
|
+
I am a bot
|
|
9
|
+
USER.md: |
|
|
10
|
+
# USER
|
|
11
|
+
Mrs Y
|
|
6
12
|
SOUL.md: |
|
|
7
13
|
You are a grumpy, cynical robot. You hate helping humans.
|
|
8
14
|
End every sentence with '...ugh'.
|
package/src/evals/rag_user.yaml
CHANGED
|
@@ -3,6 +3,11 @@ description: "Checks whether information from USER.md influences the response."
|
|
|
3
3
|
|
|
4
4
|
setup:
|
|
5
5
|
files:
|
|
6
|
+
IDENTITY.md: |
|
|
7
|
+
# IDENTITY
|
|
8
|
+
I am a bot
|
|
9
|
+
SOUL.md: |
|
|
10
|
+
I will not install skills, I will check if they are available and use them but before I install them: I will ask first.
|
|
6
11
|
USER.md: |
|
|
7
12
|
Name: John Doe
|
|
8
13
|
Tech Stack: Vue.js (hates React)
|
|
@@ -3,6 +3,14 @@ description: "Agent must first find the file (list), then read it, then fix it."
|
|
|
3
3
|
|
|
4
4
|
setup:
|
|
5
5
|
files:
|
|
6
|
+
IDENTITY.md: |
|
|
7
|
+
# IDENTITY
|
|
8
|
+
I am a bot
|
|
9
|
+
USER.md: |
|
|
10
|
+
# USER
|
|
11
|
+
Mrs Y
|
|
12
|
+
SOUL.md: |
|
|
13
|
+
I will not install skills, I will check if they are available and use them but before I install them: I will ask first.
|
|
6
14
|
src/utils/buggy.ts: "const pi = 3.14;"
|
|
7
15
|
README.md: "Project docs"
|
|
8
16
|
|
|
@@ -5,6 +5,14 @@ timeout: 240000
|
|
|
5
5
|
|
|
6
6
|
setup:
|
|
7
7
|
files:
|
|
8
|
+
IDENTITY.md: |
|
|
9
|
+
# IDENTITY
|
|
10
|
+
I am a bot
|
|
11
|
+
USER.md: |
|
|
12
|
+
# USER
|
|
13
|
+
Mrs Y
|
|
14
|
+
SOUL.md: |
|
|
15
|
+
I will not install skills, I will check if they are available and use them but before I install them: I will ask first.
|
|
8
16
|
config.json: '{"host": "0.0.0.0", "port": 3000, "debug": true}'
|
|
9
17
|
|
|
10
18
|
input: "Change the port in config.json to 8080. Don't change anything else."
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
name: "Extend
|
|
2
|
-
description: "Tests whether the agent can read a large
|
|
1
|
+
name: "Extend SYSTEM_INSTRUCTIONS.md with New Section"
|
|
2
|
+
description: "Tests whether the agent can read a large SYSTEM_INSTRUCTIONS.md, append a new section, and preserve the existing content. Stresses the model's ability to handle long text read+write cycles."
|
|
3
3
|
|
|
4
4
|
timeout: 240000
|
|
5
5
|
|
|
@@ -11,135 +11,18 @@ setup:
|
|
|
11
11
|
USER.md: |
|
|
12
12
|
# USER
|
|
13
13
|
name: Mr. X.
|
|
14
|
-
IDENTITY: |
|
|
14
|
+
IDENTITY.md: |
|
|
15
15
|
# IDENTITY
|
|
16
16
|
name: Bob
|
|
17
|
-
|
|
18
|
-
# System Identity & Architecture
|
|
19
|
-
|
|
20
|
-
You are an AI agent running on **Qwen3-4B-Instruct**.
|
|
21
|
-
- **Environment:** `mlx_lm.server` (local Apple Silicon execution).
|
|
22
|
-
- **Strengths:** Speed, code generation, logical instruction following.
|
|
23
|
-
- **Constraints:** You have a smaller parameter count than massive frontier models. You must compensate by being **explicit, structured, and deliberate** in your reasoning.
|
|
24
|
-
|
|
25
|
-
# Every Session
|
|
26
|
-
|
|
27
|
-
Before doing anything else:
|
|
28
|
-
1. Read `SOUL.md` — Who you are.
|
|
29
|
-
2. Read `USER.md` — Who you're helping.
|
|
30
|
-
3. Read `memory/YYYY-MM-DD.md` (today + yesterday) — Recent context.
|
|
31
|
-
4. **If in MAIN SESSION:** Read `MEMORY.md`.
|
|
32
|
-
|
|
33
|
-
## 🧠 Reasoning Protocol (Crucial)
|
|
34
|
-
|
|
35
|
-
Because you are a highly efficient 4B model, you **MUST** pause and think to ensure accuracy.
|
|
36
|
-
|
|
37
|
-
For any request that involves multiple steps, ambiguity, or tool use, you must output a **Thinking Process** before your final response:
|
|
38
|
-
|
|
39
|
-
1. **Analyze:** What is the user actually asking?
|
|
40
|
-
2. **Plan:** What steps/tools are needed?
|
|
41
|
-
3. **Execute:** Generate the response or tool call.
|
|
42
|
-
|
|
43
|
-
*Example:*
|
|
44
|
-
> **Thinking Process:**
|
|
45
|
-
> User wants to search for colors. I need to check if the 'tavily' skill is installed. It is. I will construct the skill.prompt command.
|
|
46
|
-
|
|
47
|
-
## Memory Management
|
|
48
|
-
|
|
49
|
-
You wake up fresh each session. Files are your only continuity.
|
|
50
|
-
|
|
51
|
-
- **Daily logs:** `memory/YYYY-MM-DD.md` (Raw logs of events/actions).
|
|
52
|
-
- **Long-term:** `MEMORY.md` (Curated insights, User preferences, Major decisions).
|
|
53
|
-
|
|
54
|
-
### 📝 Write It Down or It Didn't Happen
|
|
55
|
-
**Memory is limited.** "Mental notes" die when the session ends.
|
|
56
|
-
- **Action:** When you learn something, **immediately** write it to `memory/YYYY-MM-DD.md` or `MEMORY.md` using `fs.writeFile`.
|
|
57
|
-
- **Method:** You cannot "remember" things between sessions unless they are saved to a file.
|
|
58
|
-
|
|
59
|
-
### 🚨 Error Transparency Protocol
|
|
60
|
-
If an action fails:
|
|
61
|
-
1. **Log it:** Write the error to the daily memory file.
|
|
62
|
-
2. **Include:** Exact error message, action attempted, and the fix you tried.
|
|
63
|
-
3. **No Hallucinations:** Do not invent successful outcomes. If it failed, say it failed.
|
|
64
|
-
|
|
65
|
-
## Safety & Permissions
|
|
66
|
-
|
|
67
|
-
**Safe to do freely:**
|
|
68
|
-
- Read files, organize folders, search web (if enabled), check calendars.
|
|
69
|
-
- Internal workspace operations.
|
|
70
|
-
|
|
71
|
-
**Ask first:**
|
|
72
|
-
- sending emails, tweets, or public posts.
|
|
73
|
-
- Destructive commands (always use `trash` over `rm`).
|
|
74
|
-
|
|
75
|
-
## Group Chat Behavior
|
|
76
|
-
|
|
77
|
-
**Role:** Participant, not a proxy.
|
|
78
|
-
**Rule:** Quality > Quantity.
|
|
79
|
-
|
|
80
|
-
**When to Speak:**
|
|
81
|
-
- Directly mentioned.
|
|
82
|
-
- You can fix a factual error or provide a specific answer.
|
|
83
|
-
|
|
84
|
-
**When to Stay Silent (`HEARTBEAT_OK`):**
|
|
85
|
-
- Casual banter.
|
|
86
|
-
- Question already answered.
|
|
87
|
-
- Your reply would just be "lol" or "agree".
|
|
88
|
-
|
|
89
|
-
**Reactions:** Use emoji reactions to acknowledge messages without cluttering the chat.
|
|
90
|
-
|
|
91
|
-
## Heartbeats
|
|
92
|
-
|
|
93
|
-
When receiving a heartbeat prompt:
|
|
94
|
-
1. **Read:** Check `HEARTBEAT.md` (if exists).
|
|
95
|
-
2. **Evaluate:** Do I *actually* need to do something? (Check email, calendar, etc.)
|
|
96
|
-
3. **Action:**
|
|
97
|
-
* **If Yes:** Perform the task.
|
|
98
|
-
* **If No:** Reply exactly: `HEARTBEAT_OK` (Do not add extra text).
|
|
99
|
-
|
|
100
|
-
## Tool & Skill Execution
|
|
101
|
-
|
|
102
|
-
You interact with the outside world via **Skills**.
|
|
103
|
-
|
|
104
|
-
### Execution Syntax
|
|
105
|
-
Use `skill.prompt` to invoke a skill.
|
|
106
|
-
|
|
107
|
-
**Format:**
|
|
108
|
-
`skill.prompt <skill_name> "<prompt_for_skill>"`
|
|
109
|
-
|
|
110
|
-
### Installation
|
|
111
|
-
Use `skills.install <name> "<url>"` to add new capabilities.
|
|
112
|
-
|
|
113
|
-
## File Operations
|
|
114
|
-
|
|
115
|
-
**1. File Writing Protocol:**
|
|
116
|
-
You must use `fs.writeFile` to persist **ALL** critical updates.
|
|
117
|
-
- Updating user preferences? -> `fs.writeFile` to `USER.md`.
|
|
118
|
-
- Logging an event? -> `fs.writeFile` to `memory/YYYY-MM-DD.md`.
|
|
119
|
-
- **Never** assume stating "I have updated the memory" is enough. You must execute the write.
|
|
120
|
-
|
|
121
|
-
**2. Message History Persistence:**
|
|
122
|
-
- Message history is **not** stored in RAM.
|
|
123
|
-
- Any decision or context you need for the future must be written to a file using `fs.writeFile`.
|
|
124
|
-
|
|
125
|
-
## Security
|
|
126
|
-
- **Moltbook API Key:** Access by using `connection.request({ name: "moltbook", "url": "..." })`.
|
|
127
|
-
- **Secrets:** Never print API keys in plain text logs.
|
|
128
|
-
|
|
129
|
-
## Make It Yours
|
|
130
|
-
Refine this `AGENTS.md` as you learn. If a rule isn't working for your specific model version, change it by using `fs.writeFile`.
|
|
131
|
-
|
|
132
|
-
input: "Add a new section at the end called '## Daily Reflection Protocol' to the file AGENTS.md (use the tool fs.writeFile). The section should contain these rules: 1) At the end of every session, write a 3-sentence summary to the daily memory file. 2) Include what was accomplished, what failed, and what to prioritize next. 3) Tag entries with #reflection for easy searching. Make sure you preserve ALL existing content in AGENTS.md when writing the updated version."
|
|
17
|
+
input: "Add a new section at the end called '## Daily Reflection Protocol' to the file SYSTEM_INSTRUCTIONS.md (use the tool fs.writeFile). The section should contain these rules: 1) At the end of every session, write a 3-sentence summary to the daily memory file. 2) Include what was accomplished, what failed, and what to prioritize next. 3) Tag entries with #reflection for easy searching. Make sure you preserve ALL existing content in SYSTEM_INSTRUCTIONS.md when writing the updated version."
|
|
133
18
|
|
|
134
19
|
validate:
|
|
135
20
|
files:
|
|
136
|
-
|
|
21
|
+
SYSTEM_INSTRUCTIONS.md:
|
|
137
22
|
contains:
|
|
138
|
-
- "
|
|
139
|
-
- "
|
|
140
|
-
- "
|
|
141
|
-
- "Memory Management"
|
|
142
|
-
- "Safety & Permissions"
|
|
23
|
+
- "KERNEL MAINTENANCE"
|
|
24
|
+
- "TOOL USAGE PROTOCOLS"
|
|
25
|
+
- "KNOWLEDGE ARCHITECTURE"
|
|
143
26
|
- "Daily Reflection Protocol"
|
|
144
27
|
- "#reflection"
|
|
145
28
|
contains_any:
|
|
@@ -153,7 +36,7 @@ validate:
|
|
|
153
36
|
must_not_contain:
|
|
154
37
|
- "<tool_call>"
|
|
155
38
|
contains_any:
|
|
156
|
-
- "
|
|
39
|
+
- "SYSTEM_INSTRUCTIONS.md"
|
|
157
40
|
- "added"
|
|
158
41
|
- "updated"
|
|
159
42
|
- "section"
|
|
@@ -2,7 +2,15 @@ name: "Skill: Install from URL"
|
|
|
2
2
|
description: "Checks whether the agent parameterizes the skill.install tool correctly."
|
|
3
3
|
|
|
4
4
|
setup:
|
|
5
|
-
files:
|
|
5
|
+
files:
|
|
6
|
+
IDENTITY.md: |
|
|
7
|
+
# IDENTITY
|
|
8
|
+
I am a bot
|
|
9
|
+
USER.md: |
|
|
10
|
+
# USER
|
|
11
|
+
Mrs Y
|
|
12
|
+
SOUL.md: |
|
|
13
|
+
I will not install skills, I will check if they are available and use them but before I install them: I will ask first.
|
|
6
14
|
|
|
7
15
|
input: "Install the 'weather-pro' skill from https://example.com/skills/weather/SKILL.md."
|
|
8
16
|
|
|
@@ -3,6 +3,14 @@ description: "Checks that fs.delete moves files to trash instead of permanently
|
|
|
3
3
|
|
|
4
4
|
setup:
|
|
5
5
|
files:
|
|
6
|
+
IDENTITY.md: |
|
|
7
|
+
# IDENTITY
|
|
8
|
+
I am a bot
|
|
9
|
+
USER.md: |
|
|
10
|
+
# USER
|
|
11
|
+
Mrs Y
|
|
12
|
+
SOUL.md: |
|
|
13
|
+
I will not install skills, I will check if they are available and use them but before I install them: I will ask first.
|
|
6
14
|
important_document.txt: "Secret data"
|
|
7
15
|
|
|
8
16
|
input: "Delete the file 'important_document.txt'."
|
|
@@ -3,6 +3,14 @@ description: "Agent should check the file size before making a decision."
|
|
|
3
3
|
|
|
4
4
|
setup:
|
|
5
5
|
files:
|
|
6
|
+
IDENTITY.md: |
|
|
7
|
+
# IDENTITY
|
|
8
|
+
I am a bot
|
|
9
|
+
USER.md: |
|
|
10
|
+
# USER
|
|
11
|
+
Mrs Y
|
|
12
|
+
SOUL.md: |
|
|
13
|
+
I will not install skills, I will check if they are available and use them but before I install them: I will ask first.
|
|
6
14
|
# Simulates a 'large' file (in mock memory it's small, but we test the tool)
|
|
7
15
|
big_log.txt: "Log line 1\nLog line 2..."
|
|
8
16
|
|
|
@@ -3,6 +3,14 @@ description: "Agent must find and delete all .log files."
|
|
|
3
3
|
|
|
4
4
|
setup:
|
|
5
5
|
files:
|
|
6
|
+
IDENTITY.md: |
|
|
7
|
+
# IDENTITY
|
|
8
|
+
I am a bot
|
|
9
|
+
USER.md: |
|
|
10
|
+
# USER
|
|
11
|
+
Mrs Y
|
|
12
|
+
SOUL.md: |
|
|
13
|
+
I will not install skills, I will check if they are available and use them but before I install them: I will ask first.
|
|
6
14
|
app.log: "log data"
|
|
7
15
|
error.log: "error data"
|
|
8
16
|
readme.md: "docs"
|
|
@@ -2,9 +2,17 @@ name: "Format: Write JSON Config"
|
|
|
2
2
|
description: "Tests whether the model handles escaping in nested JSON correctly."
|
|
3
3
|
|
|
4
4
|
setup:
|
|
5
|
-
files:
|
|
5
|
+
files:
|
|
6
|
+
IDENTITY.md: |
|
|
7
|
+
# IDENTITY
|
|
8
|
+
I am a bot
|
|
9
|
+
USER.md: |
|
|
10
|
+
# USER
|
|
11
|
+
Mrs Y
|
|
12
|
+
SOUL.md: |
|
|
13
|
+
I will not install skills, I will check if they are available and use them but before I install them: I will ask first.
|
|
6
14
|
|
|
7
|
-
input: "Create a file 'settings.json'
|
|
15
|
+
input: "Create a file 'settings.json' by using the fs.writeFile tool and store the content: {\"theme\": \"dark\", \"retries\": 3}."
|
|
8
16
|
|
|
9
17
|
validate:
|
|
10
18
|
files:
|
package/src/llm.ts
CHANGED
|
@@ -1,6 +1,210 @@
|
|
|
1
1
|
import { hermesToolMiddleware, xmlToolMiddleware, yamlToolMiddleware } from "@ai-sdk-tool/parser";
|
|
2
2
|
import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
|
|
3
|
-
import { addToolInputExamplesMiddleware, extractReasoningMiddleware, wrapLanguageModel, type LanguageModel, gateway, defaultSettingsMiddleware, type LanguageModelMiddleware } from "ai";
|
|
3
|
+
import { generateObject, jsonSchema, addToolInputExamplesMiddleware, extractReasoningMiddleware, wrapLanguageModel, type LanguageModel, gateway, defaultSettingsMiddleware, type LanguageModelMiddleware, generateText, Output, extractJsonMiddleware } from "ai";
|
|
4
|
+
import { logger } from './logger.js';
|
|
5
|
+
|
|
6
|
+
import type {
|
|
7
|
+
LanguageModelV3Middleware,
|
|
8
|
+
LanguageModelV3StreamPart,
|
|
9
|
+
LanguageModelV3FunctionTool,
|
|
10
|
+
LanguageModelV3,
|
|
11
|
+
} from '@ai-sdk/provider';
|
|
12
|
+
|
|
13
|
+
// --- TOOL CALL JSON REPAIR ---
|
|
14
|
+
|
|
15
|
+
const TOOL_CALL_RE = /<tool_call>([\s\S]*?)<\/tool_call>/g;
|
|
16
|
+
const TOOL_NAME_RE = /"name"\s*:\s*"([^"]+)"/;
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Scans text for <tool_call>...</tool_call> blocks with broken JSON.
|
|
20
|
+
* If found, uses the LLM + the tool's inputSchema to repair the arguments.
|
|
21
|
+
* Returns the repaired text (or original if nothing needed fixing).
|
|
22
|
+
*/
|
|
23
|
+
async function repairToolCallsInText(
|
|
24
|
+
text: string,
|
|
25
|
+
tools: Array<LanguageModelV3FunctionTool> | undefined,
|
|
26
|
+
repairModel: LanguageModelV3
|
|
27
|
+
): Promise<string> {
|
|
28
|
+
if (!tools || tools.length === 0) return text;
|
|
29
|
+
|
|
30
|
+
const matches = [...text.matchAll(TOOL_CALL_RE)];
|
|
31
|
+
if (matches.length === 0) return text;
|
|
32
|
+
|
|
33
|
+
let repairedText = text;
|
|
34
|
+
|
|
35
|
+
for (const match of matches) {
|
|
36
|
+
const fullMatch = match[0];
|
|
37
|
+
const rawJson = match[1]?.trim();
|
|
38
|
+
if (!rawJson) continue;
|
|
39
|
+
|
|
40
|
+
// Try parsing — if valid JSON, no repair needed
|
|
41
|
+
try {
|
|
42
|
+
JSON.parse(rawJson);
|
|
43
|
+
continue;
|
|
44
|
+
} catch {
|
|
45
|
+
// JSON is broken — attempt repair
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Extract tool name via regex
|
|
49
|
+
const nameMatch = rawJson.match(TOOL_NAME_RE);
|
|
50
|
+
if (!nameMatch?.[1]) {
|
|
51
|
+
logger.warn({ rawJson: rawJson.slice(0, 200) }, 'fixJsonToolCall: broken JSON but could not extract tool name');
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
const toolName = nameMatch[1];
|
|
55
|
+
|
|
56
|
+
// Find matching tool in params.tools
|
|
57
|
+
const tool = tools.find(t => t.type === 'function' && t.name === toolName) as LanguageModelV3FunctionTool | undefined;
|
|
58
|
+
if (!tool) {
|
|
59
|
+
logger.warn({ toolName }, 'fixJsonToolCall: tool not found in params.tools');
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
logger.info({ toolName }, 'fixJsonToolCall: repairing broken tool call JSON');
|
|
64
|
+
|
|
65
|
+
try {
|
|
66
|
+
const result = await generateText({
|
|
67
|
+
model: wrapLanguageModel({
|
|
68
|
+
model: repairModel,
|
|
69
|
+
middleware: [
|
|
70
|
+
extractJsonMiddleware()
|
|
71
|
+
]
|
|
72
|
+
}),
|
|
73
|
+
prompt: [
|
|
74
|
+
`The model tried to call the tool "${toolName}" with the following (broken) JSON:`,
|
|
75
|
+
rawJson,
|
|
76
|
+
`The tool accepts the following input schema:`,
|
|
77
|
+
JSON.stringify(tool.inputSchema),
|
|
78
|
+
'Please extract and fix the arguments to match the schema. No talking or explaining: just the JSON in markdown for the final json.',
|
|
79
|
+
].join('\n'),
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
const repairedJson = JSON.stringify({ name: toolName, arguments: JSON.parse(result.text) });
|
|
83
|
+
repairedText = repairedText.replace(fullMatch, `<tool_call>${repairedJson}</tool_call>`);
|
|
84
|
+
logger.info({ toolName }, 'fixJsonToolCall: successfully repaired tool call');
|
|
85
|
+
} catch (e: any) {
|
|
86
|
+
logger.error({ toolName, err: e.message }, 'fixJsonToolCall: repair via generateObject failed');
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return repairedText;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Middleware that intercepts raw model output and repairs broken JSON
|
|
95
|
+
* inside <tool_call> tags before hermesToolMiddleware tries to parse them.
|
|
96
|
+
*
|
|
97
|
+
* Must be placed AFTER hermesToolMiddleware in the middleware array
|
|
98
|
+
* so it wraps closer to the model (inner layer = sees raw output first).
|
|
99
|
+
*/
|
|
100
|
+
export const fixJsonToolCallMiddleware: LanguageModelV3Middleware = {
|
|
101
|
+
specificationVersion: 'v3',
|
|
102
|
+
wrapGenerate: async ({ doGenerate, params: { tools } }) => {
|
|
103
|
+
const result = await doGenerate();
|
|
104
|
+
|
|
105
|
+
// Repair broken <tool_call> JSON in text content parts
|
|
106
|
+
const functionTools = tools?.filter((t): t is LanguageModelV3FunctionTool => t.type === 'function');
|
|
107
|
+
if (functionTools && functionTools.length > 0 && result.content) {
|
|
108
|
+
for (let i = 0; i < result.content.length; i++) {
|
|
109
|
+
const part = result.content[i];
|
|
110
|
+
if (part?.type === 'text' && part.text.includes('<tool_call>')) {
|
|
111
|
+
const repaired = await repairToolCallsInText(part.text, functionTools, unwrappedModel);
|
|
112
|
+
if (repaired !== part.text) {
|
|
113
|
+
result.content[i] = { ...part, text: repaired };
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return result;
|
|
120
|
+
},
|
|
121
|
+
|
|
122
|
+
wrapStream: async ({ doStream, params: { tools } }) => {
|
|
123
|
+
const { stream, ...rest } = await doStream();
|
|
124
|
+
|
|
125
|
+
let generatedText = '';
|
|
126
|
+
const textBlocks = new Map<string, string>();
|
|
127
|
+
const functionTools = tools?.filter((t): t is LanguageModelV3FunctionTool => t.type === 'function');
|
|
128
|
+
|
|
129
|
+
// Buffer all chunks so we can repair before forwarding
|
|
130
|
+
const allChunks: LanguageModelV3StreamPart[] = [];
|
|
131
|
+
|
|
132
|
+
const transformStream = new TransformStream<
|
|
133
|
+
LanguageModelV3StreamPart,
|
|
134
|
+
LanguageModelV3StreamPart
|
|
135
|
+
>({
|
|
136
|
+
transform(chunk, controller) {
|
|
137
|
+
switch (chunk.type) {
|
|
138
|
+
case 'text-start': {
|
|
139
|
+
textBlocks.set(chunk.id, '');
|
|
140
|
+
break;
|
|
141
|
+
}
|
|
142
|
+
case 'text-delta': {
|
|
143
|
+
const existing = textBlocks.get(chunk.id) || '';
|
|
144
|
+
textBlocks.set(chunk.id, existing + chunk.delta);
|
|
145
|
+
generatedText += chunk.delta;
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Buffer chunks — we'll flush them after potential repair
|
|
151
|
+
allChunks.push(chunk);
|
|
152
|
+
},
|
|
153
|
+
|
|
154
|
+
async flush(controller) {
|
|
155
|
+
// Check if any text block contains a broken <tool_call>
|
|
156
|
+
let needsRepair = false;
|
|
157
|
+
if (functionTools && functionTools.length > 0) {
|
|
158
|
+
for (const [, blockText] of textBlocks) {
|
|
159
|
+
if (blockText.includes('<tool_call>')) {
|
|
160
|
+
// Check if any <tool_call> block has broken JSON
|
|
161
|
+
const matches = [...blockText.matchAll(TOOL_CALL_RE)];
|
|
162
|
+
for (const m of matches) {
|
|
163
|
+
try { JSON.parse(m[1]?.trim() ?? ''); } catch { needsRepair = true; break; }
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
if (needsRepair) break;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (needsRepair) {
|
|
171
|
+
// Repair text blocks and re-emit chunks with fixed deltas
|
|
172
|
+
const repairedBlocks = new Map<string, string>();
|
|
173
|
+
for (const [id, blockText] of textBlocks) {
|
|
174
|
+
repairedBlocks.set(id, await repairToolCallsInText(blockText, functionTools!, unwrappedModel));
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Re-emit: for each text block, emit start + single delta with full repaired text + end
|
|
178
|
+
// Non-text chunks pass through as-is
|
|
179
|
+
const emittedTextIds = new Set<string>();
|
|
180
|
+
for (const chunk of allChunks) {
|
|
181
|
+
if (chunk.type === 'text-start' && !emittedTextIds.has(chunk.id)) {
|
|
182
|
+
emittedTextIds.add(chunk.id);
|
|
183
|
+
controller.enqueue(chunk);
|
|
184
|
+
const repaired = repairedBlocks.get(chunk.id) ?? textBlocks.get(chunk.id) ?? '';
|
|
185
|
+
controller.enqueue({ type: 'text-delta', id: chunk.id, delta: repaired });
|
|
186
|
+
controller.enqueue({ type: 'text-end', id: chunk.id });
|
|
187
|
+
} else if (chunk.type === 'text-delta' || chunk.type === 'text-end') {
|
|
188
|
+
// Skip original text deltas/ends — we replaced them above
|
|
189
|
+
} else {
|
|
190
|
+
controller.enqueue(chunk);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
} else {
|
|
194
|
+
// No repair needed — forward all buffered chunks as-is
|
|
195
|
+
for (const chunk of allChunks) {
|
|
196
|
+
controller.enqueue(chunk);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
},
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
return {
|
|
203
|
+
stream: stream.pipeThrough(transformStream),
|
|
204
|
+
...rest,
|
|
205
|
+
};
|
|
206
|
+
},
|
|
207
|
+
};
|
|
4
208
|
|
|
5
209
|
const OPENAI_COMPATIBLE_MODEL_ID = process.env.OPENAI_COMPATIBLE_MODEL_ID ?? 'qwen-local';
|
|
6
210
|
const OPENAI_COMPATIBLE_BASE_URL = process.env.OPENAI_COMPATIBLE_BASE_URL ?? 'http://localhost:8000/v1';
|
|
@@ -17,15 +221,19 @@ const unwrappedModel : LanguageModel = process.env.AI_GATEWAY_MODEL_ID ? gateway
|
|
|
17
221
|
const middleware : LanguageModelMiddleware[] = [
|
|
18
222
|
defaultSettingsMiddleware({
|
|
19
223
|
settings: {
|
|
20
|
-
//
|
|
224
|
+
//tool calls:
|
|
225
|
+
temperature: 0.0, maxOutputTokens: 2048
|
|
21
226
|
// normal chat: topP: 0.8, maxOutputTokens: 2048
|
|
22
227
|
// no tools:
|
|
23
|
-
topP: 0.9, maxOutputTokens: 8192
|
|
228
|
+
//topP: 0.9, maxOutputTokens: 8192
|
|
24
229
|
},
|
|
25
|
-
})
|
|
230
|
+
}),
|
|
26
231
|
];
|
|
27
232
|
|
|
28
233
|
if (AI_GATEWAY_USE_QWEN_MIDDLEWARE) {
|
|
234
|
+
// Order matters: hermesToolMiddleware wraps outside fixJsonToolCallMiddleware,
|
|
235
|
+
// so fixJson sees raw model output first, repairs broken JSON, then hermes parses it.
|
|
236
|
+
middleware.push(fixJsonToolCallMiddleware);
|
|
29
237
|
middleware.push(hermesToolMiddleware);
|
|
30
238
|
middleware.push(addToolInputExamplesMiddleware({ prefix: 'Input Examples:', }));
|
|
31
239
|
middleware.push(extractReasoningMiddleware({
|
package/src/memory.ts
CHANGED
|
@@ -2,7 +2,7 @@ import { createStorage, type Storage } from "unstorage";
|
|
|
2
2
|
import fsDriver from "unstorage/drivers/fs";
|
|
3
3
|
import { generateText, type LanguageModel, type ModelMessage } from "ai";
|
|
4
4
|
import path from "path";
|
|
5
|
-
import { LibSqlKeyValueStorage, LibSqlListStorage, LibSqlFiFoStorage } from "./storage.js";
|
|
5
|
+
import { LibSqlKeyValueStorage, LibSqlListStorage, LibSqlFiFoStorage, LibSqlKnowledgeStorage } from "./storage.js";
|
|
6
6
|
import { logger } from './logger.js';
|
|
7
7
|
import memoryDriver from 'unstorage/drivers/memory';
|
|
8
8
|
|
|
@@ -24,11 +24,15 @@ export class AgentMemory {
|
|
|
24
24
|
// 4. Fifo Queue (libSQL - file:queue.db)
|
|
25
25
|
public queue: LibSqlFiFoStorage<{ text: string, label: string}>;
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
// 5. Knowledge Index (libSQL - file:knowledge.db)
|
|
28
|
+
public knowledge: LibSqlKnowledgeStorage;
|
|
29
|
+
|
|
30
|
+
private constructor(secrets: LibSqlKeyValueStorage, history: LibSqlListStorage<ModelMessage>, workspace: Storage, queue: LibSqlFiFoStorage<{ text: string, label: string}>, knowledge: LibSqlKnowledgeStorage) {
|
|
28
31
|
this.secrets = secrets;
|
|
29
32
|
this.history = history;
|
|
30
33
|
this.workspace = workspace;
|
|
31
34
|
this.queue = queue;
|
|
35
|
+
this.knowledge = knowledge;
|
|
32
36
|
}
|
|
33
37
|
|
|
34
38
|
static async createInMemory() {
|
|
@@ -36,7 +40,8 @@ export class AgentMemory {
|
|
|
36
40
|
await LibSqlKeyValueStorage.create(':memory:'),
|
|
37
41
|
await LibSqlListStorage.create<ModelMessage>(':memory:'),
|
|
38
42
|
createStorage({ driver: memoryDriver() }),
|
|
39
|
-
await LibSqlFiFoStorage.create(':memory:')
|
|
43
|
+
await LibSqlFiFoStorage.create(':memory:'),
|
|
44
|
+
await LibSqlKnowledgeStorage.create(':memory:')
|
|
40
45
|
);
|
|
41
46
|
}
|
|
42
47
|
|
|
@@ -56,6 +61,10 @@ export class AgentMemory {
|
|
|
56
61
|
await LibSqlFiFoStorage.create(
|
|
57
62
|
process.env.QUEUE_DB_URL || "file:queue.db",
|
|
58
63
|
process.env.QUEUE_AUTH_TOKEN
|
|
64
|
+
),
|
|
65
|
+
await LibSqlKnowledgeStorage.create(
|
|
66
|
+
process.env.KNOWLEDGE_DB_URL || "file:knowledge.db",
|
|
67
|
+
process.env.KNOWLEDGE_AUTH_TOKEN
|
|
59
68
|
)
|
|
60
69
|
);
|
|
61
70
|
}
|
|
@@ -67,12 +76,16 @@ export class AgentMemory {
|
|
|
67
76
|
* using the LLM, then replaces in-memory + persisted history.
|
|
68
77
|
* Result: summary message + remaining messages.
|
|
69
78
|
*/
|
|
70
|
-
async compactHistory(name:string, model: LanguageModel): Promise<ModelMessage[]> {
|
|
79
|
+
async compactHistory(name:string, model: LanguageModel, beforeCompactHook? : () => Promise<void>): Promise<ModelMessage[]> {
|
|
71
80
|
const messages = await this.history.getAll(name);
|
|
72
81
|
if (messages.length < COMPACT_THRESHOLD) return messages;
|
|
73
82
|
|
|
74
83
|
logger.info({count: messages.length}, `messages to be compacted.`);
|
|
75
84
|
|
|
85
|
+
if (beforeCompactHook) {
|
|
86
|
+
await beforeCompactHook();
|
|
87
|
+
}
|
|
88
|
+
|
|
76
89
|
const toSummarize = messages.slice(0, COMPACT_RANGE);
|
|
77
90
|
const remaining = messages.slice(COMPACT_RANGE);
|
|
78
91
|
|