@dotsetlabs/dotclaw 2.3.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config-examples/runtime.json +29 -3
- package/container/agent-runner/src/agent-config.ts +19 -3
- package/container/agent-runner/src/container-protocol.ts +11 -0
- package/container/agent-runner/src/context-overflow-recovery.ts +39 -0
- package/container/agent-runner/src/index.ts +744 -123
- package/container/agent-runner/src/memory.ts +18 -68
- package/container/agent-runner/src/system-prompt.ts +36 -34
- package/container/agent-runner/src/tool-loop-policy.ts +724 -0
- package/container/agent-runner/src/tools.ts +211 -8
- package/dist/agent-context.d.ts +1 -0
- package/dist/agent-context.d.ts.map +1 -1
- package/dist/agent-context.js +21 -9
- package/dist/agent-context.js.map +1 -1
- package/dist/agent-execution.d.ts +2 -0
- package/dist/agent-execution.d.ts.map +1 -1
- package/dist/agent-execution.js +164 -15
- package/dist/agent-execution.js.map +1 -1
- package/dist/agent-semaphore.d.ts +24 -1
- package/dist/agent-semaphore.d.ts.map +1 -1
- package/dist/agent-semaphore.js +109 -20
- package/dist/agent-semaphore.js.map +1 -1
- package/dist/cli.js +3 -11
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +2 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +2 -0
- package/dist/config.js.map +1 -1
- package/dist/container-protocol.d.ts +22 -0
- package/dist/container-protocol.d.ts.map +1 -1
- package/dist/container-protocol.js.map +1 -1
- package/dist/container-runner.d.ts +7 -0
- package/dist/container-runner.d.ts.map +1 -1
- package/dist/container-runner.js +417 -143
- package/dist/container-runner.js.map +1 -1
- package/dist/db.d.ts.map +1 -1
- package/dist/db.js +46 -12
- package/dist/db.js.map +1 -1
- package/dist/failover-policy.d.ts +41 -0
- package/dist/failover-policy.d.ts.map +1 -0
- package/dist/failover-policy.js +261 -0
- package/dist/failover-policy.js.map +1 -0
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/ipc-dispatcher.d.ts.map +1 -1
- package/dist/ipc-dispatcher.js +27 -43
- package/dist/ipc-dispatcher.js.map +1 -1
- package/dist/mcp-config.d.ts +22 -0
- package/dist/mcp-config.d.ts.map +1 -0
- package/dist/mcp-config.js +94 -0
- package/dist/mcp-config.js.map +1 -0
- package/dist/memory-backend.d.ts +27 -0
- package/dist/memory-backend.d.ts.map +1 -0
- package/dist/memory-backend.js +112 -0
- package/dist/memory-backend.js.map +1 -0
- package/dist/memory-recall.d.ts.map +1 -1
- package/dist/memory-recall.js +135 -22
- package/dist/memory-recall.js.map +1 -1
- package/dist/memory-store.d.ts +1 -0
- package/dist/memory-store.d.ts.map +1 -1
- package/dist/memory-store.js +55 -7
- package/dist/memory-store.js.map +1 -1
- package/dist/message-pipeline.d.ts +24 -0
- package/dist/message-pipeline.d.ts.map +1 -1
- package/dist/message-pipeline.js +131 -27
- package/dist/message-pipeline.js.map +1 -1
- package/dist/metrics.d.ts +1 -0
- package/dist/metrics.d.ts.map +1 -1
- package/dist/metrics.js +9 -0
- package/dist/metrics.js.map +1 -1
- package/dist/recall-policy.d.ts +12 -0
- package/dist/recall-policy.d.ts.map +1 -0
- package/dist/recall-policy.js +89 -0
- package/dist/recall-policy.js.map +1 -0
- package/dist/runtime-config.d.ts +33 -0
- package/dist/runtime-config.d.ts.map +1 -1
- package/dist/runtime-config.js +111 -11
- package/dist/runtime-config.js.map +1 -1
- package/dist/streaming.d.ts.map +1 -1
- package/dist/streaming.js +125 -33
- package/dist/streaming.js.map +1 -1
- package/dist/task-scheduler.d.ts.map +1 -1
- package/dist/task-scheduler.js +27 -10
- package/dist/task-scheduler.js.map +1 -1
- package/dist/tool-policy.d.ts.map +1 -1
- package/dist/tool-policy.js +26 -4
- package/dist/tool-policy.js.map +1 -1
- package/dist/trace-writer.d.ts +12 -0
- package/dist/trace-writer.d.ts.map +1 -1
- package/dist/trace-writer.js.map +1 -1
- package/dist/turn-hygiene.d.ts +14 -0
- package/dist/turn-hygiene.d.ts.map +1 -0
- package/dist/turn-hygiene.js +214 -0
- package/dist/turn-hygiene.js.map +1 -0
- package/dist/webhook.d.ts.map +1 -1
- package/dist/webhook.js +1 -0
- package/dist/webhook.js.map +1 -1
- package/package.json +15 -1
- package/scripts/benchmark-baseline.js +365 -0
- package/scripts/benchmark-harness.js +1413 -0
- package/scripts/benchmark-scenarios.js +301 -0
- package/scripts/canary-suite.js +123 -0
- package/scripts/generate-controlled-traces.js +230 -0
- package/scripts/release-slo-check.js +214 -0
- package/scripts/run-live-canary.js +339 -0
|
@@ -371,71 +371,6 @@ export function parseSummaryResponse(text: string): { summary: string; facts: st
|
|
|
371
371
|
}
|
|
372
372
|
}
|
|
373
373
|
|
|
374
|
-
function tokenize(text: string): string[] {
|
|
375
|
-
return (text.toLowerCase().match(/[a-z0-9]+/g) || []).filter(token => token.length > 1);
|
|
376
|
-
}
|
|
377
|
-
|
|
378
|
-
function scoreCandidate(candidate: string, queryTokens: string[], weight: number): number {
|
|
379
|
-
const candidateTokens = tokenize(candidate);
|
|
380
|
-
if (candidateTokens.length === 0 || queryTokens.length === 0) return 0;
|
|
381
|
-
const tokenSet = new Set(candidateTokens);
|
|
382
|
-
let overlap = 0;
|
|
383
|
-
for (const token of queryTokens) {
|
|
384
|
-
if (tokenSet.has(token)) overlap += 1;
|
|
385
|
-
}
|
|
386
|
-
if (overlap === 0) return 0;
|
|
387
|
-
return (overlap / Math.sqrt(candidateTokens.length)) * weight;
|
|
388
|
-
}
|
|
389
|
-
|
|
390
|
-
export function retrieveRelevantMemories(params: {
|
|
391
|
-
query: string;
|
|
392
|
-
summary: string;
|
|
393
|
-
facts: string[];
|
|
394
|
-
olderMessages: Message[];
|
|
395
|
-
config: MemoryConfig;
|
|
396
|
-
}): string[] {
|
|
397
|
-
const queryTokens = tokenize(params.query);
|
|
398
|
-
if (queryTokens.length === 0) return [];
|
|
399
|
-
|
|
400
|
-
const candidates: Array<{ text: string; score: number }> = [];
|
|
401
|
-
|
|
402
|
-
if (params.summary) {
|
|
403
|
-
const summaryLines = params.summary.split('\n').map(line => line.trim()).filter(Boolean);
|
|
404
|
-
for (const line of summaryLines) {
|
|
405
|
-
const score = scoreCandidate(line, queryTokens, 1.4);
|
|
406
|
-
if (score > 0) candidates.push({ text: line, score });
|
|
407
|
-
}
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
for (const fact of params.facts) {
|
|
411
|
-
const score = scoreCandidate(fact, queryTokens, 2.0);
|
|
412
|
-
if (score > 0) candidates.push({ text: fact, score });
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
for (const msg of params.olderMessages.slice(-50)) {
|
|
416
|
-
const snippet = msg.content.length > 300 ? `${msg.content.slice(0, 300)}...` : msg.content;
|
|
417
|
-
const score = scoreCandidate(snippet, queryTokens, 1.0);
|
|
418
|
-
if (score > 0) candidates.push({ text: snippet, score });
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
candidates.sort((a, b) => b.score - a.score);
|
|
422
|
-
|
|
423
|
-
// Quality gate: filter out low-scoring candidates to prevent noise injection
|
|
424
|
-
const MIN_SESSION_RECALL_SCORE = 0.5;
|
|
425
|
-
const filtered = candidates.filter(c => c.score >= MIN_SESSION_RECALL_SCORE);
|
|
426
|
-
|
|
427
|
-
const results: string[] = [];
|
|
428
|
-
let tokens = 0;
|
|
429
|
-
for (const candidate of filtered) {
|
|
430
|
-
if (results.length >= params.config.memoryMaxResults) break;
|
|
431
|
-
const nextTokens = estimateTokens(candidate.text);
|
|
432
|
-
if (tokens + nextTokens > params.config.memoryMaxTokens) break;
|
|
433
|
-
results.push(candidate.text);
|
|
434
|
-
tokens += nextTokens;
|
|
435
|
-
}
|
|
436
|
-
return results;
|
|
437
|
-
}
|
|
438
|
-
|
|
439
374
|
export interface ContextPruningConfig {
|
|
440
375
|
softTrimMaxChars: number;
|
|
441
376
|
softTrimHeadChars: number;
|
|
@@ -475,10 +410,25 @@ export function pruneContextMessages(
|
|
|
475
410
|
}
|
|
476
411
|
|
|
477
412
|
/**
|
|
478
|
-
* Limit conversation history
|
|
413
|
+
* Limit conversation history by counting user turns (not total messages).
|
|
414
|
+
* maxTurns=40 means keep the last 40 user messages plus all their associated
|
|
415
|
+
* assistant replies — roughly 80 messages total.
|
|
479
416
|
* Preserves chronological order.
|
|
480
417
|
*/
|
|
481
418
|
export function limitHistoryTurns(messages: Message[], maxTurns: number): Message[] {
|
|
482
|
-
if (maxTurns <= 0
|
|
483
|
-
|
|
419
|
+
if (maxTurns <= 0) return messages;
|
|
420
|
+
// Count user turns from the end
|
|
421
|
+
let userTurnsSeen = 0;
|
|
422
|
+
let cutoff = 0;
|
|
423
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
424
|
+
if (messages[i].role === 'user') {
|
|
425
|
+
userTurnsSeen++;
|
|
426
|
+
if (userTurnsSeen > maxTurns) {
|
|
427
|
+
cutoff = i + 1;
|
|
428
|
+
break;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
if (cutoff === 0) return messages;
|
|
433
|
+
return messages.slice(cutoff);
|
|
484
434
|
}
|
|
@@ -110,7 +110,9 @@ function buildScheduledSection(params: SystemPromptParams): string {
|
|
|
110
110
|
|
|
111
111
|
function buildResponseGuidanceSection(): string {
|
|
112
112
|
return [
|
|
113
|
-
'-
|
|
113
|
+
'- Answer directly when the request can be completed from conversation context without external state.',
|
|
114
|
+
'- When the request requires file/system/network actions or fresh state, execute tools first before finalizing.',
|
|
115
|
+
'- Never claim an action happened unless corresponding tool calls succeeded in this turn.',
|
|
114
116
|
'- If the user asks about your previous actions (e.g., "did you use X tool?"), reflect on the conversation history — do not re-execute the task.',
|
|
115
117
|
'- If the user asks a simple factual question, answer from your knowledge — do not call tools unless you need to verify or act.',
|
|
116
118
|
'- When you have genuinely nothing to say, respond with ONLY: NO_REPLY (your entire message must be just this token, nothing else).'
|
|
@@ -128,6 +130,7 @@ function buildToolCallStyleSection(): string {
|
|
|
128
130
|
function buildToolGuidanceSection(params: SystemPromptParams): string {
|
|
129
131
|
const lines = [
|
|
130
132
|
'Key tool rules:',
|
|
133
|
+
'- Never claim file/system/web actions succeeded unless tool calls in this turn confirm them.',
|
|
131
134
|
'- User attachments arrive in /workspace/group/inbox/ (see <attachment> tags). Process with Read/Bash/Python.',
|
|
132
135
|
'- To send media from the web: download_url → send_photo/send_file/send_audio.',
|
|
133
136
|
'- Charts/plots: matplotlib → savefig → send_photo. Graphviz → dot -Tpng → send_photo.',
|
|
@@ -160,37 +163,30 @@ function buildToolGuidanceSection(params: SystemPromptParams): string {
|
|
|
160
163
|
|
|
161
164
|
function buildMemorySection(params: SystemPromptParams): string {
|
|
162
165
|
const parts: string[] = [];
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
if (
|
|
167
|
-
parts.push('
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
}
|
|
180
|
-
if (params.longTermRecall.length > 0) {
|
|
181
|
-
parts.push('What you remember about the user (long-term):');
|
|
182
|
-
parts.push(params.longTermRecall.map(item => `- ${item}`).join('\n'));
|
|
183
|
-
}
|
|
184
|
-
if (params.memoryStats) {
|
|
185
|
-
parts.push(`Memory stats: Total: ${params.memoryStats.total}, User: ${params.memoryStats.user}, Group: ${params.memoryStats.group}, Global: ${params.memoryStats.global}`);
|
|
186
|
-
}
|
|
187
|
-
} else {
|
|
188
|
-
parts.push('No long-term memory available yet.');
|
|
166
|
+
|
|
167
|
+
// Session-level context: summary and facts from the current conversation.
|
|
168
|
+
// These are essential for understanding the current thread.
|
|
169
|
+
if (params.memorySummary) {
|
|
170
|
+
parts.push('Conversation summary (this session):');
|
|
171
|
+
parts.push(params.memorySummary.slice(0, MEMORY_SUMMARY_MAX_CHARS));
|
|
172
|
+
}
|
|
173
|
+
if (params.memoryFacts.length > 0) {
|
|
174
|
+
parts.push('Key facts (this session):');
|
|
175
|
+
parts.push(params.memoryFacts.map(f => `- ${f}`).join('\n'));
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// User profile stays pre-injected — identity and preferences should always be available.
|
|
179
|
+
if (params.userProfile) {
|
|
180
|
+
parts.push('User profile:');
|
|
181
|
+
parts.push(params.userProfile);
|
|
189
182
|
}
|
|
190
183
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
184
|
+
// Long-term memory is now tool-based: agent searches on demand instead of pre-injection.
|
|
185
|
+
// This prevents context bloat from irrelevant memories and lets the agent decide what's needed.
|
|
186
|
+
parts.push('Long-term memory: Use the mcp__dotclaw__memory_search tool to recall information from past conversations, stored preferences, notes, and knowledge. Search BEFORE answering questions about prior decisions, dates, people, projects, or anything you don\'t see in the conversation above.');
|
|
187
|
+
|
|
188
|
+
if (params.memoryStats && params.memoryStats.total > 0) {
|
|
189
|
+
parts.push(`Memory store: ${params.memoryStats.total} entries available (search with mcp__dotclaw__memory_search).`);
|
|
194
190
|
}
|
|
195
191
|
|
|
196
192
|
return parts.join('\n');
|
|
@@ -292,8 +288,14 @@ export function buildSystemPrompt(params: SystemPromptParams): string {
|
|
|
292
288
|
const toolReliability = trimLevel >= 2 ? '' : (
|
|
293
289
|
params.toolReliability && params.toolReliability.length > 0
|
|
294
290
|
? params.toolReliability
|
|
295
|
-
.
|
|
296
|
-
.
|
|
291
|
+
.filter(t => t.count >= 5 && (t.success_rate < 0.98 || (Number.isFinite(t.avg_duration_ms) && (t.avg_duration_ms || 0) > 2500)))
|
|
292
|
+
.sort((a, b) => {
|
|
293
|
+
if (a.success_rate !== b.success_rate) return a.success_rate - b.success_rate;
|
|
294
|
+
const aDur = Number.isFinite(a.avg_duration_ms) ? (a.avg_duration_ms || 0) : 0;
|
|
295
|
+
const bDur = Number.isFinite(b.avg_duration_ms) ? (b.avg_duration_ms || 0) : 0;
|
|
296
|
+
return bDur - aDur;
|
|
297
|
+
})
|
|
298
|
+
.slice(0, 8)
|
|
297
299
|
.map(t => {
|
|
298
300
|
const pct = `${Math.round(t.success_rate * 100)}%`;
|
|
299
301
|
const avg = Number.isFinite(t.avg_duration_ms) ? `${Math.round(t.avg_duration_ms!)}ms` : 'n/a';
|
|
@@ -306,9 +308,9 @@ export function buildSystemPrompt(params: SystemPromptParams): string {
|
|
|
306
308
|
// Trim level 1+: drop prompt packs
|
|
307
309
|
const packBlocks = trimLevel >= 1 ? [] : buildPromptPackSections(params);
|
|
308
310
|
|
|
309
|
-
// Trim level 3+: reduce memory section (drop
|
|
311
|
+
// Trim level 3+: reduce memory section (drop summary to save space)
|
|
310
312
|
const memoryParams = trimLevel >= 3
|
|
311
|
-
? { ...params,
|
|
313
|
+
? { ...params, memorySummary: params.memorySummary ? params.memorySummary.slice(0, 500) : '', memoryFacts: params.memoryFacts.slice(0, 5) }
|
|
312
314
|
: params;
|
|
313
315
|
|
|
314
316
|
const sections = [
|