npm - @dotsetlabs/dotclaw - Versions diffs - 2.3.0 → 2.5.0 - Mend

@dotsetlabs/dotclaw 2.3.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

package/config-examples/runtime.json +29 -3
package/container/agent-runner/src/agent-config.ts +19 -3
package/container/agent-runner/src/container-protocol.ts +11 -0
package/container/agent-runner/src/context-overflow-recovery.ts +39 -0
package/container/agent-runner/src/index.ts +744 -123
package/container/agent-runner/src/memory.ts +18 -68
package/container/agent-runner/src/system-prompt.ts +36 -34
package/container/agent-runner/src/tool-loop-policy.ts +724 -0
package/container/agent-runner/src/tools.ts +211 -8
package/dist/agent-context.d.ts +1 -0
package/dist/agent-context.d.ts.map +1 -1
package/dist/agent-context.js +21 -9
package/dist/agent-context.js.map +1 -1
package/dist/agent-execution.d.ts +2 -0
package/dist/agent-execution.d.ts.map +1 -1
package/dist/agent-execution.js +164 -15
package/dist/agent-execution.js.map +1 -1
package/dist/agent-semaphore.d.ts +24 -1
package/dist/agent-semaphore.d.ts.map +1 -1
package/dist/agent-semaphore.js +109 -20
package/dist/agent-semaphore.js.map +1 -1
package/dist/cli.js +3 -11
package/dist/cli.js.map +1 -1
package/dist/config.d.ts +2 -0
package/dist/config.d.ts.map +1 -1
package/dist/config.js +2 -0
package/dist/config.js.map +1 -1
package/dist/container-protocol.d.ts +22 -0
package/dist/container-protocol.d.ts.map +1 -1
package/dist/container-protocol.js.map +1 -1
package/dist/container-runner.d.ts +7 -0
package/dist/container-runner.d.ts.map +1 -1
package/dist/container-runner.js +417 -143
package/dist/container-runner.js.map +1 -1
package/dist/db.d.ts.map +1 -1
package/dist/db.js +46 -12
package/dist/db.js.map +1 -1
package/dist/failover-policy.d.ts +41 -0
package/dist/failover-policy.d.ts.map +1 -0
package/dist/failover-policy.js +261 -0
package/dist/failover-policy.js.map +1 -0
package/dist/index.js +1 -0
package/dist/index.js.map +1 -1
package/dist/ipc-dispatcher.d.ts.map +1 -1
package/dist/ipc-dispatcher.js +27 -43
package/dist/ipc-dispatcher.js.map +1 -1
package/dist/mcp-config.d.ts +22 -0
package/dist/mcp-config.d.ts.map +1 -0
package/dist/mcp-config.js +94 -0
package/dist/mcp-config.js.map +1 -0
package/dist/memory-backend.d.ts +27 -0
package/dist/memory-backend.d.ts.map +1 -0
package/dist/memory-backend.js +112 -0
package/dist/memory-backend.js.map +1 -0
package/dist/memory-recall.d.ts.map +1 -1
package/dist/memory-recall.js +135 -22
package/dist/memory-recall.js.map +1 -1
package/dist/memory-store.d.ts +1 -0
package/dist/memory-store.d.ts.map +1 -1
package/dist/memory-store.js +55 -7
package/dist/memory-store.js.map +1 -1
package/dist/message-pipeline.d.ts +24 -0
package/dist/message-pipeline.d.ts.map +1 -1
package/dist/message-pipeline.js +131 -27
package/dist/message-pipeline.js.map +1 -1
package/dist/metrics.d.ts +1 -0
package/dist/metrics.d.ts.map +1 -1
package/dist/metrics.js +9 -0
package/dist/metrics.js.map +1 -1
package/dist/recall-policy.d.ts +12 -0
package/dist/recall-policy.d.ts.map +1 -0
package/dist/recall-policy.js +89 -0
package/dist/recall-policy.js.map +1 -0
package/dist/runtime-config.d.ts +33 -0
package/dist/runtime-config.d.ts.map +1 -1
package/dist/runtime-config.js +111 -11
package/dist/runtime-config.js.map +1 -1
package/dist/streaming.d.ts.map +1 -1
package/dist/streaming.js +125 -33
package/dist/streaming.js.map +1 -1
package/dist/task-scheduler.d.ts.map +1 -1
package/dist/task-scheduler.js +27 -10
package/dist/task-scheduler.js.map +1 -1
package/dist/tool-policy.d.ts.map +1 -1
package/dist/tool-policy.js +26 -4
package/dist/tool-policy.js.map +1 -1
package/dist/trace-writer.d.ts +12 -0
package/dist/trace-writer.d.ts.map +1 -1
package/dist/trace-writer.js.map +1 -1
package/dist/turn-hygiene.d.ts +14 -0
package/dist/turn-hygiene.d.ts.map +1 -0
package/dist/turn-hygiene.js +214 -0
package/dist/turn-hygiene.js.map +1 -0
package/dist/webhook.d.ts.map +1 -1
package/dist/webhook.js +1 -0
package/dist/webhook.js.map +1 -1
package/package.json +15 -1
package/scripts/benchmark-baseline.js +365 -0
package/scripts/benchmark-harness.js +1413 -0
package/scripts/benchmark-scenarios.js +301 -0
package/scripts/canary-suite.js +123 -0
package/scripts/generate-controlled-traces.js +230 -0
package/scripts/release-slo-check.js +214 -0
package/scripts/run-live-canary.js +339 -0

package/container/agent-runner/src/memory.ts CHANGED Viewed

@@ -371,71 +371,6 @@ export function parseSummaryResponse(text: string): { summary: string; facts: st
   }
 }
-function tokenize(text: string): string[] {
-  return (text.toLowerCase().match(/[a-z0-9]+/g) || []).filter(token => token.length > 1);
-}
-function scoreCandidate(candidate: string, queryTokens: string[], weight: number): number {
-  const candidateTokens = tokenize(candidate);
-  if (candidateTokens.length === 0 || queryTokens.length === 0) return 0;
-  const tokenSet = new Set(candidateTokens);
-  let overlap = 0;
-  for (const token of queryTokens) {
-    if (tokenSet.has(token)) overlap += 1;
-  }
-  if (overlap === 0) return 0;
-  return (overlap / Math.sqrt(candidateTokens.length)) * weight;
-}
-export function retrieveRelevantMemories(params: {
-  query: string;
-  summary: string;
-  facts: string[];
-  olderMessages: Message[];
-  config: MemoryConfig;
-}): string[] {
-  const queryTokens = tokenize(params.query);
-  if (queryTokens.length === 0) return [];
-  const candidates: Array<{ text: string; score: number }> = [];
-  if (params.summary) {
-    const summaryLines = params.summary.split('\n').map(line => line.trim()).filter(Boolean);
-    for (const line of summaryLines) {
-      const score = scoreCandidate(line, queryTokens, 1.4);
-      if (score > 0) candidates.push({ text: line, score });
-    }
-  }
-  for (const fact of params.facts) {
-    const score = scoreCandidate(fact, queryTokens, 2.0);
-    if (score > 0) candidates.push({ text: fact, score });
-  }
-  for (const msg of params.olderMessages.slice(-50)) {
-    const snippet = msg.content.length > 300 ? `${msg.content.slice(0, 300)}...` : msg.content;
-    const score = scoreCandidate(snippet, queryTokens, 1.0);
-    if (score > 0) candidates.push({ text: snippet, score });
-  }
-  candidates.sort((a, b) => b.score - a.score);
-  // Quality gate: filter out low-scoring candidates to prevent noise injection
-  const MIN_SESSION_RECALL_SCORE = 0.5;
-  const filtered = candidates.filter(c => c.score >= MIN_SESSION_RECALL_SCORE);
-  const results: string[] = [];
-  let tokens = 0;
-  for (const candidate of filtered) {
-    if (results.length >= params.config.memoryMaxResults) break;
-    const nextTokens = estimateTokens(candidate.text);
-    if (tokens + nextTokens > params.config.memoryMaxTokens) break;
-    results.push(candidate.text);
-    tokens += nextTokens;
-  }
-  return results;
-}
 export interface ContextPruningConfig {
   softTrimMaxChars: number;
   softTrimHeadChars: number;
@@ -475,10 +410,25 @@ export function pruneContextMessages(
 }
 /**
- * Limit conversation history to the last N messages.
+ * Limit conversation history by counting user turns (not total messages).
+ * maxTurns=40 means keep the last 40 user messages plus all their associated
+ * assistant replies — roughly 80 messages total.
  * Preserves chronological order.
  */
 export function limitHistoryTurns(messages: Message[], maxTurns: number): Message[] {
-  if (maxTurns <= 0 || messages.length <= maxTurns) return messages;
-  return messages.slice(-maxTurns);
+  if (maxTurns <= 0) return messages;
+  // Count user turns from the end
+  let userTurnsSeen = 0;
+  let cutoff = 0;
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if (messages[i].role === 'user') {
+      userTurnsSeen++;
+      if (userTurnsSeen > maxTurns) {
+        cutoff = i + 1;
+        break;
+      }
+    }
+  }
+  if (cutoff === 0) return messages;
+  return messages.slice(cutoff);
 }

package/container/agent-runner/src/system-prompt.ts CHANGED Viewed

@@ -110,7 +110,9 @@ function buildScheduledSection(params: SystemPromptParams): string {
 function buildResponseGuidanceSection(): string {
   return [
-    '- Always answer the user\'s question directly before reaching for tools.',
+    '- Answer directly when the request can be completed from conversation context without external state.',
+    '- When the request requires file/system/network actions or fresh state, execute tools first before finalizing.',
+    '- Never claim an action happened unless corresponding tool calls succeeded in this turn.',
     '- If the user asks about your previous actions (e.g., "did you use X tool?"), reflect on the conversation history — do not re-execute the task.',
     '- If the user asks a simple factual question, answer from your knowledge — do not call tools unless you need to verify or act.',
     '- When you have genuinely nothing to say, respond with ONLY: NO_REPLY (your entire message must be just this token, nothing else).'
@@ -128,6 +130,7 @@ function buildToolCallStyleSection(): string {
 function buildToolGuidanceSection(params: SystemPromptParams): string {
   const lines = [
     'Key tool rules:',
+    '- Never claim file/system/web actions succeeded unless tool calls in this turn confirm them.',
     '- User attachments arrive in /workspace/group/inbox/ (see <attachment> tags). Process with Read/Bash/Python.',
     '- To send media from the web: download_url → send_photo/send_file/send_audio.',
     '- Charts/plots: matplotlib → savefig → send_photo. Graphviz → dot -Tpng → send_photo.',
@@ -160,37 +163,30 @@ function buildToolGuidanceSection(params: SystemPromptParams): string {
 function buildMemorySection(params: SystemPromptParams): string {
   const parts: string[] = [];
-  const hasAny = params.memorySummary || params.memoryFacts.length > 0 ||
-    params.longTermRecall.length > 0 || params.userProfile;
-  if (hasAny) {
-    parts.push('The following memories may or may not be relevant to the current conversation. Use them only if they directly answer the user\'s question.');
-    if (params.memorySummary) {
-      parts.push('Long-term memory summary:');
-      parts.push(params.memorySummary.slice(0, MEMORY_SUMMARY_MAX_CHARS));
-    }
-    if (params.memoryFacts.length > 0) {
-      parts.push('Long-term facts:');
-      parts.push(params.memoryFacts.map(f => `- ${f}`).join('\n'));
-    }
-    if (params.userProfile) {
-      parts.push('User profile:');
-      parts.push(params.userProfile);
-    }
-    if (params.longTermRecall.length > 0) {
-      parts.push('What you remember about the user (long-term):');
-      parts.push(params.longTermRecall.map(item => `- ${item}`).join('\n'));
-    }
-    if (params.memoryStats) {
-      parts.push(`Memory stats: Total: ${params.memoryStats.total}, User: ${params.memoryStats.user}, Group: ${params.memoryStats.group}, Global: ${params.memoryStats.global}`);
-    }
-  } else {
-    parts.push('No long-term memory available yet.');
+  // Session-level context: summary and facts from the current conversation.
+  // These are essential for understanding the current thread.
+  if (params.memorySummary) {
+    parts.push('Conversation summary (this session):');
+    parts.push(params.memorySummary.slice(0, MEMORY_SUMMARY_MAX_CHARS));
+  }
+  if (params.memoryFacts.length > 0) {
+    parts.push('Key facts (this session):');
+    parts.push(params.memoryFacts.map(f => `- ${f}`).join('\n'));
+  }
+  // User profile stays pre-injected — identity and preferences should always be available.
+  if (params.userProfile) {
+    parts.push('User profile:');
+    parts.push(params.userProfile);
   }
-  if (params.sessionRecall.length > 0) {
-    parts.push('Recent conversation context:');
-    parts.push(params.sessionRecall.map(item => `- ${item}`).join('\n'));
+  // Long-term memory is now tool-based: agent searches on demand instead of pre-injection.
+  // This prevents context bloat from irrelevant memories and lets the agent decide what's needed.
+  parts.push('Long-term memory: Use the mcp__dotclaw__memory_search tool to recall information from past conversations, stored preferences, notes, and knowledge. Search BEFORE answering questions about prior decisions, dates, people, projects, or anything you don\'t see in the conversation above.');
+  if (params.memoryStats && params.memoryStats.total > 0) {
+    parts.push(`Memory store: ${params.memoryStats.total} entries available (search with mcp__dotclaw__memory_search).`);
   }
   return parts.join('\n');
@@ -292,8 +288,14 @@ export function buildSystemPrompt(params: SystemPromptParams): string {
   const toolReliability = trimLevel >= 2 ? '' : (
     params.toolReliability && params.toolReliability.length > 0
       ? params.toolReliability
-        .sort((a, b) => a.success_rate - b.success_rate)
-        .slice(0, 20)
+        .filter(t => t.count >= 5 && (t.success_rate < 0.98 || (Number.isFinite(t.avg_duration_ms) && (t.avg_duration_ms || 0) > 2500)))
+        .sort((a, b) => {
+          if (a.success_rate !== b.success_rate) return a.success_rate - b.success_rate;
+          const aDur = Number.isFinite(a.avg_duration_ms) ? (a.avg_duration_ms || 0) : 0;
+          const bDur = Number.isFinite(b.avg_duration_ms) ? (b.avg_duration_ms || 0) : 0;
+          return bDur - aDur;
+        })
+        .slice(0, 8)
         .map(t => {
           const pct = `${Math.round(t.success_rate * 100)}%`;
           const avg = Number.isFinite(t.avg_duration_ms) ? `${Math.round(t.avg_duration_ms!)}ms` : 'n/a';
@@ -306,9 +308,9 @@ export function buildSystemPrompt(params: SystemPromptParams): string {
   // Trim level 1+: drop prompt packs
   const packBlocks = trimLevel >= 1 ? [] : buildPromptPackSections(params);
-  // Trim level 3+: reduce memory section (drop session recall, limit long-term recall)
+  // Trim level 3+: reduce memory section (drop summary to save space)
   const memoryParams = trimLevel >= 3
-    ? { ...params, sessionRecall: [], longTermRecall: params.longTermRecall.slice(0, 2) }
+    ? { ...params, memorySummary: params.memorySummary ? params.memorySummary.slice(0, 500) : '', memoryFacts: params.memoryFacts.slice(0, 5) }
     : params;
   const sections = [