npm - create-walle - Versions diffs - 0.9.13 → 0.9.15 - Mend

create-walle 0.9.13 → 0.9.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

package/README.md +8 -3
package/bin/create-walle.js +232 -32
package/bin/mcp-inject.js +18 -53
package/package.json +3 -1
package/template/claude-task-manager/api-prompts.js +11 -2
package/template/claude-task-manager/approval-agent.js +7 -0
package/template/claude-task-manager/db.js +94 -75
package/template/claude-task-manager/docs/session-standup-command-center-design.md +242 -0
package/template/claude-task-manager/docs/session-tooltip-freshness-design.md +224 -0
package/template/claude-task-manager/docs/session-ux-issue-review-2026-05-01.md +369 -0
package/template/claude-task-manager/fuzzy-utils.js +10 -2
package/template/claude-task-manager/git-utils.js +140 -10
package/template/claude-task-manager/lib/agent-capabilities.js +1 -1
package/template/claude-task-manager/lib/agent-presets.js +38 -5
package/template/claude-task-manager/lib/codex-terminal-final.js +53 -0
package/template/claude-task-manager/lib/ctm-session-context-api.js +222 -0
package/template/claude-task-manager/lib/session-diagnostics.js +56 -0
package/template/claude-task-manager/lib/session-history.js +309 -16
package/template/claude-task-manager/lib/session-standup.js +409 -0
package/template/claude-task-manager/lib/session-stream.js +253 -20
package/template/claude-task-manager/lib/standup-attention.js +200 -0
package/template/claude-task-manager/lib/status-hooks.js +8 -2
package/template/claude-task-manager/lib/update-telemetry.js +114 -0
package/template/claude-task-manager/lib/walle-ctm-history.js +49 -6
package/template/claude-task-manager/lib/walle-default-model.js +55 -0
package/template/claude-task-manager/lib/walle-mcp-auto-config.js +66 -0
package/template/claude-task-manager/lib/walle-supervisor.js +86 -19
package/template/claude-task-manager/lib/walle-transcript.js +1 -3
package/template/claude-task-manager/lib/worktree-cwd.js +82 -0
package/template/claude-task-manager/package.json +1 -0
package/template/claude-task-manager/providers/codex-mcp.js +104 -0
package/template/claude-task-manager/providers/index.js +2 -0
package/template/claude-task-manager/public/css/setup.css +2 -1
package/template/claude-task-manager/public/css/walle.css +71 -0
package/template/claude-task-manager/public/index.html +2388 -429
package/template/claude-task-manager/public/js/message-renderer.js +314 -35
package/template/claude-task-manager/public/js/session-search-utils.js +185 -3
package/template/claude-task-manager/public/js/session-status-precedence.js +125 -0
package/template/claude-task-manager/public/js/setup.js +62 -19
package/template/claude-task-manager/public/js/stream-view.js +396 -55
package/template/claude-task-manager/public/js/terminal-restore-state.js +57 -0
package/template/claude-task-manager/public/js/walle-session.js +234 -26
package/template/claude-task-manager/public/js/walle.js +143 -2
package/template/claude-task-manager/server.js +1402 -433
package/template/claude-task-manager/session-integrity.js +77 -28
package/template/claude-task-manager/workers/approval-widget-validator.js +15 -5
package/template/claude-task-manager/workers/scrollback-worker.js +5 -6
package/template/claude-task-manager/workers/state-detectors/codex.js +6 -0
package/template/package.json +1 -1
package/template/wall-e/agent-runners/claude-code.js +2 -0
package/template/wall-e/agent.js +63 -8
package/template/wall-e/api-walle.js +330 -52
package/template/wall-e/brain.js +291 -42
package/template/wall-e/chat.js +172 -15
package/template/wall-e/coding/compaction-service.js +19 -5
package/template/wall-e/coding/stream-processor.js +22 -2
package/template/wall-e/coding/workspace-replay.js +1 -4
package/template/wall-e/coding-orchestrator.js +250 -80
package/template/wall-e/compat.js +0 -28
package/template/wall-e/context/context-builder.js +3 -1
package/template/wall-e/embeddings.js +2 -7
package/template/wall-e/eval/agent-runner.js +30 -9
package/template/wall-e/eval/benchmark-generator.js +21 -1
package/template/wall-e/eval/benchmarks/chat-eval.json +66 -6
package/template/wall-e/eval/benchmarks/coding-agent.json +0 -596
package/template/wall-e/eval/cc-replay.js +1 -0
package/template/wall-e/eval/codex-cli-baseline.js +633 -0
package/template/wall-e/eval/debug-agent003.js +1 -0
package/template/wall-e/eval/eval-orchestrator.js +3 -3
package/template/wall-e/eval/run-agent-benchmarks.js +11 -3
package/template/wall-e/eval/run-codex-cli-baseline.js +177 -0
package/template/wall-e/eval/run-model-comparison.js +1 -0
package/template/wall-e/eval/swebench-adapter.js +1 -0
package/template/wall-e/evaluation/quorum-evaluator.js +0 -1
package/template/wall-e/extraction/knowledge-extractor.js +1 -2
package/template/wall-e/lib/mcp-integration.js +336 -0
package/template/wall-e/llm/ollama.js +47 -8
package/template/wall-e/llm/ollama.plugin.json +1 -1
package/template/wall-e/llm/tool-adapter.js +1 -0
package/template/wall-e/loops/ingest.js +42 -8
package/template/wall-e/loops/initiative.js +87 -2
package/template/wall-e/mcp-server.js +872 -19
package/template/wall-e/memory/ctm-context-client.js +230 -0
package/template/wall-e/memory/ctm-session-context.js +1376 -0
package/template/wall-e/prompts/coding/memory-protocol.md +6 -0
package/template/wall-e/server.js +30 -1
package/template/wall-e/skills/_bundled/memory-search/SKILL.md +8 -0
package/template/wall-e/skills/_bundled/scan-ctm-sessions/SKILL.md +20 -0
package/template/wall-e/skills/_bundled/scan-ctm-sessions/run.js +43 -0
package/template/wall-e/skills/_bundled/slack-mentions/run.js +471 -188
package/template/wall-e/skills/skill-planner.js +86 -4
package/template/wall-e/slack/socket-mode-listener.js +276 -0
package/template/wall-e/telemetry.js +70 -2
package/template/wall-e/tools/builtin-middleware.js +55 -2
package/template/wall-e/tools/shell-policy.js +1 -1
package/template/wall-e/tools/slack-owner.js +104 -0
package/template/website/index.html +4 -4
package/template/builder-journal.md +0 -17

package/template/wall-e/eval/benchmark-generator.js CHANGED Viewed

@@ -18,11 +18,31 @@ function classifyCodingType(session) {
   return 'coding:generation';
 }
+function isReplayableBenchmarkPrompt(prompt) {
+  const text = String(prompt || '').trim();
+  if (text.length < 25) return false;
+  // Session-mined prompts must be the user's task, not the assistant's first
+  // progress narration. Assistant prose turns the benchmark into "continue the
+  // previous assistant's work", which is not replayable from a fresh sandbox.
+  if (/^(i('|’)ll|i will|i can|i('|’)m going to|let me|sure[, ]|happy to help|i('|’)ll help|i('|’)ll start)\b/i.test(text)) {
+    return false;
+  }
+  if (/^\s*(go ahead|continue|proceed|do it|yes|yep|ok|okay|thanks|thank you)\b/i.test(text)) {
+    return false;
+  }
+  return /\b(fix|implement|add|change|update|refactor|test|debug|make|write|delete|remove|harden|wire|bug|failing|error|regression|feature|endpoint|api|ui|server|component|code review|review.*code)\b/i.test(text);
+}
 /**
  * Convert a coding agent session object to a benchmark entry.
  * Returns null if the session would be a duplicate (id already in existingIds).
  */
 function sessionToBenchmark(session, existingIds = new Set()) {
+  if (!isReplayableBenchmarkPrompt(session.prompt)) return null;
   const id = `agent-session-${crypto.createHash('sha256').update(session.prompt || '').digest('hex').slice(0, 8)}`;
   if (existingIds.has(id)) return null; // dedup
@@ -90,4 +110,4 @@ async function generateBenchmarks(brain, { minSignificance = 0.5, limit = 50 } =
   return benchmarks;
 }
-module.exports = { classifyCodingType, sessionToBenchmark, generateBenchmarks };
+module.exports = { classifyCodingType, isReplayableBenchmarkPrompt, sessionToBenchmark, generateBenchmarks };

package/template/wall-e/eval/benchmarks/chat-eval.json CHANGED Viewed

@@ -89,13 +89,13 @@
   },
   {
     "id": "ce-B2",
-    "prompt": "Who is Sarah Chen and what's her role?",
+    "prompt": "Who is Alex Example and what's their role?",
     "taskType": "chat",
     "difficulty": "medium",
     "category": "knowledge",
     "expectedIntent": "knowledge",
     "expectedTools": ["search_memories"],
-    "forbiddenTools": ["calendar_events", "run_shell"],
+    "forbiddenTools": ["calendar_events", "run_shell", "web_fetch", "slack_search"],
     "maxToolCalls": 5,
     "latencyBudgetMs": 15000,
     "mockToolResults": {
@@ -105,18 +105,18 @@
         "total_slack": 400,
         "search_method": "fts5_bm25",
         "memories": [
-          {"source": "slack", "channel": "general", "content": "Sarah Chen mentioned she's leading the platform team restructuring. She reports directly to the VP of Engineering.", "timestamp": "2026-04-07T11:00:00Z"},
-          {"source": "slack", "channel": "engineering", "content": "1:1 with Sarah Chen — discussed Q2 roadmap priorities and hiring plan for 3 new engineers.", "timestamp": "2026-04-05T09:30:00Z"}
+          {"source": "slack", "channel": "general", "content": "Alex Example mentioned they're leading the platform team restructuring. They report directly to the VP of Engineering.", "timestamp": "2026-04-07T11:00:00Z"},
+          {"source": "slack", "channel": "engineering", "content": "1:1 with Alex Example — discussed Q2 roadmap priorities and hiring plan for 3 new engineers.", "timestamp": "2026-04-05T09:30:00Z"}
         ]
       },
       "lookup_person": {
-        "name": "Sarah Chen",
+        "name": "Alex Example",
         "relationship": "manager",
         "trust_level": 0.9,
         "notes": "VP Engineering, leads platform team"
       }
     },
-    "expectedInReply": ["sarah"],
+    "expectedInReply": ["alex"],
     "forbiddenInReply": [],
     "minReplyLength": 30,
     "maxReplyLength": 1500,
@@ -125,6 +125,66 @@
   },
   {
     "id": "ce-B3",
+    "prompt": "What did we decide last time about the MCP auto-config approach?",
+    "taskType": "chat",
+    "difficulty": "medium",
+    "category": "knowledge",
+    "expectedIntent": "knowledge",
+    "expectedTools": ["search_memories"],
+    "forbiddenTools": ["calendar_events", "run_shell", "web_fetch", "slack_search"],
+    "maxToolCalls": 5,
+    "latencyBudgetMs": 15000,
+    "mockToolResults": {
+      "search_memories": {
+        "count": 2,
+        "total_memories": 500,
+        "total_slack": 200,
+        "search_method": "fts5_bm25",
+        "memories": [
+          {"source": "ctm", "content": "Decision: MCP auto-config should be handled in the Wall-E install/startup path, not with a local-only Codex skill.", "timestamp": "2026-05-01T18:30:00Z"},
+          {"source": "ctm", "content": "Follow-up: add portable agent instructions for Claude and Codex so memory routing works for all npx users.", "timestamp": "2026-05-01T18:35:00Z"}
+        ]
+      }
+    },
+    "expectedInReply": ["auto-config", "portable"],
+    "forbiddenInReply": [],
+    "minReplyLength": 30,
+    "maxReplyLength": 1500,
+    "expectedTraits": ["references context"],
+    "tags": ["knowledge", "recall", "private-context"]
+  },
+  {
+    "id": "ce-B3-session-recall",
+    "prompt": "What was the parser regression session about and what should we do next?",
+    "taskType": "chat",
+    "difficulty": "medium",
+    "category": "knowledge",
+    "expectedIntent": "knowledge",
+    "expectedTools": ["search_memories"],
+    "forbiddenTools": ["calendar_events", "run_shell", "web_fetch", "slack_search"],
+    "maxToolCalls": 5,
+    "latencyBudgetMs": 15000,
+    "mockToolResults": {
+      "search_memories": {
+        "count": 2,
+        "total_memories": 500,
+        "total_slack": 200,
+        "search_method": "fts5_bm25",
+        "memories": [
+          {"source": "ctm", "content": "Session summary: Parser regression was caused by stale cached CTM session rows after restart.", "timestamp": "2026-05-01T20:00:00Z"},
+          {"source": "walle-diary", "content": "Next steps: verify restart recovery with CTM render tests and keep the session title from cached metadata.", "timestamp": "2026-05-01T20:05:00Z"}
+        ]
+      }
+    },
+    "expectedInReply": ["parser", "restart"],
+    "forbiddenInReply": [],
+    "minReplyLength": 30,
+    "maxReplyLength": 1500,
+    "expectedTraits": ["references context"],
+    "tags": ["knowledge", "session-recall", "private-context"]
+  },
+  {
+    "id": "ce-B4",
     "prompt": "What topics come up most in my slack conversations?",
     "taskType": "chat",
     "difficulty": "medium",