create-walle 0.9.13 → 0.9.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -3
- package/bin/create-walle.js +232 -32
- package/bin/mcp-inject.js +18 -53
- package/package.json +3 -1
- package/template/claude-task-manager/api-prompts.js +11 -2
- package/template/claude-task-manager/approval-agent.js +7 -0
- package/template/claude-task-manager/db.js +94 -75
- package/template/claude-task-manager/docs/session-standup-command-center-design.md +242 -0
- package/template/claude-task-manager/docs/session-tooltip-freshness-design.md +224 -0
- package/template/claude-task-manager/docs/session-ux-issue-review-2026-05-01.md +369 -0
- package/template/claude-task-manager/fuzzy-utils.js +10 -2
- package/template/claude-task-manager/git-utils.js +140 -10
- package/template/claude-task-manager/lib/agent-capabilities.js +1 -1
- package/template/claude-task-manager/lib/agent-presets.js +38 -5
- package/template/claude-task-manager/lib/codex-terminal-final.js +53 -0
- package/template/claude-task-manager/lib/ctm-session-context-api.js +222 -0
- package/template/claude-task-manager/lib/session-diagnostics.js +56 -0
- package/template/claude-task-manager/lib/session-history.js +309 -16
- package/template/claude-task-manager/lib/session-standup.js +409 -0
- package/template/claude-task-manager/lib/session-stream.js +253 -20
- package/template/claude-task-manager/lib/standup-attention.js +200 -0
- package/template/claude-task-manager/lib/status-hooks.js +8 -2
- package/template/claude-task-manager/lib/update-telemetry.js +114 -0
- package/template/claude-task-manager/lib/walle-ctm-history.js +49 -6
- package/template/claude-task-manager/lib/walle-default-model.js +55 -0
- package/template/claude-task-manager/lib/walle-mcp-auto-config.js +66 -0
- package/template/claude-task-manager/lib/walle-supervisor.js +86 -19
- package/template/claude-task-manager/lib/walle-transcript.js +1 -3
- package/template/claude-task-manager/lib/worktree-cwd.js +82 -0
- package/template/claude-task-manager/package.json +1 -0
- package/template/claude-task-manager/providers/codex-mcp.js +104 -0
- package/template/claude-task-manager/providers/index.js +2 -0
- package/template/claude-task-manager/public/css/setup.css +2 -1
- package/template/claude-task-manager/public/css/walle.css +71 -0
- package/template/claude-task-manager/public/index.html +2388 -429
- package/template/claude-task-manager/public/js/message-renderer.js +314 -35
- package/template/claude-task-manager/public/js/session-search-utils.js +185 -3
- package/template/claude-task-manager/public/js/session-status-precedence.js +125 -0
- package/template/claude-task-manager/public/js/setup.js +62 -19
- package/template/claude-task-manager/public/js/stream-view.js +396 -55
- package/template/claude-task-manager/public/js/terminal-restore-state.js +57 -0
- package/template/claude-task-manager/public/js/walle-session.js +234 -26
- package/template/claude-task-manager/public/js/walle.js +143 -2
- package/template/claude-task-manager/server.js +1402 -433
- package/template/claude-task-manager/session-integrity.js +77 -28
- package/template/claude-task-manager/workers/approval-widget-validator.js +15 -5
- package/template/claude-task-manager/workers/scrollback-worker.js +5 -6
- package/template/claude-task-manager/workers/state-detectors/codex.js +6 -0
- package/template/package.json +1 -1
- package/template/wall-e/agent-runners/claude-code.js +2 -0
- package/template/wall-e/agent.js +63 -8
- package/template/wall-e/api-walle.js +330 -52
- package/template/wall-e/brain.js +291 -42
- package/template/wall-e/chat.js +172 -15
- package/template/wall-e/coding/compaction-service.js +19 -5
- package/template/wall-e/coding/stream-processor.js +22 -2
- package/template/wall-e/coding/workspace-replay.js +1 -4
- package/template/wall-e/coding-orchestrator.js +250 -80
- package/template/wall-e/compat.js +0 -28
- package/template/wall-e/context/context-builder.js +3 -1
- package/template/wall-e/embeddings.js +2 -7
- package/template/wall-e/eval/agent-runner.js +30 -9
- package/template/wall-e/eval/benchmark-generator.js +21 -1
- package/template/wall-e/eval/benchmarks/chat-eval.json +66 -6
- package/template/wall-e/eval/benchmarks/coding-agent.json +0 -596
- package/template/wall-e/eval/cc-replay.js +1 -0
- package/template/wall-e/eval/codex-cli-baseline.js +633 -0
- package/template/wall-e/eval/debug-agent003.js +1 -0
- package/template/wall-e/eval/eval-orchestrator.js +3 -3
- package/template/wall-e/eval/run-agent-benchmarks.js +11 -3
- package/template/wall-e/eval/run-codex-cli-baseline.js +177 -0
- package/template/wall-e/eval/run-model-comparison.js +1 -0
- package/template/wall-e/eval/swebench-adapter.js +1 -0
- package/template/wall-e/evaluation/quorum-evaluator.js +0 -1
- package/template/wall-e/extraction/knowledge-extractor.js +1 -2
- package/template/wall-e/lib/mcp-integration.js +336 -0
- package/template/wall-e/llm/ollama.js +47 -8
- package/template/wall-e/llm/ollama.plugin.json +1 -1
- package/template/wall-e/llm/tool-adapter.js +1 -0
- package/template/wall-e/loops/ingest.js +42 -8
- package/template/wall-e/loops/initiative.js +87 -2
- package/template/wall-e/mcp-server.js +872 -19
- package/template/wall-e/memory/ctm-context-client.js +230 -0
- package/template/wall-e/memory/ctm-session-context.js +1376 -0
- package/template/wall-e/prompts/coding/memory-protocol.md +6 -0
- package/template/wall-e/server.js +30 -1
- package/template/wall-e/skills/_bundled/memory-search/SKILL.md +8 -0
- package/template/wall-e/skills/_bundled/scan-ctm-sessions/SKILL.md +20 -0
- package/template/wall-e/skills/_bundled/scan-ctm-sessions/run.js +43 -0
- package/template/wall-e/skills/_bundled/slack-mentions/run.js +471 -188
- package/template/wall-e/skills/skill-planner.js +86 -4
- package/template/wall-e/slack/socket-mode-listener.js +276 -0
- package/template/wall-e/telemetry.js +70 -2
- package/template/wall-e/tools/builtin-middleware.js +55 -2
- package/template/wall-e/tools/shell-policy.js +1 -1
- package/template/wall-e/tools/slack-owner.js +104 -0
- package/template/website/index.html +4 -4
- package/template/builder-journal.md +0 -17
|
@@ -18,11 +18,31 @@ function classifyCodingType(session) {
|
|
|
18
18
|
return 'coding:generation';
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
+
function isReplayableBenchmarkPrompt(prompt) {
|
|
22
|
+
const text = String(prompt || '').trim();
|
|
23
|
+
if (text.length < 25) return false;
|
|
24
|
+
|
|
25
|
+
// Session-mined prompts must be the user's task, not the assistant's first
|
|
26
|
+
// progress narration. Assistant prose turns the benchmark into "continue the
|
|
27
|
+
// previous assistant's work", which is not replayable from a fresh sandbox.
|
|
28
|
+
if (/^(i('|’)ll|i will|i can|i('|’)m going to|let me|sure[, ]|happy to help|i('|’)ll help|i('|’)ll start)\b/i.test(text)) {
|
|
29
|
+
return false;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
if (/^\s*(go ahead|continue|proceed|do it|yes|yep|ok|okay|thanks|thank you)\b/i.test(text)) {
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return /\b(fix|implement|add|change|update|refactor|test|debug|make|write|delete|remove|harden|wire|bug|failing|error|regression|feature|endpoint|api|ui|server|component|code review|review.*code)\b/i.test(text);
|
|
37
|
+
}
|
|
38
|
+
|
|
21
39
|
/**
|
|
22
40
|
* Convert a coding agent session object to a benchmark entry.
|
|
23
41
|
* Returns null if the session would be a duplicate (id already in existingIds).
|
|
24
42
|
*/
|
|
25
43
|
function sessionToBenchmark(session, existingIds = new Set()) {
|
|
44
|
+
if (!isReplayableBenchmarkPrompt(session.prompt)) return null;
|
|
45
|
+
|
|
26
46
|
const id = `agent-session-${crypto.createHash('sha256').update(session.prompt || '').digest('hex').slice(0, 8)}`;
|
|
27
47
|
if (existingIds.has(id)) return null; // dedup
|
|
28
48
|
|
|
@@ -90,4 +110,4 @@ async function generateBenchmarks(brain, { minSignificance = 0.5, limit = 50 } =
|
|
|
90
110
|
return benchmarks;
|
|
91
111
|
}
|
|
92
112
|
|
|
93
|
-
module.exports = { classifyCodingType, sessionToBenchmark, generateBenchmarks };
|
|
113
|
+
module.exports = { classifyCodingType, isReplayableBenchmarkPrompt, sessionToBenchmark, generateBenchmarks };
|
|
@@ -89,13 +89,13 @@
|
|
|
89
89
|
},
|
|
90
90
|
{
|
|
91
91
|
"id": "ce-B2",
|
|
92
|
-
"prompt": "Who is
|
|
92
|
+
"prompt": "Who is Alex Example and what's their role?",
|
|
93
93
|
"taskType": "chat",
|
|
94
94
|
"difficulty": "medium",
|
|
95
95
|
"category": "knowledge",
|
|
96
96
|
"expectedIntent": "knowledge",
|
|
97
97
|
"expectedTools": ["search_memories"],
|
|
98
|
-
"forbiddenTools": ["calendar_events", "run_shell"],
|
|
98
|
+
"forbiddenTools": ["calendar_events", "run_shell", "web_fetch", "slack_search"],
|
|
99
99
|
"maxToolCalls": 5,
|
|
100
100
|
"latencyBudgetMs": 15000,
|
|
101
101
|
"mockToolResults": {
|
|
@@ -105,18 +105,18 @@
|
|
|
105
105
|
"total_slack": 400,
|
|
106
106
|
"search_method": "fts5_bm25",
|
|
107
107
|
"memories": [
|
|
108
|
-
{"source": "slack", "channel": "general", "content": "
|
|
109
|
-
{"source": "slack", "channel": "engineering", "content": "1:1 with
|
|
108
|
+
{"source": "slack", "channel": "general", "content": "Alex Example mentioned they're leading the platform team restructuring. They report directly to the VP of Engineering.", "timestamp": "2026-04-07T11:00:00Z"},
|
|
109
|
+
{"source": "slack", "channel": "engineering", "content": "1:1 with Alex Example — discussed Q2 roadmap priorities and hiring plan for 3 new engineers.", "timestamp": "2026-04-05T09:30:00Z"}
|
|
110
110
|
]
|
|
111
111
|
},
|
|
112
112
|
"lookup_person": {
|
|
113
|
-
"name": "
|
|
113
|
+
"name": "Alex Example",
|
|
114
114
|
"relationship": "manager",
|
|
115
115
|
"trust_level": 0.9,
|
|
116
116
|
"notes": "VP Engineering, leads platform team"
|
|
117
117
|
}
|
|
118
118
|
},
|
|
119
|
-
"expectedInReply": ["
|
|
119
|
+
"expectedInReply": ["alex"],
|
|
120
120
|
"forbiddenInReply": [],
|
|
121
121
|
"minReplyLength": 30,
|
|
122
122
|
"maxReplyLength": 1500,
|
|
@@ -125,6 +125,66 @@
|
|
|
125
125
|
},
|
|
126
126
|
{
|
|
127
127
|
"id": "ce-B3",
|
|
128
|
+
"prompt": "What did we decide last time about the MCP auto-config approach?",
|
|
129
|
+
"taskType": "chat",
|
|
130
|
+
"difficulty": "medium",
|
|
131
|
+
"category": "knowledge",
|
|
132
|
+
"expectedIntent": "knowledge",
|
|
133
|
+
"expectedTools": ["search_memories"],
|
|
134
|
+
"forbiddenTools": ["calendar_events", "run_shell", "web_fetch", "slack_search"],
|
|
135
|
+
"maxToolCalls": 5,
|
|
136
|
+
"latencyBudgetMs": 15000,
|
|
137
|
+
"mockToolResults": {
|
|
138
|
+
"search_memories": {
|
|
139
|
+
"count": 2,
|
|
140
|
+
"total_memories": 500,
|
|
141
|
+
"total_slack": 200,
|
|
142
|
+
"search_method": "fts5_bm25",
|
|
143
|
+
"memories": [
|
|
144
|
+
{"source": "ctm", "content": "Decision: MCP auto-config should be handled in the Wall-E install/startup path, not with a local-only Codex skill.", "timestamp": "2026-05-01T18:30:00Z"},
|
|
145
|
+
{"source": "ctm", "content": "Follow-up: add portable agent instructions for Claude and Codex so memory routing works for all npx users.", "timestamp": "2026-05-01T18:35:00Z"}
|
|
146
|
+
]
|
|
147
|
+
}
|
|
148
|
+
},
|
|
149
|
+
"expectedInReply": ["auto-config", "portable"],
|
|
150
|
+
"forbiddenInReply": [],
|
|
151
|
+
"minReplyLength": 30,
|
|
152
|
+
"maxReplyLength": 1500,
|
|
153
|
+
"expectedTraits": ["references context"],
|
|
154
|
+
"tags": ["knowledge", "recall", "private-context"]
|
|
155
|
+
},
|
|
156
|
+
{
|
|
157
|
+
"id": "ce-B3-session-recall",
|
|
158
|
+
"prompt": "What was the parser regression session about and what should we do next?",
|
|
159
|
+
"taskType": "chat",
|
|
160
|
+
"difficulty": "medium",
|
|
161
|
+
"category": "knowledge",
|
|
162
|
+
"expectedIntent": "knowledge",
|
|
163
|
+
"expectedTools": ["search_memories"],
|
|
164
|
+
"forbiddenTools": ["calendar_events", "run_shell", "web_fetch", "slack_search"],
|
|
165
|
+
"maxToolCalls": 5,
|
|
166
|
+
"latencyBudgetMs": 15000,
|
|
167
|
+
"mockToolResults": {
|
|
168
|
+
"search_memories": {
|
|
169
|
+
"count": 2,
|
|
170
|
+
"total_memories": 500,
|
|
171
|
+
"total_slack": 200,
|
|
172
|
+
"search_method": "fts5_bm25",
|
|
173
|
+
"memories": [
|
|
174
|
+
{"source": "ctm", "content": "Session summary: Parser regression was caused by stale cached CTM session rows after restart.", "timestamp": "2026-05-01T20:00:00Z"},
|
|
175
|
+
{"source": "walle-diary", "content": "Next steps: verify restart recovery with CTM render tests and keep the session title from cached metadata.", "timestamp": "2026-05-01T20:05:00Z"}
|
|
176
|
+
]
|
|
177
|
+
}
|
|
178
|
+
},
|
|
179
|
+
"expectedInReply": ["parser", "restart"],
|
|
180
|
+
"forbiddenInReply": [],
|
|
181
|
+
"minReplyLength": 30,
|
|
182
|
+
"maxReplyLength": 1500,
|
|
183
|
+
"expectedTraits": ["references context"],
|
|
184
|
+
"tags": ["knowledge", "session-recall", "private-context"]
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
"id": "ce-B4",
|
|
128
188
|
"prompt": "What topics come up most in my slack conversations?",
|
|
129
189
|
"taskType": "chat",
|
|
130
190
|
"difficulty": "medium",
|