@metabob/minibob 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +255 -0
- package/CHANGELOG.md +112 -0
- package/README.md +380 -0
- package/bin/minibob.js +36 -0
- package/dist/acp-gossip.d.ts +72 -0
- package/dist/acp-gossip.d.ts.map +1 -0
- package/dist/acp-gossip.js +156 -0
- package/dist/acp-gossip.js.map +1 -0
- package/dist/acp.d.ts +62 -0
- package/dist/acp.d.ts.map +1 -0
- package/dist/acp.js +292 -0
- package/dist/acp.js.map +1 -0
- package/dist/activity.d.ts +157 -0
- package/dist/activity.d.ts.map +1 -0
- package/dist/activity.js +518 -0
- package/dist/activity.js.map +1 -0
- package/dist/agent-runtime.d.ts +104 -0
- package/dist/agent-runtime.d.ts.map +1 -0
- package/dist/boredom.d.ts +125 -0
- package/dist/boredom.d.ts.map +1 -0
- package/dist/boredom.js +244 -0
- package/dist/boredom.js.map +1 -0
- package/dist/cli/acp-server.d.ts +23 -0
- package/dist/cli/acp-server.d.ts.map +1 -0
- package/dist/cli/burrow.d.ts +26 -0
- package/dist/cli/burrow.d.ts.map +1 -0
- package/dist/cli/doctor.d.ts +22 -0
- package/dist/cli/doctor.d.ts.map +1 -0
- package/dist/cli/goal.d.ts +22 -0
- package/dist/cli/goal.d.ts.map +1 -0
- package/dist/cli/index.d.ts +47 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/instance-registry.d.ts +78 -0
- package/dist/cli/instance-registry.d.ts.map +1 -0
- package/dist/cli/observe.d.ts +35 -0
- package/dist/cli/observe.d.ts.map +1 -0
- package/dist/cli/vessel.d.ts +14 -0
- package/dist/cli/vessel.d.ts.map +1 -0
- package/dist/composition-observer.d.ts +96 -0
- package/dist/composition-observer.d.ts.map +1 -0
- package/dist/config.d.ts +36 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +128 -0
- package/dist/config.js.map +1 -0
- package/dist/docker/Dockerfile +35 -0
- package/dist/environment.d.ts +72 -0
- package/dist/environment.d.ts.map +1 -0
- package/dist/environment.js +142 -0
- package/dist/environment.js.map +1 -0
- package/dist/goal-processor.d.ts +165 -0
- package/dist/goal-processor.d.ts.map +1 -0
- package/dist/helm/minibob-cluster/Chart.yaml +13 -0
- package/dist/helm/minibob-cluster/templates/_helpers.tpl +60 -0
- package/dist/helm/minibob-cluster/templates/configmap.yaml +11 -0
- package/dist/helm/minibob-cluster/templates/deployment.yaml +108 -0
- package/dist/helm/minibob-cluster/templates/secret.yaml +10 -0
- package/dist/helm/minibob-cluster/templates/service.yaml +37 -0
- package/dist/helm/minibob-cluster/values-local.yaml +41 -0
- package/dist/helm/minibob-cluster/values-production.yaml +57 -0
- package/dist/helm/minibob-cluster/values-testing-cluster.yaml +43 -0
- package/dist/helm/minibob-cluster/values.yaml +127 -0
- package/dist/improviser.d.ts +74 -0
- package/dist/improviser.d.ts.map +1 -0
- package/dist/impulse-filter.d.ts +74 -0
- package/dist/impulse-filter.d.ts.map +1 -0
- package/dist/impulse.d.ts +92 -0
- package/dist/impulse.d.ts.map +1 -0
- package/dist/impulse.js +234 -0
- package/dist/impulse.js.map +1 -0
- package/dist/lib.d.ts +29 -0
- package/dist/lib.d.ts.map +1 -0
- package/dist/lib.js +18561 -0
- package/dist/lib.js.map +98 -0
- package/dist/lifecycle-hooks.d.ts +99 -0
- package/dist/lifecycle-hooks.d.ts.map +1 -0
- package/dist/lifecycle-hooks.js +135 -0
- package/dist/lifecycle-hooks.js.map +1 -0
- package/dist/llm.d.ts +31 -0
- package/dist/llm.d.ts.map +1 -0
- package/dist/llm.js +349 -0
- package/dist/llm.js.map +1 -0
- package/dist/mcp-activity-bridge.d.ts +66 -0
- package/dist/mcp-activity-bridge.d.ts.map +1 -0
- package/dist/mcp-activity-bridge.js +126 -0
- package/dist/mcp-activity-bridge.js.map +1 -0
- package/dist/mcp.d.ts +216 -0
- package/dist/mcp.d.ts.map +1 -0
- package/dist/mcp.js +292 -0
- package/dist/mcp.js.map +1 -0
- package/dist/memory-agent.d.ts +92 -0
- package/dist/memory-agent.d.ts.map +1 -0
- package/dist/memory-agent.js +277 -0
- package/dist/memory-agent.js.map +1 -0
- package/dist/runtime-mapping.d.ts +97 -0
- package/dist/runtime-mapping.d.ts.map +1 -0
- package/dist/search-first-executor.d.ts +113 -0
- package/dist/search-first-executor.d.ts.map +1 -0
- package/dist/session.d.ts +48 -0
- package/dist/session.d.ts.map +1 -0
- package/dist/template-extractor.d.ts +9 -0
- package/dist/template-extractor.d.ts.map +1 -0
- package/dist/template-generator.d.ts +12 -0
- package/dist/template-generator.d.ts.map +1 -0
- package/dist/tools.d.ts +58 -0
- package/dist/tools.d.ts.map +1 -0
- package/dist/tools.js +771 -0
- package/dist/tools.js.map +1 -0
- package/dist/types.d.ts +503 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +8 -0
- package/dist/types.js.map +1 -0
- package/dist/understanding/analyzer.d.ts +55 -0
- package/dist/understanding/analyzer.d.ts.map +1 -0
- package/dist/understanding/explorer.d.ts +73 -0
- package/dist/understanding/explorer.d.ts.map +1 -0
- package/dist/understanding/index.d.ts +7 -0
- package/dist/understanding/index.d.ts.map +1 -0
- package/dist/understanding/types.d.ts +136 -0
- package/dist/understanding/types.d.ts.map +1 -0
- package/dist/validation.d.ts +29 -0
- package/dist/validation.d.ts.map +1 -0
- package/dist/validation.js +106 -0
- package/dist/validation.js.map +1 -0
- package/dist/vessel-bootstrap.d.ts +190 -0
- package/dist/vessel-bootstrap.d.ts.map +1 -0
- package/dist/vessel-registry.d.ts +229 -0
- package/dist/vessel-registry.d.ts.map +1 -0
- package/index.ts +1329 -0
- package/package.json +54 -0
- package/src/acp-gossip.ts +193 -0
- package/src/acp.ts +362 -0
- package/src/activity.ts +1464 -0
- package/src/agent-runtime.ts +365 -0
- package/src/boredom.ts +423 -0
- package/src/cli/acp-server.ts +377 -0
- package/src/cli/burrow.ts +896 -0
- package/src/cli/doctor.ts +526 -0
- package/src/cli/goal.ts +224 -0
- package/src/cli/index.ts +147 -0
- package/src/cli/instance-registry.ts +271 -0
- package/src/cli/observe.ts +682 -0
- package/src/cli/vessel.ts +287 -0
- package/src/components/SystemOverview.tsx +331 -0
- package/src/composition-observer.ts +449 -0
- package/src/config.ts +172 -0
- package/src/environment.ts +167 -0
- package/src/goal-processor.ts +654 -0
- package/src/improviser.ts +591 -0
- package/src/impulse-filter.ts +273 -0
- package/src/impulse.ts +311 -0
- package/src/lib.ts +147 -0
- package/src/lifecycle-hooks.ts +181 -0
- package/src/llm.ts +434 -0
- package/src/mcp-activity-bridge.ts +158 -0
- package/src/mcp.ts +747 -0
- package/src/memory-agent.ts +316 -0
- package/src/runtime-mapping.ts +527 -0
- package/src/search-first-executor.ts +666 -0
- package/src/session.ts +141 -0
- package/src/template-extractor.ts +256 -0
- package/src/template-generator.ts +130 -0
- package/src/tools.ts +924 -0
- package/src/types.ts +497 -0
- package/src/understanding/analyzer.ts +354 -0
- package/src/understanding/explorer.ts +488 -0
- package/src/understanding/index.ts +27 -0
- package/src/understanding/types.ts +153 -0
- package/src/validation.ts +125 -0
- package/src/vessel-bootstrap.ts +440 -0
- package/src/vessel-registry.ts +621 -0
- package/templates/core/edit-file.json +85 -0
- package/templates/understanding/diagnose-problem.json +32 -0
- package/templates/understanding/explore-codebase-v2.json +57 -0
- package/templates/understanding/explore-codebase.json +37 -0
|
@@ -0,0 +1,666 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search-First Goal Executor
|
|
3
|
+
*
|
|
4
|
+
* Implements dynamic step-by-step goal execution:
|
|
5
|
+
* 1. Decompose goal into steps using LLM
|
|
6
|
+
* 2. For each step:
|
|
7
|
+
* - Search for existing activities that match
|
|
8
|
+
* - If match: delegate to existing activity (minimal context)
|
|
9
|
+
* - If no match: execute step directly
|
|
10
|
+
* - Summarize result
|
|
11
|
+
* - Pass summary to next step
|
|
12
|
+
*
|
|
13
|
+
* This prevents token accumulation by:
|
|
14
|
+
* - Using summaries instead of full traces between steps
|
|
15
|
+
* - Delegating to existing activities with minimal context
|
|
16
|
+
* - Breaking large goals into focused steps
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import type { LLMClient } from "./llm"
|
|
20
|
+
import type { ActivityTemplate, ToolResult, ToolHandler } from "./types"
|
|
21
|
+
import { createLLMClient } from "./llm"
|
|
22
|
+
import { createToolHandlers, getAllToolDefinitions } from "./tools"
|
|
23
|
+
import { getMCPClient, isMCPEnabled } from "./mcp"
|
|
24
|
+
import { loadTemplateFromMCPOrLocal, ActivityExecutor } from "./activity"
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Safe wrapper for Bun.file() with better error handling
|
|
28
|
+
*/
|
|
29
|
+
function safeReadFile(path: string): ReturnType<typeof Bun.file> {
|
|
30
|
+
if (!path || typeof path !== 'string') {
|
|
31
|
+
throw new Error(`Invalid file path: expected string, got ${typeof path}`)
|
|
32
|
+
}
|
|
33
|
+
try {
|
|
34
|
+
return Bun.file(path)
|
|
35
|
+
} catch (error) {
|
|
36
|
+
throw new Error(`Failed to create file handle for '${path}': ${error instanceof Error ? error.message : String(error)}`)
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// =============================================================================
|
|
41
|
+
// TYPES
|
|
42
|
+
// =============================================================================
|
|
43
|
+
|
|
44
|
+
export interface StepValidation {
|
|
45
|
+
/** Commands to run for validation (exit code 0 = pass) */
|
|
46
|
+
commands?: string[]
|
|
47
|
+
/** Files that must exist after execution */
|
|
48
|
+
requiredFiles?: string[]
|
|
49
|
+
/** Patterns that must exist in specific files */
|
|
50
|
+
requiredPatterns?: Array<{ file: string; pattern: string }>
|
|
51
|
+
/** Description of what success looks like (for LLM self-validation) */
|
|
52
|
+
successCriteria?: string
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export interface GoalStep {
|
|
56
|
+
id: string
|
|
57
|
+
description: string
|
|
58
|
+
intent: string
|
|
59
|
+
category: "feature" | "bugfix" | "refactor" | "tool" | "infrastructure" | "other"
|
|
60
|
+
dependencies: string[]
|
|
61
|
+
/** Validation criteria for this step */
|
|
62
|
+
validation?: StepValidation
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export interface StepResult {
|
|
66
|
+
stepId: string
|
|
67
|
+
status: "completed" | "failed" | "skipped"
|
|
68
|
+
method: "existing_activity" | "direct_execution" | "skipped"
|
|
69
|
+
activityId?: string
|
|
70
|
+
summary: string
|
|
71
|
+
error?: string
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export interface SearchFirstConfig {
|
|
75
|
+
provider: "anthropic" | "openai"
|
|
76
|
+
apiKey: string
|
|
77
|
+
model: string
|
|
78
|
+
workingDirectory: string
|
|
79
|
+
maxSteps?: number
|
|
80
|
+
maxTokensPerStep?: number
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// =============================================================================
|
|
84
|
+
// SEARCH-FIRST EXECUTOR
|
|
85
|
+
// =============================================================================
|
|
86
|
+
|
|
87
|
+
export class SearchFirstExecutor {
|
|
88
|
+
private config: SearchFirstConfig
|
|
89
|
+
private llm: LLMClient
|
|
90
|
+
|
|
91
|
+
constructor(config: SearchFirstConfig) {
|
|
92
|
+
this.config = {
|
|
93
|
+
...config,
|
|
94
|
+
maxSteps: config.maxSteps ?? 5,
|
|
95
|
+
maxTokensPerStep: config.maxTokensPerStep ?? 4096,
|
|
96
|
+
}
|
|
97
|
+
this.llm = createLLMClient(config.provider, config.apiKey)
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Decompose a goal into discrete steps using LLM
|
|
102
|
+
*/
|
|
103
|
+
async decomposeGoal(goal: string, context?: Record<string, unknown>): Promise<GoalStep[]> {
|
|
104
|
+
const contextStr = context ? Object.entries(context)
|
|
105
|
+
.map(([k, v]) => `- ${k}: ${typeof v === 'string' ? v : JSON.stringify(v)}`)
|
|
106
|
+
.join('\n') : 'None provided'
|
|
107
|
+
|
|
108
|
+
const prompt = `Decompose this goal into 2-4 discrete, actionable steps WITH validation criteria.
|
|
109
|
+
|
|
110
|
+
Goal: ${goal}
|
|
111
|
+
|
|
112
|
+
Context:
|
|
113
|
+
${contextStr}
|
|
114
|
+
|
|
115
|
+
Rules:
|
|
116
|
+
- Each step should be independently accomplishable
|
|
117
|
+
- Steps should be sequential (later steps may depend on earlier ones)
|
|
118
|
+
- Keep steps focused and specific
|
|
119
|
+
- Use categories: feature, bugfix, refactor, tool, infrastructure
|
|
120
|
+
- CRITICAL: Include validation criteria for each step - how do we VERIFY it succeeded?
|
|
121
|
+
|
|
122
|
+
Respond with JSON array:
|
|
123
|
+
[
|
|
124
|
+
{
|
|
125
|
+
"id": "step-1",
|
|
126
|
+
"description": "Brief description of step",
|
|
127
|
+
"intent": "What this step accomplishes",
|
|
128
|
+
"category": "refactor",
|
|
129
|
+
"dependencies": [],
|
|
130
|
+
"validation": {
|
|
131
|
+
"commands": ["bun run typecheck"],
|
|
132
|
+
"requiredFiles": ["src/new-file.ts"],
|
|
133
|
+
"successCriteria": "Code compiles without type errors"
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
Validation guidelines by category:
|
|
139
|
+
- refactor: typecheck passes, tests pass, no regressions
|
|
140
|
+
- bugfix: specific test passes, error no longer occurs
|
|
141
|
+
- feature: new functionality works, tests pass
|
|
142
|
+
- tool: tool executes successfully with expected output
|
|
143
|
+
- infrastructure: service starts, health check passes
|
|
144
|
+
|
|
145
|
+
Only output the JSON array, nothing else.`
|
|
146
|
+
|
|
147
|
+
const result = await this.llm.complete({
|
|
148
|
+
model: this.config.model,
|
|
149
|
+
messages: [
|
|
150
|
+
{ role: "user", content: prompt }
|
|
151
|
+
],
|
|
152
|
+
maxTokens: 1024,
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
try {
|
|
156
|
+
// Extract JSON from response
|
|
157
|
+
const jsonMatch = result.content.match(/\[[\s\S]*\]/)
|
|
158
|
+
if (!jsonMatch) {
|
|
159
|
+
throw new Error("No JSON array found in response")
|
|
160
|
+
}
|
|
161
|
+
const steps = JSON.parse(jsonMatch[0]) as GoalStep[]
|
|
162
|
+
return steps.slice(0, this.config.maxSteps)
|
|
163
|
+
} catch (e) {
|
|
164
|
+
console.error("[SearchFirst] Failed to parse steps:", e)
|
|
165
|
+
// Fallback: treat entire goal as single step
|
|
166
|
+
return [{
|
|
167
|
+
id: "step-1",
|
|
168
|
+
description: goal,
|
|
169
|
+
intent: goal,
|
|
170
|
+
category: "other",
|
|
171
|
+
dependencies: [],
|
|
172
|
+
}]
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Search for existing activities that can accomplish a step
|
|
178
|
+
*/
|
|
179
|
+
async searchForActivity(step: GoalStep): Promise<{ found: boolean; templateId?: string; score?: number }> {
|
|
180
|
+
if (!isMCPEnabled()) {
|
|
181
|
+
return { found: false }
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const mcp = getMCPClient()
|
|
185
|
+
if (!mcp) {
|
|
186
|
+
return { found: false }
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
try {
|
|
190
|
+
// Get recommendations from backend
|
|
191
|
+
const recommendations = await mcp.recommendActivities(
|
|
192
|
+
step.intent,
|
|
193
|
+
step.category === "other" ? undefined : step.category,
|
|
194
|
+
[],
|
|
195
|
+
3
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
if (recommendations.length === 0) {
|
|
199
|
+
console.log(`[SearchFirst] No existing activities found for: ${step.description}`)
|
|
200
|
+
return { found: false }
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Check if top recommendation is a good match
|
|
204
|
+
const top = recommendations[0]
|
|
205
|
+
if (!top) {
|
|
206
|
+
return { found: false }
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
const score = top.selection_metadata?.score ?? 0
|
|
210
|
+
|
|
211
|
+
// Require minimum score for reuse (Thompson Sampling score > 0.3)
|
|
212
|
+
if (score < 0.3) {
|
|
213
|
+
console.log(`[SearchFirst] Top match score too low (${score}): ${top.template_id}`)
|
|
214
|
+
return { found: false }
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
console.log(`[SearchFirst] Found matching activity: ${top.template_id} (score: ${score})`)
|
|
218
|
+
return {
|
|
219
|
+
found: true,
|
|
220
|
+
templateId: top.template_id,
|
|
221
|
+
score,
|
|
222
|
+
}
|
|
223
|
+
} catch (e) {
|
|
224
|
+
console.error("[SearchFirst] Search failed:", e)
|
|
225
|
+
return { found: false }
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Execute a step using an existing activity
|
|
231
|
+
*/
|
|
232
|
+
async executeViaActivity(
|
|
233
|
+
step: GoalStep,
|
|
234
|
+
templateId: string,
|
|
235
|
+
previousResults: StepResult[]
|
|
236
|
+
): Promise<StepResult> {
|
|
237
|
+
console.log(`[SearchFirst] Executing step "${step.id}" via activity: ${templateId}`)
|
|
238
|
+
|
|
239
|
+
try {
|
|
240
|
+
const template = await loadTemplateFromMCPOrLocal(templateId)
|
|
241
|
+
if (!template) {
|
|
242
|
+
return {
|
|
243
|
+
stepId: step.id,
|
|
244
|
+
status: "failed",
|
|
245
|
+
method: "existing_activity",
|
|
246
|
+
error: `Template not found: ${templateId}`,
|
|
247
|
+
summary: `Failed to load activity template ${templateId}`,
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Create minimal executor with isolated context
|
|
252
|
+
const executor = new ActivityExecutor({
|
|
253
|
+
provider: this.config.provider,
|
|
254
|
+
apiKey: this.config.apiKey,
|
|
255
|
+
model: this.config.model,
|
|
256
|
+
workingDirectory: this.config.workingDirectory,
|
|
257
|
+
maxNestingDepth: 1, // Prevent further nesting
|
|
258
|
+
})
|
|
259
|
+
|
|
260
|
+
// Build minimal context from previous step summaries
|
|
261
|
+
const contextSummary = previousResults
|
|
262
|
+
.filter(r => r.status === "completed")
|
|
263
|
+
.map(r => `- ${r.stepId}: ${r.summary}`)
|
|
264
|
+
.join('\n')
|
|
265
|
+
|
|
266
|
+
const result = await executor.execute({
|
|
267
|
+
template,
|
|
268
|
+
variables: {
|
|
269
|
+
goal: step.intent,
|
|
270
|
+
previousSteps: contextSummary || "This is the first step",
|
|
271
|
+
},
|
|
272
|
+
reason: step.description,
|
|
273
|
+
})
|
|
274
|
+
|
|
275
|
+
// Activity reported failure
|
|
276
|
+
if (result.status !== "completed") {
|
|
277
|
+
return {
|
|
278
|
+
stepId: step.id,
|
|
279
|
+
status: "failed",
|
|
280
|
+
method: "existing_activity",
|
|
281
|
+
activityId: templateId,
|
|
282
|
+
summary: `Failed: ${result.error?.substring(0, 100) ?? "unknown error"}`,
|
|
283
|
+
error: result.error,
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// Activity completed - now run our own validation
|
|
288
|
+
const validationResult = await this.validateStep(step)
|
|
289
|
+
|
|
290
|
+
if (!validationResult.passed) {
|
|
291
|
+
console.log(`[SearchFirst] Activity completed but step validation FAILED`)
|
|
292
|
+
|
|
293
|
+
// Report validation failure to backend to update metrics
|
|
294
|
+
// This corrects the false "success" that the activity reported
|
|
295
|
+
await this.reportValidationFailure(templateId, step, validationResult.reason)
|
|
296
|
+
|
|
297
|
+
return {
|
|
298
|
+
stepId: step.id,
|
|
299
|
+
status: "failed",
|
|
300
|
+
method: "existing_activity",
|
|
301
|
+
activityId: templateId,
|
|
302
|
+
summary: `Activity completed but validation failed: ${validationResult.reason}`,
|
|
303
|
+
error: validationResult.reason,
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
return {
|
|
308
|
+
stepId: step.id,
|
|
309
|
+
status: "completed",
|
|
310
|
+
method: "existing_activity",
|
|
311
|
+
activityId: templateId,
|
|
312
|
+
summary: `Completed via ${templateId}`,
|
|
313
|
+
}
|
|
314
|
+
} catch (e) {
|
|
315
|
+
const error = e instanceof Error ? e.message : String(e)
|
|
316
|
+
return {
|
|
317
|
+
stepId: step.id,
|
|
318
|
+
status: "failed",
|
|
319
|
+
method: "existing_activity",
|
|
320
|
+
activityId: templateId,
|
|
321
|
+
summary: `Execution failed: ${error.substring(0, 100)}`,
|
|
322
|
+
error,
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Execute a step directly (no existing activity found)
|
|
329
|
+
*/
|
|
330
|
+
async executeDirect(
|
|
331
|
+
step: GoalStep,
|
|
332
|
+
previousResults: StepResult[]
|
|
333
|
+
): Promise<StepResult> {
|
|
334
|
+
console.log(`[SearchFirst] Executing step "${step.id}" directly`)
|
|
335
|
+
|
|
336
|
+
try {
|
|
337
|
+
// Build context from previous step summaries
|
|
338
|
+
const contextSummary = previousResults
|
|
339
|
+
.filter(r => r.status === "completed")
|
|
340
|
+
.map(r => `- ${r.stepId}: ${r.summary}`)
|
|
341
|
+
.join('\n')
|
|
342
|
+
|
|
343
|
+
const prompt = `Complete this step:
|
|
344
|
+
|
|
345
|
+
Step: ${step.description}
|
|
346
|
+
Intent: ${step.intent}
|
|
347
|
+
|
|
348
|
+
Previous steps completed:
|
|
349
|
+
${contextSummary || "None - this is the first step"}
|
|
350
|
+
|
|
351
|
+
Instructions:
|
|
352
|
+
- Focus only on this specific step
|
|
353
|
+
- Use available tools to accomplish the goal
|
|
354
|
+
- Be concise and efficient`
|
|
355
|
+
|
|
356
|
+
// Create minimal tool handlers
|
|
357
|
+
const toolHandlers = createToolHandlers({
|
|
358
|
+
workingDirectory: this.config.workingDirectory,
|
|
359
|
+
// Don't allow nested activity execution from direct steps
|
|
360
|
+
onActivityExecute: undefined,
|
|
361
|
+
onSearchActivities: undefined,
|
|
362
|
+
onCreateActivity: undefined,
|
|
363
|
+
})
|
|
364
|
+
|
|
365
|
+
const result = await this.llm.completeWithTools(
|
|
366
|
+
{
|
|
367
|
+
model: this.config.model,
|
|
368
|
+
messages: [
|
|
369
|
+
{
|
|
370
|
+
role: "system",
|
|
371
|
+
content: "You are a focused task executor. Complete the given step using available tools. Be efficient and concise.",
|
|
372
|
+
},
|
|
373
|
+
{
|
|
374
|
+
role: "user",
|
|
375
|
+
content: prompt,
|
|
376
|
+
},
|
|
377
|
+
],
|
|
378
|
+
tools: getAllToolDefinitions(),
|
|
379
|
+
maxTokens: this.config.maxTokensPerStep,
|
|
380
|
+
},
|
|
381
|
+
toolHandlers
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
// Summarize the result
|
|
385
|
+
const summary = await this.summarizeResult(step, result.content)
|
|
386
|
+
|
|
387
|
+
// Run validation if specified
|
|
388
|
+
const validationResult = await this.validateStep(step)
|
|
389
|
+
|
|
390
|
+
if (!validationResult.passed) {
|
|
391
|
+
console.log(`[SearchFirst] Step ${step.id} execution completed but validation FAILED`)
|
|
392
|
+
return {
|
|
393
|
+
stepId: step.id,
|
|
394
|
+
status: "failed",
|
|
395
|
+
method: "direct_execution",
|
|
396
|
+
summary: `Execution completed but validation failed: ${validationResult.reason}`,
|
|
397
|
+
error: validationResult.reason,
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
return {
|
|
402
|
+
stepId: step.id,
|
|
403
|
+
status: "completed",
|
|
404
|
+
method: "direct_execution",
|
|
405
|
+
summary,
|
|
406
|
+
}
|
|
407
|
+
} catch (e) {
|
|
408
|
+
const error = e instanceof Error ? e.message : String(e)
|
|
409
|
+
return {
|
|
410
|
+
stepId: step.id,
|
|
411
|
+
status: "failed",
|
|
412
|
+
method: "direct_execution",
|
|
413
|
+
summary: `Direct execution failed: ${error.substring(0, 100)}`,
|
|
414
|
+
error,
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* Summarize a step result to compress context for next step
|
|
421
|
+
*/
|
|
422
|
+
async summarizeResult(step: GoalStep, fullResult: string): Promise<string> {
|
|
423
|
+
// If result is already short, use as-is
|
|
424
|
+
if (fullResult.length < 200) {
|
|
425
|
+
return fullResult
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
try {
|
|
429
|
+
const result = await this.llm.complete({
|
|
430
|
+
model: this.config.model,
|
|
431
|
+
messages: [
|
|
432
|
+
{
|
|
433
|
+
role: "user",
|
|
434
|
+
content: `Summarize this step result in 1-2 sentences (max 100 words):
|
|
435
|
+
|
|
436
|
+
Step: ${step.description}
|
|
437
|
+
|
|
438
|
+
Result:
|
|
439
|
+
${fullResult.substring(0, 2000)}
|
|
440
|
+
|
|
441
|
+
Summary:`,
|
|
442
|
+
},
|
|
443
|
+
],
|
|
444
|
+
maxTokens: 150,
|
|
445
|
+
})
|
|
446
|
+
|
|
447
|
+
return result.content.trim()
|
|
448
|
+
} catch {
|
|
449
|
+
// Fallback: truncate
|
|
450
|
+
return fullResult.substring(0, 200) + "..."
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
/**
|
|
455
|
+
* Validate a step's execution result
|
|
456
|
+
*
|
|
457
|
+
* Returns { passed: true } if validation passes or no validation specified
|
|
458
|
+
* Returns { passed: false, reason: string } if validation fails
|
|
459
|
+
*/
|
|
460
|
+
async validateStep(step: GoalStep): Promise<{ passed: boolean; reason?: string }> {
|
|
461
|
+
if (!step.validation) {
|
|
462
|
+
console.log(`[SearchFirst] No validation criteria for step ${step.id}`)
|
|
463
|
+
return { passed: true }
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
const validation = step.validation
|
|
467
|
+
console.log(`[SearchFirst] Validating step ${step.id}...`)
|
|
468
|
+
|
|
469
|
+
// 1. Check required files exist
|
|
470
|
+
if (validation.requiredFiles && validation.requiredFiles.length > 0) {
|
|
471
|
+
for (const filePath of validation.requiredFiles) {
|
|
472
|
+
const fullPath = filePath.startsWith("/")
|
|
473
|
+
? filePath
|
|
474
|
+
: `${this.config.workingDirectory}/${filePath}`
|
|
475
|
+
const file = safeReadFile(fullPath)
|
|
476
|
+
if (!(await file.exists())) {
|
|
477
|
+
console.log(`[SearchFirst] Validation FAILED: Missing file ${filePath}`)
|
|
478
|
+
return { passed: false, reason: `Required file missing: ${filePath}` }
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
console.log(`[SearchFirst] ✓ Required files exist`)
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
// 2. Check required patterns in files
|
|
485
|
+
if (validation.requiredPatterns && validation.requiredPatterns.length > 0) {
|
|
486
|
+
for (const { file: filePath, pattern } of validation.requiredPatterns) {
|
|
487
|
+
const fullPath = filePath.startsWith("/")
|
|
488
|
+
? filePath
|
|
489
|
+
: `${this.config.workingDirectory}/${filePath}`
|
|
490
|
+
const file = safeReadFile(fullPath)
|
|
491
|
+
if (!(await file.exists())) {
|
|
492
|
+
console.log(`[SearchFirst] Validation FAILED: File missing for pattern check ${filePath}`)
|
|
493
|
+
return { passed: false, reason: `File missing for pattern check: ${filePath}` }
|
|
494
|
+
}
|
|
495
|
+
const content = await file.text()
|
|
496
|
+
const regex = new RegExp(pattern)
|
|
497
|
+
if (!regex.test(content)) {
|
|
498
|
+
console.log(`[SearchFirst] Validation FAILED: Pattern not found in ${filePath}`)
|
|
499
|
+
return { passed: false, reason: `Pattern "${pattern}" not found in ${filePath}` }
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
console.log(`[SearchFirst] ✓ Required patterns found`)
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
// 3. Run validation commands
|
|
506
|
+
if (validation.commands && validation.commands.length > 0) {
|
|
507
|
+
for (const command of validation.commands) {
|
|
508
|
+
console.log(`[SearchFirst] Running validation command: ${command}`)
|
|
509
|
+
try {
|
|
510
|
+
const proc = Bun.spawn(["sh", "-c", command], {
|
|
511
|
+
cwd: this.config.workingDirectory,
|
|
512
|
+
stdout: "pipe",
|
|
513
|
+
stderr: "pipe",
|
|
514
|
+
})
|
|
515
|
+
const exitCode = await proc.exited
|
|
516
|
+
|
|
517
|
+
if (exitCode !== 0) {
|
|
518
|
+
const stderr = await new Response(proc.stderr).text()
|
|
519
|
+
console.log(`[SearchFirst] Validation FAILED: Command "${command}" exited with ${exitCode}`)
|
|
520
|
+
return {
|
|
521
|
+
passed: false,
|
|
522
|
+
reason: `Validation command failed: ${command}\nExit code: ${exitCode}\n${stderr.substring(0, 200)}`,
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
console.log(`[SearchFirst] ✓ Command passed: ${command}`)
|
|
526
|
+
} catch (e) {
|
|
527
|
+
const error = e instanceof Error ? e.message : String(e)
|
|
528
|
+
console.log(`[SearchFirst] Validation FAILED: Command error: ${error}`)
|
|
529
|
+
return { passed: false, reason: `Validation command error: ${error}` }
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
console.log(`[SearchFirst] ✓ All validations passed for step ${step.id}`)
|
|
535
|
+
return { passed: true }
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
/**
|
|
539
|
+
* Report validation failure to backend to correct metrics
|
|
540
|
+
*
|
|
541
|
+
* When an activity reports "completed" but our validation fails,
|
|
542
|
+
* we need to notify the backend so it can update Thompson Sampling
|
|
543
|
+
* parameters (increment beta to reduce the activity's score).
|
|
544
|
+
*/
|
|
545
|
+
private async reportValidationFailure(
|
|
546
|
+
templateId: string,
|
|
547
|
+
step: GoalStep,
|
|
548
|
+
reason?: string
|
|
549
|
+
): Promise<void> {
|
|
550
|
+
const mcp = getMCPClient()
|
|
551
|
+
if (!mcp) return
|
|
552
|
+
|
|
553
|
+
try {
|
|
554
|
+
console.log(`[SearchFirst] Reporting validation failure for ${templateId}`)
|
|
555
|
+
|
|
556
|
+
// Use the executions endpoint to record the failure
|
|
557
|
+
// This will increment beta in Thompson Sampling
|
|
558
|
+
const endpoint = (mcp as any).endpoint
|
|
559
|
+
const response = await fetch(`${endpoint}/v2/activities/executions`, {
|
|
560
|
+
method: "POST",
|
|
561
|
+
headers: { "Content-Type": "application/json" },
|
|
562
|
+
body: JSON.stringify({
|
|
563
|
+
variant_id: templateId,
|
|
564
|
+
success: false, // This increments beta in Thompson Sampling
|
|
565
|
+
duration_ms: 0,
|
|
566
|
+
cost: 0,
|
|
567
|
+
tokens: { input: 0, output: 0, cache: 0 },
|
|
568
|
+
error_message: `Validation failed: ${reason ?? "unknown"}`,
|
|
569
|
+
error_type: "validation_failure",
|
|
570
|
+
failed_task_id: step.id,
|
|
571
|
+
}),
|
|
572
|
+
})
|
|
573
|
+
|
|
574
|
+
if (response.ok) {
|
|
575
|
+
console.log(`[SearchFirst] ✓ Validation failure reported for ${templateId}`)
|
|
576
|
+
} else {
|
|
577
|
+
const text = await response.text()
|
|
578
|
+
console.warn(`[SearchFirst] Failed to report validation failure: ${response.status} - ${text}`)
|
|
579
|
+
}
|
|
580
|
+
} catch (e) {
|
|
581
|
+
console.error("[SearchFirst] Error reporting validation failure:", e)
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
/**
|
|
586
|
+
* Execute goal using search-first approach
|
|
587
|
+
*/
|
|
588
|
+
async execute(
|
|
589
|
+
goal: string,
|
|
590
|
+
context?: Record<string, unknown>
|
|
591
|
+
): Promise<{
|
|
592
|
+
goal: string
|
|
593
|
+
steps: GoalStep[]
|
|
594
|
+
results: StepResult[]
|
|
595
|
+
completed: boolean
|
|
596
|
+
summary: string
|
|
597
|
+
}> {
|
|
598
|
+
console.log(`[SearchFirst] Starting goal: ${goal}`)
|
|
599
|
+
|
|
600
|
+
// Step 1: Decompose goal into steps
|
|
601
|
+
const steps = await this.decomposeGoal(goal, context)
|
|
602
|
+
console.log(`[SearchFirst] Decomposed into ${steps.length} steps`)
|
|
603
|
+
|
|
604
|
+
const results: StepResult[] = []
|
|
605
|
+
|
|
606
|
+
// Step 2: Execute each step
|
|
607
|
+
for (const step of steps) {
|
|
608
|
+
console.log(`\n[SearchFirst] === Step ${step.id}: ${step.description} ===`)
|
|
609
|
+
|
|
610
|
+
// Check dependencies
|
|
611
|
+
const unmetDeps = step.dependencies.filter(
|
|
612
|
+
depId => !results.find(r => r.stepId === depId && r.status === "completed")
|
|
613
|
+
)
|
|
614
|
+
if (unmetDeps.length > 0) {
|
|
615
|
+
console.log(`[SearchFirst] Skipping - unmet dependencies: ${unmetDeps.join(", ")}`)
|
|
616
|
+
results.push({
|
|
617
|
+
stepId: step.id,
|
|
618
|
+
status: "skipped",
|
|
619
|
+
method: "skipped",
|
|
620
|
+
summary: `Skipped due to unmet dependencies: ${unmetDeps.join(", ")}`,
|
|
621
|
+
})
|
|
622
|
+
continue
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
// Search for existing activity
|
|
626
|
+
const searchResult = await this.searchForActivity(step)
|
|
627
|
+
|
|
628
|
+
let stepResult: StepResult
|
|
629
|
+
|
|
630
|
+
if (searchResult.found && searchResult.templateId) {
|
|
631
|
+
// Execute via existing activity
|
|
632
|
+
stepResult = await this.executeViaActivity(step, searchResult.templateId, results)
|
|
633
|
+
} else {
|
|
634
|
+
// Execute directly
|
|
635
|
+
stepResult = await this.executeDirect(step, results)
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
results.push(stepResult)
|
|
639
|
+
console.log(`[SearchFirst] Step ${step.id} ${stepResult.status}: ${stepResult.summary}`)
|
|
640
|
+
|
|
641
|
+
// Stop on failure (could be configurable)
|
|
642
|
+
if (stepResult.status === "failed") {
|
|
643
|
+
console.log(`[SearchFirst] Stopping due to step failure`)
|
|
644
|
+
break
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
// Generate final summary
|
|
649
|
+
const completedSteps = results.filter(r => r.status === "completed")
|
|
650
|
+
const completed = completedSteps.length === steps.length
|
|
651
|
+
|
|
652
|
+
const summary = completed
|
|
653
|
+
? `Goal completed in ${steps.length} steps: ${completedSteps.map(r => r.summary).join("; ")}`
|
|
654
|
+
: `Goal partially completed (${completedSteps.length}/${steps.length} steps)`
|
|
655
|
+
|
|
656
|
+
console.log(`\n[SearchFirst] === Final: ${summary} ===`)
|
|
657
|
+
|
|
658
|
+
return {
|
|
659
|
+
goal,
|
|
660
|
+
steps,
|
|
661
|
+
results,
|
|
662
|
+
completed,
|
|
663
|
+
summary,
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
}
|