@metabob/minibob 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/ARCHITECTURE.md +255 -0
  2. package/CHANGELOG.md +112 -0
  3. package/README.md +380 -0
  4. package/bin/minibob.js +36 -0
  5. package/dist/acp-gossip.d.ts +72 -0
  6. package/dist/acp-gossip.d.ts.map +1 -0
  7. package/dist/acp-gossip.js +156 -0
  8. package/dist/acp-gossip.js.map +1 -0
  9. package/dist/acp.d.ts +62 -0
  10. package/dist/acp.d.ts.map +1 -0
  11. package/dist/acp.js +292 -0
  12. package/dist/acp.js.map +1 -0
  13. package/dist/activity.d.ts +157 -0
  14. package/dist/activity.d.ts.map +1 -0
  15. package/dist/activity.js +518 -0
  16. package/dist/activity.js.map +1 -0
  17. package/dist/agent-runtime.d.ts +104 -0
  18. package/dist/agent-runtime.d.ts.map +1 -0
  19. package/dist/boredom.d.ts +125 -0
  20. package/dist/boredom.d.ts.map +1 -0
  21. package/dist/boredom.js +244 -0
  22. package/dist/boredom.js.map +1 -0
  23. package/dist/cli/acp-server.d.ts +23 -0
  24. package/dist/cli/acp-server.d.ts.map +1 -0
  25. package/dist/cli/burrow.d.ts +26 -0
  26. package/dist/cli/burrow.d.ts.map +1 -0
  27. package/dist/cli/doctor.d.ts +22 -0
  28. package/dist/cli/doctor.d.ts.map +1 -0
  29. package/dist/cli/goal.d.ts +22 -0
  30. package/dist/cli/goal.d.ts.map +1 -0
  31. package/dist/cli/index.d.ts +47 -0
  32. package/dist/cli/index.d.ts.map +1 -0
  33. package/dist/cli/instance-registry.d.ts +78 -0
  34. package/dist/cli/instance-registry.d.ts.map +1 -0
  35. package/dist/cli/observe.d.ts +35 -0
  36. package/dist/cli/observe.d.ts.map +1 -0
  37. package/dist/cli/vessel.d.ts +14 -0
  38. package/dist/cli/vessel.d.ts.map +1 -0
  39. package/dist/composition-observer.d.ts +96 -0
  40. package/dist/composition-observer.d.ts.map +1 -0
  41. package/dist/config.d.ts +36 -0
  42. package/dist/config.d.ts.map +1 -0
  43. package/dist/config.js +128 -0
  44. package/dist/config.js.map +1 -0
  45. package/dist/docker/Dockerfile +35 -0
  46. package/dist/environment.d.ts +72 -0
  47. package/dist/environment.d.ts.map +1 -0
  48. package/dist/environment.js +142 -0
  49. package/dist/environment.js.map +1 -0
  50. package/dist/goal-processor.d.ts +165 -0
  51. package/dist/goal-processor.d.ts.map +1 -0
  52. package/dist/helm/minibob-cluster/Chart.yaml +13 -0
  53. package/dist/helm/minibob-cluster/templates/_helpers.tpl +60 -0
  54. package/dist/helm/minibob-cluster/templates/configmap.yaml +11 -0
  55. package/dist/helm/minibob-cluster/templates/deployment.yaml +108 -0
  56. package/dist/helm/minibob-cluster/templates/secret.yaml +10 -0
  57. package/dist/helm/minibob-cluster/templates/service.yaml +37 -0
  58. package/dist/helm/minibob-cluster/values-local.yaml +41 -0
  59. package/dist/helm/minibob-cluster/values-production.yaml +57 -0
  60. package/dist/helm/minibob-cluster/values-testing-cluster.yaml +43 -0
  61. package/dist/helm/minibob-cluster/values.yaml +127 -0
  62. package/dist/improviser.d.ts +74 -0
  63. package/dist/improviser.d.ts.map +1 -0
  64. package/dist/impulse-filter.d.ts +74 -0
  65. package/dist/impulse-filter.d.ts.map +1 -0
  66. package/dist/impulse.d.ts +92 -0
  67. package/dist/impulse.d.ts.map +1 -0
  68. package/dist/impulse.js +234 -0
  69. package/dist/impulse.js.map +1 -0
  70. package/dist/lib.d.ts +29 -0
  71. package/dist/lib.d.ts.map +1 -0
  72. package/dist/lib.js +18561 -0
  73. package/dist/lib.js.map +98 -0
  74. package/dist/lifecycle-hooks.d.ts +99 -0
  75. package/dist/lifecycle-hooks.d.ts.map +1 -0
  76. package/dist/lifecycle-hooks.js +135 -0
  77. package/dist/lifecycle-hooks.js.map +1 -0
  78. package/dist/llm.d.ts +31 -0
  79. package/dist/llm.d.ts.map +1 -0
  80. package/dist/llm.js +349 -0
  81. package/dist/llm.js.map +1 -0
  82. package/dist/mcp-activity-bridge.d.ts +66 -0
  83. package/dist/mcp-activity-bridge.d.ts.map +1 -0
  84. package/dist/mcp-activity-bridge.js +126 -0
  85. package/dist/mcp-activity-bridge.js.map +1 -0
  86. package/dist/mcp.d.ts +216 -0
  87. package/dist/mcp.d.ts.map +1 -0
  88. package/dist/mcp.js +292 -0
  89. package/dist/mcp.js.map +1 -0
  90. package/dist/memory-agent.d.ts +92 -0
  91. package/dist/memory-agent.d.ts.map +1 -0
  92. package/dist/memory-agent.js +277 -0
  93. package/dist/memory-agent.js.map +1 -0
  94. package/dist/runtime-mapping.d.ts +97 -0
  95. package/dist/runtime-mapping.d.ts.map +1 -0
  96. package/dist/search-first-executor.d.ts +113 -0
  97. package/dist/search-first-executor.d.ts.map +1 -0
  98. package/dist/session.d.ts +48 -0
  99. package/dist/session.d.ts.map +1 -0
  100. package/dist/template-extractor.d.ts +9 -0
  101. package/dist/template-extractor.d.ts.map +1 -0
  102. package/dist/template-generator.d.ts +12 -0
  103. package/dist/template-generator.d.ts.map +1 -0
  104. package/dist/tools.d.ts +58 -0
  105. package/dist/tools.d.ts.map +1 -0
  106. package/dist/tools.js +771 -0
  107. package/dist/tools.js.map +1 -0
  108. package/dist/types.d.ts +503 -0
  109. package/dist/types.d.ts.map +1 -0
  110. package/dist/types.js +8 -0
  111. package/dist/types.js.map +1 -0
  112. package/dist/understanding/analyzer.d.ts +55 -0
  113. package/dist/understanding/analyzer.d.ts.map +1 -0
  114. package/dist/understanding/explorer.d.ts +73 -0
  115. package/dist/understanding/explorer.d.ts.map +1 -0
  116. package/dist/understanding/index.d.ts +7 -0
  117. package/dist/understanding/index.d.ts.map +1 -0
  118. package/dist/understanding/types.d.ts +136 -0
  119. package/dist/understanding/types.d.ts.map +1 -0
  120. package/dist/validation.d.ts +29 -0
  121. package/dist/validation.d.ts.map +1 -0
  122. package/dist/validation.js +106 -0
  123. package/dist/validation.js.map +1 -0
  124. package/dist/vessel-bootstrap.d.ts +190 -0
  125. package/dist/vessel-bootstrap.d.ts.map +1 -0
  126. package/dist/vessel-registry.d.ts +229 -0
  127. package/dist/vessel-registry.d.ts.map +1 -0
  128. package/index.ts +1329 -0
  129. package/package.json +54 -0
  130. package/src/acp-gossip.ts +193 -0
  131. package/src/acp.ts +362 -0
  132. package/src/activity.ts +1464 -0
  133. package/src/agent-runtime.ts +365 -0
  134. package/src/boredom.ts +423 -0
  135. package/src/cli/acp-server.ts +377 -0
  136. package/src/cli/burrow.ts +896 -0
  137. package/src/cli/doctor.ts +526 -0
  138. package/src/cli/goal.ts +224 -0
  139. package/src/cli/index.ts +147 -0
  140. package/src/cli/instance-registry.ts +271 -0
  141. package/src/cli/observe.ts +682 -0
  142. package/src/cli/vessel.ts +287 -0
  143. package/src/components/SystemOverview.tsx +331 -0
  144. package/src/composition-observer.ts +449 -0
  145. package/src/config.ts +172 -0
  146. package/src/environment.ts +167 -0
  147. package/src/goal-processor.ts +654 -0
  148. package/src/improviser.ts +591 -0
  149. package/src/impulse-filter.ts +273 -0
  150. package/src/impulse.ts +311 -0
  151. package/src/lib.ts +147 -0
  152. package/src/lifecycle-hooks.ts +181 -0
  153. package/src/llm.ts +434 -0
  154. package/src/mcp-activity-bridge.ts +158 -0
  155. package/src/mcp.ts +747 -0
  156. package/src/memory-agent.ts +316 -0
  157. package/src/runtime-mapping.ts +527 -0
  158. package/src/search-first-executor.ts +666 -0
  159. package/src/session.ts +141 -0
  160. package/src/template-extractor.ts +256 -0
  161. package/src/template-generator.ts +130 -0
  162. package/src/tools.ts +924 -0
  163. package/src/types.ts +497 -0
  164. package/src/understanding/analyzer.ts +354 -0
  165. package/src/understanding/explorer.ts +488 -0
  166. package/src/understanding/index.ts +27 -0
  167. package/src/understanding/types.ts +153 -0
  168. package/src/validation.ts +125 -0
  169. package/src/vessel-bootstrap.ts +440 -0
  170. package/src/vessel-registry.ts +621 -0
  171. package/templates/core/edit-file.json +85 -0
  172. package/templates/understanding/diagnose-problem.json +32 -0
  173. package/templates/understanding/explore-codebase-v2.json +57 -0
  174. package/templates/understanding/explore-codebase.json +37 -0
@@ -0,0 +1,666 @@
1
+ /**
2
+ * Search-First Goal Executor
3
+ *
4
+ * Implements dynamic step-by-step goal execution:
5
+ * 1. Decompose goal into steps using LLM
6
+ * 2. For each step:
7
+ * - Search for existing activities that match
8
+ * - If match: delegate to existing activity (minimal context)
9
+ * - If no match: execute step directly
10
+ * - Summarize result
11
+ * - Pass summary to next step
12
+ *
13
+ * This prevents token accumulation by:
14
+ * - Using summaries instead of full traces between steps
15
+ * - Delegating to existing activities with minimal context
16
+ * - Breaking large goals into focused steps
17
+ */
18
+
19
+ import type { LLMClient } from "./llm"
20
+ import type { ActivityTemplate, ToolResult, ToolHandler } from "./types"
21
+ import { createLLMClient } from "./llm"
22
+ import { createToolHandlers, getAllToolDefinitions } from "./tools"
23
+ import { getMCPClient, isMCPEnabled } from "./mcp"
24
+ import { loadTemplateFromMCPOrLocal, ActivityExecutor } from "./activity"
25
+
26
+ /**
27
+ * Safe wrapper for Bun.file() with better error handling
28
+ */
29
+ function safeReadFile(path: string): ReturnType<typeof Bun.file> {
30
+ if (!path || typeof path !== 'string') {
31
+ throw new Error(`Invalid file path: expected string, got ${typeof path}`)
32
+ }
33
+ try {
34
+ return Bun.file(path)
35
+ } catch (error) {
36
+ throw new Error(`Failed to create file handle for '${path}': ${error instanceof Error ? error.message : String(error)}`)
37
+ }
38
+ }
39
+
40
+ // =============================================================================
41
+ // TYPES
42
+ // =============================================================================
43
+
44
+ export interface StepValidation {
45
+ /** Commands to run for validation (exit code 0 = pass) */
46
+ commands?: string[]
47
+ /** Files that must exist after execution */
48
+ requiredFiles?: string[]
49
+ /** Patterns that must exist in specific files */
50
+ requiredPatterns?: Array<{ file: string; pattern: string }>
51
+ /** Description of what success looks like (for LLM self-validation) */
52
+ successCriteria?: string
53
+ }
54
+
55
+ export interface GoalStep {
56
+ id: string
57
+ description: string
58
+ intent: string
59
+ category: "feature" | "bugfix" | "refactor" | "tool" | "infrastructure" | "other"
60
+ dependencies: string[]
61
+ /** Validation criteria for this step */
62
+ validation?: StepValidation
63
+ }
64
+
65
+ export interface StepResult {
66
+ stepId: string
67
+ status: "completed" | "failed" | "skipped"
68
+ method: "existing_activity" | "direct_execution" | "skipped"
69
+ activityId?: string
70
+ summary: string
71
+ error?: string
72
+ }
73
+
74
+ export interface SearchFirstConfig {
75
+ provider: "anthropic" | "openai"
76
+ apiKey: string
77
+ model: string
78
+ workingDirectory: string
79
+ maxSteps?: number
80
+ maxTokensPerStep?: number
81
+ }
82
+
83
+ // =============================================================================
84
+ // SEARCH-FIRST EXECUTOR
85
+ // =============================================================================
86
+
87
+ export class SearchFirstExecutor {
88
+ private config: SearchFirstConfig
89
+ private llm: LLMClient
90
+
91
+ constructor(config: SearchFirstConfig) {
92
+ this.config = {
93
+ ...config,
94
+ maxSteps: config.maxSteps ?? 5,
95
+ maxTokensPerStep: config.maxTokensPerStep ?? 4096,
96
+ }
97
+ this.llm = createLLMClient(config.provider, config.apiKey)
98
+ }
99
+
100
+ /**
101
+ * Decompose a goal into discrete steps using LLM
102
+ */
103
+ async decomposeGoal(goal: string, context?: Record<string, unknown>): Promise<GoalStep[]> {
104
+ const contextStr = context ? Object.entries(context)
105
+ .map(([k, v]) => `- ${k}: ${typeof v === 'string' ? v : JSON.stringify(v)}`)
106
+ .join('\n') : 'None provided'
107
+
108
+ const prompt = `Decompose this goal into 2-4 discrete, actionable steps WITH validation criteria.
109
+
110
+ Goal: ${goal}
111
+
112
+ Context:
113
+ ${contextStr}
114
+
115
+ Rules:
116
+ - Each step should be independently accomplishable
117
+ - Steps should be sequential (later steps may depend on earlier ones)
118
+ - Keep steps focused and specific
119
+ - Use categories: feature, bugfix, refactor, tool, infrastructure
120
+ - CRITICAL: Include validation criteria for each step - how do we VERIFY it succeeded?
121
+
122
+ Respond with JSON array:
123
+ [
124
+ {
125
+ "id": "step-1",
126
+ "description": "Brief description of step",
127
+ "intent": "What this step accomplishes",
128
+ "category": "refactor",
129
+ "dependencies": [],
130
+ "validation": {
131
+ "commands": ["bun run typecheck"],
132
+ "requiredFiles": ["src/new-file.ts"],
133
+ "successCriteria": "Code compiles without type errors"
134
+ }
135
+ }
136
+ ]
137
+
138
+ Validation guidelines by category:
139
+ - refactor: typecheck passes, tests pass, no regressions
140
+ - bugfix: specific test passes, error no longer occurs
141
+ - feature: new functionality works, tests pass
142
+ - tool: tool executes successfully with expected output
143
+ - infrastructure: service starts, health check passes
144
+
145
+ Only output the JSON array, nothing else.`
146
+
147
+ const result = await this.llm.complete({
148
+ model: this.config.model,
149
+ messages: [
150
+ { role: "user", content: prompt }
151
+ ],
152
+ maxTokens: 1024,
153
+ })
154
+
155
+ try {
156
+ // Extract JSON from response
157
+ const jsonMatch = result.content.match(/\[[\s\S]*\]/)
158
+ if (!jsonMatch) {
159
+ throw new Error("No JSON array found in response")
160
+ }
161
+ const steps = JSON.parse(jsonMatch[0]) as GoalStep[]
162
+ return steps.slice(0, this.config.maxSteps)
163
+ } catch (e) {
164
+ console.error("[SearchFirst] Failed to parse steps:", e)
165
+ // Fallback: treat entire goal as single step
166
+ return [{
167
+ id: "step-1",
168
+ description: goal,
169
+ intent: goal,
170
+ category: "other",
171
+ dependencies: [],
172
+ }]
173
+ }
174
+ }
175
+
176
+ /**
177
+ * Search for existing activities that can accomplish a step
178
+ */
179
+ async searchForActivity(step: GoalStep): Promise<{ found: boolean; templateId?: string; score?: number }> {
180
+ if (!isMCPEnabled()) {
181
+ return { found: false }
182
+ }
183
+
184
+ const mcp = getMCPClient()
185
+ if (!mcp) {
186
+ return { found: false }
187
+ }
188
+
189
+ try {
190
+ // Get recommendations from backend
191
+ const recommendations = await mcp.recommendActivities(
192
+ step.intent,
193
+ step.category === "other" ? undefined : step.category,
194
+ [],
195
+ 3
196
+ )
197
+
198
+ if (recommendations.length === 0) {
199
+ console.log(`[SearchFirst] No existing activities found for: ${step.description}`)
200
+ return { found: false }
201
+ }
202
+
203
+ // Check if top recommendation is a good match
204
+ const top = recommendations[0]
205
+ if (!top) {
206
+ return { found: false }
207
+ }
208
+
209
+ const score = top.selection_metadata?.score ?? 0
210
+
211
+ // Require minimum score for reuse (Thompson Sampling score > 0.3)
212
+ if (score < 0.3) {
213
+ console.log(`[SearchFirst] Top match score too low (${score}): ${top.template_id}`)
214
+ return { found: false }
215
+ }
216
+
217
+ console.log(`[SearchFirst] Found matching activity: ${top.template_id} (score: ${score})`)
218
+ return {
219
+ found: true,
220
+ templateId: top.template_id,
221
+ score,
222
+ }
223
+ } catch (e) {
224
+ console.error("[SearchFirst] Search failed:", e)
225
+ return { found: false }
226
+ }
227
+ }
228
+
229
+ /**
230
+ * Execute a step using an existing activity
231
+ */
232
+ async executeViaActivity(
233
+ step: GoalStep,
234
+ templateId: string,
235
+ previousResults: StepResult[]
236
+ ): Promise<StepResult> {
237
+ console.log(`[SearchFirst] Executing step "${step.id}" via activity: ${templateId}`)
238
+
239
+ try {
240
+ const template = await loadTemplateFromMCPOrLocal(templateId)
241
+ if (!template) {
242
+ return {
243
+ stepId: step.id,
244
+ status: "failed",
245
+ method: "existing_activity",
246
+ error: `Template not found: ${templateId}`,
247
+ summary: `Failed to load activity template ${templateId}`,
248
+ }
249
+ }
250
+
251
+ // Create minimal executor with isolated context
252
+ const executor = new ActivityExecutor({
253
+ provider: this.config.provider,
254
+ apiKey: this.config.apiKey,
255
+ model: this.config.model,
256
+ workingDirectory: this.config.workingDirectory,
257
+ maxNestingDepth: 1, // Prevent further nesting
258
+ })
259
+
260
+ // Build minimal context from previous step summaries
261
+ const contextSummary = previousResults
262
+ .filter(r => r.status === "completed")
263
+ .map(r => `- ${r.stepId}: ${r.summary}`)
264
+ .join('\n')
265
+
266
+ const result = await executor.execute({
267
+ template,
268
+ variables: {
269
+ goal: step.intent,
270
+ previousSteps: contextSummary || "This is the first step",
271
+ },
272
+ reason: step.description,
273
+ })
274
+
275
+ // Activity reported failure
276
+ if (result.status !== "completed") {
277
+ return {
278
+ stepId: step.id,
279
+ status: "failed",
280
+ method: "existing_activity",
281
+ activityId: templateId,
282
+ summary: `Failed: ${result.error?.substring(0, 100) ?? "unknown error"}`,
283
+ error: result.error,
284
+ }
285
+ }
286
+
287
+ // Activity completed - now run our own validation
288
+ const validationResult = await this.validateStep(step)
289
+
290
+ if (!validationResult.passed) {
291
+ console.log(`[SearchFirst] Activity completed but step validation FAILED`)
292
+
293
+ // Report validation failure to backend to update metrics
294
+ // This corrects the false "success" that the activity reported
295
+ await this.reportValidationFailure(templateId, step, validationResult.reason)
296
+
297
+ return {
298
+ stepId: step.id,
299
+ status: "failed",
300
+ method: "existing_activity",
301
+ activityId: templateId,
302
+ summary: `Activity completed but validation failed: ${validationResult.reason}`,
303
+ error: validationResult.reason,
304
+ }
305
+ }
306
+
307
+ return {
308
+ stepId: step.id,
309
+ status: "completed",
310
+ method: "existing_activity",
311
+ activityId: templateId,
312
+ summary: `Completed via ${templateId}`,
313
+ }
314
+ } catch (e) {
315
+ const error = e instanceof Error ? e.message : String(e)
316
+ return {
317
+ stepId: step.id,
318
+ status: "failed",
319
+ method: "existing_activity",
320
+ activityId: templateId,
321
+ summary: `Execution failed: ${error.substring(0, 100)}`,
322
+ error,
323
+ }
324
+ }
325
+ }
326
+
327
+ /**
328
+ * Execute a step directly (no existing activity found)
329
+ */
330
+ async executeDirect(
331
+ step: GoalStep,
332
+ previousResults: StepResult[]
333
+ ): Promise<StepResult> {
334
+ console.log(`[SearchFirst] Executing step "${step.id}" directly`)
335
+
336
+ try {
337
+ // Build context from previous step summaries
338
+ const contextSummary = previousResults
339
+ .filter(r => r.status === "completed")
340
+ .map(r => `- ${r.stepId}: ${r.summary}`)
341
+ .join('\n')
342
+
343
+ const prompt = `Complete this step:
344
+
345
+ Step: ${step.description}
346
+ Intent: ${step.intent}
347
+
348
+ Previous steps completed:
349
+ ${contextSummary || "None - this is the first step"}
350
+
351
+ Instructions:
352
+ - Focus only on this specific step
353
+ - Use available tools to accomplish the goal
354
+ - Be concise and efficient`
355
+
356
+ // Create minimal tool handlers
357
+ const toolHandlers = createToolHandlers({
358
+ workingDirectory: this.config.workingDirectory,
359
+ // Don't allow nested activity execution from direct steps
360
+ onActivityExecute: undefined,
361
+ onSearchActivities: undefined,
362
+ onCreateActivity: undefined,
363
+ })
364
+
365
+ const result = await this.llm.completeWithTools(
366
+ {
367
+ model: this.config.model,
368
+ messages: [
369
+ {
370
+ role: "system",
371
+ content: "You are a focused task executor. Complete the given step using available tools. Be efficient and concise.",
372
+ },
373
+ {
374
+ role: "user",
375
+ content: prompt,
376
+ },
377
+ ],
378
+ tools: getAllToolDefinitions(),
379
+ maxTokens: this.config.maxTokensPerStep,
380
+ },
381
+ toolHandlers
382
+ )
383
+
384
+ // Summarize the result
385
+ const summary = await this.summarizeResult(step, result.content)
386
+
387
+ // Run validation if specified
388
+ const validationResult = await this.validateStep(step)
389
+
390
+ if (!validationResult.passed) {
391
+ console.log(`[SearchFirst] Step ${step.id} execution completed but validation FAILED`)
392
+ return {
393
+ stepId: step.id,
394
+ status: "failed",
395
+ method: "direct_execution",
396
+ summary: `Execution completed but validation failed: ${validationResult.reason}`,
397
+ error: validationResult.reason,
398
+ }
399
+ }
400
+
401
+ return {
402
+ stepId: step.id,
403
+ status: "completed",
404
+ method: "direct_execution",
405
+ summary,
406
+ }
407
+ } catch (e) {
408
+ const error = e instanceof Error ? e.message : String(e)
409
+ return {
410
+ stepId: step.id,
411
+ status: "failed",
412
+ method: "direct_execution",
413
+ summary: `Direct execution failed: ${error.substring(0, 100)}`,
414
+ error,
415
+ }
416
+ }
417
+ }
418
+
419
+ /**
420
+ * Summarize a step result to compress context for next step
421
+ */
422
+ async summarizeResult(step: GoalStep, fullResult: string): Promise<string> {
423
+ // If result is already short, use as-is
424
+ if (fullResult.length < 200) {
425
+ return fullResult
426
+ }
427
+
428
+ try {
429
+ const result = await this.llm.complete({
430
+ model: this.config.model,
431
+ messages: [
432
+ {
433
+ role: "user",
434
+ content: `Summarize this step result in 1-2 sentences (max 100 words):
435
+
436
+ Step: ${step.description}
437
+
438
+ Result:
439
+ ${fullResult.substring(0, 2000)}
440
+
441
+ Summary:`,
442
+ },
443
+ ],
444
+ maxTokens: 150,
445
+ })
446
+
447
+ return result.content.trim()
448
+ } catch {
449
+ // Fallback: truncate
450
+ return fullResult.substring(0, 200) + "..."
451
+ }
452
+ }
453
+
454
+ /**
455
+ * Validate a step's execution result
456
+ *
457
+ * Returns { passed: true } if validation passes or no validation specified
458
+ * Returns { passed: false, reason: string } if validation fails
459
+ */
460
+ async validateStep(step: GoalStep): Promise<{ passed: boolean; reason?: string }> {
461
+ if (!step.validation) {
462
+ console.log(`[SearchFirst] No validation criteria for step ${step.id}`)
463
+ return { passed: true }
464
+ }
465
+
466
+ const validation = step.validation
467
+ console.log(`[SearchFirst] Validating step ${step.id}...`)
468
+
469
+ // 1. Check required files exist
470
+ if (validation.requiredFiles && validation.requiredFiles.length > 0) {
471
+ for (const filePath of validation.requiredFiles) {
472
+ const fullPath = filePath.startsWith("/")
473
+ ? filePath
474
+ : `${this.config.workingDirectory}/${filePath}`
475
+ const file = safeReadFile(fullPath)
476
+ if (!(await file.exists())) {
477
+ console.log(`[SearchFirst] Validation FAILED: Missing file ${filePath}`)
478
+ return { passed: false, reason: `Required file missing: ${filePath}` }
479
+ }
480
+ }
481
+ console.log(`[SearchFirst] ✓ Required files exist`)
482
+ }
483
+
484
+ // 2. Check required patterns in files
485
+ if (validation.requiredPatterns && validation.requiredPatterns.length > 0) {
486
+ for (const { file: filePath, pattern } of validation.requiredPatterns) {
487
+ const fullPath = filePath.startsWith("/")
488
+ ? filePath
489
+ : `${this.config.workingDirectory}/${filePath}`
490
+ const file = safeReadFile(fullPath)
491
+ if (!(await file.exists())) {
492
+ console.log(`[SearchFirst] Validation FAILED: File missing for pattern check ${filePath}`)
493
+ return { passed: false, reason: `File missing for pattern check: ${filePath}` }
494
+ }
495
+ const content = await file.text()
496
+ const regex = new RegExp(pattern)
497
+ if (!regex.test(content)) {
498
+ console.log(`[SearchFirst] Validation FAILED: Pattern not found in ${filePath}`)
499
+ return { passed: false, reason: `Pattern "${pattern}" not found in ${filePath}` }
500
+ }
501
+ }
502
+ console.log(`[SearchFirst] ✓ Required patterns found`)
503
+ }
504
+
505
+ // 3. Run validation commands
506
+ if (validation.commands && validation.commands.length > 0) {
507
+ for (const command of validation.commands) {
508
+ console.log(`[SearchFirst] Running validation command: ${command}`)
509
+ try {
510
+ const proc = Bun.spawn(["sh", "-c", command], {
511
+ cwd: this.config.workingDirectory,
512
+ stdout: "pipe",
513
+ stderr: "pipe",
514
+ })
515
+ const exitCode = await proc.exited
516
+
517
+ if (exitCode !== 0) {
518
+ const stderr = await new Response(proc.stderr).text()
519
+ console.log(`[SearchFirst] Validation FAILED: Command "${command}" exited with ${exitCode}`)
520
+ return {
521
+ passed: false,
522
+ reason: `Validation command failed: ${command}\nExit code: ${exitCode}\n${stderr.substring(0, 200)}`,
523
+ }
524
+ }
525
+ console.log(`[SearchFirst] ✓ Command passed: ${command}`)
526
+ } catch (e) {
527
+ const error = e instanceof Error ? e.message : String(e)
528
+ console.log(`[SearchFirst] Validation FAILED: Command error: ${error}`)
529
+ return { passed: false, reason: `Validation command error: ${error}` }
530
+ }
531
+ }
532
+ }
533
+
534
+ console.log(`[SearchFirst] ✓ All validations passed for step ${step.id}`)
535
+ return { passed: true }
536
+ }
537
+
538
+ /**
539
+ * Report validation failure to backend to correct metrics
540
+ *
541
+ * When an activity reports "completed" but our validation fails,
542
+ * we need to notify the backend so it can update Thompson Sampling
543
+ * parameters (increment beta to reduce the activity's score).
544
+ */
545
+ private async reportValidationFailure(
546
+ templateId: string,
547
+ step: GoalStep,
548
+ reason?: string
549
+ ): Promise<void> {
550
+ const mcp = getMCPClient()
551
+ if (!mcp) return
552
+
553
+ try {
554
+ console.log(`[SearchFirst] Reporting validation failure for ${templateId}`)
555
+
556
+ // Use the executions endpoint to record the failure
557
+ // This will increment beta in Thompson Sampling
558
+ const endpoint = (mcp as any).endpoint
559
+ const response = await fetch(`${endpoint}/v2/activities/executions`, {
560
+ method: "POST",
561
+ headers: { "Content-Type": "application/json" },
562
+ body: JSON.stringify({
563
+ variant_id: templateId,
564
+ success: false, // This increments beta in Thompson Sampling
565
+ duration_ms: 0,
566
+ cost: 0,
567
+ tokens: { input: 0, output: 0, cache: 0 },
568
+ error_message: `Validation failed: ${reason ?? "unknown"}`,
569
+ error_type: "validation_failure",
570
+ failed_task_id: step.id,
571
+ }),
572
+ })
573
+
574
+ if (response.ok) {
575
+ console.log(`[SearchFirst] ✓ Validation failure reported for ${templateId}`)
576
+ } else {
577
+ const text = await response.text()
578
+ console.warn(`[SearchFirst] Failed to report validation failure: ${response.status} - ${text}`)
579
+ }
580
+ } catch (e) {
581
+ console.error("[SearchFirst] Error reporting validation failure:", e)
582
+ }
583
+ }
584
+
585
+ /**
586
+ * Execute goal using search-first approach
587
+ */
588
+ async execute(
589
+ goal: string,
590
+ context?: Record<string, unknown>
591
+ ): Promise<{
592
+ goal: string
593
+ steps: GoalStep[]
594
+ results: StepResult[]
595
+ completed: boolean
596
+ summary: string
597
+ }> {
598
+ console.log(`[SearchFirst] Starting goal: ${goal}`)
599
+
600
+ // Step 1: Decompose goal into steps
601
+ const steps = await this.decomposeGoal(goal, context)
602
+ console.log(`[SearchFirst] Decomposed into ${steps.length} steps`)
603
+
604
+ const results: StepResult[] = []
605
+
606
+ // Step 2: Execute each step
607
+ for (const step of steps) {
608
+ console.log(`\n[SearchFirst] === Step ${step.id}: ${step.description} ===`)
609
+
610
+ // Check dependencies
611
+ const unmetDeps = step.dependencies.filter(
612
+ depId => !results.find(r => r.stepId === depId && r.status === "completed")
613
+ )
614
+ if (unmetDeps.length > 0) {
615
+ console.log(`[SearchFirst] Skipping - unmet dependencies: ${unmetDeps.join(", ")}`)
616
+ results.push({
617
+ stepId: step.id,
618
+ status: "skipped",
619
+ method: "skipped",
620
+ summary: `Skipped due to unmet dependencies: ${unmetDeps.join(", ")}`,
621
+ })
622
+ continue
623
+ }
624
+
625
+ // Search for existing activity
626
+ const searchResult = await this.searchForActivity(step)
627
+
628
+ let stepResult: StepResult
629
+
630
+ if (searchResult.found && searchResult.templateId) {
631
+ // Execute via existing activity
632
+ stepResult = await this.executeViaActivity(step, searchResult.templateId, results)
633
+ } else {
634
+ // Execute directly
635
+ stepResult = await this.executeDirect(step, results)
636
+ }
637
+
638
+ results.push(stepResult)
639
+ console.log(`[SearchFirst] Step ${step.id} ${stepResult.status}: ${stepResult.summary}`)
640
+
641
+ // Stop on failure (could be configurable)
642
+ if (stepResult.status === "failed") {
643
+ console.log(`[SearchFirst] Stopping due to step failure`)
644
+ break
645
+ }
646
+ }
647
+
648
+ // Generate final summary
649
+ const completedSteps = results.filter(r => r.status === "completed")
650
+ const completed = completedSteps.length === steps.length
651
+
652
+ const summary = completed
653
+ ? `Goal completed in ${steps.length} steps: ${completedSteps.map(r => r.summary).join("; ")}`
654
+ : `Goal partially completed (${completedSteps.length}/${steps.length} steps)`
655
+
656
+ console.log(`\n[SearchFirst] === Final: ${summary} ===`)
657
+
658
+ return {
659
+ goal,
660
+ steps,
661
+ results,
662
+ completed,
663
+ summary,
664
+ }
665
+ }
666
+ }