@lakitu/sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/README.md +166 -0
  2. package/convex/_generated/api.d.ts +45 -0
  3. package/convex/_generated/api.js +23 -0
  4. package/convex/_generated/dataModel.d.ts +58 -0
  5. package/convex/_generated/server.d.ts +143 -0
  6. package/convex/_generated/server.js +93 -0
  7. package/convex/cloud/CLAUDE.md +238 -0
  8. package/convex/cloud/_generated/api.ts +84 -0
  9. package/convex/cloud/_generated/component.ts +861 -0
  10. package/convex/cloud/_generated/dataModel.ts +60 -0
  11. package/convex/cloud/_generated/server.ts +156 -0
  12. package/convex/cloud/convex.config.ts +16 -0
  13. package/convex/cloud/index.ts +29 -0
  14. package/convex/cloud/intentSchema/generate.ts +447 -0
  15. package/convex/cloud/intentSchema/index.ts +16 -0
  16. package/convex/cloud/intentSchema/types.ts +418 -0
  17. package/convex/cloud/ksaPolicy.ts +554 -0
  18. package/convex/cloud/mail.ts +92 -0
  19. package/convex/cloud/schema.ts +322 -0
  20. package/convex/cloud/utils/kanbanContext.ts +229 -0
  21. package/convex/cloud/workflows/agentBoard.ts +451 -0
  22. package/convex/cloud/workflows/agentPrompt.ts +272 -0
  23. package/convex/cloud/workflows/agentThread.ts +374 -0
  24. package/convex/cloud/workflows/compileSandbox.ts +146 -0
  25. package/convex/cloud/workflows/crudBoard.ts +217 -0
  26. package/convex/cloud/workflows/crudKSAs.ts +262 -0
  27. package/convex/cloud/workflows/crudLorobeads.ts +371 -0
  28. package/convex/cloud/workflows/crudSkills.ts +205 -0
  29. package/convex/cloud/workflows/crudThreads.ts +708 -0
  30. package/convex/cloud/workflows/lifecycleSandbox.ts +1396 -0
  31. package/convex/cloud/workflows/sandboxConvex.ts +1046 -0
  32. package/convex/sandbox/README.md +90 -0
  33. package/convex/sandbox/_generated/api.d.ts +2934 -0
  34. package/convex/sandbox/_generated/api.js +23 -0
  35. package/convex/sandbox/_generated/dataModel.d.ts +60 -0
  36. package/convex/sandbox/_generated/server.d.ts +143 -0
  37. package/convex/sandbox/_generated/server.js +93 -0
  38. package/convex/sandbox/actions/bash.ts +130 -0
  39. package/convex/sandbox/actions/browser.ts +282 -0
  40. package/convex/sandbox/actions/file.ts +336 -0
  41. package/convex/sandbox/actions/lsp.ts +325 -0
  42. package/convex/sandbox/actions/pdf.ts +119 -0
  43. package/convex/sandbox/agent/codeExecLoop.ts +535 -0
  44. package/convex/sandbox/agent/decisions.ts +284 -0
  45. package/convex/sandbox/agent/index.ts +515 -0
  46. package/convex/sandbox/agent/subagents.ts +651 -0
  47. package/convex/sandbox/brandResearch/index.ts +417 -0
  48. package/convex/sandbox/context/index.ts +7 -0
  49. package/convex/sandbox/context/session.ts +402 -0
  50. package/convex/sandbox/convex.config.ts +17 -0
  51. package/convex/sandbox/index.ts +51 -0
  52. package/convex/sandbox/nodeActions/codeExec.ts +130 -0
  53. package/convex/sandbox/planning/beads.ts +187 -0
  54. package/convex/sandbox/planning/index.ts +8 -0
  55. package/convex/sandbox/planning/sync.ts +194 -0
  56. package/convex/sandbox/prompts/codeExec.ts +852 -0
  57. package/convex/sandbox/prompts/modes.ts +231 -0
  58. package/convex/sandbox/prompts/system.ts +142 -0
  59. package/convex/sandbox/schema.ts +510 -0
  60. package/convex/sandbox/state/artifacts.ts +99 -0
  61. package/convex/sandbox/state/checkpoints.ts +341 -0
  62. package/convex/sandbox/state/files.ts +383 -0
  63. package/convex/sandbox/state/index.ts +10 -0
  64. package/convex/sandbox/state/verification.actions.ts +268 -0
  65. package/convex/sandbox/state/verification.ts +101 -0
  66. package/convex/sandbox/tsconfig.json +25 -0
  67. package/convex/sandbox/utils/codeExecHelpers.ts +52 -0
  68. package/dist/cli/commands/build.d.ts +19 -0
  69. package/dist/cli/commands/build.d.ts.map +1 -0
  70. package/dist/cli/commands/build.js +223 -0
  71. package/dist/cli/commands/init.d.ts +16 -0
  72. package/dist/cli/commands/init.d.ts.map +1 -0
  73. package/dist/cli/commands/init.js +148 -0
  74. package/dist/cli/commands/publish.d.ts +12 -0
  75. package/dist/cli/commands/publish.d.ts.map +1 -0
  76. package/dist/cli/commands/publish.js +33 -0
  77. package/dist/cli/index.d.ts +14 -0
  78. package/dist/cli/index.d.ts.map +1 -0
  79. package/dist/cli/index.js +40 -0
  80. package/dist/sdk/builders.d.ts +104 -0
  81. package/dist/sdk/builders.d.ts.map +1 -0
  82. package/dist/sdk/builders.js +214 -0
  83. package/dist/sdk/index.d.ts +29 -0
  84. package/dist/sdk/index.d.ts.map +1 -0
  85. package/dist/sdk/index.js +38 -0
  86. package/dist/sdk/types.d.ts +107 -0
  87. package/dist/sdk/types.d.ts.map +1 -0
  88. package/dist/sdk/types.js +6 -0
  89. package/ksa/README.md +263 -0
  90. package/ksa/_generated/REFERENCE.md +2954 -0
  91. package/ksa/_generated/registry.ts +257 -0
  92. package/ksa/_shared/configReader.ts +302 -0
  93. package/ksa/_shared/configSchemas.ts +649 -0
  94. package/ksa/_shared/gateway.ts +175 -0
  95. package/ksa/_shared/ksaBehaviors.ts +411 -0
  96. package/ksa/_shared/ksaProxy.ts +248 -0
  97. package/ksa/_shared/localDb.ts +302 -0
  98. package/ksa/index.ts +134 -0
  99. package/package.json +93 -0
  100. package/runtime/browser/agent-browser.ts +330 -0
  101. package/runtime/entrypoint.ts +194 -0
  102. package/runtime/lsp/manager.ts +366 -0
  103. package/runtime/pdf/pdf-generator.ts +50 -0
  104. package/runtime/pdf/renderer.ts +357 -0
  105. package/runtime/pdf/schema.ts +97 -0
  106. package/runtime/services/file-watcher.ts +191 -0
  107. package/template/build.ts +307 -0
  108. package/template/e2b/Dockerfile +69 -0
  109. package/template/e2b/e2b.toml +13 -0
  110. package/template/e2b/prebuild.sh +68 -0
  111. package/template/e2b/start.sh +14 -0
@@ -0,0 +1,535 @@
1
+ /**
2
+ * Code Execution Agent Loop
3
+ *
4
+ * This is the NEW agent loop that uses code execution instead of JSON tool calls.
5
+ *
6
+ * Architecture:
7
+ * 1. Send prompt to LLM (NO tool schemas)
8
+ * 2. LLM responds with TypeScript code
9
+ * 3. Extract code blocks from response
10
+ * 4. Execute code in E2B sandbox
11
+ * 5. Feed output back to LLM
12
+ * 6. Repeat until task complete
13
+ *
14
+ * The agent imports from /home/user/ksa/ (KSAs - Knowledge, Skills, Abilities).
15
+ */
16
+
17
+ import { internal } from "../_generated/api";
18
+ import { wrapCodeForExecution, extractCodeBlocks } from "../utils/codeExecHelpers";
19
+ import type { ChainOfThoughtStep, StepStatus } from "../../../shared/chain-of-thought";
20
+ import { createStepId } from "../../../shared/chain-of-thought";
21
+
22
+ // Default model for code execution loop - used as fallback if no model passed via context
23
+ // The model should be passed from unified settings (convex/features/settings/models.ts)
24
+ const DEFAULT_MODEL = "anthropic/claude-sonnet-4";
25
+
26
+ // ============================================================================
27
+ // Types
28
+ // ============================================================================
29
+
30
+ interface LLMMessage {
31
+ role: "system" | "user" | "assistant";
32
+ content: string;
33
+ }
34
+
35
+ interface GatewayConfig {
36
+ convexUrl: string;
37
+ jwt: string;
38
+ }
39
+
40
+ interface CodeExecResult {
41
+ text: string;
42
+ codeExecutions: Array<{
43
+ code: string;
44
+ output: string;
45
+ success: boolean;
46
+ }>;
47
+ }
48
+
49
+ // ============================================================================
50
+ // Chain of Thought Tracking + Real-time Cloud Forwarding
51
+ // ============================================================================
52
+
53
+ const chainOfThoughtSteps: Map<string, ChainOfThoughtStep[]> = new Map();
54
+
55
+ // Cloud forwarding config (set during loop execution)
56
+ let cloudForwardingConfig: {
57
+ gatewayConfig: GatewayConfig;
58
+ sessionId: string;
59
+ } | null = null;
60
+
61
+ interface StructuredLog {
62
+ type: string; // thinking, tool, search, file, text
63
+ label: string;
64
+ status?: string; // active, complete, error
65
+ icon?: string;
66
+ details?: string;
67
+ }
68
+
69
+ /**
70
+ * Forward a structured log to the cloud for real-time UI display.
71
+ * Fire-and-forget - doesn't block execution.
72
+ */
73
+ async function forwardLogToCloud(log: StructuredLog): Promise<void> {
74
+ if (!cloudForwardingConfig) return;
75
+
76
+ const { gatewayConfig, sessionId } = cloudForwardingConfig;
77
+ try {
78
+ await fetch(`${gatewayConfig.convexUrl}/agent/call`, {
79
+ method: "POST",
80
+ headers: {
81
+ "Content-Type": "application/json",
82
+ Authorization: `Bearer ${gatewayConfig.jwt}`,
83
+ },
84
+ body: JSON.stringify({
85
+ path: "agent.workflows.sandboxConvex.appendLogs",
86
+ type: "mutation",
87
+ args: {
88
+ sessionId,
89
+ logs: [log],
90
+ },
91
+ }),
92
+ }).catch(() => {}); // Ignore errors - fire and forget
93
+ } catch {
94
+ // Ignore - don't block execution
95
+ }
96
+ }
97
+
98
+ function emitStep(
99
+ threadId: string,
100
+ step: Omit<ChainOfThoughtStep, "id" | "timestamp">
101
+ ): string {
102
+ if (!chainOfThoughtSteps.has(threadId)) {
103
+ chainOfThoughtSteps.set(threadId, []);
104
+ }
105
+ const fullStep = {
106
+ id: createStepId(),
107
+ timestamp: Date.now(),
108
+ ...step,
109
+ } as ChainOfThoughtStep;
110
+ chainOfThoughtSteps.get(threadId)!.push(fullStep);
111
+
112
+ // Forward structured log to cloud for real-time UI (fire-and-forget)
113
+ const label = (fullStep as any).label || (fullStep as any).toolName || fullStep.type;
114
+ forwardLogToCloud({
115
+ type: fullStep.type,
116
+ label,
117
+ status: fullStep.status,
118
+ icon: fullStep.type === "thinking" ? "lightbulb" :
119
+ fullStep.type === "tool" ? "tools" :
120
+ fullStep.type === "search" ? "magnify" :
121
+ fullStep.type === "file" ? "file" : "text",
122
+ details: (fullStep as any).description,
123
+ });
124
+
125
+ return fullStep.id;
126
+ }
127
+
128
+ function updateStepStatus(threadId: string, stepId: string, status: StepStatus) {
129
+ const steps = chainOfThoughtSteps.get(threadId);
130
+ if (steps) {
131
+ const step = steps.find((s) => s.id === stepId);
132
+ if (step) step.status = status;
133
+ }
134
+ }
135
+
136
+ export function getSteps(threadId: string): ChainOfThoughtStep[] {
137
+ return chainOfThoughtSteps.get(threadId) || [];
138
+ }
139
+
140
+ // ============================================================================
141
+ // Cloud LLM Gateway (JSON Schema Structured Output)
142
+ // ============================================================================
143
+
144
+ interface AgentAction {
145
+ thinking: string;
146
+ code?: string;
147
+ response?: string;
148
+ }
149
+
150
+ interface LLMResponse {
151
+ text: string;
152
+ action?: AgentAction;
153
+ finishReason?: string;
154
+ }
155
+
156
+ // JSON Schema for structured output - forces model to return valid JSON
157
+ // This is MORE RELIABLE than tool_choice which some providers ignore
158
+ const AGENT_ACTION_SCHEMA = {
159
+ name: "AgentAction",
160
+ strict: true,
161
+ schema: {
162
+ type: "object",
163
+ properties: {
164
+ thinking: {
165
+ type: "string",
166
+ description: "Your reasoning about what to do next. Always explain your thought process.",
167
+ },
168
+ code: {
169
+ type: "string",
170
+ description: "TypeScript code to execute. Import from ./ksa/* for capabilities (web search, file ops, PDF generation, etc.). Leave empty string if no code needed.",
171
+ },
172
+ response: {
173
+ type: "string",
174
+ description: "Final response to the user. Only provide a non-empty value when the task is FULLY COMPLETE and no more code needs to run. Leave empty string otherwise.",
175
+ },
176
+ },
177
+ required: ["thinking", "code", "response"],
178
+ additionalProperties: false,
179
+ },
180
+ };
181
+
182
+ /**
183
+ * Call the cloud LLM gateway with JSON schema structured output.
184
+ * Uses response_format instead of tool calling for reliability.
185
+ */
186
+ async function callCloudLLM(
187
+ messages: LLMMessage[],
188
+ gatewayConfig: GatewayConfig,
189
+ options: {
190
+ model?: string;
191
+ maxTokens?: number;
192
+ temperature?: number;
193
+ } = {}
194
+ ): Promise<LLMResponse> {
195
+ const { convexUrl, jwt } = gatewayConfig;
196
+
197
+ if (!convexUrl || !jwt) {
198
+ throw new Error("Gateway not configured");
199
+ }
200
+
201
+ const response = await fetch(`${convexUrl}/agent/call`, {
202
+ method: "POST",
203
+ headers: {
204
+ "Content-Type": "application/json",
205
+ Authorization: `Bearer ${jwt}`,
206
+ },
207
+ body: JSON.stringify({
208
+ path: "internal.services.OpenRouter.internal.chatCompletion",
209
+ args: {
210
+ model: options.model || DEFAULT_MODEL,
211
+ messages,
212
+ responseFormat: {
213
+ type: "json_schema",
214
+ json_schema: AGENT_ACTION_SCHEMA,
215
+ },
216
+ maxTokens: options.maxTokens || 4096,
217
+ temperature: options.temperature,
218
+ },
219
+ }),
220
+ });
221
+
222
+ if (!response.ok) {
223
+ throw new Error(`LLM call failed: ${response.status}`);
224
+ }
225
+
226
+ const result = await response.json();
227
+ if (!result.ok) {
228
+ throw new Error(`LLM error: ${result.error || JSON.stringify(result)}`);
229
+ }
230
+
231
+ const choice = result.data.choices?.[0];
232
+ const content = choice?.message?.content || "";
233
+
234
+ // Debug logging
235
+ console.log(`[callCloudLLM] finish_reason: ${choice?.finish_reason}`);
236
+ console.log(`[callCloudLLM] content preview: ${content.slice(0, 300)}`);
237
+
238
+ // Parse JSON structured output
239
+ let action: AgentAction | undefined;
240
+ if (content) {
241
+ try {
242
+ action = JSON.parse(content) as AgentAction;
243
+ console.log(`[callCloudLLM] Parsed action - thinking: ${action.thinking?.slice(0, 100)}, hasCode: ${!!action.code}, hasResponse: ${!!action.response}`);
244
+ } catch (e) {
245
+ console.error(`[callCloudLLM] Failed to parse JSON: ${e}`);
246
+ // If JSON parse fails, try to extract from markdown code blocks
247
+ const jsonMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/);
248
+ if (jsonMatch) {
249
+ try {
250
+ action = JSON.parse(jsonMatch[1].trim()) as AgentAction;
251
+ console.log(`[callCloudLLM] Extracted JSON from code block`);
252
+ } catch {
253
+ console.error(`[callCloudLLM] Could not parse JSON from code block either`);
254
+ }
255
+ }
256
+ }
257
+ }
258
+
259
+ return {
260
+ text: content,
261
+ action,
262
+ finishReason: choice?.finish_reason,
263
+ };
264
+ }
265
+
266
+ // ============================================================================
267
+ // Code Execution Agent Loop
268
+ // ============================================================================
269
+
270
+ /**
271
+ * Run the code execution agent loop.
272
+ *
273
+ * Architecture:
274
+ * - LLM has single execute_code tool
275
+ * - LLM calls the tool with TypeScript code
276
+ * - We execute the code and return results
277
+ * - Loop until LLM responds without tool calls
278
+ */
279
+ export async function runCodeExecLoop(
280
+ ctx: any,
281
+ systemPrompt: string,
282
+ userPrompt: string,
283
+ gatewayConfig: GatewayConfig,
284
+ options: {
285
+ maxSteps?: number;
286
+ threadId?: string;
287
+ cardId?: string;
288
+ cloudThreadId?: string; // Cloud thread ID for artifact uploads
289
+ model?: string;
290
+ sessionId?: string; // For real-time log forwarding to cloud
291
+ } = {}
292
+ ): Promise<CodeExecResult> {
293
+ // MARKER: Version 2026-01-15-v4 - real-time log forwarding to cloud
294
+ console.log("🔥🔥🔥 [codeExecLoop] VERSION: 2026-01-15-v4 WITH REAL-TIME LOGS 🔥🔥🔥");
295
+
296
+ const maxSteps = options.maxSteps || 10;
297
+ const threadId = options.threadId || `codeexec_${Date.now()}`;
298
+ const cardId = options.cardId;
299
+ const cloudThreadId = options.cloudThreadId; // Cloud thread ID for artifact uploads
300
+ const model = options.model;
301
+
302
+ // Set up cloud forwarding for real-time chain of thought
303
+ if (options.sessionId) {
304
+ cloudForwardingConfig = {
305
+ gatewayConfig,
306
+ sessionId: options.sessionId,
307
+ };
308
+ console.log(`[codeExecLoop] Cloud forwarding enabled for session: ${options.sessionId}`);
309
+ }
310
+ let codeEnforcementRetries = 0;
311
+ const MAX_CODE_ENFORCEMENT_RETRIES = 3;
312
+
313
+ const messages: LLMMessage[] = [
314
+ { role: "system", content: systemPrompt },
315
+ { role: "user", content: userPrompt },
316
+ ];
317
+
318
+ const allExecutions: CodeExecResult["codeExecutions"] = [];
319
+ let finalText = "";
320
+
321
+ emitStep(threadId, {
322
+ type: "thinking",
323
+ status: "complete",
324
+ label: "Starting code execution loop...",
325
+ });
326
+
327
+ for (let step = 0; step < maxSteps; step++) {
328
+ const thinkingId = emitStep(threadId, {
329
+ type: "thinking",
330
+ status: "active",
331
+ label: `Step ${step + 1}: Thinking...`,
332
+ });
333
+
334
+ // Call LLM - uses JSON schema structured output
335
+ const response = await callCloudLLM(messages, gatewayConfig, { model });
336
+ updateStepStatus(threadId, thinkingId, "complete");
337
+
338
+ // Get the structured action from response
339
+ let action = response.action;
340
+
341
+ // Fallback: if structured output failed, try to extract code blocks
342
+ if (!action) {
343
+ console.error("[codeExecLoop] ERROR: No structured action returned!");
344
+ console.error("[codeExecLoop] Response text:", response.text);
345
+
346
+ const codeBlocks = extractCodeBlocks(response.text);
347
+ if (codeBlocks.length > 0) {
348
+ console.log(`[codeExecLoop] Fallback: Found ${codeBlocks.length} code blocks`);
349
+ action = {
350
+ thinking: "Extracted from markdown",
351
+ code: codeBlocks.join("\n\n"),
352
+ response: "",
353
+ };
354
+ } else {
355
+ // No action and no code - treat text as final response
356
+ finalText = response.text || "Agent completed without response.";
357
+ emitStep(threadId, {
358
+ type: "text",
359
+ status: "complete",
360
+ label: finalText.slice(0, 200),
361
+ });
362
+ break;
363
+ }
364
+ }
365
+
366
+ // Log the agent's thinking
367
+ if (action.thinking) {
368
+ emitStep(threadId, {
369
+ type: "thinking",
370
+ status: "complete",
371
+ label: action.thinking.slice(0, 200),
372
+ });
373
+ console.log(`[codeExecLoop] Thinking: ${action.thinking}`);
374
+ }
375
+
376
+ // If agent provided a final response (non-empty) and no code, we're done
377
+ const hasCode = action.code && action.code.trim().length > 0;
378
+ const hasResponse = action.response && action.response.trim().length > 0;
379
+
380
+ if (hasResponse && !hasCode) {
381
+ // CRITICAL: Reject responses if no code has been executed yet
382
+ // This prevents the agent from hallucinating completion without actually executing
383
+ if (allExecutions.length === 0) {
384
+ codeEnforcementRetries++;
385
+ console.warn(`[codeExecLoop] Agent tried to respond without code - retry ${codeEnforcementRetries}/${MAX_CODE_ENFORCEMENT_RETRIES} (step ${step})`);
386
+
387
+ if (codeEnforcementRetries >= MAX_CODE_ENFORCEMENT_RETRIES) {
388
+ console.error("[codeExecLoop] Agent failed to provide code after max retries - failing");
389
+ emitStep(threadId, {
390
+ type: "thinking",
391
+ status: "error",
392
+ label: "Agent failed to execute code after multiple attempts",
393
+ });
394
+ finalText = `ERROR: Agent failed to execute code. Response was: ${action.response}`;
395
+ break;
396
+ }
397
+
398
+ emitStep(threadId, {
399
+ type: "thinking",
400
+ status: "error",
401
+ label: `Retry ${codeEnforcementRetries}: Agent must execute code`,
402
+ });
403
+
404
+ // Ask the agent to try again with code
405
+ messages.push({
406
+ role: "assistant",
407
+ content: `Thinking: ${action.thinking || "..."}\n\nResponse: ${action.response}`,
408
+ });
409
+ messages.push({
410
+ role: "user",
411
+ content: `ERROR: You cannot provide a response without executing code first. You MUST provide actual TypeScript code in the "code" field. Do not describe what you would do - actually write and execute code using import statements like: import { search } from './ksa/web'. Try again with code.`,
412
+ });
413
+ continue; // Go to next iteration
414
+ }
415
+
416
+ // After code has been executed, accept the response
417
+ finalText = action.response!;
418
+ emitStep(threadId, {
419
+ type: "text",
420
+ status: "complete",
421
+ label: finalText.slice(0, 200),
422
+ });
423
+ break;
424
+ }
425
+
426
+ // If agent provided code, execute it
427
+ if (hasCode) {
428
+ const code = wrapCodeForExecution(action.code!);
429
+
430
+ const execId = emitStep(threadId, {
431
+ type: "tool",
432
+ status: "active",
433
+ toolName: "code_execution",
434
+ label: "Executing code...",
435
+ input: { code: code.slice(0, 500) },
436
+ });
437
+
438
+ let execResult: string;
439
+ try {
440
+ const result = await ctx.runAction(internal.nodeActions.codeExec.execute, {
441
+ code,
442
+ timeoutMs: 60_000,
443
+ env: {
444
+ // KSAs use both CONVEX_URL and GATEWAY_URL - provide both for compatibility
445
+ CONVEX_URL: gatewayConfig.convexUrl,
446
+ GATEWAY_URL: gatewayConfig.convexUrl,
447
+ SANDBOX_JWT: gatewayConfig.jwt,
448
+ ...(cardId && { CARD_ID: cardId }),
449
+ // Pass cloud thread ID for artifact uploads (NOT the sandbox-local threadId)
450
+ ...(cloudThreadId && { THREAD_ID: cloudThreadId }),
451
+ },
452
+ });
453
+
454
+ allExecutions.push({
455
+ code,
456
+ output: result.output,
457
+ success: result.success,
458
+ });
459
+
460
+ if (result.success) {
461
+ execResult = `[Execution successful]\n${result.output}`;
462
+ updateStepStatus(threadId, execId, "complete");
463
+ } else {
464
+ execResult = `[Execution failed]\nError: ${result.error}\nOutput: ${result.output}`;
465
+ updateStepStatus(threadId, execId, "error");
466
+ }
467
+ } catch (error) {
468
+ const msg = error instanceof Error ? error.message : String(error);
469
+ execResult = `[Execution error]\n${msg}`;
470
+ allExecutions.push({
471
+ code,
472
+ output: msg,
473
+ success: false,
474
+ });
475
+ updateStepStatus(threadId, execId, "error");
476
+ }
477
+
478
+ // Add assistant's action to messages
479
+ messages.push({
480
+ role: "assistant",
481
+ content: `Thinking: ${action.thinking || "..."}\n\nExecuting code:\n\`\`\`typescript\n${action.code}\n\`\`\``,
482
+ });
483
+
484
+ // Emit console output as chain-of-thought steps for real-time UI visibility
485
+ // Parse output for meaningful logs (beads, deliverables, web, etc.)
486
+ const outputLines = (allExecutions[allExecutions.length - 1]?.output || "").split("\n");
487
+ for (const line of outputLines) {
488
+ if (!line.trim()) continue;
489
+
490
+ // Categorize log lines for better UI display
491
+ let stepType: "tool" | "text" | "search" | "file" = "text";
492
+ let label = line.slice(0, 150);
493
+
494
+ if (line.includes("[beads]")) {
495
+ stepType = "tool";
496
+ label = line.replace("[beads]", "📋").trim();
497
+ } else if (line.includes("[deliverables]") || line.includes("[pdf]")) {
498
+ stepType = "file";
499
+ label = line.replace("[deliverables]", "💾").replace("[pdf]", "📄").trim();
500
+ } else if (line.includes("[web]") || line.includes("Searching") || line.includes("search")) {
501
+ stepType = "search";
502
+ label = line.replace("[web]", "🔍").trim();
503
+ } else if (line.includes("Found") || line.includes("Created") || line.includes("Saved")) {
504
+ // Keep as text but show it
505
+ } else if (line.startsWith("[") || line.includes("DEBUG")) {
506
+ // Skip debug/internal logs
507
+ continue;
508
+ }
509
+
510
+ emitStep(threadId, {
511
+ type: stepType,
512
+ status: "complete",
513
+ label,
514
+ ...(stepType === "tool" && { toolName: "console", output: line }),
515
+ });
516
+ }
517
+
518
+ // Add execution result
519
+ messages.push({
520
+ role: "user",
521
+ content: `${execResult}\n\nContinue with the task. Respond with JSON containing "thinking", "code", and "response" fields.`,
522
+ });
523
+ } else {
524
+ // No code and no response - shouldn't happen but handle gracefully
525
+ console.warn("[codeExecLoop] Action has neither code nor response");
526
+ finalText = action.thinking || "Task completed.";
527
+ break;
528
+ }
529
+ }
530
+
531
+ return {
532
+ text: finalText,
533
+ codeExecutions: allExecutions,
534
+ };
535
+ }