@lakitu/sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +166 -0
- package/convex/_generated/api.d.ts +45 -0
- package/convex/_generated/api.js +23 -0
- package/convex/_generated/dataModel.d.ts +58 -0
- package/convex/_generated/server.d.ts +143 -0
- package/convex/_generated/server.js +93 -0
- package/convex/cloud/CLAUDE.md +238 -0
- package/convex/cloud/_generated/api.ts +84 -0
- package/convex/cloud/_generated/component.ts +861 -0
- package/convex/cloud/_generated/dataModel.ts +60 -0
- package/convex/cloud/_generated/server.ts +156 -0
- package/convex/cloud/convex.config.ts +16 -0
- package/convex/cloud/index.ts +29 -0
- package/convex/cloud/intentSchema/generate.ts +447 -0
- package/convex/cloud/intentSchema/index.ts +16 -0
- package/convex/cloud/intentSchema/types.ts +418 -0
- package/convex/cloud/ksaPolicy.ts +554 -0
- package/convex/cloud/mail.ts +92 -0
- package/convex/cloud/schema.ts +322 -0
- package/convex/cloud/utils/kanbanContext.ts +229 -0
- package/convex/cloud/workflows/agentBoard.ts +451 -0
- package/convex/cloud/workflows/agentPrompt.ts +272 -0
- package/convex/cloud/workflows/agentThread.ts +374 -0
- package/convex/cloud/workflows/compileSandbox.ts +146 -0
- package/convex/cloud/workflows/crudBoard.ts +217 -0
- package/convex/cloud/workflows/crudKSAs.ts +262 -0
- package/convex/cloud/workflows/crudLorobeads.ts +371 -0
- package/convex/cloud/workflows/crudSkills.ts +205 -0
- package/convex/cloud/workflows/crudThreads.ts +708 -0
- package/convex/cloud/workflows/lifecycleSandbox.ts +1396 -0
- package/convex/cloud/workflows/sandboxConvex.ts +1046 -0
- package/convex/sandbox/README.md +90 -0
- package/convex/sandbox/_generated/api.d.ts +2934 -0
- package/convex/sandbox/_generated/api.js +23 -0
- package/convex/sandbox/_generated/dataModel.d.ts +60 -0
- package/convex/sandbox/_generated/server.d.ts +143 -0
- package/convex/sandbox/_generated/server.js +93 -0
- package/convex/sandbox/actions/bash.ts +130 -0
- package/convex/sandbox/actions/browser.ts +282 -0
- package/convex/sandbox/actions/file.ts +336 -0
- package/convex/sandbox/actions/lsp.ts +325 -0
- package/convex/sandbox/actions/pdf.ts +119 -0
- package/convex/sandbox/agent/codeExecLoop.ts +535 -0
- package/convex/sandbox/agent/decisions.ts +284 -0
- package/convex/sandbox/agent/index.ts +515 -0
- package/convex/sandbox/agent/subagents.ts +651 -0
- package/convex/sandbox/brandResearch/index.ts +417 -0
- package/convex/sandbox/context/index.ts +7 -0
- package/convex/sandbox/context/session.ts +402 -0
- package/convex/sandbox/convex.config.ts +17 -0
- package/convex/sandbox/index.ts +51 -0
- package/convex/sandbox/nodeActions/codeExec.ts +130 -0
- package/convex/sandbox/planning/beads.ts +187 -0
- package/convex/sandbox/planning/index.ts +8 -0
- package/convex/sandbox/planning/sync.ts +194 -0
- package/convex/sandbox/prompts/codeExec.ts +852 -0
- package/convex/sandbox/prompts/modes.ts +231 -0
- package/convex/sandbox/prompts/system.ts +142 -0
- package/convex/sandbox/schema.ts +510 -0
- package/convex/sandbox/state/artifacts.ts +99 -0
- package/convex/sandbox/state/checkpoints.ts +341 -0
- package/convex/sandbox/state/files.ts +383 -0
- package/convex/sandbox/state/index.ts +10 -0
- package/convex/sandbox/state/verification.actions.ts +268 -0
- package/convex/sandbox/state/verification.ts +101 -0
- package/convex/sandbox/tsconfig.json +25 -0
- package/convex/sandbox/utils/codeExecHelpers.ts +52 -0
- package/dist/cli/commands/build.d.ts +19 -0
- package/dist/cli/commands/build.d.ts.map +1 -0
- package/dist/cli/commands/build.js +223 -0
- package/dist/cli/commands/init.d.ts +16 -0
- package/dist/cli/commands/init.d.ts.map +1 -0
- package/dist/cli/commands/init.js +148 -0
- package/dist/cli/commands/publish.d.ts +12 -0
- package/dist/cli/commands/publish.d.ts.map +1 -0
- package/dist/cli/commands/publish.js +33 -0
- package/dist/cli/index.d.ts +14 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +40 -0
- package/dist/sdk/builders.d.ts +104 -0
- package/dist/sdk/builders.d.ts.map +1 -0
- package/dist/sdk/builders.js +214 -0
- package/dist/sdk/index.d.ts +29 -0
- package/dist/sdk/index.d.ts.map +1 -0
- package/dist/sdk/index.js +38 -0
- package/dist/sdk/types.d.ts +107 -0
- package/dist/sdk/types.d.ts.map +1 -0
- package/dist/sdk/types.js +6 -0
- package/ksa/README.md +263 -0
- package/ksa/_generated/REFERENCE.md +2954 -0
- package/ksa/_generated/registry.ts +257 -0
- package/ksa/_shared/configReader.ts +302 -0
- package/ksa/_shared/configSchemas.ts +649 -0
- package/ksa/_shared/gateway.ts +175 -0
- package/ksa/_shared/ksaBehaviors.ts +411 -0
- package/ksa/_shared/ksaProxy.ts +248 -0
- package/ksa/_shared/localDb.ts +302 -0
- package/ksa/index.ts +134 -0
- package/package.json +93 -0
- package/runtime/browser/agent-browser.ts +330 -0
- package/runtime/entrypoint.ts +194 -0
- package/runtime/lsp/manager.ts +366 -0
- package/runtime/pdf/pdf-generator.ts +50 -0
- package/runtime/pdf/renderer.ts +357 -0
- package/runtime/pdf/schema.ts +97 -0
- package/runtime/services/file-watcher.ts +191 -0
- package/template/build.ts +307 -0
- package/template/e2b/Dockerfile +69 -0
- package/template/e2b/e2b.toml +13 -0
- package/template/e2b/prebuild.sh +68 -0
- package/template/e2b/start.sh +14 -0
|
@@ -0,0 +1,535 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code Execution Agent Loop
|
|
3
|
+
*
|
|
4
|
+
* This is the NEW agent loop that uses code execution instead of JSON tool calls.
|
|
5
|
+
*
|
|
6
|
+
* Architecture:
|
|
7
|
+
* 1. Send prompt to LLM (NO tool schemas)
|
|
8
|
+
* 2. LLM responds with TypeScript code
|
|
9
|
+
* 3. Extract code blocks from response
|
|
10
|
+
* 4. Execute code in E2B sandbox
|
|
11
|
+
* 5. Feed output back to LLM
|
|
12
|
+
* 6. Repeat until task complete
|
|
13
|
+
*
|
|
14
|
+
* The agent imports from /home/user/ksa/ (KSAs - Knowledge, Skills, Abilities).
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { internal } from "../_generated/api";
|
|
18
|
+
import { wrapCodeForExecution, extractCodeBlocks } from "../utils/codeExecHelpers";
|
|
19
|
+
import type { ChainOfThoughtStep, StepStatus } from "../../../shared/chain-of-thought";
|
|
20
|
+
import { createStepId } from "../../../shared/chain-of-thought";
|
|
21
|
+
|
|
22
|
+
// Default model for code execution loop - used as fallback if no model passed via context
|
|
23
|
+
// The model should be passed from unified settings (convex/features/settings/models.ts)
|
|
24
|
+
const DEFAULT_MODEL = "anthropic/claude-sonnet-4";
|
|
25
|
+
|
|
26
|
+
// ============================================================================
|
|
27
|
+
// Types
|
|
28
|
+
// ============================================================================
|
|
29
|
+
|
|
30
|
+
interface LLMMessage {
|
|
31
|
+
role: "system" | "user" | "assistant";
|
|
32
|
+
content: string;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
interface GatewayConfig {
|
|
36
|
+
convexUrl: string;
|
|
37
|
+
jwt: string;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
interface CodeExecResult {
|
|
41
|
+
text: string;
|
|
42
|
+
codeExecutions: Array<{
|
|
43
|
+
code: string;
|
|
44
|
+
output: string;
|
|
45
|
+
success: boolean;
|
|
46
|
+
}>;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// ============================================================================
|
|
50
|
+
// Chain of Thought Tracking + Real-time Cloud Forwarding
|
|
51
|
+
// ============================================================================
|
|
52
|
+
|
|
53
|
+
const chainOfThoughtSteps: Map<string, ChainOfThoughtStep[]> = new Map();
|
|
54
|
+
|
|
55
|
+
// Cloud forwarding config (set during loop execution)
|
|
56
|
+
let cloudForwardingConfig: {
|
|
57
|
+
gatewayConfig: GatewayConfig;
|
|
58
|
+
sessionId: string;
|
|
59
|
+
} | null = null;
|
|
60
|
+
|
|
61
|
+
interface StructuredLog {
|
|
62
|
+
type: string; // thinking, tool, search, file, text
|
|
63
|
+
label: string;
|
|
64
|
+
status?: string; // active, complete, error
|
|
65
|
+
icon?: string;
|
|
66
|
+
details?: string;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Forward a structured log to the cloud for real-time UI display.
|
|
71
|
+
* Fire-and-forget - doesn't block execution.
|
|
72
|
+
*/
|
|
73
|
+
async function forwardLogToCloud(log: StructuredLog): Promise<void> {
|
|
74
|
+
if (!cloudForwardingConfig) return;
|
|
75
|
+
|
|
76
|
+
const { gatewayConfig, sessionId } = cloudForwardingConfig;
|
|
77
|
+
try {
|
|
78
|
+
await fetch(`${gatewayConfig.convexUrl}/agent/call`, {
|
|
79
|
+
method: "POST",
|
|
80
|
+
headers: {
|
|
81
|
+
"Content-Type": "application/json",
|
|
82
|
+
Authorization: `Bearer ${gatewayConfig.jwt}`,
|
|
83
|
+
},
|
|
84
|
+
body: JSON.stringify({
|
|
85
|
+
path: "agent.workflows.sandboxConvex.appendLogs",
|
|
86
|
+
type: "mutation",
|
|
87
|
+
args: {
|
|
88
|
+
sessionId,
|
|
89
|
+
logs: [log],
|
|
90
|
+
},
|
|
91
|
+
}),
|
|
92
|
+
}).catch(() => {}); // Ignore errors - fire and forget
|
|
93
|
+
} catch {
|
|
94
|
+
// Ignore - don't block execution
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function emitStep(
|
|
99
|
+
threadId: string,
|
|
100
|
+
step: Omit<ChainOfThoughtStep, "id" | "timestamp">
|
|
101
|
+
): string {
|
|
102
|
+
if (!chainOfThoughtSteps.has(threadId)) {
|
|
103
|
+
chainOfThoughtSteps.set(threadId, []);
|
|
104
|
+
}
|
|
105
|
+
const fullStep = {
|
|
106
|
+
id: createStepId(),
|
|
107
|
+
timestamp: Date.now(),
|
|
108
|
+
...step,
|
|
109
|
+
} as ChainOfThoughtStep;
|
|
110
|
+
chainOfThoughtSteps.get(threadId)!.push(fullStep);
|
|
111
|
+
|
|
112
|
+
// Forward structured log to cloud for real-time UI (fire-and-forget)
|
|
113
|
+
const label = (fullStep as any).label || (fullStep as any).toolName || fullStep.type;
|
|
114
|
+
forwardLogToCloud({
|
|
115
|
+
type: fullStep.type,
|
|
116
|
+
label,
|
|
117
|
+
status: fullStep.status,
|
|
118
|
+
icon: fullStep.type === "thinking" ? "lightbulb" :
|
|
119
|
+
fullStep.type === "tool" ? "tools" :
|
|
120
|
+
fullStep.type === "search" ? "magnify" :
|
|
121
|
+
fullStep.type === "file" ? "file" : "text",
|
|
122
|
+
details: (fullStep as any).description,
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
return fullStep.id;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function updateStepStatus(threadId: string, stepId: string, status: StepStatus) {
|
|
129
|
+
const steps = chainOfThoughtSteps.get(threadId);
|
|
130
|
+
if (steps) {
|
|
131
|
+
const step = steps.find((s) => s.id === stepId);
|
|
132
|
+
if (step) step.status = status;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export function getSteps(threadId: string): ChainOfThoughtStep[] {
|
|
137
|
+
return chainOfThoughtSteps.get(threadId) || [];
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// ============================================================================
|
|
141
|
+
// Cloud LLM Gateway (JSON Schema Structured Output)
|
|
142
|
+
// ============================================================================
|
|
143
|
+
|
|
144
|
+
interface AgentAction {
|
|
145
|
+
thinking: string;
|
|
146
|
+
code?: string;
|
|
147
|
+
response?: string;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
interface LLMResponse {
|
|
151
|
+
text: string;
|
|
152
|
+
action?: AgentAction;
|
|
153
|
+
finishReason?: string;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// JSON Schema for structured output - forces model to return valid JSON
|
|
157
|
+
// This is MORE RELIABLE than tool_choice which some providers ignore
|
|
158
|
+
const AGENT_ACTION_SCHEMA = {
|
|
159
|
+
name: "AgentAction",
|
|
160
|
+
strict: true,
|
|
161
|
+
schema: {
|
|
162
|
+
type: "object",
|
|
163
|
+
properties: {
|
|
164
|
+
thinking: {
|
|
165
|
+
type: "string",
|
|
166
|
+
description: "Your reasoning about what to do next. Always explain your thought process.",
|
|
167
|
+
},
|
|
168
|
+
code: {
|
|
169
|
+
type: "string",
|
|
170
|
+
description: "TypeScript code to execute. Import from ./ksa/* for capabilities (web search, file ops, PDF generation, etc.). Leave empty string if no code needed.",
|
|
171
|
+
},
|
|
172
|
+
response: {
|
|
173
|
+
type: "string",
|
|
174
|
+
description: "Final response to the user. Only provide a non-empty value when the task is FULLY COMPLETE and no more code needs to run. Leave empty string otherwise.",
|
|
175
|
+
},
|
|
176
|
+
},
|
|
177
|
+
required: ["thinking", "code", "response"],
|
|
178
|
+
additionalProperties: false,
|
|
179
|
+
},
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Call the cloud LLM gateway with JSON schema structured output.
|
|
184
|
+
* Uses response_format instead of tool calling for reliability.
|
|
185
|
+
*/
|
|
186
|
+
async function callCloudLLM(
|
|
187
|
+
messages: LLMMessage[],
|
|
188
|
+
gatewayConfig: GatewayConfig,
|
|
189
|
+
options: {
|
|
190
|
+
model?: string;
|
|
191
|
+
maxTokens?: number;
|
|
192
|
+
temperature?: number;
|
|
193
|
+
} = {}
|
|
194
|
+
): Promise<LLMResponse> {
|
|
195
|
+
const { convexUrl, jwt } = gatewayConfig;
|
|
196
|
+
|
|
197
|
+
if (!convexUrl || !jwt) {
|
|
198
|
+
throw new Error("Gateway not configured");
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const response = await fetch(`${convexUrl}/agent/call`, {
|
|
202
|
+
method: "POST",
|
|
203
|
+
headers: {
|
|
204
|
+
"Content-Type": "application/json",
|
|
205
|
+
Authorization: `Bearer ${jwt}`,
|
|
206
|
+
},
|
|
207
|
+
body: JSON.stringify({
|
|
208
|
+
path: "internal.services.OpenRouter.internal.chatCompletion",
|
|
209
|
+
args: {
|
|
210
|
+
model: options.model || DEFAULT_MODEL,
|
|
211
|
+
messages,
|
|
212
|
+
responseFormat: {
|
|
213
|
+
type: "json_schema",
|
|
214
|
+
json_schema: AGENT_ACTION_SCHEMA,
|
|
215
|
+
},
|
|
216
|
+
maxTokens: options.maxTokens || 4096,
|
|
217
|
+
temperature: options.temperature,
|
|
218
|
+
},
|
|
219
|
+
}),
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
if (!response.ok) {
|
|
223
|
+
throw new Error(`LLM call failed: ${response.status}`);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
const result = await response.json();
|
|
227
|
+
if (!result.ok) {
|
|
228
|
+
throw new Error(`LLM error: ${result.error || JSON.stringify(result)}`);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
const choice = result.data.choices?.[0];
|
|
232
|
+
const content = choice?.message?.content || "";
|
|
233
|
+
|
|
234
|
+
// Debug logging
|
|
235
|
+
console.log(`[callCloudLLM] finish_reason: ${choice?.finish_reason}`);
|
|
236
|
+
console.log(`[callCloudLLM] content preview: ${content.slice(0, 300)}`);
|
|
237
|
+
|
|
238
|
+
// Parse JSON structured output
|
|
239
|
+
let action: AgentAction | undefined;
|
|
240
|
+
if (content) {
|
|
241
|
+
try {
|
|
242
|
+
action = JSON.parse(content) as AgentAction;
|
|
243
|
+
console.log(`[callCloudLLM] Parsed action - thinking: ${action.thinking?.slice(0, 100)}, hasCode: ${!!action.code}, hasResponse: ${!!action.response}`);
|
|
244
|
+
} catch (e) {
|
|
245
|
+
console.error(`[callCloudLLM] Failed to parse JSON: ${e}`);
|
|
246
|
+
// If JSON parse fails, try to extract from markdown code blocks
|
|
247
|
+
const jsonMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
248
|
+
if (jsonMatch) {
|
|
249
|
+
try {
|
|
250
|
+
action = JSON.parse(jsonMatch[1].trim()) as AgentAction;
|
|
251
|
+
console.log(`[callCloudLLM] Extracted JSON from code block`);
|
|
252
|
+
} catch {
|
|
253
|
+
console.error(`[callCloudLLM] Could not parse JSON from code block either`);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
return {
|
|
260
|
+
text: content,
|
|
261
|
+
action,
|
|
262
|
+
finishReason: choice?.finish_reason,
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// ============================================================================
|
|
267
|
+
// Code Execution Agent Loop
|
|
268
|
+
// ============================================================================
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Run the code execution agent loop.
|
|
272
|
+
*
|
|
273
|
+
* Architecture:
|
|
274
|
+
* - LLM has single execute_code tool
|
|
275
|
+
* - LLM calls the tool with TypeScript code
|
|
276
|
+
* - We execute the code and return results
|
|
277
|
+
* - Loop until LLM responds without tool calls
|
|
278
|
+
*/
|
|
279
|
+
export async function runCodeExecLoop(
|
|
280
|
+
ctx: any,
|
|
281
|
+
systemPrompt: string,
|
|
282
|
+
userPrompt: string,
|
|
283
|
+
gatewayConfig: GatewayConfig,
|
|
284
|
+
options: {
|
|
285
|
+
maxSteps?: number;
|
|
286
|
+
threadId?: string;
|
|
287
|
+
cardId?: string;
|
|
288
|
+
cloudThreadId?: string; // Cloud thread ID for artifact uploads
|
|
289
|
+
model?: string;
|
|
290
|
+
sessionId?: string; // For real-time log forwarding to cloud
|
|
291
|
+
} = {}
|
|
292
|
+
): Promise<CodeExecResult> {
|
|
293
|
+
// MARKER: Version 2026-01-15-v4 - real-time log forwarding to cloud
|
|
294
|
+
console.log("🔥🔥🔥 [codeExecLoop] VERSION: 2026-01-15-v4 WITH REAL-TIME LOGS 🔥🔥🔥");
|
|
295
|
+
|
|
296
|
+
const maxSteps = options.maxSteps || 10;
|
|
297
|
+
const threadId = options.threadId || `codeexec_${Date.now()}`;
|
|
298
|
+
const cardId = options.cardId;
|
|
299
|
+
const cloudThreadId = options.cloudThreadId; // Cloud thread ID for artifact uploads
|
|
300
|
+
const model = options.model;
|
|
301
|
+
|
|
302
|
+
// Set up cloud forwarding for real-time chain of thought
|
|
303
|
+
if (options.sessionId) {
|
|
304
|
+
cloudForwardingConfig = {
|
|
305
|
+
gatewayConfig,
|
|
306
|
+
sessionId: options.sessionId,
|
|
307
|
+
};
|
|
308
|
+
console.log(`[codeExecLoop] Cloud forwarding enabled for session: ${options.sessionId}`);
|
|
309
|
+
}
|
|
310
|
+
let codeEnforcementRetries = 0;
|
|
311
|
+
const MAX_CODE_ENFORCEMENT_RETRIES = 3;
|
|
312
|
+
|
|
313
|
+
const messages: LLMMessage[] = [
|
|
314
|
+
{ role: "system", content: systemPrompt },
|
|
315
|
+
{ role: "user", content: userPrompt },
|
|
316
|
+
];
|
|
317
|
+
|
|
318
|
+
const allExecutions: CodeExecResult["codeExecutions"] = [];
|
|
319
|
+
let finalText = "";
|
|
320
|
+
|
|
321
|
+
emitStep(threadId, {
|
|
322
|
+
type: "thinking",
|
|
323
|
+
status: "complete",
|
|
324
|
+
label: "Starting code execution loop...",
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
for (let step = 0; step < maxSteps; step++) {
|
|
328
|
+
const thinkingId = emitStep(threadId, {
|
|
329
|
+
type: "thinking",
|
|
330
|
+
status: "active",
|
|
331
|
+
label: `Step ${step + 1}: Thinking...`,
|
|
332
|
+
});
|
|
333
|
+
|
|
334
|
+
// Call LLM - uses JSON schema structured output
|
|
335
|
+
const response = await callCloudLLM(messages, gatewayConfig, { model });
|
|
336
|
+
updateStepStatus(threadId, thinkingId, "complete");
|
|
337
|
+
|
|
338
|
+
// Get the structured action from response
|
|
339
|
+
let action = response.action;
|
|
340
|
+
|
|
341
|
+
// Fallback: if structured output failed, try to extract code blocks
|
|
342
|
+
if (!action) {
|
|
343
|
+
console.error("[codeExecLoop] ERROR: No structured action returned!");
|
|
344
|
+
console.error("[codeExecLoop] Response text:", response.text);
|
|
345
|
+
|
|
346
|
+
const codeBlocks = extractCodeBlocks(response.text);
|
|
347
|
+
if (codeBlocks.length > 0) {
|
|
348
|
+
console.log(`[codeExecLoop] Fallback: Found ${codeBlocks.length} code blocks`);
|
|
349
|
+
action = {
|
|
350
|
+
thinking: "Extracted from markdown",
|
|
351
|
+
code: codeBlocks.join("\n\n"),
|
|
352
|
+
response: "",
|
|
353
|
+
};
|
|
354
|
+
} else {
|
|
355
|
+
// No action and no code - treat text as final response
|
|
356
|
+
finalText = response.text || "Agent completed without response.";
|
|
357
|
+
emitStep(threadId, {
|
|
358
|
+
type: "text",
|
|
359
|
+
status: "complete",
|
|
360
|
+
label: finalText.slice(0, 200),
|
|
361
|
+
});
|
|
362
|
+
break;
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// Log the agent's thinking
|
|
367
|
+
if (action.thinking) {
|
|
368
|
+
emitStep(threadId, {
|
|
369
|
+
type: "thinking",
|
|
370
|
+
status: "complete",
|
|
371
|
+
label: action.thinking.slice(0, 200),
|
|
372
|
+
});
|
|
373
|
+
console.log(`[codeExecLoop] Thinking: ${action.thinking}`);
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
// If agent provided a final response (non-empty) and no code, we're done
|
|
377
|
+
const hasCode = action.code && action.code.trim().length > 0;
|
|
378
|
+
const hasResponse = action.response && action.response.trim().length > 0;
|
|
379
|
+
|
|
380
|
+
if (hasResponse && !hasCode) {
|
|
381
|
+
// CRITICAL: Reject responses if no code has been executed yet
|
|
382
|
+
// This prevents the agent from hallucinating completion without actually executing
|
|
383
|
+
if (allExecutions.length === 0) {
|
|
384
|
+
codeEnforcementRetries++;
|
|
385
|
+
console.warn(`[codeExecLoop] Agent tried to respond without code - retry ${codeEnforcementRetries}/${MAX_CODE_ENFORCEMENT_RETRIES} (step ${step})`);
|
|
386
|
+
|
|
387
|
+
if (codeEnforcementRetries >= MAX_CODE_ENFORCEMENT_RETRIES) {
|
|
388
|
+
console.error("[codeExecLoop] Agent failed to provide code after max retries - failing");
|
|
389
|
+
emitStep(threadId, {
|
|
390
|
+
type: "thinking",
|
|
391
|
+
status: "error",
|
|
392
|
+
label: "Agent failed to execute code after multiple attempts",
|
|
393
|
+
});
|
|
394
|
+
finalText = `ERROR: Agent failed to execute code. Response was: ${action.response}`;
|
|
395
|
+
break;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
emitStep(threadId, {
|
|
399
|
+
type: "thinking",
|
|
400
|
+
status: "error",
|
|
401
|
+
label: `Retry ${codeEnforcementRetries}: Agent must execute code`,
|
|
402
|
+
});
|
|
403
|
+
|
|
404
|
+
// Ask the agent to try again with code
|
|
405
|
+
messages.push({
|
|
406
|
+
role: "assistant",
|
|
407
|
+
content: `Thinking: ${action.thinking || "..."}\n\nResponse: ${action.response}`,
|
|
408
|
+
});
|
|
409
|
+
messages.push({
|
|
410
|
+
role: "user",
|
|
411
|
+
content: `ERROR: You cannot provide a response without executing code first. You MUST provide actual TypeScript code in the "code" field. Do not describe what you would do - actually write and execute code using import statements like: import { search } from './ksa/web'. Try again with code.`,
|
|
412
|
+
});
|
|
413
|
+
continue; // Go to next iteration
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
// After code has been executed, accept the response
|
|
417
|
+
finalText = action.response!;
|
|
418
|
+
emitStep(threadId, {
|
|
419
|
+
type: "text",
|
|
420
|
+
status: "complete",
|
|
421
|
+
label: finalText.slice(0, 200),
|
|
422
|
+
});
|
|
423
|
+
break;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// If agent provided code, execute it
|
|
427
|
+
if (hasCode) {
|
|
428
|
+
const code = wrapCodeForExecution(action.code!);
|
|
429
|
+
|
|
430
|
+
const execId = emitStep(threadId, {
|
|
431
|
+
type: "tool",
|
|
432
|
+
status: "active",
|
|
433
|
+
toolName: "code_execution",
|
|
434
|
+
label: "Executing code...",
|
|
435
|
+
input: { code: code.slice(0, 500) },
|
|
436
|
+
});
|
|
437
|
+
|
|
438
|
+
let execResult: string;
|
|
439
|
+
try {
|
|
440
|
+
const result = await ctx.runAction(internal.nodeActions.codeExec.execute, {
|
|
441
|
+
code,
|
|
442
|
+
timeoutMs: 60_000,
|
|
443
|
+
env: {
|
|
444
|
+
// KSAs use both CONVEX_URL and GATEWAY_URL - provide both for compatibility
|
|
445
|
+
CONVEX_URL: gatewayConfig.convexUrl,
|
|
446
|
+
GATEWAY_URL: gatewayConfig.convexUrl,
|
|
447
|
+
SANDBOX_JWT: gatewayConfig.jwt,
|
|
448
|
+
...(cardId && { CARD_ID: cardId }),
|
|
449
|
+
// Pass cloud thread ID for artifact uploads (NOT the sandbox-local threadId)
|
|
450
|
+
...(cloudThreadId && { THREAD_ID: cloudThreadId }),
|
|
451
|
+
},
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
allExecutions.push({
|
|
455
|
+
code,
|
|
456
|
+
output: result.output,
|
|
457
|
+
success: result.success,
|
|
458
|
+
});
|
|
459
|
+
|
|
460
|
+
if (result.success) {
|
|
461
|
+
execResult = `[Execution successful]\n${result.output}`;
|
|
462
|
+
updateStepStatus(threadId, execId, "complete");
|
|
463
|
+
} else {
|
|
464
|
+
execResult = `[Execution failed]\nError: ${result.error}\nOutput: ${result.output}`;
|
|
465
|
+
updateStepStatus(threadId, execId, "error");
|
|
466
|
+
}
|
|
467
|
+
} catch (error) {
|
|
468
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
469
|
+
execResult = `[Execution error]\n${msg}`;
|
|
470
|
+
allExecutions.push({
|
|
471
|
+
code,
|
|
472
|
+
output: msg,
|
|
473
|
+
success: false,
|
|
474
|
+
});
|
|
475
|
+
updateStepStatus(threadId, execId, "error");
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// Add assistant's action to messages
|
|
479
|
+
messages.push({
|
|
480
|
+
role: "assistant",
|
|
481
|
+
content: `Thinking: ${action.thinking || "..."}\n\nExecuting code:\n\`\`\`typescript\n${action.code}\n\`\`\``,
|
|
482
|
+
});
|
|
483
|
+
|
|
484
|
+
// Emit console output as chain-of-thought steps for real-time UI visibility
|
|
485
|
+
// Parse output for meaningful logs (beads, deliverables, web, etc.)
|
|
486
|
+
const outputLines = (allExecutions[allExecutions.length - 1]?.output || "").split("\n");
|
|
487
|
+
for (const line of outputLines) {
|
|
488
|
+
if (!line.trim()) continue;
|
|
489
|
+
|
|
490
|
+
// Categorize log lines for better UI display
|
|
491
|
+
let stepType: "tool" | "text" | "search" | "file" = "text";
|
|
492
|
+
let label = line.slice(0, 150);
|
|
493
|
+
|
|
494
|
+
if (line.includes("[beads]")) {
|
|
495
|
+
stepType = "tool";
|
|
496
|
+
label = line.replace("[beads]", "📋").trim();
|
|
497
|
+
} else if (line.includes("[deliverables]") || line.includes("[pdf]")) {
|
|
498
|
+
stepType = "file";
|
|
499
|
+
label = line.replace("[deliverables]", "💾").replace("[pdf]", "📄").trim();
|
|
500
|
+
} else if (line.includes("[web]") || line.includes("Searching") || line.includes("search")) {
|
|
501
|
+
stepType = "search";
|
|
502
|
+
label = line.replace("[web]", "🔍").trim();
|
|
503
|
+
} else if (line.includes("Found") || line.includes("Created") || line.includes("Saved")) {
|
|
504
|
+
// Keep as text but show it
|
|
505
|
+
} else if (line.startsWith("[") || line.includes("DEBUG")) {
|
|
506
|
+
// Skip debug/internal logs
|
|
507
|
+
continue;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
emitStep(threadId, {
|
|
511
|
+
type: stepType,
|
|
512
|
+
status: "complete",
|
|
513
|
+
label,
|
|
514
|
+
...(stepType === "tool" && { toolName: "console", output: line }),
|
|
515
|
+
});
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
// Add execution result
|
|
519
|
+
messages.push({
|
|
520
|
+
role: "user",
|
|
521
|
+
content: `${execResult}\n\nContinue with the task. Respond with JSON containing "thinking", "code", and "response" fields.`,
|
|
522
|
+
});
|
|
523
|
+
} else {
|
|
524
|
+
// No code and no response - shouldn't happen but handle gracefully
|
|
525
|
+
console.warn("[codeExecLoop] Action has neither code nor response");
|
|
526
|
+
finalText = action.thinking || "Task completed.";
|
|
527
|
+
break;
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
return {
|
|
532
|
+
text: finalText,
|
|
533
|
+
codeExecutions: allExecutions,
|
|
534
|
+
};
|
|
535
|
+
}
|