@auvira.ai/sdk 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/README.md +31 -0
  2. package/dist/agent/Agent.d.ts.map +1 -1
  3. package/dist/agent/Agent.js +9 -0
  4. package/dist/agent/Agent.js.map +1 -1
  5. package/dist/agent/attachmentContext.d.ts +9 -0
  6. package/dist/agent/attachmentContext.d.ts.map +1 -0
  7. package/dist/agent/attachmentContext.js +72 -0
  8. package/dist/agent/attachmentContext.js.map +1 -0
  9. package/dist/agent/editCompletion.d.ts +16 -2
  10. package/dist/agent/editCompletion.d.ts.map +1 -1
  11. package/dist/agent/editCompletion.js +83 -1
  12. package/dist/agent/editCompletion.js.map +1 -1
  13. package/dist/agent/hostTools.d.ts +46 -0
  14. package/dist/agent/hostTools.d.ts.map +1 -0
  15. package/dist/agent/hostTools.js +98 -0
  16. package/dist/agent/hostTools.js.map +1 -0
  17. package/dist/agent/parseSendPayload.d.ts +3 -1
  18. package/dist/agent/parseSendPayload.d.ts.map +1 -1
  19. package/dist/agent/parseSendPayload.js +16 -0
  20. package/dist/agent/parseSendPayload.js.map +1 -1
  21. package/dist/agent/runAgentTask.d.ts +4 -1
  22. package/dist/agent/runAgentTask.d.ts.map +1 -1
  23. package/dist/agent/runAgentTask.js +16 -0
  24. package/dist/agent/runAgentTask.js.map +1 -1
  25. package/dist/agent/runValidationWithRepair.d.ts.map +1 -1
  26. package/dist/agent/runValidationWithRepair.js +6 -0
  27. package/dist/agent/runValidationWithRepair.js.map +1 -1
  28. package/dist/agent/tools/executeTool.d.ts.map +1 -1
  29. package/dist/agent/tools/executeTool.js +15 -1
  30. package/dist/agent/tools/executeTool.js.map +1 -1
  31. package/dist/agent/tools/types.d.ts +3 -0
  32. package/dist/agent/tools/types.d.ts.map +1 -1
  33. package/dist/agent/types.d.ts +37 -0
  34. package/dist/agent/types.d.ts.map +1 -1
  35. package/dist/agent/types.js.map +1 -1
  36. package/dist/agent/validateOptions.d.ts.map +1 -1
  37. package/dist/agent/validateOptions.js +10 -1
  38. package/dist/agent/validateOptions.js.map +1 -1
  39. package/dist/index.d.ts +4 -1
  40. package/dist/index.d.ts.map +1 -1
  41. package/dist/index.js +2 -0
  42. package/dist/index.js.map +1 -1
  43. package/dist/providers/agentic/AgenticCustomProvider.d.ts.map +1 -1
  44. package/dist/providers/agentic/AgenticCustomProvider.js +21 -0
  45. package/dist/providers/agentic/AgenticCustomProvider.js.map +1 -1
  46. package/dist/providers/agentic/agentLoop.d.ts +4 -1
  47. package/dist/providers/agentic/agentLoop.d.ts.map +1 -1
  48. package/dist/providers/agentic/agentLoop.js +286 -265
  49. package/dist/providers/agentic/agentLoop.js.map +1 -1
  50. package/dist/providers/agentic/toolSchemas.d.ts +314 -0
  51. package/dist/providers/agentic/toolSchemas.d.ts.map +1 -1
  52. package/dist/providers/agentic/toolSchemas.js +7 -0
  53. package/dist/providers/agentic/toolSchemas.js.map +1 -1
  54. package/dist/providers/types.d.ts +4 -1
  55. package/dist/providers/types.d.ts.map +1 -1
  56. package/dist/providers/types.js.map +1 -1
  57. package/dist/runner/jobTypes.d.ts +3 -2
  58. package/dist/runner/jobTypes.d.ts.map +1 -1
  59. package/dist/runner/run.d.ts.map +1 -1
  60. package/dist/runner/run.js +6 -2
  61. package/dist/runner/run.js.map +1 -1
  62. package/dist/runner/validateJob.d.ts.map +1 -1
  63. package/dist/runner/validateJob.js +83 -0
  64. package/dist/runner/validateJob.js.map +1 -1
  65. package/docs/host-integration-image-placement.md +141 -0
  66. package/docs/sandbox-runner.md +22 -1
  67. package/package.json +2 -1
@@ -2,7 +2,8 @@ import { buildAssistantEvent } from "../../agent/assistantEvent.js";
2
2
  import { throwIfCancelled } from "../../agent/cancellation.js";
3
3
  import { emitAgentCompletionCheckThinking, emitAgentPlanningThinking, emitAgentRawModelTrace, emitAgentReflectionThinking, emitAgentToolSelectionThinking, } from "../../agent/emitAgentThinking.js";
4
4
  import { buildIncompleteContinuationNudge, computeCompletionEvaluationKey, evaluateTaskCompletion, getCompletionMaxContinues, getTaskCompletionConfidenceThreshold, } from "../../agent/evaluateTaskCompletion.js";
5
- import { evaluateHarnessCompletionGate, getAssetPublishIncompleteNudge, getWiringIncompleteNudge, isStyleOnlyDiff, } from "../../agent/editCompletion.js";
5
+ import { evaluateHarnessCompletionGate, getAssetPublishIncompleteNudge, getCompletionRulesIncompleteNudge, getWiringIncompleteNudge, isStyleOnlyDiff, } from "../../agent/editCompletion.js";
6
+ import { clearHostToolsForRun, hostToolsToOpenAiDefinitions, registerHostToolsForRun, resolveHostToolsForRun, } from "../../agent/hostTools.js";
6
7
  import { executeAgentTool, parseToolArguments, } from "../../agent/tools/executeTool.js";
7
8
  import { filterAllowlistedChangedFiles, } from "../../agent/tools/pathGuard.js";
8
9
  import { gitDiff } from "../../git/gitDiff.js";
@@ -12,7 +13,7 @@ import { formatRateLimitReport, formatTimingReport, postJsonWithRetry, } from ".
12
13
  import { applyCustomModelRequestDefaults, bumpMaxCompletionTokensForLengthRetry, getMaxCompletionTokens, } from "../custom/modelRequestDefaults.js";
13
14
  import { evaluateToolPolicy, getPrimaryMaxToolCalls, } from "./toolPolicy.js";
14
15
  import { trimAgentHistory, } from "./trimAgentHistory.js";
15
- import { AGENT_TOOL_DEFINITIONS } from "./toolSchemas.js";
16
+ import { buildAgentToolDefinitions } from "./toolSchemas.js";
16
17
  const MAX_LOOP_TURNS = 25;
17
18
  const LENGTH_RETRY_NUDGE = "Your previous response was truncated (length limit). Call one tool with minimal arguments.";
18
19
  function buildUserMessage(userPrompt, imageDataUrls) {
@@ -100,7 +101,7 @@ function buildEarlyFinishResult(input) {
100
101
  };
101
102
  }
102
103
  async function applyDeterministicCompletionGate(input) {
103
- const gate = evaluateHarnessCompletionGate({
104
+ const gate = await evaluateHarnessCompletionGate({
104
105
  changedFiles: input.allowlistedFiles,
105
106
  toolCallCount: input.policyState.toolCallCount,
106
107
  modelTurnCount: input.modelTurnCount,
@@ -108,6 +109,7 @@ async function applyDeterministicCompletionGate(input) {
108
109
  selectedDom: input.selectedDom,
109
110
  completion: input.completion,
110
111
  deferredFinishCount: input.completionState.deferredFinishCount,
112
+ repoPath: input.repoPath,
111
113
  });
112
114
  if (gate.action === "continue") {
113
115
  input.completionState.deferredFinishCount += 1;
@@ -115,7 +117,9 @@ async function applyDeterministicCompletionGate(input) {
115
117
  ? "Wiring references an image; continuing until publish_reference_image writes public/assets."
116
118
  : gate.continueReason === "style_only_incomplete"
117
119
  ? "Style changes are in place; continuing to wire components and config."
118
- : undefined);
120
+ : gate.continueReason === "completion_callback"
121
+ ? "Completion rules not satisfied; continuing until required files change."
122
+ : undefined);
119
123
  input.bus?.emit({
120
124
  type: "run.continue",
121
125
  reason: gate.continueReason ?? "style_only_incomplete",
@@ -129,7 +133,9 @@ async function applyDeterministicCompletionGate(input) {
129
133
  role: "user",
130
134
  content: gate.continueReason === "asset_publish_incomplete"
131
135
  ? getAssetPublishIncompleteNudge()
132
- : getWiringIncompleteNudge(),
136
+ : gate.continueReason === "completion_callback"
137
+ ? getCompletionRulesIncompleteNudge()
138
+ : getWiringIncompleteNudge(),
133
139
  });
134
140
  return undefined;
135
141
  }
@@ -176,7 +182,7 @@ async function tryHarnessEarlyFinish(input) {
176
182
  if (allowlistedFiles.length === 0) {
177
183
  return undefined;
178
184
  }
179
- const gateContext = evaluateHarnessCompletionGate({
185
+ const gateContext = await evaluateHarnessCompletionGate({
180
186
  changedFiles: allowlistedFiles,
181
187
  toolCallCount: input.policyState.toolCallCount,
182
188
  modelTurnCount: input.modelTurnCount,
@@ -184,6 +190,7 @@ async function tryHarnessEarlyFinish(input) {
184
190
  selectedDom: input.selectedDom,
185
191
  completion: input.completion,
186
192
  deferredFinishCount: input.completionState.deferredFinishCount,
193
+ repoPath: input.repoPath,
187
194
  });
188
195
  const useLlm = shouldUseLlmCompletionEvaluator({
189
196
  harnessMode: input.harnessMode,
@@ -320,6 +327,7 @@ async function tryHarnessEarlyFinish(input) {
320
327
  selectedDom: input.selectedDom,
321
328
  completion: input.completion,
322
329
  completionState: input.completionState,
330
+ repoPath: input.repoPath,
323
331
  });
324
332
  }
325
333
  /** Runs a multi-turn tool-calling loop against an OpenAI-compatible chat API. */
@@ -338,6 +346,12 @@ export async function runAgentToolLoop(input) {
338
346
  recentToolResults: [],
339
347
  evaluationCallCount: 0,
340
348
  };
349
+ const resolvedHostTools = resolveHostToolsForRun({
350
+ hostTools: input.hostTools,
351
+ includeAttachmentUrlsTool: Boolean(input.attachments?.length),
352
+ });
353
+ registerHostToolsForRun(resolvedHostTools);
354
+ const toolDefinitions = buildAgentToolDefinitions(hostToolsToOpenAiDefinitions(resolvedHostTools));
341
355
  const toolCtx = {
342
356
  repoPath: input.repoPath,
343
357
  allowedWritePaths: input.allowedWritePaths,
@@ -345,242 +359,282 @@ export async function runAgentToolLoop(input) {
345
359
  apiKey: input.apiKey,
346
360
  signal: input.signal,
347
361
  imageGenCount: 0,
362
+ attachments: input.attachments,
363
+ selectedDom: input.selectedDom,
348
364
  };
349
- for (let turn = 0; turn < MAX_LOOP_TURNS; turn += 1) {
350
- throwIfCancelled(input.signal);
351
- const modelTurnCount = turn + 1;
352
- if (policyState.abortReason) {
353
- const early = await tryHarnessEarlyFinish({
354
- harnessMode: input.harnessMode,
355
- allowedWritePaths: input.allowedWritePaths,
356
- repoPath: input.repoPath,
357
- workspaceCheckpoint: input.workspaceCheckpoint,
358
- bus: input.bus,
359
- policyState,
360
- editsApplied,
361
- lastAssistantText,
362
- loopStartMs,
363
- lastUsage,
364
- messages,
365
- modelTurnCount,
366
- hasVision: input.hasVision,
367
- selectedDom: input.selectedDom,
368
- completion: input.completion,
369
- completionAuto: input.completionAuto,
370
- completionEvaluationEnabled: input.completionEvaluationEnabled,
371
- completionConfidenceThreshold: input.completionConfidenceThreshold,
372
- referenceImages: input.referenceImages,
373
- ownerRequest: input.ownerRequest,
374
- model: input.model,
375
- timeoutMs: input.timeoutMs,
376
- signal: input.signal,
377
- completionState,
378
- });
379
- if (early) {
380
- return early;
365
+ try {
366
+ for (let turn = 0; turn < MAX_LOOP_TURNS; turn += 1) {
367
+ throwIfCancelled(input.signal);
368
+ const modelTurnCount = turn + 1;
369
+ if (policyState.abortReason) {
370
+ const early = await tryHarnessEarlyFinish({
371
+ harnessMode: input.harnessMode,
372
+ allowedWritePaths: input.allowedWritePaths,
373
+ repoPath: input.repoPath,
374
+ workspaceCheckpoint: input.workspaceCheckpoint,
375
+ bus: input.bus,
376
+ policyState,
377
+ editsApplied,
378
+ lastAssistantText,
379
+ loopStartMs,
380
+ lastUsage,
381
+ messages,
382
+ modelTurnCount,
383
+ hasVision: input.hasVision,
384
+ selectedDom: input.selectedDom,
385
+ completion: input.completion,
386
+ completionAuto: input.completionAuto,
387
+ completionEvaluationEnabled: input.completionEvaluationEnabled,
388
+ completionConfidenceThreshold: input.completionConfidenceThreshold,
389
+ referenceImages: input.referenceImages,
390
+ ownerRequest: input.ownerRequest,
391
+ model: input.model,
392
+ timeoutMs: input.timeoutMs,
393
+ signal: input.signal,
394
+ completionState,
395
+ });
396
+ if (early) {
397
+ return early;
398
+ }
399
+ break;
381
400
  }
382
- break;
383
- }
384
- const trimmedMessages = trimAgentHistory(messages);
385
- let maxCompletionTokensOverride;
386
- let lengthRetryUsed = false;
387
- input.bus?.emit({
388
- type: "model.attempt",
389
- attempt: turn + 1,
390
- maxAttempts: MAX_LOOP_TURNS,
391
- });
392
- emitAgentPlanningThinking(input.bus, turn + 1);
393
- const requestBody = {
394
- model: input.modelId,
395
- messages: trimmedMessages,
396
- tools: AGENT_TOOL_DEFINITIONS,
397
- tool_choice: "auto",
398
- temperature: 0.2,
399
- };
400
- applyCustomModelRequestDefaults(requestBody, {
401
- profile,
402
- modelId: input.modelId,
403
- maxCompletionTokensOverride,
404
- });
405
- const callModel = async () => {
401
+ const trimmedMessages = trimAgentHistory(messages);
402
+ let maxCompletionTokensOverride;
403
+ let lengthRetryUsed = false;
406
404
  input.bus?.emit({
407
- type: "model.request",
408
- provider: "custom",
409
- model: input.modelId,
410
- hasVision: Boolean(input.hasVision),
405
+ type: "model.attempt",
406
+ attempt: turn + 1,
407
+ maxAttempts: MAX_LOOP_TURNS,
411
408
  });
412
- const { value: result } = await withApiConcurrencyLimit((queueWaitMs) => postJsonWithRetry({
413
- url: `${input.baseURL}/chat/completions`,
414
- headers: {
415
- Authorization: `Bearer ${input.apiKey}`,
416
- "Content-Type": "application/json",
417
- },
418
- body: requestBody,
419
- timeoutMs: input.timeoutMs ?? 300_000,
420
- signal: input.signal,
421
- }, queueWaitMs));
422
- if (result.ok && result.data) {
423
- const content = result.data.choices?.[0]?.message?.content?.trim() ?? "";
424
- input.bus?.emit({
425
- type: "model.response",
426
- contentPreview: content.slice(0, 500),
427
- timingMs: result.totalMs,
428
- httpRetries: result.retries,
429
- });
430
- }
431
- return result;
432
- };
433
- let apiResult = input.bus
434
- ? await input.bus.withTool("model_completion", { turn: turn + 1, model: input.modelId }, callModel)
435
- : await callModel();
436
- throwIfCancelled(input.signal);
437
- if (!apiResult.ok || !apiResult.data) {
438
- const rateLimit = apiResult.rateLimit;
439
- return {
440
- ok: false,
441
- response: "",
442
- summary: rateLimit ? "MiniMax rate limit exceeded" : "Model API request failed",
443
- editsApplied,
444
- toolCallCount: policyState.toolCallCount,
445
- error: {
446
- message: rateLimit
447
- ? formatRateLimitReport(rateLimit)
448
- : `${apiResult.errorMessage ?? "Model API request failed"} | ${formatTimingReport(apiResult)}`,
449
- code: rateLimit ? "RATE_LIMIT" : "PROVIDER_ERROR",
450
- },
409
+ emitAgentPlanningThinking(input.bus, turn + 1);
410
+ const requestBody = {
411
+ model: input.modelId,
412
+ messages: trimmedMessages,
413
+ tools: toolDefinitions,
414
+ tool_choice: "auto",
415
+ temperature: 0.2,
451
416
  };
452
- }
453
- let payload = apiResult.data;
454
- let choice = payload.choices?.[0];
455
- let finishReason = choice?.finish_reason;
456
- if (finishReason === "length" && !lengthRetryUsed) {
457
- lengthRetryUsed = true;
458
- const currentCap = requestBody.max_completion_tokens ??
459
- getMaxCompletionTokens(profile);
460
- maxCompletionTokensOverride = bumpMaxCompletionTokensForLengthRetry(profile, currentCap);
461
- messages.push({ role: "user", content: LENGTH_RETRY_NUDGE });
462
417
  applyCustomModelRequestDefaults(requestBody, {
463
418
  profile,
464
419
  modelId: input.modelId,
465
420
  maxCompletionTokensOverride,
466
421
  });
467
- requestBody.messages = trimAgentHistory(messages);
468
- input.bus?.emit({
469
- type: "model.repair",
470
- reason: "length",
471
- priorOutputSnippet: (choice?.message?.content ?? "").slice(0, 200),
472
- });
473
- apiResult = input.bus
474
- ? await input.bus.withTool("model_completion", { turn: turn + 1, model: input.modelId, lengthRetry: true }, callModel)
422
+ const callModel = async () => {
423
+ input.bus?.emit({
424
+ type: "model.request",
425
+ provider: "custom",
426
+ model: input.modelId,
427
+ hasVision: Boolean(input.hasVision),
428
+ });
429
+ const { value: result } = await withApiConcurrencyLimit((queueWaitMs) => postJsonWithRetry({
430
+ url: `${input.baseURL}/chat/completions`,
431
+ headers: {
432
+ Authorization: `Bearer ${input.apiKey}`,
433
+ "Content-Type": "application/json",
434
+ },
435
+ body: requestBody,
436
+ timeoutMs: input.timeoutMs ?? 300_000,
437
+ signal: input.signal,
438
+ }, queueWaitMs));
439
+ if (result.ok && result.data) {
440
+ const content = result.data.choices?.[0]?.message?.content?.trim() ?? "";
441
+ input.bus?.emit({
442
+ type: "model.response",
443
+ contentPreview: content.slice(0, 500),
444
+ timingMs: result.totalMs,
445
+ httpRetries: result.retries,
446
+ });
447
+ }
448
+ return result;
449
+ };
450
+ let apiResult = input.bus
451
+ ? await input.bus.withTool("model_completion", { turn: turn + 1, model: input.modelId }, callModel)
475
452
  : await callModel();
453
+ throwIfCancelled(input.signal);
476
454
  if (!apiResult.ok || !apiResult.data) {
477
455
  const rateLimit = apiResult.rateLimit;
478
456
  return {
479
457
  ok: false,
480
- response: lastAssistantText,
481
- summary: "Model API request failed after length retry",
458
+ response: "",
459
+ summary: rateLimit ? "MiniMax rate limit exceeded" : "Model API request failed",
482
460
  editsApplied,
483
461
  toolCallCount: policyState.toolCallCount,
484
462
  error: {
485
- message: rateLimit?.message ??
486
- apiResult.errorMessage ??
487
- "Model API request failed",
463
+ message: rateLimit
464
+ ? formatRateLimitReport(rateLimit)
465
+ : `${apiResult.errorMessage ?? "Model API request failed"} | ${formatTimingReport(apiResult)}`,
488
466
  code: rateLimit ? "RATE_LIMIT" : "PROVIDER_ERROR",
489
467
  },
490
468
  };
491
469
  }
492
- payload = apiResult.data;
493
- choice = payload.choices?.[0];
494
- finishReason = choice?.finish_reason;
495
- }
496
- lastUsage = payload.usage;
497
- const message = choice?.message;
498
- if (!message) {
499
- return {
500
- ok: false,
501
- response: lastAssistantText,
502
- summary: "Model returned empty message",
503
- editsApplied,
504
- toolCallCount: policyState.toolCallCount,
505
- error: { message: "Empty model response", code: "PROVIDER_ERROR" },
506
- };
507
- }
508
- const toolCalls = message.tool_calls ?? [];
509
- const assistantContent = typeof message.content === "string" ? message.content.trim() : "";
510
- emitAgentRawModelTrace(input.bus, turn + 1, "custom", message);
511
- if (assistantContent) {
512
- lastAssistantText = assistantContent;
513
- input.bus?.emit(buildAssistantEvent(assistantContent));
514
- }
515
- messages.push(toAssistantHistoryMessage(message));
516
- if (toolCalls.length > 0) {
517
- emitAgentToolSelectionThinking(input.bus, turn + 1, toolCalls.map((tc) => tc.function.name));
518
- }
519
- if (toolCalls.length === 0) {
520
- const harnessOk = input.harnessMode && input.allowedWritePaths?.length
521
- ? editsApplied > 0
522
- : editsApplied > 0 || assistantContent.length > 0;
523
- return {
524
- ok: harnessOk,
525
- response: assistantContent || lastAssistantText || "Agent completed",
526
- summary: editsApplied > 0 ? `Applied ${editsApplied} edit(s)` : assistantContent.slice(0, 200),
527
- editsApplied,
528
- toolCallCount: policyState.toolCallCount,
529
- usage: lastUsage,
530
- };
531
- }
532
- let mutatingEditsThisTurn = 0;
533
- for (const toolCall of toolCalls) {
534
- throwIfCancelled(input.signal);
535
- const toolName = toolCall.function.name;
536
- policyState = evaluateToolPolicy(toolName, policyState, maxToolCalls);
537
- if (policyState.abortReason) {
470
+ let payload = apiResult.data;
471
+ let choice = payload.choices?.[0];
472
+ let finishReason = choice?.finish_reason;
473
+ if (finishReason === "length" && !lengthRetryUsed) {
474
+ lengthRetryUsed = true;
475
+ const currentCap = requestBody.max_completion_tokens ??
476
+ getMaxCompletionTokens(profile);
477
+ maxCompletionTokensOverride = bumpMaxCompletionTokensForLengthRetry(profile, currentCap);
478
+ messages.push({ role: "user", content: LENGTH_RETRY_NUDGE });
479
+ applyCustomModelRequestDefaults(requestBody, {
480
+ profile,
481
+ modelId: input.modelId,
482
+ maxCompletionTokensOverride,
483
+ });
484
+ requestBody.messages = trimAgentHistory(messages);
485
+ input.bus?.emit({
486
+ type: "model.repair",
487
+ reason: "length",
488
+ priorOutputSnippet: (choice?.message?.content ?? "").slice(0, 200),
489
+ });
490
+ apiResult = input.bus
491
+ ? await input.bus.withTool("model_completion", { turn: turn + 1, model: input.modelId, lengthRetry: true }, callModel)
492
+ : await callModel();
493
+ if (!apiResult.ok || !apiResult.data) {
494
+ const rateLimit = apiResult.rateLimit;
495
+ return {
496
+ ok: false,
497
+ response: lastAssistantText,
498
+ summary: "Model API request failed after length retry",
499
+ editsApplied,
500
+ toolCallCount: policyState.toolCallCount,
501
+ error: {
502
+ message: rateLimit?.message ??
503
+ apiResult.errorMessage ??
504
+ "Model API request failed",
505
+ code: rateLimit ? "RATE_LIMIT" : "PROVIDER_ERROR",
506
+ },
507
+ };
508
+ }
509
+ payload = apiResult.data;
510
+ choice = payload.choices?.[0];
511
+ finishReason = choice?.finish_reason;
512
+ }
513
+ lastUsage = payload.usage;
514
+ const message = choice?.message;
515
+ if (!message) {
516
+ return {
517
+ ok: false,
518
+ response: lastAssistantText,
519
+ summary: "Model returned empty message",
520
+ editsApplied,
521
+ toolCallCount: policyState.toolCallCount,
522
+ error: { message: "Empty model response", code: "PROVIDER_ERROR" },
523
+ };
524
+ }
525
+ const toolCalls = message.tool_calls ?? [];
526
+ const assistantContent = typeof message.content === "string" ? message.content.trim() : "";
527
+ emitAgentRawModelTrace(input.bus, turn + 1, "custom", message);
528
+ if (assistantContent) {
529
+ lastAssistantText = assistantContent;
530
+ input.bus?.emit(buildAssistantEvent(assistantContent));
531
+ }
532
+ messages.push(toAssistantHistoryMessage(message));
533
+ if (toolCalls.length > 0) {
534
+ emitAgentToolSelectionThinking(input.bus, turn + 1, toolCalls.map((tc) => tc.function.name));
535
+ }
536
+ if (toolCalls.length === 0) {
537
+ const harnessOk = input.harnessMode && input.allowedWritePaths?.length
538
+ ? editsApplied > 0
539
+ : editsApplied > 0 || assistantContent.length > 0;
540
+ return {
541
+ ok: harnessOk,
542
+ response: assistantContent || lastAssistantText || "Agent completed",
543
+ summary: editsApplied > 0 ? `Applied ${editsApplied} edit(s)` : assistantContent.slice(0, 200),
544
+ editsApplied,
545
+ toolCallCount: policyState.toolCallCount,
546
+ usage: lastUsage,
547
+ };
548
+ }
549
+ let mutatingEditsThisTurn = 0;
550
+ for (const toolCall of toolCalls) {
551
+ throwIfCancelled(input.signal);
552
+ const toolName = toolCall.function.name;
553
+ policyState = evaluateToolPolicy(toolName, policyState, maxToolCalls);
554
+ if (policyState.abortReason) {
555
+ messages.push({
556
+ role: "tool",
557
+ tool_call_id: toolCall.id,
558
+ content: `Tool blocked: ${policyState.abortReason}`,
559
+ });
560
+ break;
561
+ }
562
+ const args = parseToolArguments(toolCall.function.arguments);
563
+ const result = await executeAgentTool(toolName, args, toolCtx, input.bus);
564
+ const toolPath = typeof args.path === "string"
565
+ ? args.path
566
+ : typeof args.file_path === "string"
567
+ ? args.file_path
568
+ : typeof args.destPath === "string"
569
+ ? args.destPath
570
+ : typeof args.dest_path === "string"
571
+ ? args.dest_path
572
+ : typeof result.path === "string"
573
+ ? result.path
574
+ : undefined;
575
+ const meta = result.meta ?? {};
576
+ const publicUrl = typeof meta.publicUrl === "string" ? meta.publicUrl : undefined;
577
+ const destPath = typeof meta.destPath === "string" ? meta.destPath : toolPath;
578
+ completionState.recentToolResults.push({
579
+ name: toolName,
580
+ ok: result.ok,
581
+ output: result.output,
582
+ path: destPath,
583
+ publicUrl,
584
+ meta,
585
+ });
586
+ if (completionState.recentToolResults.length > 5) {
587
+ completionState.recentToolResults.shift();
588
+ }
589
+ if (result.applied) {
590
+ editsApplied += 1;
591
+ mutatingEditsThisTurn += 1;
592
+ invalidateWorkspaceCheckpointCache(input.workspaceCheckpoint);
593
+ }
538
594
  messages.push({
539
595
  role: "tool",
540
596
  tool_call_id: toolCall.id,
541
- content: `Tool blocked: ${policyState.abortReason}`,
597
+ content: result.output,
542
598
  });
543
- break;
544
599
  }
545
- const args = parseToolArguments(toolCall.function.arguments);
546
- const result = await executeAgentTool(toolName, args, toolCtx, input.bus);
547
- const toolPath = typeof args.path === "string"
548
- ? args.path
549
- : typeof args.file_path === "string"
550
- ? args.file_path
551
- : typeof args.destPath === "string"
552
- ? args.destPath
553
- : typeof args.dest_path === "string"
554
- ? args.dest_path
555
- : typeof result.path === "string"
556
- ? result.path
557
- : undefined;
558
- const meta = result.meta ?? {};
559
- const publicUrl = typeof meta.publicUrl === "string" ? meta.publicUrl : undefined;
560
- const destPath = typeof meta.destPath === "string" ? meta.destPath : toolPath;
561
- completionState.recentToolResults.push({
562
- name: toolName,
563
- ok: result.ok,
564
- output: result.output,
565
- path: destPath,
566
- publicUrl,
567
- meta,
568
- });
569
- if (completionState.recentToolResults.length > 5) {
570
- completionState.recentToolResults.shift();
600
+ if (mutatingEditsThisTurn > 0) {
601
+ const early = await tryHarnessEarlyFinish({
602
+ harnessMode: input.harnessMode,
603
+ allowedWritePaths: input.allowedWritePaths,
604
+ repoPath: input.repoPath,
605
+ workspaceCheckpoint: input.workspaceCheckpoint,
606
+ bus: input.bus,
607
+ policyState,
608
+ editsApplied,
609
+ lastAssistantText,
610
+ loopStartMs,
611
+ lastUsage,
612
+ messages,
613
+ modelTurnCount,
614
+ hasVision: input.hasVision,
615
+ selectedDom: input.selectedDom,
616
+ completion: input.completion,
617
+ completionAuto: input.completionAuto,
618
+ completionEvaluationEnabled: input.completionEvaluationEnabled,
619
+ completionConfidenceThreshold: input.completionConfidenceThreshold,
620
+ referenceImages: input.referenceImages,
621
+ ownerRequest: input.ownerRequest,
622
+ model: input.model,
623
+ timeoutMs: input.timeoutMs,
624
+ signal: input.signal,
625
+ completionState,
626
+ });
627
+ if (early) {
628
+ return early;
629
+ }
571
630
  }
572
- if (result.applied) {
573
- editsApplied += 1;
574
- mutatingEditsThisTurn += 1;
575
- invalidateWorkspaceCheckpointCache(input.workspaceCheckpoint);
631
+ if (editsApplied > 0 && toolCalls.every((call) => !isMutatingTool(call.function.name))) {
632
+ continue;
576
633
  }
577
- messages.push({
578
- role: "tool",
579
- tool_call_id: toolCall.id,
580
- content: result.output,
581
- });
582
634
  }
583
- if (mutatingEditsThisTurn > 0) {
635
+ if (input.harnessMode &&
636
+ input.allowedWritePaths?.length &&
637
+ editsApplied > 0) {
584
638
  const early = await tryHarnessEarlyFinish({
585
639
  harnessMode: input.harnessMode,
586
640
  allowedWritePaths: input.allowedWritePaths,
@@ -593,7 +647,7 @@ export async function runAgentToolLoop(input) {
593
647
  loopStartMs,
594
648
  lastUsage,
595
649
  messages,
596
- modelTurnCount,
650
+ modelTurnCount: MAX_LOOP_TURNS,
597
651
  hasVision: input.hasVision,
598
652
  selectedDom: input.selectedDom,
599
653
  completion: input.completion,
@@ -611,60 +665,27 @@ export async function runAgentToolLoop(input) {
611
665
  return early;
612
666
  }
613
667
  }
614
- if (editsApplied > 0 && toolCalls.every((call) => !isMutatingTool(call.function.name))) {
615
- continue;
616
- }
617
- }
618
- if (input.harnessMode &&
619
- input.allowedWritePaths?.length &&
620
- editsApplied > 0) {
621
- const early = await tryHarnessEarlyFinish({
622
- harnessMode: input.harnessMode,
623
- allowedWritePaths: input.allowedWritePaths,
624
- repoPath: input.repoPath,
625
- workspaceCheckpoint: input.workspaceCheckpoint,
626
- bus: input.bus,
627
- policyState,
668
+ return {
669
+ ok: editsApplied > 0,
670
+ response: lastAssistantText || "Agent loop ended",
671
+ summary: editsApplied > 0
672
+ ? `Applied ${editsApplied} edit(s) via tools`
673
+ : policyState.abortReason ?? "Agent loop ended without edits",
628
674
  editsApplied,
629
- lastAssistantText,
630
- loopStartMs,
631
- lastUsage,
632
- messages,
633
- modelTurnCount: MAX_LOOP_TURNS,
634
- hasVision: input.hasVision,
635
- selectedDom: input.selectedDom,
636
- completion: input.completion,
637
- completionAuto: input.completionAuto,
638
- completionEvaluationEnabled: input.completionEvaluationEnabled,
639
- completionConfidenceThreshold: input.completionConfidenceThreshold,
640
- referenceImages: input.referenceImages,
641
- ownerRequest: input.ownerRequest,
642
- model: input.model,
643
- timeoutMs: input.timeoutMs,
644
- signal: input.signal,
645
- completionState,
646
- });
647
- if (early) {
648
- return early;
649
- }
675
+ toolCallCount: policyState.toolCallCount,
676
+ abortReason: policyState.abortReason,
677
+ usage: lastUsage,
678
+ error: editsApplied === 0
679
+ ? {
680
+ message: policyState.abortReason ?? "No edits applied",
681
+ code: "EDIT_NOT_APPLIED",
682
+ }
683
+ : undefined,
684
+ };
685
+ }
686
+ finally {
687
+ clearHostToolsForRun();
650
688
  }
651
- return {
652
- ok: editsApplied > 0,
653
- response: lastAssistantText || "Agent loop ended",
654
- summary: editsApplied > 0
655
- ? `Applied ${editsApplied} edit(s) via tools`
656
- : policyState.abortReason ?? "Agent loop ended without edits",
657
- editsApplied,
658
- toolCallCount: policyState.toolCallCount,
659
- abortReason: policyState.abortReason,
660
- usage: lastUsage,
661
- error: editsApplied === 0
662
- ? {
663
- message: policyState.abortReason ?? "No edits applied",
664
- code: "EDIT_NOT_APPLIED",
665
- }
666
- : undefined,
667
- };
668
689
  }
669
690
  function isMutatingTool(toolName) {
670
691
  const name = toolName.toLowerCase();