grok-dev 1.0.0-rc8 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/.claude/worktrees/vigilant-johnson/.cursor/hooks/state/continual-learning.json +8 -0
  2. package/.claude/worktrees/vigilant-johnson/.cursor/rules/development-workflow.mdc +66 -0
  3. package/.claude/worktrees/vigilant-johnson/.cursor/rules/project-overview.mdc +66 -0
  4. package/.claude/worktrees/vigilant-johnson/.cursor/rules/react-ink-components.mdc +45 -0
  5. package/.claude/worktrees/vigilant-johnson/.cursor/rules/tools-and-agent.mdc +62 -0
  6. package/.claude/worktrees/vigilant-johnson/.cursor/rules/typescript-conventions.mdc +54 -0
  7. package/.claude/worktrees/vigilant-johnson/.husky/pre-commit +1 -0
  8. package/.claude/worktrees/vigilant-johnson/LICENSE +21 -0
  9. package/.claude/worktrees/vigilant-johnson/README.md +341 -0
  10. package/.claude/worktrees/vigilant-johnson/biome.json +51 -0
  11. package/.claude/worktrees/vigilant-johnson/package.json +74 -0
  12. package/.claude/worktrees/vigilant-johnson/telegram-pair-code.txt +0 -0
  13. package/.claude/worktrees/vigilant-johnson/vitest.config.ts +7 -0
  14. package/.grok/generated-media/image-2026-03-26T16-38-08-388Z.jpg +0 -0
  15. package/.grok/generated-media/video-2026-03-26T16-39-19-329Z.mp4 +0 -0
  16. package/.grok/settings.json +1 -1
  17. package/README.md +20 -2
  18. package/dist/agent/agent.d.ts +9 -1
  19. package/dist/agent/agent.js +704 -18
  20. package/dist/agent/agent.js.map +1 -1
  21. package/dist/agent/batch-mode.test.d.ts +1 -0
  22. package/dist/agent/batch-mode.test.js.map +1 -0
  23. package/dist/agent/delegations.d.ts +2 -0
  24. package/dist/agent/delegations.js +9 -1
  25. package/dist/agent/delegations.js.map +1 -1
  26. package/dist/agent/delegations.test.js.map +1 -1
  27. package/dist/grok/batch.d.ts +136 -0
  28. package/dist/grok/batch.js +204 -0
  29. package/dist/grok/batch.js.map +1 -0
  30. package/dist/grok/batch.test.d.ts +1 -0
  31. package/dist/grok/batch.test.js.map +1 -0
  32. package/dist/grok/tool-schemas.d.ts +3 -0
  33. package/dist/grok/tool-schemas.js +24 -0
  34. package/dist/grok/tool-schemas.js.map +1 -0
  35. package/dist/grok/tool-schemas.test.d.ts +1 -0
  36. package/dist/grok/tool-schemas.test.js.map +1 -0
  37. package/dist/grok/tools.js +3 -3
  38. package/dist/grok/tools.js.map +1 -1
  39. package/dist/grok/tools.test.js.map +1 -1
  40. package/dist/headless/output.d.ts +1 -0
  41. package/dist/headless/output.js +29 -4
  42. package/dist/headless/output.js.map +1 -1
  43. package/dist/index.js +24 -6
  44. package/dist/index.js.map +1 -1
  45. package/dist/tools/bash.d.ts +3 -1
  46. package/dist/tools/bash.js +101 -12
  47. package/dist/tools/bash.js.map +1 -1
  48. package/dist/tools/bash.test.js.map +1 -1
  49. package/dist/types/index.d.ts +18 -1
  50. package/dist/types/index.js.map +1 -1
  51. package/dist/ui/app.js +14 -0
  52. package/dist/ui/app.js.map +1 -1
  53. package/dist/ui/schedule-modal.js +2 -2
  54. package/dist/ui/schedule-modal.js.map +1 -1
  55. package/dist/utils/settings.d.ts +6 -0
  56. package/dist/utils/settings.js +9 -1
  57. package/dist/utils/settings.js.map +1 -1
  58. package/dist/utils/skills.d.ts +3 -2
  59. package/dist/utils/skills.js +27 -7
  60. package/dist/utils/skills.js.map +1 -1
  61. package/dist/utils/skills.test.d.ts +1 -0
  62. package/dist/utils/skills.test.js.map +1 -0
  63. package/dist/utils/subagents-settings.test.js.map +1 -1
  64. package/dist/verify/checkpoint.d.ts +11 -0
  65. package/dist/verify/checkpoint.js +158 -0
  66. package/dist/verify/checkpoint.js.map +1 -0
  67. package/dist/verify/checkpoint.test.d.ts +1 -0
  68. package/dist/verify/checkpoint.test.js.map +1 -0
  69. package/dist/verify/entrypoint.d.ts +34 -0
  70. package/dist/verify/entrypoint.js +642 -0
  71. package/dist/verify/entrypoint.js.map +1 -0
  72. package/dist/verify/entrypoint.test.d.ts +1 -0
  73. package/dist/verify/entrypoint.test.js.map +1 -0
  74. package/package.json +2 -1
  75. package/tmp/.grok/verify-artifacts/screenshot-1774806349456.png +0 -0
  76. package/tmp/.grok/verify-artifacts/verify-smoke.webm +0 -0
  77. package/tmp/README.md +36 -0
  78. package/tmp/eslint.config.mjs +18 -0
  79. package/tmp/next.config.ts +7 -0
  80. package/tmp/package.json +34 -0
  81. package/tmp/postcss.config.mjs +7 -0
  82. package/tmp/public/file.svg +1 -0
  83. package/tmp/public/globe.svg +1 -0
  84. package/tmp/public/next.svg +1 -0
  85. package/tmp/public/vercel.svg +1 -0
  86. package/tmp/public/window.svg +1 -0
  87. package/tmp/large_class.py +0 -633
@@ -1,6 +1,9 @@
1
+ import { convertToBase64 } from "@ai-sdk/provider-utils";
1
2
  import { stepCountIs, streamText } from "ai";
3
+ import { addBatchRequests, createBatch, getBatchChatCompletion, pollBatchRequestResult, } from "../grok/batch";
2
4
  import { createProvider, generateTitle as genTitle, resolveModelRuntime } from "../grok/client";
3
5
  import { DEFAULT_MODEL, getModelInfo, normalizeModelId } from "../grok/models";
6
+ import { toolSetToBatchTools } from "../grok/tool-schemas";
4
7
  import { createTools } from "../grok/tools";
5
8
  import { buildMcpToolSet } from "../mcp/runtime";
6
9
  import { appendCompaction, appendMessages, appendSystemMessage, buildChatEntries, getNextMessageSequence, getSessionTotalTokens, loadTranscript, loadTranscriptState, recordUsageEvent, SessionStore, } from "../storage/index";
@@ -9,6 +12,7 @@ import { ScheduleManager } from "../tools/schedule";
9
12
  import { loadCustomInstructions } from "../utils/instructions";
10
13
  import { loadMcpServers, loadValidSubAgents, } from "../utils/settings";
11
14
  import { discoverSkills, formatSkillsForPrompt } from "../utils/skills";
15
+ import { buildVerifyDetectPrompt, normalizeVerifyRecipe } from "../verify/entrypoint";
12
16
  import { createCompactionSummaryMessage, DEFAULT_KEEP_RECENT_TOKENS, DEFAULT_RESERVE_TOKENS, estimateConversationTokens, generateCompactionSummary, prepareCompaction, relaxCompactionSettings, shouldCompactContext, } from "./compaction";
13
17
  import { DelegationManager } from "./delegations";
14
18
  import { containsEncryptedReasoning, sanitizeModelMessages } from "./reasoning";
@@ -36,7 +40,7 @@ TOOLS:
36
40
  - process_logs: View recent output from a background process by ID.
37
41
  - process_stop: Stop a background process by ID.
38
42
  - process_list: List all background processes with status and uptime.
39
- - task: Delegate a focused foreground task to a sub-agent. Use general for multi-step execution, explore for fast read-only research, or a configured custom sub-agent name when listed under CUSTOM SUB-AGENTS.
43
+ - task: Delegate a focused foreground task to a sub-agent. Use general for multi-step execution, explore for fast read-only research, verify for sandbox-aware validation, or a configured custom sub-agent name when listed under CUSTOM SUB-AGENTS.
40
44
  - delegate: Launch a read-only background agent for longer research while you continue working.
41
45
  - delegation_read: Retrieve a completed background delegation result by ID.
42
46
  - delegation_list: List running and completed background delegations. Do not poll it repeatedly.
@@ -65,10 +69,11 @@ WORKFLOW:
65
69
  9. Use search_web or search_x when you need up-to-date information
66
70
 
67
71
  DEFAULT DELEGATION POLICY:
68
- - Prefer the task tool by default for code review, code quality analysis, architecture research, root-cause investigation, bug triage, or any request that likely needs reading multiple files before acting.
72
+ - Prefer the task tool by default for code review, code quality analysis, architecture research, root-cause investigation, bug triage, verification, or any request that likely needs reading multiple files before acting.
69
73
  - Prefer delegate for longer-running read-only exploration when you can keep making progress without blocking.
70
74
  - Use the explore sub-agent for read-only investigation, reviews, research, and "how does this work?" tasks.
71
75
  - Use the general sub-agent for delegated work that may need editing files, running commands, or producing a concrete implementation.
76
+ - Use the verify sub-agent for sandbox-aware build, test, app boot, and smoke validation work.
72
77
  - Use a matching custom sub-agent when the task fits one of the configured specializations.
73
78
  - Never use delegate for tasks that should edit files or make shell changes.
74
79
  - When a background delegation is running, do not wait idly and do not spam delegation_list(). Continue useful work.
@@ -80,6 +85,7 @@ EXAMPLES:
80
85
  - "research how auth works" -> delegate to explore first
81
86
  - "investigate why this test fails" -> delegate to explore first, then continue with findings
82
87
  - "refactor this module" -> delegate a focused part to general when helpful
88
+ - "verify this feature locally" -> use verify
83
89
  - "generate a logo" -> use generate_image
84
90
  - "animate this still image" -> use generate_video
85
91
  - Recurring specialized workflows -> use the matching custom sub-agent via task
@@ -161,27 +167,46 @@ Current working directory: ${cwd}`;
161
167
  function buildSubagentPrompt(request, cwd, custom, sandboxMode, subagents, sandboxSettings) {
162
168
  const isExplore = request.agent === "explore";
163
169
  const isVision = request.agent === "vision";
164
- const mode = isExplore ? "ask" : "agent";
170
+ const isVerify = request.agent === "verify";
171
+ const isVerifyDetect = request.agent === "verify-detect";
172
+ const mode = isExplore || isVerifyDetect ? "ask" : "agent";
165
173
  const role = custom
166
174
  ? `You are the custom sub-agent "${custom.name}". You can investigate, edit files, and run commands unless the delegated task says otherwise.`
167
175
  : request.agent === "explore"
168
176
  ? "You are the Explore sub-agent. You are read-only and focus on fast codebase research."
169
177
  : isVision
170
178
  ? "You are the Vision sub-agent."
171
- : "You are the General sub-agent. You can investigate, edit files, and run commands to complete delegated work.";
179
+ : isVerifyDetect
180
+ ? "You are the Verify Detect sub-agent. You inspect a repository to produce a structured verification recipe. You are read-only."
181
+ : isVerify
182
+ ? "You are the Verify sub-agent. You specialize in sandbox-aware local verification using builds, tests, app boot checks, and optional browser smoke tests."
183
+ : "You are the General sub-agent. You can investigate, edit files, and run commands to complete delegated work.";
172
184
  const rules = isExplore
173
185
  ? [
174
186
  "Do not create, modify, or delete files.",
175
187
  "Prefer `read_file` and search commands over broad shell exploration.",
176
188
  "Return concise findings for the parent agent.",
177
189
  ]
178
- : isVision
179
- ? ["Validate the image."]
180
- : [
181
- "Work only on the delegated task below.",
182
- "Use tools directly instead of narrating your intent.",
183
- "Return a concise summary for the parent agent with key outcomes and any open risks.",
184
- ];
190
+ : isVerifyDetect
191
+ ? [
192
+ "Do not create, modify, or delete files.",
193
+ "Read config files, package manifests, scripts, and source layout to understand the project.",
194
+ "Return ONLY a valid JSON object with the VerifyRecipe schema. No markdown, no prose, no explanation outside the JSON.",
195
+ ]
196
+ : isVision
197
+ ? ["Validate the image."]
198
+ : isVerify
199
+ ? [
200
+ "Focus on verification first. Do not make durable source edits unless the delegated task explicitly asks for fixes.",
201
+ "Prefer the smallest meaningful set of validation commands and explain any environment blockers clearly.",
202
+ "IMPORTANT: When the recipe includes a smoke target URL and a forwarded port, you MUST attempt browser smoke testing using agent-browser via the bash tool. The agent-browser command runs on the HOST, not inside the sandbox. It will work even in sandbox mode. Do not skip it or assume it is unavailable. Just run the command.",
203
+ "Return a concise structured verification report for the parent agent.",
204
+ ]
205
+ : [
206
+ "Work only on the delegated task below.",
207
+ "Use tools directly instead of narrating your intent.",
208
+ "Return a concise summary for the parent agent with key outcomes and any open risks.",
209
+ ];
185
210
  const instructionLines = custom?.instruction.trim() ? ["", "SUB-AGENT INSTRUCTIONS:", custom.instruction.trim()] : [];
186
211
  return [
187
212
  role,
@@ -262,6 +287,7 @@ export class Agent {
262
287
  planContext = null;
263
288
  subagentStatusListeners = new Set();
264
289
  sendTelegramFile = null;
290
+ batchApi = false;
265
291
  constructor(apiKey, baseURL, model, maxToolRounds, options = {}) {
266
292
  this.baseURL = baseURL || null;
267
293
  if (apiKey) {
@@ -277,6 +303,7 @@ export class Agent {
277
303
  this.maxToolRounds = maxToolRounds || MAX_TOOL_ROUNDS;
278
304
  const envMax = Number(process.env.GROK_MAX_TOKENS);
279
305
  this.maxTokens = Number.isFinite(envMax) && envMax > 0 ? envMax : 16_384;
306
+ this.batchApi = options.batchApi ?? false;
280
307
  if (options.persistSession !== false) {
281
308
  this.sessionStore = new SessionStore(this.bash.getCwd());
282
309
  this.workspace = this.sessionStore.getWorkspace();
@@ -461,6 +488,173 @@ export class Agent {
461
488
  return [];
462
489
  }
463
490
  }
491
+ getBatchClientOptions(signal) {
492
+ if (!this.apiKey) {
493
+ throw new Error("API key required. Add an API key to continue.");
494
+ }
495
+ return {
496
+ apiKey: this.apiKey,
497
+ baseURL: this.baseURL ?? undefined,
498
+ signal,
499
+ };
500
+ }
501
+ async executeBatchToolCall(tools, toolCall, messages, signal) {
502
+ const tool = tools[toolCall.function.name];
503
+ if (!tool || tool.type === "provider" || typeof tool.execute !== "function") {
504
+ return {
505
+ input: parseToolArgumentsOrRaw(toolCall.function.arguments),
506
+ result: {
507
+ success: false,
508
+ output: `Tool "${toolCall.function.name}" is unavailable in batch mode.`,
509
+ },
510
+ };
511
+ }
512
+ let parsedInput;
513
+ try {
514
+ parsedInput = toolCall.function.arguments.trim() ? JSON.parse(toolCall.function.arguments) : {};
515
+ }
516
+ catch (error) {
517
+ const message = error instanceof Error ? error.message : String(error);
518
+ return {
519
+ input: toolCall.function.arguments,
520
+ result: {
521
+ success: false,
522
+ output: `Tool "${toolCall.function.name}" received invalid JSON arguments: ${message}`,
523
+ },
524
+ };
525
+ }
526
+ try {
527
+ const output = await tool.execute(parsedInput, {
528
+ toolCallId: toolCall.id,
529
+ messages,
530
+ abortSignal: signal,
531
+ });
532
+ return {
533
+ input: parsedInput,
534
+ result: toToolResult(output),
535
+ };
536
+ }
537
+ catch (error) {
538
+ if (signal?.aborted) {
539
+ throw error;
540
+ }
541
+ const message = error instanceof Error ? error.message : String(error);
542
+ return {
543
+ input: parsedInput,
544
+ result: {
545
+ success: false,
546
+ output: `Tool "${toolCall.function.name}" failed: ${message}`,
547
+ },
548
+ };
549
+ }
550
+ }
551
+ async runTaskRequestBatch(args) {
552
+ const { request, childMessages, childSystem, childRuntime, childTools, maxSteps, initialDetail, onActivity, signal, } = args;
553
+ if (childRuntime.modelInfo?.responsesOnly) {
554
+ throw new Error("Batch mode currently supports chat-completions models only.");
555
+ }
556
+ const batchTools = childRuntime.modelInfo?.supportsClientTools === false ? [] : await toolSetToBatchTools(childTools);
557
+ const batch = await createBatch({
558
+ ...this.getBatchClientOptions(signal),
559
+ name: buildBatchName(`task-${request.agent}`, request.description),
560
+ });
561
+ const turnMessages = [];
562
+ const totalUsage = {};
563
+ let assistantText = "";
564
+ let lastActivity = initialDetail;
565
+ for (let round = 0; round < maxSteps; round++) {
566
+ const batchRequestId = `task-${Date.now()}-${round + 1}`;
567
+ await addBatchRequests({
568
+ ...this.getBatchClientOptions(signal),
569
+ batchId: batch.batch_id,
570
+ batchRequests: [
571
+ {
572
+ batch_request_id: batchRequestId,
573
+ batch_request: {
574
+ chat_get_completion: buildBatchChatCompletionRequest({
575
+ modelId: childRuntime.modelId,
576
+ system: childSystem,
577
+ messages: [...childMessages, ...turnMessages],
578
+ temperature: request.agent === "explore" ? 0.2 : 0.5,
579
+ maxOutputTokens: childRuntime.modelInfo?.supportsMaxOutputTokens === false
580
+ ? undefined
581
+ : Math.min(this.maxTokens, 8_192),
582
+ reasoningEffort: childRuntime.providerOptions?.xai.reasoningEffort,
583
+ tools: batchTools,
584
+ }),
585
+ },
586
+ },
587
+ ],
588
+ });
589
+ const result = await pollBatchRequestResult({
590
+ ...this.getBatchClientOptions(signal),
591
+ batchId: batch.batch_id,
592
+ batchRequestId,
593
+ });
594
+ const response = getBatchChatCompletion(result);
595
+ accumulateUsage(totalUsage, getBatchUsage(response));
596
+ const choice = response.choices[0];
597
+ if (!choice) {
598
+ throw new Error("Batch response did not contain any choices.");
599
+ }
600
+ const content = choice?.message.content ?? "";
601
+ if (content) {
602
+ assistantText += content;
603
+ }
604
+ const requestMessages = [...childMessages, ...turnMessages];
605
+ const toolCalls = (choice?.message.tool_calls ?? []).map(toLocalToolCall);
606
+ const assistantMessage = buildAssistantBatchMessage(content, toolCalls);
607
+ if (assistantMessage) {
608
+ turnMessages.push(assistantMessage);
609
+ }
610
+ if (toolCalls.length === 0) {
611
+ if (hasUsage(totalUsage)) {
612
+ this.recordUsage(totalUsage, "task", childRuntime.modelId);
613
+ }
614
+ const output = assistantText.trim() || `Task completed. Last action: ${lastActivity}`;
615
+ return {
616
+ success: true,
617
+ output,
618
+ task: {
619
+ agent: request.agent,
620
+ description: request.description,
621
+ summary: firstLine(output),
622
+ activity: lastActivity,
623
+ },
624
+ };
625
+ }
626
+ const toolParts = [];
627
+ for (const toolCall of toolCalls) {
628
+ const nextActivity = formatSubagentActivity(toolCall.function.name, parseToolArgumentsOrRaw(toolCall.function.arguments));
629
+ lastActivity = nextActivity;
630
+ onActivity?.(nextActivity);
631
+ const executed = await this.executeBatchToolCall(childTools, toolCall, requestMessages, signal);
632
+ toolParts.push({
633
+ toolCall,
634
+ input: executed.input,
635
+ toolResult: executed.result,
636
+ });
637
+ }
638
+ const toolMessage = buildToolBatchMessage(toolParts);
639
+ if (toolMessage) {
640
+ turnMessages.push(toolMessage);
641
+ }
642
+ }
643
+ if (hasUsage(totalUsage)) {
644
+ this.recordUsage(totalUsage, "task", childRuntime.modelId);
645
+ }
646
+ const output = assistantText.trim() || `Task stopped after ${maxSteps} batch rounds. Last action: ${lastActivity}`;
647
+ return {
648
+ success: false,
649
+ output,
650
+ task: {
651
+ agent: request.agent,
652
+ description: request.description,
653
+ summary: output,
654
+ activity: lastActivity,
655
+ },
656
+ };
657
+ }
464
658
  async runTaskRequest(request, onActivity, abortSignal) {
465
659
  const provider = this.requireProvider();
466
660
  const signal = abortSignal;
@@ -468,10 +662,14 @@ export class Agent {
468
662
  const isExplore = agentKey === "explore";
469
663
  const isGeneral = agentKey === "general";
470
664
  const isVision = agentKey === "vision";
665
+ const isVerify = agentKey === "verify";
666
+ const isVerifyDetect = agentKey === "verify-detect";
471
667
  const subagents = loadValidSubAgents();
472
- const custom = !isExplore && !isGeneral && !isVision ? findCustomSubagent(agentKey, subagents) : undefined;
473
- if (!isExplore && !isGeneral && !isVision && !custom) {
474
- const message = `Unknown sub-agent "${agentKey}". Use general, explore, vision, or a configured name from ~/.grok/user-settings.json.`;
668
+ const custom = !isExplore && !isGeneral && !isVision && !isVerify && !isVerifyDetect
669
+ ? findCustomSubagent(agentKey, subagents)
670
+ : undefined;
671
+ if (!isExplore && !isGeneral && !isVision && !isVerify && !isVerifyDetect && !custom) {
672
+ const message = `Unknown sub-agent "${agentKey}". Use general, explore, vision, verify, or a configured name from ~/.grok/user-settings.json.`;
475
673
  return {
476
674
  success: false,
477
675
  output: message,
@@ -482,13 +680,24 @@ export class Agent {
482
680
  },
483
681
  };
484
682
  }
485
- const childMode = isExplore ? "ask" : "agent";
683
+ const childMode = isExplore || isVerifyDetect ? "ask" : "agent";
684
+ const verifySandboxOverrides = isVerify
685
+ ? { allowNet: true, allowedHosts: undefined, allowEphemeralInstall: true, hostBrowserCommandsOnHost: true }
686
+ : {};
486
687
  const childBash = new BashTool(this.bash.getCwd(), {
487
- sandboxMode: this.bash.getSandboxMode(),
488
- sandboxSettings: this.bash.getSandboxSettings(),
688
+ sandboxMode: isVerify ? "shuru" : this.bash.getSandboxMode(),
689
+ sandboxSettings: isVerify
690
+ ? { ...this.bash.getSandboxSettings(), ...verifySandboxOverrides }
691
+ : this.bash.getSandboxSettings(),
489
692
  });
490
693
  const childBaseTools = createTools(childBash, provider, childMode);
491
- const initialDetail = isExplore ? "Scanning the codebase" : "Planning delegated work";
694
+ const initialDetail = isExplore
695
+ ? "Scanning the codebase"
696
+ : isVerifyDetect
697
+ ? "Detecting verification recipe"
698
+ : isVerify
699
+ ? "Preparing verification pass"
700
+ : "Planning delegated work";
492
701
  let assistantText = "";
493
702
  let lastActivity = initialDetail;
494
703
  let childTools = childBaseTools;
@@ -512,6 +721,19 @@ export class Agent {
512
721
  const childMessages = isVision
513
722
  ? await buildVisionUserMessages(request.prompt, childBash.getCwd(), signal)
514
723
  : [{ role: "user", content: request.prompt }];
724
+ if (this.batchApi) {
725
+ return await this.runTaskRequestBatch({
726
+ request,
727
+ childMessages,
728
+ childSystem,
729
+ childRuntime,
730
+ childTools,
731
+ maxSteps: Math.min(this.maxToolRounds, isExplore ? 60 : 120),
732
+ initialDetail,
733
+ onActivity,
734
+ signal,
735
+ });
736
+ }
515
737
  const result = streamText({
516
738
  model: childRuntime.model,
517
739
  system: childSystem,
@@ -605,6 +827,7 @@ export class Agent {
605
827
  sandboxSettings: this.bash.getSandboxSettings(),
606
828
  maxToolRounds: this.maxToolRounds,
607
829
  maxTokens: this.maxTokens,
830
+ batchApi: this.batchApi,
608
831
  });
609
832
  }
610
833
  catch (err) {
@@ -681,6 +904,183 @@ export class Agent {
681
904
  this.messageSeqs = [null, ...keptSeqs];
682
905
  return true;
683
906
  }
907
+ async *processMessageBatchTurn(args) {
908
+ const { userModelMessage, observer, provider, subagents, system, runtime, modelInfo, signal } = args;
909
+ let attemptedOverflowRecovery = false;
910
+ while (true) {
911
+ let closeMcp;
912
+ const turnMessages = [];
913
+ const totalUsage = {};
914
+ try {
915
+ const settings = attemptedOverflowRecovery
916
+ ? relaxCompactionSettings(this.getCompactionSettings())
917
+ : this.getCompactionSettings();
918
+ if (modelInfo) {
919
+ await this.compactForContext(provider, system, modelInfo.contextWindow, signal, settings, attemptedOverflowRecovery);
920
+ }
921
+ if (runtime.modelInfo?.responsesOnly) {
922
+ throw new Error("Batch mode currently supports chat-completions models only.");
923
+ }
924
+ const baseTools = createTools(this.bash, provider, this.mode, {
925
+ runTask: (request, abortSignal) => this.runTask(request, combineAbortSignals(signal, abortSignal)),
926
+ runDelegation: (request, abortSignal) => this.runDelegation(request, combineAbortSignals(signal, abortSignal)),
927
+ readDelegation: (id) => this.readDelegation(id),
928
+ listDelegations: () => this.listDelegations(),
929
+ scheduleManager: this.schedules,
930
+ subagents,
931
+ sendTelegramFile: this.sendTelegramFile ?? undefined,
932
+ });
933
+ let tools = runtime.modelInfo?.supportsClientTools === false ? {} : baseTools;
934
+ if (this.mode === "agent" && runtime.modelInfo?.supportsClientTools !== false) {
935
+ const mcpBundle = await buildMcpToolSet(loadMcpServers());
936
+ closeMcp = mcpBundle.close;
937
+ tools = { ...baseTools, ...mcpBundle.tools };
938
+ if (mcpBundle.errors.length > 0) {
939
+ yield { type: "content", content: `MCP unavailable: ${mcpBundle.errors.join(" | ")}\n\n` };
940
+ }
941
+ }
942
+ const batchTools = runtime.modelInfo?.supportsClientTools === false ? [] : await toolSetToBatchTools(tools);
943
+ const batch = await createBatch({
944
+ ...this.getBatchClientOptions(signal),
945
+ name: buildBatchName("session", this.getSessionId() || runtime.modelId),
946
+ });
947
+ for (let round = 0; round < this.maxToolRounds; round++) {
948
+ const stepNumber = round + 1;
949
+ notifyObserver(observer?.onStepStart, {
950
+ stepNumber,
951
+ timestamp: Date.now(),
952
+ });
953
+ const batchRequestId = `turn-${Date.now()}-${stepNumber}`;
954
+ await addBatchRequests({
955
+ ...this.getBatchClientOptions(signal),
956
+ batchId: batch.batch_id,
957
+ batchRequests: [
958
+ {
959
+ batch_request_id: batchRequestId,
960
+ batch_request: {
961
+ chat_get_completion: buildBatchChatCompletionRequest({
962
+ modelId: runtime.modelId,
963
+ system,
964
+ messages: [...this.messages, ...turnMessages],
965
+ temperature: 0.7,
966
+ maxOutputTokens: runtime.modelInfo?.supportsMaxOutputTokens === false ? undefined : this.maxTokens,
967
+ reasoningEffort: runtime.providerOptions?.xai.reasoningEffort,
968
+ tools: batchTools,
969
+ }),
970
+ },
971
+ },
972
+ ],
973
+ });
974
+ const result = await pollBatchRequestResult({
975
+ ...this.getBatchClientOptions(signal),
976
+ batchId: batch.batch_id,
977
+ batchRequestId,
978
+ });
979
+ const response = getBatchChatCompletion(result);
980
+ const choice = response.choices[0];
981
+ if (!choice) {
982
+ throw new Error("Batch response did not contain any choices.");
983
+ }
984
+ const usage = getBatchUsage(response);
985
+ accumulateUsage(totalUsage, usage);
986
+ const finishReason = getBatchFinishReason(choice.finish_reason);
987
+ const content = choice.message.content ?? "";
988
+ if (content) {
989
+ yield { type: "content", content };
990
+ }
991
+ const requestMessages = [...this.messages, ...turnMessages];
992
+ const toolCalls = (choice.message.tool_calls ?? []).map(toLocalToolCall);
993
+ const assistantMessage = buildAssistantBatchMessage(content, toolCalls);
994
+ if (assistantMessage) {
995
+ turnMessages.push(assistantMessage);
996
+ }
997
+ if (toolCalls.length === 0) {
998
+ notifyObserver(observer?.onStepFinish, {
999
+ stepNumber,
1000
+ timestamp: Date.now(),
1001
+ finishReason,
1002
+ usage,
1003
+ });
1004
+ if (hasUsage(totalUsage)) {
1005
+ this.recordUsage(totalUsage, "message", runtime.modelId);
1006
+ }
1007
+ this.appendCompletedTurn(userModelMessage, turnMessages);
1008
+ yield { type: "done" };
1009
+ return;
1010
+ }
1011
+ yield { type: "tool_calls", toolCalls };
1012
+ const toolParts = [];
1013
+ for (const toolCall of toolCalls) {
1014
+ notifyObserver(observer?.onToolStart, {
1015
+ toolCall,
1016
+ timestamp: Date.now(),
1017
+ });
1018
+ const executed = await this.executeBatchToolCall(tools, toolCall, requestMessages, signal);
1019
+ notifyObserver(observer?.onToolFinish, {
1020
+ toolCall,
1021
+ toolResult: executed.result,
1022
+ timestamp: Date.now(),
1023
+ });
1024
+ yield { type: "tool_result", toolCall, toolResult: executed.result };
1025
+ toolParts.push({
1026
+ toolCall,
1027
+ input: executed.input,
1028
+ toolResult: executed.result,
1029
+ });
1030
+ }
1031
+ const toolMessage = buildToolBatchMessage(toolParts);
1032
+ if (toolMessage) {
1033
+ turnMessages.push(toolMessage);
1034
+ }
1035
+ notifyObserver(observer?.onStepFinish, {
1036
+ stepNumber,
1037
+ timestamp: Date.now(),
1038
+ finishReason,
1039
+ usage,
1040
+ });
1041
+ }
1042
+ const message = `Error: Reached max tool rounds (${this.maxToolRounds}) in batch mode.`;
1043
+ notifyObserver(observer?.onError, {
1044
+ message,
1045
+ timestamp: Date.now(),
1046
+ });
1047
+ if (hasUsage(totalUsage)) {
1048
+ this.recordUsage(totalUsage, "message", runtime.modelId);
1049
+ }
1050
+ this.appendCompletedTurn(userModelMessage, turnMessages);
1051
+ yield { type: "error", content: message };
1052
+ yield { type: "done" };
1053
+ return;
1054
+ }
1055
+ catch (err) {
1056
+ if (signal.aborted) {
1057
+ this.discardAbortedTurn(userModelMessage);
1058
+ yield { type: "content", content: "\n\n[Cancelled]" };
1059
+ yield { type: "done" };
1060
+ return;
1061
+ }
1062
+ if (!attemptedOverflowRecovery && turnMessages.length === 0 && modelInfo && isContextLimitError(err)) {
1063
+ attemptedOverflowRecovery = true;
1064
+ continue;
1065
+ }
1066
+ const msg = err instanceof Error ? err.message : String(err);
1067
+ notifyObserver(observer?.onError, {
1068
+ message: `Error: ${msg}`,
1069
+ timestamp: Date.now(),
1070
+ });
1071
+ if (hasUsage(totalUsage)) {
1072
+ this.recordUsage(totalUsage, "message", runtime.modelId);
1073
+ }
1074
+ this.appendCompletedTurn(userModelMessage, turnMessages);
1075
+ yield { type: "error", content: `Error: ${msg}` };
1076
+ yield { type: "done" };
1077
+ return;
1078
+ }
1079
+ finally {
1080
+ await closeMcp?.().catch(() => { });
1081
+ }
1082
+ }
1083
+ }
684
1084
  appendCompletedTurn(userMessage, newMessages) {
685
1085
  if (newMessages.length === 0)
686
1086
  return;
@@ -717,6 +1117,26 @@ export class Agent {
717
1117
  const modelInfo = runtime.modelInfo;
718
1118
  this.planContext = null;
719
1119
  let attemptedOverflowRecovery = false;
1120
+ if (this.batchApi) {
1121
+ try {
1122
+ yield* this.processMessageBatchTurn({
1123
+ userModelMessage,
1124
+ observer,
1125
+ provider,
1126
+ subagents,
1127
+ system,
1128
+ runtime,
1129
+ modelInfo,
1130
+ signal,
1131
+ });
1132
+ }
1133
+ finally {
1134
+ if (this.abortController?.signal === signal) {
1135
+ this.abortController = null;
1136
+ }
1137
+ }
1138
+ return;
1139
+ }
720
1140
  try {
721
1141
  while (true) {
722
1142
  let assistantText = "";
@@ -913,6 +1333,272 @@ export class Agent {
913
1333
  }
914
1334
  return this.provider;
915
1335
  }
1336
+ async detectVerifyRecipe(settings, abortSignal) {
1337
+ try {
1338
+ const result = await this.runTaskRequest({
1339
+ agent: "verify-detect",
1340
+ description: "Detect verification recipe",
1341
+ prompt: buildVerifyDetectPrompt(this.bash.getCwd(), settings ?? this.bash.getSandboxSettings()),
1342
+ }, undefined, abortSignal);
1343
+ if (!result.success || !result.output)
1344
+ return null;
1345
+ const maybeJson = extractJsonObject(result.output);
1346
+ if (!maybeJson)
1347
+ return null;
1348
+ return normalizeVerifyRecipe(JSON.parse(maybeJson));
1349
+ }
1350
+ catch {
1351
+ return null;
1352
+ }
1353
+ }
1354
+ }
1355
+ function extractJsonObject(text) {
1356
+ const start = text.indexOf("{");
1357
+ const end = text.lastIndexOf("}");
1358
+ if (start < 0 || end < start)
1359
+ return null;
1360
+ return text.slice(start, end + 1);
1361
+ }
1362
+ function buildBatchName(prefix, label) {
1363
+ const compact = label
1364
+ .replace(/\s+/g, "-")
1365
+ .replace(/[^a-zA-Z0-9._-]+/g, "")
1366
+ .slice(0, 48) || "run";
1367
+ return `grok-cli-${prefix}-${compact}`;
1368
+ }
1369
+ function buildBatchChatCompletionRequest(args) {
1370
+ return {
1371
+ model: args.modelId,
1372
+ messages: toBatchChatMessages(args.system, args.messages),
1373
+ temperature: args.temperature,
1374
+ ...(args.maxOutputTokens != null ? { max_completion_tokens: args.maxOutputTokens } : {}),
1375
+ ...(args.reasoningEffort ? { reasoning_effort: args.reasoningEffort } : {}),
1376
+ ...(args.tools.length > 0 ? { tools: args.tools } : {}),
1377
+ };
1378
+ }
1379
+ function toBatchChatMessages(system, messages) {
1380
+ const batchMessages = [{ role: "system", content: system }];
1381
+ for (const message of messages) {
1382
+ const { role, content } = message;
1383
+ switch (role) {
1384
+ case "system":
1385
+ batchMessages.push({ role: "system", content });
1386
+ break;
1387
+ case "user": {
1388
+ if (typeof content === "string") {
1389
+ batchMessages.push({ role: "user", content });
1390
+ break;
1391
+ }
1392
+ if (!Array.isArray(content)) {
1393
+ break;
1394
+ }
1395
+ if (content.length === 1 && content[0]?.type === "text") {
1396
+ batchMessages.push({ role: "user", content: content[0].text });
1397
+ break;
1398
+ }
1399
+ const userContent = [];
1400
+ for (const part of content) {
1401
+ switch (part.type) {
1402
+ case "text":
1403
+ userContent.push({ type: "text", text: part.text });
1404
+ break;
1405
+ case "image": {
1406
+ const mediaType = part.mediaType === "image/*" || !part.mediaType ? "image/jpeg" : part.mediaType;
1407
+ const data = part.image instanceof URL
1408
+ ? part.image.toString()
1409
+ : `data:${mediaType};base64,${toBase64DataContent(part.image)}`;
1410
+ userContent.push({ type: "image_url", image_url: { url: data } });
1411
+ break;
1412
+ }
1413
+ case "file": {
1414
+ if (!part.mediaType.startsWith("image/")) {
1415
+ break;
1416
+ }
1417
+ const mediaType = part.mediaType === "image/*" ? "image/jpeg" : part.mediaType;
1418
+ const data = part.data instanceof URL
1419
+ ? part.data.toString()
1420
+ : `data:${mediaType};base64,${toBase64DataContent(part.data)}`;
1421
+ userContent.push({ type: "image_url", image_url: { url: data } });
1422
+ break;
1423
+ }
1424
+ }
1425
+ }
1426
+ batchMessages.push({
1427
+ role: "user",
1428
+ content: userContent,
1429
+ });
1430
+ break;
1431
+ }
1432
+ case "assistant": {
1433
+ if (typeof content === "string") {
1434
+ batchMessages.push({ role: "assistant", content });
1435
+ break;
1436
+ }
1437
+ if (!Array.isArray(content)) {
1438
+ break;
1439
+ }
1440
+ let assistantText = "";
1441
+ const toolCalls = [];
1442
+ for (const part of content) {
1443
+ if (part.type === "text") {
1444
+ assistantText += part.text;
1445
+ }
1446
+ else if (part.type === "tool-call") {
1447
+ toolCalls.push({
1448
+ id: part.toolCallId,
1449
+ type: "function",
1450
+ function: {
1451
+ name: part.toolName,
1452
+ arguments: JSON.stringify(part.input),
1453
+ },
1454
+ });
1455
+ }
1456
+ }
1457
+ if (assistantText || toolCalls.length > 0) {
1458
+ batchMessages.push({
1459
+ role: "assistant",
1460
+ content: assistantText,
1461
+ ...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}),
1462
+ });
1463
+ }
1464
+ break;
1465
+ }
1466
+ case "tool":
1467
+ for (const part of content) {
1468
+ if (part.type === "tool-approval-response") {
1469
+ continue;
1470
+ }
1471
+ batchMessages.push({
1472
+ role: "tool",
1473
+ tool_call_id: part.toolCallId,
1474
+ content: toolOutputToText(part.output),
1475
+ });
1476
+ }
1477
+ break;
1478
+ }
1479
+ }
1480
+ return batchMessages;
1481
+ }
1482
+ function toBase64DataContent(value) {
1483
+ return convertToBase64(value instanceof ArrayBuffer ? new Uint8Array(value) : value);
1484
+ }
1485
+ function toolOutputToText(output) {
1486
+ switch (output.type) {
1487
+ case "text":
1488
+ case "error-text":
1489
+ return String(output.value ?? "");
1490
+ case "execution-denied":
1491
+ return output.reason ?? "Tool execution denied.";
1492
+ case "json":
1493
+ case "error-json":
1494
+ case "content":
1495
+ return JSON.stringify(output.value ?? null);
1496
+ }
1497
+ }
1498
+ function getBatchUsage(response) {
1499
+ const usage = response.usage ?? {};
1500
+ const inputTokens = asNumber(usage.input_tokens) ?? asNumber(usage.prompt_tokens);
1501
+ const outputTokens = asNumber(usage.output_tokens) ?? asNumber(usage.completion_tokens);
1502
+ const totalTokens = asNumber(usage.total_tokens) ?? sumDefined(inputTokens, outputTokens);
1503
+ return {
1504
+ inputTokens,
1505
+ outputTokens,
1506
+ totalTokens,
1507
+ costUsdTicks: asNumber(usage.cost_in_usd_ticks),
1508
+ };
1509
+ }
1510
+ function accumulateUsage(target, usage) {
1511
+ target.inputTokens = (target.inputTokens ?? 0) + (usage.inputTokens ?? 0);
1512
+ target.outputTokens = (target.outputTokens ?? 0) + (usage.outputTokens ?? 0);
1513
+ target.totalTokens = (target.totalTokens ?? 0) + (usage.totalTokens ?? 0);
1514
+ target.costUsdTicks = (target.costUsdTicks ?? 0) + (usage.costUsdTicks ?? 0);
1515
+ }
1516
+ function hasUsage(usage) {
1517
+ return Boolean((usage.inputTokens ?? 0) || (usage.outputTokens ?? 0) || (usage.totalTokens ?? 0) || (usage.costUsdTicks ?? 0));
1518
+ }
1519
+ function getBatchFinishReason(finishReason) {
1520
+ switch (finishReason) {
1521
+ case "stop":
1522
+ case "length":
1523
+ case "content-filter":
1524
+ case "tool-calls":
1525
+ case "error":
1526
+ case "other":
1527
+ return finishReason;
1528
+ case "tool_calls":
1529
+ return "tool-calls";
1530
+ default:
1531
+ return "other";
1532
+ }
1533
+ }
1534
+ function toLocalToolCall(toolCall) {
1535
+ return {
1536
+ id: toolCall.id,
1537
+ type: "function",
1538
+ function: {
1539
+ name: toolCall.function.name,
1540
+ arguments: toolCall.function.arguments,
1541
+ },
1542
+ };
1543
+ }
1544
+ function buildAssistantBatchMessage(content, toolCalls) {
1545
+ if (toolCalls.length === 0) {
1546
+ return content ? { role: "assistant", content } : null;
1547
+ }
1548
+ const parts = [];
1549
+ if (content) {
1550
+ parts.push({ type: "text", text: content });
1551
+ }
1552
+ for (const toolCall of toolCalls) {
1553
+ parts.push({
1554
+ type: "tool-call",
1555
+ toolCallId: toolCall.id,
1556
+ toolName: toolCall.function.name,
1557
+ input: parseToolArgumentsOrRaw(toolCall.function.arguments),
1558
+ });
1559
+ }
1560
+ return { role: "assistant", content: parts };
1561
+ }
1562
+ function buildToolBatchMessage(toolParts) {
1563
+ if (toolParts.length === 0) {
1564
+ return null;
1565
+ }
1566
+ return {
1567
+ role: "tool",
1568
+ content: toolParts.map((part) => ({
1569
+ type: "tool-result",
1570
+ toolCallId: part.toolCall.id,
1571
+ toolName: part.toolCall.function.name,
1572
+ output: part.toolResult.success
1573
+ ? { type: "json", value: toSerializableValue(part.toolResult) }
1574
+ : { type: "error-json", value: toSerializableValue(part.toolResult) },
1575
+ })),
1576
+ };
1577
+ }
1578
+ function parseToolArgumentsOrRaw(raw) {
1579
+ try {
1580
+ return raw.trim() ? JSON.parse(raw) : {};
1581
+ }
1582
+ catch {
1583
+ return raw;
1584
+ }
1585
+ }
1586
+ function toSerializableValue(value) {
1587
+ try {
1588
+ return JSON.parse(JSON.stringify(value ?? null));
1589
+ }
1590
+ catch {
1591
+ return String(value);
1592
+ }
1593
+ }
1594
+ function asNumber(value) {
1595
+ return typeof value === "number" ? value : undefined;
1596
+ }
1597
+ function sumDefined(left, right) {
1598
+ if (left == null && right == null) {
1599
+ return undefined;
1600
+ }
1601
+ return (left ?? 0) + (right ?? 0);
916
1602
  }
917
1603
  function toToolCall(part) {
918
1604
  return {