grok-dev 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/.claude/worktrees/vigilant-johnson/.cursor/hooks/state/continual-learning.json +8 -0
  2. package/.claude/worktrees/vigilant-johnson/.cursor/rules/development-workflow.mdc +66 -0
  3. package/.claude/worktrees/vigilant-johnson/.cursor/rules/project-overview.mdc +66 -0
  4. package/.claude/worktrees/vigilant-johnson/.cursor/rules/react-ink-components.mdc +45 -0
  5. package/.claude/worktrees/vigilant-johnson/.cursor/rules/tools-and-agent.mdc +62 -0
  6. package/.claude/worktrees/vigilant-johnson/.cursor/rules/typescript-conventions.mdc +54 -0
  7. package/.claude/worktrees/vigilant-johnson/.husky/pre-commit +1 -0
  8. package/.claude/worktrees/vigilant-johnson/LICENSE +21 -0
  9. package/.claude/worktrees/vigilant-johnson/README.md +341 -0
  10. package/.claude/worktrees/vigilant-johnson/biome.json +51 -0
  11. package/.claude/worktrees/vigilant-johnson/package.json +74 -0
  12. package/.claude/worktrees/vigilant-johnson/telegram-pair-code.txt +0 -0
  13. package/.claude/worktrees/vigilant-johnson/vitest.config.ts +7 -0
  14. package/README.md +20 -2
  15. package/dist/agent/agent.d.ts +9 -1
  16. package/dist/agent/agent.js +704 -18
  17. package/dist/agent/agent.js.map +1 -1
  18. package/dist/agent/batch-mode.test.d.ts +1 -0
  19. package/dist/agent/batch-mode.test.js.map +1 -0
  20. package/dist/agent/delegations.d.ts +2 -0
  21. package/dist/agent/delegations.js +9 -1
  22. package/dist/agent/delegations.js.map +1 -1
  23. package/dist/agent/delegations.test.js.map +1 -1
  24. package/dist/grok/batch.d.ts +136 -0
  25. package/dist/grok/batch.js +204 -0
  26. package/dist/grok/batch.js.map +1 -0
  27. package/dist/grok/batch.test.d.ts +1 -0
  28. package/dist/grok/batch.test.js.map +1 -0
  29. package/dist/grok/tool-schemas.d.ts +3 -0
  30. package/dist/grok/tool-schemas.js +24 -0
  31. package/dist/grok/tool-schemas.js.map +1 -0
  32. package/dist/grok/tool-schemas.test.d.ts +1 -0
  33. package/dist/grok/tool-schemas.test.js.map +1 -0
  34. package/dist/grok/tools.js +3 -3
  35. package/dist/grok/tools.js.map +1 -1
  36. package/dist/grok/tools.test.js.map +1 -1
  37. package/dist/headless/output.d.ts +1 -0
  38. package/dist/headless/output.js +29 -4
  39. package/dist/headless/output.js.map +1 -1
  40. package/dist/index.js +24 -6
  41. package/dist/index.js.map +1 -1
  42. package/dist/tools/bash.d.ts +3 -1
  43. package/dist/tools/bash.js +101 -12
  44. package/dist/tools/bash.js.map +1 -1
  45. package/dist/tools/bash.test.js.map +1 -1
  46. package/dist/types/index.d.ts +18 -1
  47. package/dist/types/index.js.map +1 -1
  48. package/dist/ui/app.js +14 -0
  49. package/dist/ui/app.js.map +1 -1
  50. package/dist/utils/settings.d.ts +6 -0
  51. package/dist/utils/settings.js +9 -1
  52. package/dist/utils/settings.js.map +1 -1
  53. package/dist/utils/skills.d.ts +3 -2
  54. package/dist/utils/skills.js +27 -7
  55. package/dist/utils/skills.js.map +1 -1
  56. package/dist/utils/skills.test.d.ts +1 -0
  57. package/dist/utils/skills.test.js.map +1 -0
  58. package/dist/utils/subagents-settings.test.js.map +1 -1
  59. package/dist/verify/checkpoint.d.ts +11 -0
  60. package/dist/verify/checkpoint.js +158 -0
  61. package/dist/verify/checkpoint.js.map +1 -0
  62. package/dist/verify/checkpoint.test.d.ts +1 -0
  63. package/dist/verify/checkpoint.test.js.map +1 -0
  64. package/dist/verify/entrypoint.d.ts +34 -0
  65. package/dist/verify/entrypoint.js +642 -0
  66. package/dist/verify/entrypoint.js.map +1 -0
  67. package/dist/verify/entrypoint.test.d.ts +1 -0
  68. package/dist/verify/entrypoint.test.js.map +1 -0
  69. package/package.json +2 -1
  70. package/tmp/.grok/verify-artifacts/screenshot-1774806349456.png +0 -0
  71. package/tmp/.grok/verify-artifacts/verify-smoke.webm +0 -0
  72. package/tmp/README.md +36 -0
  73. package/tmp/eslint.config.mjs +18 -0
  74. package/tmp/next.config.ts +7 -0
  75. package/tmp/package.json +34 -0
  76. package/tmp/postcss.config.mjs +7 -0
  77. package/tmp/public/file.svg +1 -0
  78. package/tmp/public/globe.svg +1 -0
  79. package/tmp/public/next.svg +1 -0
  80. package/tmp/public/vercel.svg +1 -0
  81. package/tmp/public/window.svg +1 -0
  82. package/tmp/large_class.py +0 -633
@@ -0,0 +1,51 @@
1
+ {
2
+ "$schema": "https://biomejs.dev/schemas/2.4.8/schema.json",
3
+ "vcs": {
4
+ "enabled": true,
5
+ "clientKind": "git",
6
+ "useIgnoreFile": true
7
+ },
8
+ "files": {
9
+ "includes": ["**", "!!**/dist"]
10
+ },
11
+ "formatter": {
12
+ "enabled": true,
13
+ "indentStyle": "space",
14
+ "indentWidth": 2,
15
+ "lineWidth": 120
16
+ },
17
+ "linter": {
18
+ "enabled": true,
19
+ "rules": {
20
+ "recommended": true,
21
+ "suspicious": {
22
+ "noExplicitAny": "error",
23
+ "noArrayIndexKey": "warn"
24
+ },
25
+ "correctness": {
26
+ "noUnusedFunctionParameters": "warn"
27
+ },
28
+ "complexity": {
29
+ "noForEach": "off"
30
+ },
31
+ "style": {
32
+ "noNonNullAssertion": "off",
33
+ "useNodejsImportProtocol": "off"
34
+ }
35
+ }
36
+ },
37
+ "javascript": {
38
+ "formatter": {
39
+ "quoteStyle": "double",
40
+ "trailingCommas": "all"
41
+ }
42
+ },
43
+ "assist": {
44
+ "enabled": true,
45
+ "actions": {
46
+ "source": {
47
+ "organizeImports": "on"
48
+ }
49
+ }
50
+ }
51
+ }
@@ -0,0 +1,74 @@
1
+ {
2
+ "name": "grok-dev",
3
+ "version": "1.1.0",
4
+ "description": "An open-source AI coding agent powered by Grok, built with Bun and OpenTUI.",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "exports": {
8
+ ".": {
9
+ "import": "./dist/index.js",
10
+ "types": "./dist/index.d.ts"
11
+ }
12
+ },
13
+ "bin": {
14
+ "grok": "dist/index.js"
15
+ },
16
+ "scripts": {
17
+ "dev": "bun run src/index.ts",
18
+ "build": "tsc",
19
+ "start": "bun run dist/index.js",
20
+ "typecheck": "tsc --noEmit",
21
+ "test": "bunx vitest run",
22
+ "test:watch": "bunx vitest",
23
+ "lint": "biome check src/",
24
+ "format": "biome format src/",
25
+ "format:fix": "biome format --write src/",
26
+ "lint:fix": "biome check --fix src/",
27
+ "pre-commit": "lint-staged",
28
+ "prepare": "husky"
29
+ },
30
+ "lint-staged": {
31
+ "*.{ts,tsx,js,mjs,cjs,json}": "biome check --write --no-errors-on-unmatched"
32
+ },
33
+ "keywords": [
34
+ "cli",
35
+ "agent",
36
+ "grok",
37
+ "ai",
38
+ "coding",
39
+ "terminal",
40
+ "opentui"
41
+ ],
42
+ "author": "Vibe Kit",
43
+ "license": "MIT",
44
+ "dependencies": {
45
+ "@ai-sdk/mcp": "^1.0.25",
46
+ "@ai-sdk/xai": "^3.0.67",
47
+ "@modelcontextprotocol/sdk": "^1.27.1",
48
+ "@opentui/core": "^0.1.88",
49
+ "@opentui/react": "^0.1.88",
50
+ "ai": "^6.0.116",
51
+ "commander": "^12.1.0",
52
+ "diff": "^8.0.3",
53
+ "dotenv": "^16.6.1",
54
+ "grammy": "^1.41.1",
55
+ "react": "^19.2.4",
56
+ "semver": "^7.7.4",
57
+ "zod": "^4.3.6"
58
+ },
59
+ "devDependencies": {
60
+ "@biomejs/biome": "^2.4.8",
61
+ "@types/diff": "^8.0.0",
62
+ "@types/node": "^22.19.15",
63
+ "@types/react": "^19.2.14",
64
+ "@types/semver": "^7.7.1",
65
+ "husky": "^9.1.7",
66
+ "lint-staged": "^16.4.0",
67
+ "typescript": "^5.9.3",
68
+ "vitest": "^4.1.0"
69
+ },
70
+ "engines": {
71
+ "node": ">=18.0.0"
72
+ },
73
+ "preferGlobal": true
74
+ }
@@ -0,0 +1,7 @@
1
+ import { defineConfig } from "vitest/config";
2
+
3
+ export default defineConfig({
4
+ test: {
5
+ exclude: ["dist/**", "node_modules/**"],
6
+ },
7
+ });
package/README.md CHANGED
@@ -60,8 +60,14 @@ grok --prompt "run the test suite and summarize failures"
60
60
  grok -p "show me package.json" --directory /path/to/project
61
61
  grok --prompt "refactor X" --max-tool-rounds 30
62
62
  grok --prompt "summarize the repo state" --format json
63
+ grok --prompt "review the repo overnight" --batch-api
64
+ grok --verify
63
65
  ```
64
66
 
67
+ `--batch-api` uses xAI's Batch API for lower-cost unattended runs. It is a good
68
+ fit for scripts, CI, schedules, and other non-interactive workflows where a
69
+ delayed result is fine.
70
+
65
71
  **Continue a saved session:**
66
72
 
67
73
  ```bash
@@ -135,6 +141,7 @@ You keep using a text model for the session, and Grok saves generated media unde
135
141
  | **X + web search** | **`search_x`** and **`search_web`** tools—live posts and docs without pretending the internet stopped in 2023. |
136
142
  | **Media generation** | Built-in **`generate_image`** and **`generate_video`** tools for text-to-image, image editing, text-to-video, and image-to-video flows. Generated files are saved locally so you can reuse them after the xAI URLs expire. |
137
143
  | **Sub-agents (default behavior)** | Foreground **`task`** delegation (e.g. explore vs general) plus background **`delegate`** for read-only deep dives—parallelize like you mean it. |
144
+ | **Verify** | **`/verify`** or **`--verify`** — inspects your app, builds, tests, boots it, and runs browser smoke checks in a sandboxed environment. Screenshots and video included. |
138
145
  | **Custom sub-agents** | Define named agents with **`subAgents`** in **`~/.grok/user-settings.json`** and manage them from the TUI with **`/agents`**. |
139
146
  | **Remote control** | Pair **Telegram** from the TUI (`/remote-control` → Telegram): DM your bot, **`/pair`**, approve the code in-terminal. Keep the CLI running while you ping it from your phone. |
140
147
  | **No “mystery meat” UI** | OpenTUI React terminal UI—fast, keyboard-driven, not whatever glitchy thing you’re thinking of. |
@@ -146,7 +153,7 @@ You keep using a text model for the session, and Grok saves generated media unde
146
153
 
147
154
  ### Coming soon
148
155
 
149
- **Autonomous agent testing** (think: sandboxed machine, recorded runs, Replit-style prove it works”—the kind of thing that makes flaky human QA nervous). Not shipped yet; when it lands, we’ll be insufferable about it.
156
+ **Deeper autonomous agent testing** persistent sandbox sessions, richer browser workflows, and stronger "prove it works" evidence.
150
157
 
151
158
  ---
152
159
 
@@ -190,7 +197,7 @@ Optional **`subAgents`** — custom foreground sub-agents. Each entry needs **`n
190
197
  }
191
198
  ```
192
199
 
193
- Names cannot be `general` or `explore` because those are reserved for the built-in sub-agents.
200
+ Names cannot be `general`, `explore`, `vision`, or `verify` because those are reserved for the built-in sub-agents.
194
201
 
195
202
  Optional: **`GROK_BASE_URL`** (default `https://api.x.ai/v1`), **`GROK_MODEL`**, **`GROK_MAX_TOKENS`**.
196
203
 
@@ -300,6 +307,17 @@ When sandbox mode is active you can configure:
300
307
 
301
308
  All settings are saved in `~/.grok/user-settings.json` (user) and `.grok/settings.json` (project).
302
309
 
310
+ ### Verify
311
+
312
+ Run **`/verify`** in the TUI or **`--verify`** on the CLI to verify your app locally:
313
+
314
+ ```bash
315
+ grok --verify
316
+ grok -d /path/to/your/app --verify
317
+ ```
318
+
319
+ The agent inspects your project, figures out how to build and run it, spins up a sandbox, and produces a verification report with screenshots and video evidence. Works with any app type.
320
+
303
321
  ---
304
322
 
305
323
  ## Development
@@ -1,17 +1,19 @@
1
1
  import { type ScheduleDaemonStatus, type StoredSchedule } from "../tools/schedule";
2
- import type { AgentMode, ChatEntry, SessionInfo, SessionSnapshot, StreamChunk, SubagentStatus, TaskRequest, ToolCall, ToolResult } from "../types/index";
2
+ import type { AgentMode, ChatEntry, SessionInfo, SessionSnapshot, StreamChunk, SubagentStatus, TaskRequest, ToolCall, ToolResult, VerifyRecipe } from "../types/index";
3
3
  import { type SandboxMode, type SandboxSettings } from "../utils/settings";
4
4
  interface AgentOptions {
5
5
  persistSession?: boolean;
6
6
  session?: string;
7
7
  sandboxMode?: SandboxMode;
8
8
  sandboxSettings?: SandboxSettings;
9
+ batchApi?: boolean;
9
10
  }
10
11
  type ProcessMessageFinishReason = "stop" | "length" | "content-filter" | "tool-calls" | "error" | "other";
11
12
  export interface ProcessMessageUsage {
12
13
  inputTokens?: number;
13
14
  outputTokens?: number;
14
15
  totalTokens?: number;
16
+ costUsdTicks?: number;
15
17
  }
16
18
  export interface ProcessMessageStepStart {
17
19
  stepNumber: number;
@@ -63,6 +65,7 @@ export declare class Agent {
63
65
  private planContext;
64
66
  private subagentStatusListeners;
65
67
  private sendTelegramFile;
68
+ private batchApi;
66
69
  constructor(apiKey: string | undefined, baseURL?: string, model?: string, maxToolRounds?: number, options?: AgentOptions);
67
70
  getModel(): string;
68
71
  setModel(model: string): void;
@@ -101,6 +104,9 @@ export declare class Agent {
101
104
  private discardAbortedTurn;
102
105
  private recordUsage;
103
106
  consumeBackgroundNotifications(): Promise<string[]>;
107
+ private getBatchClientOptions;
108
+ private executeBatchToolCall;
109
+ private runTaskRequestBatch;
104
110
  runTaskRequest(request: TaskRequest, onActivity?: (detail: string) => void, abortSignal?: AbortSignal): Promise<ToolResult>;
105
111
  private runTask;
106
112
  private runDelegation;
@@ -108,8 +114,10 @@ export declare class Agent {
108
114
  private listDelegations;
109
115
  private getCompactionSettings;
110
116
  private compactForContext;
117
+ private processMessageBatchTurn;
111
118
  private appendCompletedTurn;
112
119
  processMessage(userMessage: string, observer?: ProcessMessageObserver): AsyncGenerator<StreamChunk, void, unknown>;
113
120
  private requireProvider;
121
+ detectVerifyRecipe(settings?: SandboxSettings, abortSignal?: AbortSignal): Promise<VerifyRecipe | null>;
114
122
  }
115
123
  export {};