grok-dev 1.0.0-rc5 → 1.0.0-rc7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/.cursor/hooks/state/continual-learning.json +8 -0
  2. package/.grok/generated-media/image-2026-03-24T06-50-58-739Z.jpg +0 -0
  3. package/.grok/generated-media/image-2026-03-24T13-17-06-230Z.jpg +0 -0
  4. package/.grok/generated-media/image-2026-03-24T14-56-55-398Z.jpg +0 -0
  5. package/.grok/generated-media/image-2026-03-24T15-10-09-928Z.jpg +0 -0
  6. package/.grok/generated-media/image-2026-03-24T15-13-59-695Z.jpg +0 -0
  7. package/.grok/generated-media/image-2026-03-24T15-26-20-632Z.jpg +0 -0
  8. package/.grok/generated-media/image-2026-03-24T15-27-00-155Z.jpg +0 -0
  9. package/.grok/generated-media/image-2026-03-24T15-39-25-115Z.jpg +0 -0
  10. package/.grok/generated-media/image-2026-03-24T15-40-52-587Z.jpg +0 -0
  11. package/.grok/generated-media/image-2026-03-24T19-32-44-606Z.jpg +0 -0
  12. package/.grok/generated-media/image-2026-03-24T19-33-25-738Z.jpg +0 -0
  13. package/.grok/generated-media/image-2026-03-24T19-39-32-487Z.jpg +0 -0
  14. package/.grok/generated-media/image-2026-03-24T19-43-15-490Z.jpg +0 -0
  15. package/.grok/generated-media/image-2026-03-24T19-45-05-413Z.jpg +0 -0
  16. package/.grok/generated-media/image-2026-03-24T19-45-37-351Z.jpg +0 -0
  17. package/.grok/generated-media/image-2026-03-25T07-51-38-269Z.jpg +0 -0
  18. package/.grok/generated-media/image-2026-03-25T07-52-03-379Z.jpg +0 -0
  19. package/.grok/generated-media/image-2026-03-25T07-52-24-604Z.jpg +0 -0
  20. package/.grok/generated-media/image-2026-03-25T08-48-54-411Z.jpg +0 -0
  21. package/.grok/generated-media/video-2026-03-23T21-39-11-587Z.mp4 +0 -0
  22. package/.grok/generated-media/video-2026-03-24T19-40-15-336Z.mp4 +0 -0
  23. package/.grok/generated-media/video-2026-03-24T19-42-10-419Z.mp4 +0 -0
  24. package/.grok/generated-media/video-2026-03-24T19-44-13-123Z.mp4 +0 -0
  25. package/.grok/generated-media/video-2026-03-24T19-46-30-950Z.mp4 +0 -0
  26. package/.grok/settings.json +4 -0
  27. package/README.md +130 -1
  28. package/dist/agent/agent.d.ts +14 -0
  29. package/dist/agent/agent.js +128 -23
  30. package/dist/agent/agent.js.map +1 -1
  31. package/dist/agent/delegations.d.ts +5 -0
  32. package/dist/agent/delegations.js +2 -0
  33. package/dist/agent/delegations.js.map +1 -1
  34. package/dist/agent/delegations.test.d.ts +1 -0
  35. package/dist/agent/delegations.test.js.map +1 -0
  36. package/dist/agent/sandbox.test.d.ts +1 -0
  37. package/dist/agent/sandbox.test.js.map +1 -0
  38. package/dist/agent/vision-input.d.ts +2 -0
  39. package/dist/agent/vision-input.js +97 -0
  40. package/dist/agent/vision-input.js.map +1 -0
  41. package/dist/agent/vision-input.test.d.ts +1 -0
  42. package/dist/agent/vision-input.test.js.map +1 -0
  43. package/dist/audio/stt/engine.d.ts +10 -0
  44. package/dist/audio/stt/engine.js +14 -0
  45. package/dist/audio/stt/engine.js.map +1 -0
  46. package/dist/audio/stt/whisper-cpp.d.ts +32 -0
  47. package/dist/audio/stt/whisper-cpp.js +213 -0
  48. package/dist/audio/stt/whisper-cpp.js.map +1 -0
  49. package/dist/audio/stt/whisper-cpp.test.d.ts +1 -0
  50. package/dist/audio/stt/whisper-cpp.test.js.map +1 -0
  51. package/dist/daemon/scheduler.d.ts +13 -0
  52. package/dist/daemon/scheduler.js +110 -0
  53. package/dist/daemon/scheduler.js.map +1 -0
  54. package/dist/daemon/scheduler.test.d.ts +1 -0
  55. package/dist/daemon/scheduler.test.js.map +1 -0
  56. package/dist/grok/media.d.ts +30 -0
  57. package/dist/grok/media.js +316 -0
  58. package/dist/grok/media.js.map +1 -0
  59. package/dist/grok/media.test.d.ts +1 -0
  60. package/dist/grok/media.test.js.map +1 -0
  61. package/dist/grok/tools.d.ts +3 -0
  62. package/dist/grok/tools.js +269 -4
  63. package/dist/grok/tools.js.map +1 -1
  64. package/dist/grok/tools.test.d.ts +1 -0
  65. package/dist/grok/tools.test.js.map +1 -0
  66. package/dist/headless/output.js +6 -1
  67. package/dist/headless/output.js.map +1 -1
  68. package/dist/headless/output.test.js.map +1 -1
  69. package/dist/index.js +128 -31
  70. package/dist/index.js.map +1 -1
  71. package/dist/storage/tool-results.d.ts +4 -0
  72. package/dist/storage/tool-results.js +49 -0
  73. package/dist/storage/tool-results.js.map +1 -0
  74. package/dist/storage/transcript.js +1 -47
  75. package/dist/storage/transcript.js.map +1 -1
  76. package/dist/storage/transcript.test.d.ts +1 -0
  77. package/dist/storage/transcript.test.js.map +1 -0
  78. package/dist/telegram/audio-input.d.ts +34 -0
  79. package/dist/telegram/audio-input.js +82 -0
  80. package/dist/telegram/audio-input.js.map +1 -0
  81. package/dist/telegram/audio-input.test.d.ts +1 -0
  82. package/dist/telegram/audio-input.test.js.map +1 -0
  83. package/dist/telegram/bridge.js +83 -40
  84. package/dist/telegram/bridge.js.map +1 -1
  85. package/dist/telegram/bridge.test.d.ts +1 -0
  86. package/dist/telegram/bridge.test.js.map +1 -0
  87. package/dist/telegram/headless-bridge-paths.d.ts +9 -0
  88. package/dist/telegram/headless-bridge-paths.js +8 -0
  89. package/dist/telegram/headless-bridge-paths.js.map +1 -0
  90. package/dist/telegram/headless-bridge.d.ts +12 -0
  91. package/dist/telegram/headless-bridge.js +198 -0
  92. package/dist/telegram/headless-bridge.js.map +1 -0
  93. package/dist/telegram/headless-bridge.test.d.ts +1 -0
  94. package/dist/telegram/headless-bridge.test.js.map +1 -0
  95. package/dist/telegram/index.d.ts +2 -0
  96. package/dist/telegram/index.js +2 -0
  97. package/dist/telegram/index.js.map +1 -1
  98. package/dist/telegram/media.d.ts +45 -0
  99. package/dist/telegram/media.js +181 -0
  100. package/dist/telegram/media.js.map +1 -0
  101. package/dist/telegram/media.test.d.ts +1 -0
  102. package/dist/telegram/media.test.js.map +1 -0
  103. package/dist/telegram/reply-text.d.ts +1 -0
  104. package/dist/telegram/reply-text.js +58 -0
  105. package/dist/telegram/reply-text.js.map +1 -0
  106. package/dist/telegram/reply-text.test.d.ts +1 -0
  107. package/dist/telegram/reply-text.test.js.map +1 -0
  108. package/dist/telegram/runtime-tools.d.ts +12 -0
  109. package/dist/telegram/runtime-tools.js +64 -0
  110. package/dist/telegram/runtime-tools.js.map +1 -0
  111. package/dist/telegram/runtime-tools.test.d.ts +1 -0
  112. package/dist/telegram/runtime-tools.test.js.map +1 -0
  113. package/dist/telegram/send-file.d.ts +15 -0
  114. package/dist/telegram/send-file.js +48 -0
  115. package/dist/telegram/send-file.js.map +1 -0
  116. package/dist/telegram/send-file.test.d.ts +1 -0
  117. package/dist/telegram/send-file.test.js.map +1 -0
  118. package/dist/tools/bash.d.ts +18 -1
  119. package/dist/tools/bash.js +138 -3
  120. package/dist/tools/bash.js.map +1 -1
  121. package/dist/tools/bash.test.d.ts +1 -0
  122. package/dist/tools/bash.test.js.map +1 -0
  123. package/dist/tools/schedule.d.ts +85 -0
  124. package/dist/tools/schedule.js +498 -0
  125. package/dist/tools/schedule.js.map +1 -0
  126. package/dist/tools/schedule.test.d.ts +1 -0
  127. package/dist/tools/schedule.test.js.map +1 -0
  128. package/dist/types/index.d.ts +13 -1
  129. package/dist/types/index.js.map +1 -1
  130. package/dist/ui/app.d.ts +3 -0
  131. package/dist/ui/app.js +763 -163
  132. package/dist/ui/app.js.map +1 -1
  133. package/dist/ui/schedule-modal.d.ts +15 -0
  134. package/dist/ui/schedule-modal.js +36 -0
  135. package/dist/ui/schedule-modal.js.map +1 -0
  136. package/dist/ui/telegram-turn-ui.d.ts +13 -0
  137. package/dist/ui/telegram-turn-ui.js +65 -0
  138. package/dist/ui/telegram-turn-ui.js.map +1 -0
  139. package/dist/ui/telegram-turn-ui.test.d.ts +1 -0
  140. package/dist/ui/telegram-turn-ui.test.js.map +1 -0
  141. package/dist/utils/instructions.test.js.map +1 -1
  142. package/dist/utils/settings.d.ts +55 -0
  143. package/dist/utils/settings.js +118 -1
  144. package/dist/utils/settings.js.map +1 -1
  145. package/dist/utils/subagents-settings.test.js.map +1 -1
  146. package/dist/utils/telegram-audio-settings.test.d.ts +1 -0
  147. package/dist/utils/telegram-audio-settings.test.js.map +1 -0
  148. package/package.json +1 -1
  149. package/telegram-pair-code.txt +0 -0
  150. package/vitest.config.ts +7 -0
@@ -0,0 +1,8 @@
1
+ {
2
+ "version": 1,
3
+ "lastRunAtMs": 0,
4
+ "turnsSinceLastRun": 4,
5
+ "lastTranscriptMtimeMs": null,
6
+ "lastProcessedGenerationId": "518874b0-1435-472e-9c42-c54214a9ae4e",
7
+ "trialStartedAtMs": null
8
+ }
@@ -13,5 +13,9 @@
13
13
  },
14
14
  "enabled": true
15
15
  }
16
+ },
17
+ "sandboxMode": "shuru",
18
+ "sandbox": {
19
+ "allowNet": false
16
20
  }
17
21
  }
package/README.md CHANGED
@@ -10,6 +10,8 @@ The rest borrowed from each other. We borrowed from *all of them*, then wired it
10
10
 
11
11
  Open source. Terminal-native. Built with **Bun** and **OpenTUI**. If you want vibes *and* velocity, you’re in the right repo.
12
12
 
13
+ Community-built and unofficial. This project is not affiliated with or endorsed by xAI, and it is not the official Grok CLI.
14
+
13
15
  https://github.com/user-attachments/assets/7ca4f6df-50ca-4e9c-91b2-d4abad5c66cb
14
16
 
15
17
  ---
@@ -22,7 +24,7 @@ npm i -g grok-dev
22
24
 
23
25
  The CLI binary is **`grok`** (yes, the package name and the command differ—deal with it).
24
26
 
25
- **Prerequisites:** Node 18+ (for the global install), and a **Grok API key** from [x.ai](https://x.ai).
27
+ **Prerequisites:** Node 18+ (for the global install), a **Grok API key** from [x.ai](https://x.ai), and a modern terminal emulator for the interactive OpenTUI experience. Headless `--prompt` mode does not depend on terminal UI support.
26
28
 
27
29
  ---
28
30
 
@@ -34,6 +36,17 @@ The CLI binary is **`grok`** (yes, the package name and the command differ—dea
34
36
  grok
35
37
  ```
36
38
 
39
+ ### Supported terminals
40
+
41
+ For the most reliable interactive OpenTUI experience, use a modern terminal emulator. We currently document and recommend:
42
+
43
+ - **WezTerm** (cross-platform)
44
+ - **Alacritty** (cross-platform)
45
+ - **Ghostty** (macOS and Linux)
46
+ - **Kitty** (macOS and Linux)
47
+
48
+ Other modern terminals may work, but these are the terminal apps we currently recommend and document for interactive use.
49
+
37
50
  **Pick a project directory:**
38
51
 
39
52
  ```bash
@@ -68,6 +81,26 @@ grok --prompt "summarize the repo state" --format json
68
81
  default human-readable text output. Events are semantic, step-level records such
69
82
  as `step_start`, `text`, `tool_use`, `step_finish`, and `error`.
70
83
 
84
+ ### Scheduling
85
+
86
+ Schedules let Grok run a headless prompt on a recurring schedule or once. Ask
87
+ for it in natural language, for example:
88
+
89
+ ```text
90
+ Create a schedule named daily-changelog-update that runs every weekday at 9am
91
+ and updates CHANGELOG.md from the latest merged commits.
92
+ ```
93
+
94
+ Recurring schedules require the background daemon:
95
+
96
+ ```bash
97
+ grok daemon --background
98
+ ```
99
+
100
+ Use `/schedule` in the TUI to browse saved schedules. One-time schedules start
101
+ immediately in the background; recurring schedules keep running as long as the
102
+ daemon is active.
103
+
71
104
  **List Grok models and pricing hints:**
72
105
 
73
106
  ```bash
@@ -80,6 +113,18 @@ grok models
80
113
  grok fix the flaky test in src/foo.test.ts
81
114
  ```
82
115
 
116
+ **Generate images or short videos from chat:**
117
+
118
+ ```bash
119
+ grok "Generate a retro-futuristic logo for my CLI called Grok Forge"
120
+ grok "Edit ./assets/hero.png into a watercolor poster"
121
+ grok "Animate ./assets/cover.jpg into a 6 second cinematic push-in"
122
+ ```
123
+
124
+ Image and video generation are exposed as agent tools inside normal chat sessions.
125
+ You keep using a text model for the session, and Grok saves generated media under
126
+ `.grok/generated-media/` by default unless you ask for a specific output path.
127
+
83
128
  ---
84
129
 
85
130
  ## What you actually get
@@ -88,6 +133,7 @@ grok fix the flaky test in src/foo.test.ts
88
133
  |--------|----------------|
89
134
  | **Grok-native** | Defaults tuned for Grok; models like **`grok-code-fast-1`**, **`grok-4-1-fast-reasoning`**, **`grok-4.20-multi-agent-0309`**, plus flagship and fast variants—run `grok models` for the full menu. |
90
135
  | **X + web search** | **`search_x`** and **`search_web`** tools—live posts and docs without pretending the internet stopped in 2023. |
136
+ | **Media generation** | Built-in **`generate_image`** and **`generate_video`** tools for text-to-image, image editing, text-to-video, and image-to-video flows. Generated files are saved locally so you can reuse them after the xAI URLs expire. |
91
137
  | **Sub-agents (default behavior)** | Foreground **`task`** delegation (e.g. explore vs general) plus background **`delegate`** for read-only deep dives—parallelize like you mean it. |
92
138
  | **Custom sub-agents** | Define named agents with **`subAgents`** in **`~/.grok/user-settings.json`** and manage them from the TUI with **`/agents`**. |
93
139
  | **Remote control** | Pair **Telegram** from the TUI (`/remote-control` → Telegram): DM your bot, **`/pair`**, approve the code in-terminal. Keep the CLI running while you ping it from your phone. |
@@ -157,6 +203,69 @@ Optional: **`GROK_BASE_URL`** (default `https://api.x.ai/v1`), **`GROK_MODEL`**,
157
203
  3. Start **`grok`**, open **`/remote-control`** → **Telegram** if needed, then in Telegram DM your bot: **`/pair`**, enter the **6-character code** in the terminal when asked.
158
204
  4. First user must be approved once; after that, it’s remembered. **Keep the CLI process running** while you use the bot (long polling lives in that process).
159
205
 
206
+ ### Voice & audio messages
207
+
208
+ Send a voice note or audio attachment in Telegram and Grok will transcribe it locally with **[whisper.cpp](https://github.com/ggml-org/whisper.cpp)** before passing the text to the agent. No cloud STT service is involved — everything runs on your machine.
209
+
210
+ #### Prerequisites
211
+
212
+ | Dependency | Why | Install (macOS) |
213
+ |---|---|---|
214
+ | **whisper-cli** | Runs the actual speech-to-text inference | `brew install whisper-cpp` |
215
+ | **ffmpeg** | Converts Telegram voice notes (OGG/Opus) to WAV for whisper.cpp | `brew install ffmpeg` |
216
+
217
+ After installing, verify both are available:
218
+
219
+ ```bash
220
+ whisper-cli -h
221
+ ffmpeg -version
222
+ ```
223
+
224
+ #### Download a Whisper model
225
+
226
+ Grok CLI auto-downloads the configured model on first use, but you can pre-download it:
227
+
228
+ ```bash
229
+ mkdir -p ~/.grok/models/stt/whisper.cpp
230
+ curl -L https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin \
231
+ -o ~/.grok/models/stt/whisper.cpp/ggml-tiny.en.bin
232
+ ```
233
+
234
+ Available models (trade size for accuracy): `tiny.en` (75 MB), `base.en` (142 MB), `small.en` (466 MB).
235
+
236
+ #### Configure in `~/.grok/user-settings.json`
237
+
238
+ ```json
239
+ {
240
+ "telegram": {
241
+ "botToken": "YOUR_BOT_TOKEN",
242
+ "audioInput": {
243
+ "enabled": true,
244
+ "binaryPath": "/opt/homebrew/bin/whisper-cli",
245
+ "model": "tiny.en",
246
+ "modelPath": "~/.grok/models/stt/whisper.cpp/ggml-tiny.en.bin",
247
+ "autoDownloadModel": true,
248
+ "language": "en"
249
+ }
250
+ }
251
+ }
252
+ ```
253
+
254
+ | Setting | Default | Description |
255
+ |---|---|---|
256
+ | `enabled` | `true` | Set to `false` to ignore voice/audio messages entirely. |
257
+ | `binaryPath` | `whisper-cli` | Absolute path or command name for the whisper.cpp CLI binary. |
258
+ | `model` | `tiny.en` | Model alias used for auto-download resolution. |
259
+ | `modelPath` | _(auto-resolved)_ | Explicit path to a `.bin` model file. Overrides `model` + auto-download. |
260
+ | `autoDownloadModel` | `true` | Download the model into `~/.grok/models/stt/whisper.cpp` on first use. |
261
+ | `language` | `en` | Whisper language code passed to the CLI. |
262
+
263
+ Optional headless flow when you do not want the TUI open:
264
+
265
+ ```bash
266
+ grok telegram-bridge
267
+ ```
268
+
160
269
  Treat the bot token like a password.
161
270
 
162
271
  ---
@@ -173,6 +282,26 @@ Project file: **`.grok/settings.json`** — e.g. the current model for this proj
173
282
 
174
283
  ---
175
284
 
285
+ ## Sandbox
286
+
287
+ Grok CLI can run shell commands inside a [Shuru](https://github.com/superhq-ai/shuru) microVM sandbox so the agent can't touch your host filesystem or network.
288
+
289
+ **Requires macOS 14+ on Apple Silicon.**
290
+
291
+ Enable it with `--sandbox` on the CLI, or toggle it from the TUI with `/sandbox`.
292
+
293
+ When sandbox mode is active you can configure:
294
+
295
+ - **Network** — off by default; enable with `--allow-net`, restrict with `--allow-host`
296
+ - **Port forwards** — `--port 8080:80`
297
+ - **Resource limits** — CPUs, memory, disk size (via settings or `/sandbox` panel)
298
+ - **Checkpoints** — start from a saved environment snapshot
299
+ - **Secrets** — inject API keys without exposing them inside the VM
300
+
301
+ All settings are saved in `~/.grok/user-settings.json` (user) and `.grok/settings.json` (project).
302
+
303
+ ---
304
+
176
305
  ## Development
177
306
 
178
307
  From a clone:
@@ -1,7 +1,11 @@
1
+ import { type ScheduleDaemonStatus, type StoredSchedule } from "../tools/schedule";
1
2
  import type { AgentMode, ChatEntry, SessionInfo, SessionSnapshot, StreamChunk, SubagentStatus, TaskRequest, ToolCall, ToolResult } from "../types/index";
3
+ import { type SandboxMode, type SandboxSettings } from "../utils/settings";
2
4
  interface AgentOptions {
3
5
  persistSession?: boolean;
4
6
  session?: string;
7
+ sandboxMode?: SandboxMode;
8
+ sandboxSettings?: SandboxSettings;
5
9
  }
6
10
  type ProcessMessageFinishReason = "stop" | "length" | "content-filter" | "tool-calls" | "error" | "other";
7
11
  export interface ProcessMessageUsage {
@@ -45,6 +49,7 @@ export declare class Agent {
45
49
  private baseURL;
46
50
  private bash;
47
51
  private delegations;
52
+ private schedules;
48
53
  private sessionStore;
49
54
  private workspace;
50
55
  private session;
@@ -57,15 +62,24 @@ export declare class Agent {
57
62
  private maxTokens;
58
63
  private planContext;
59
64
  private subagentStatusListeners;
65
+ private sendTelegramFile;
60
66
  constructor(apiKey: string | undefined, baseURL?: string, model?: string, maxToolRounds?: number, options?: AgentOptions);
61
67
  getModel(): string;
62
68
  setModel(model: string): void;
63
69
  getMode(): AgentMode;
70
+ getSandboxMode(): SandboxMode;
71
+ setSandboxMode(mode: SandboxMode): void;
72
+ getSandboxSettings(): SandboxSettings;
73
+ setSandboxSettings(settings: SandboxSettings): void;
64
74
  setMode(mode: AgentMode): void;
65
75
  setPlanContext(ctx: string | null): void;
76
+ setSendTelegramFile(fn: ((filePath: string) => Promise<ToolResult>) | null): void;
66
77
  hasApiKey(): boolean;
67
78
  setApiKey(apiKey: string, baseURL?: string | undefined): void;
68
79
  getCwd(): string;
80
+ listSchedules(): Promise<StoredSchedule[]>;
81
+ removeSchedule(id: string): Promise<string>;
82
+ getScheduleDaemonStatus(): Promise<ScheduleDaemonStatus>;
69
83
  getContextStats(contextWindow: number, inFlightText?: string): {
70
84
  contextWindow: number;
71
85
  usedTokens: number;
@@ -5,13 +5,16 @@ import { createTools } from "../grok/tools";
5
5
  import { buildMcpToolSet } from "../mcp/runtime";
6
6
  import { appendCompaction, appendMessages, appendSystemMessage, buildChatEntries, getNextMessageSequence, getSessionTotalTokens, loadTranscript, loadTranscriptState, recordUsageEvent, SessionStore, } from "../storage/index";
7
7
  import { BashTool } from "../tools/bash";
8
+ import { ScheduleManager } from "../tools/schedule";
8
9
  import { loadCustomInstructions } from "../utils/instructions";
9
- import { loadMcpServers, loadValidSubAgents } from "../utils/settings";
10
+ import { loadMcpServers, loadValidSubAgents, } from "../utils/settings";
10
11
  import { discoverSkills, formatSkillsForPrompt } from "../utils/skills";
11
12
  import { createCompactionSummaryMessage, DEFAULT_KEEP_RECENT_TOKENS, DEFAULT_RESERVE_TOKENS, estimateConversationTokens, generateCompactionSummary, prepareCompaction, relaxCompactionSettings, shouldCompactContext, } from "./compaction";
12
13
  import { DelegationManager } from "./delegations";
13
14
  import { containsEncryptedReasoning, sanitizeModelMessages } from "./reasoning";
15
+ import { buildVisionUserMessages } from "./vision-input";
14
16
  const MAX_TOOL_ROUNDS = 400;
17
+ const VISION_MODEL = "grok-4-1-fast-reasoning";
15
18
  const ENVIRONMENT = `ENVIRONMENT:
16
19
  You are running inside a terminal (CLI). Your text output is rendered in a plain terminal — not a browser, not a rich text editor.
17
20
  - Use plain text only. No markdown tables, no HTML, no images, no colored text.
@@ -37,8 +40,17 @@ TOOLS:
37
40
  - delegate: Launch a read-only background agent for longer research while you continue working.
38
41
  - delegation_read: Retrieve a completed background delegation result by ID.
39
42
  - delegation_list: List running and completed background delegations. Do not poll it repeatedly.
43
+ - schedule_create: Create a recurring or one-time scheduled headless run.
44
+ - schedule_list: List saved schedules and their status.
45
+ - schedule_remove: Remove a saved schedule.
46
+ - schedule_read_log: Read recent log output from a schedule.
47
+ - schedule_daemon_status: Check whether the schedule daemon is running.
48
+ - schedule_daemon_start: Start the schedule daemon in the background.
49
+ - schedule_daemon_stop: Stop the schedule daemon.
40
50
  - search_web: Search the web for current information, documentation, APIs, tutorials, etc.
41
51
  - search_x: Search X/Twitter for real-time posts, discussions, opinions, and trends.
52
+ - generate_image: Generate a new image or edit an existing image. It saves image files locally and returns their paths.
53
+ - generate_video: Generate a new video or animate an existing image. It saves video files locally and returns their paths.
42
54
  - MCP tools: Enabled servers appear as tools named like mcp_<server>__<tool>.
43
55
 
44
56
  WORKFLOW:
@@ -68,12 +80,19 @@ EXAMPLES:
68
80
  - "research how auth works" -> delegate to explore first
69
81
  - "investigate why this test fails" -> delegate to explore first, then continue with findings
70
82
  - "refactor this module" -> delegate a focused part to general when helpful
83
+ - "generate a logo" -> use generate_image
84
+ - "animate this still image" -> use generate_video
71
85
  - Recurring specialized workflows -> use the matching custom sub-agent via task
86
+ - "every weekday at 9am run this check" -> use schedule_create with a cron expression
87
+ - "run this once automatically" -> use schedule_create with the right timing
88
+ - "make sure scheduled jobs keep running" -> use schedule_daemon_status and schedule_daemon_start
72
89
 
73
90
  IMPORTANT:
74
91
  - Prefer edit_file for surgical changes to existing files — it shows a clean diff.
75
92
  - Use write_file only for new files or when most of the file is changing.
76
93
  - Use read_file instead of cat/head/tail for reading files.
94
+ - When the user asks for an automated recurring or one-time run, use the schedule tools instead of only describing the setup.
95
+ - After creating a recurring schedule, check the daemon status and start it with \`schedule_daemon_start\` if needed.
77
96
 
78
97
  Be direct. Execute, don't just describe. Show results, not plans.`,
79
98
  plan: `You are Grok CLI in Plan mode — you analyze and plan but DO NOT execute changes.
@@ -123,7 +142,7 @@ function formatCustomSubagentsPromptSection(subagents) {
123
142
  });
124
143
  return `\n\nCUSTOM SUB-AGENTS:\nUser-defined foreground sub-agents from ~/.grok/user-settings.json. When one matches the task, call the task tool with agent set to the exact name.\n\n${lines.join("\n\n")}\n`;
125
144
  }
126
- function buildSystemPrompt(cwd, mode, planContext, subagents) {
145
+ function buildSystemPrompt(cwd, mode, sandboxMode, planContext, subagents, sandboxSettings) {
127
146
  const custom = loadCustomInstructions(cwd);
128
147
  const customSection = custom
129
148
  ? `\n\nCUSTOM INSTRUCTIONS:\n${custom}\n\nFollow the above alongside standard instructions.\n`
@@ -131,32 +150,38 @@ function buildSystemPrompt(cwd, mode, planContext, subagents) {
131
150
  const skillsText = formatSkillsForPrompt(discoverSkills(cwd));
132
151
  const skillsSection = skillsText ? `\n\n${skillsText}\n` : "";
133
152
  const subagentsSection = formatCustomSubagentsPromptSection(subagents ?? loadValidSubAgents());
153
+ const sandboxSection = formatSandboxPromptSection(sandboxMode, sandboxSettings);
134
154
  const planSection = planContext
135
155
  ? `\n\nAPPROVED PLAN:\nThe following plan has been approved by the user. Execute it now.\n${planContext}\n`
136
156
  : "";
137
- return `${MODE_PROMPTS[mode]}${customSection}${skillsSection}${subagentsSection}${planSection}
157
+ return `${MODE_PROMPTS[mode]}${sandboxSection}${customSection}${skillsSection}${subagentsSection}${planSection}
138
158
 
139
159
  Current working directory: ${cwd}`;
140
160
  }
141
- function buildSubagentPrompt(request, cwd, custom, subagents) {
161
+ function buildSubagentPrompt(request, cwd, custom, sandboxMode, subagents, sandboxSettings) {
142
162
  const isExplore = request.agent === "explore";
163
+ const isVision = request.agent === "vision";
143
164
  const mode = isExplore ? "ask" : "agent";
144
165
  const role = custom
145
166
  ? `You are the custom sub-agent "${custom.name}". You can investigate, edit files, and run commands unless the delegated task says otherwise.`
146
167
  : request.agent === "explore"
147
168
  ? "You are the Explore sub-agent. You are read-only and focus on fast codebase research."
148
- : "You are the General sub-agent. You can investigate, edit files, and run commands to complete delegated work.";
169
+ : isVision
170
+ ? "You are the Vision sub-agent."
171
+ : "You are the General sub-agent. You can investigate, edit files, and run commands to complete delegated work.";
149
172
  const rules = isExplore
150
173
  ? [
151
174
  "Do not create, modify, or delete files.",
152
175
  "Prefer `read_file` and search commands over broad shell exploration.",
153
176
  "Return concise findings for the parent agent.",
154
177
  ]
155
- : [
156
- "Work only on the delegated task below.",
157
- "Use tools directly instead of narrating your intent.",
158
- "Return a concise summary for the parent agent with key outcomes and any open risks.",
159
- ];
178
+ : isVision
179
+ ? ["Validate the image."]
180
+ : [
181
+ "Work only on the delegated task below.",
182
+ "Use tools directly instead of narrating your intent.",
183
+ "Return a concise summary for the parent agent with key outcomes and any open risks.",
184
+ ];
160
185
  const instructionLines = custom?.instruction.trim() ? ["", "SUB-AGENT INSTRUCTIONS:", custom.instruction.trim()] : [];
161
186
  return [
162
187
  role,
@@ -169,9 +194,40 @@ function buildSubagentPrompt(request, cwd, custom, subagents) {
169
194
  "",
170
195
  `Delegated task: ${request.description}`,
171
196
  "",
172
- buildSystemPrompt(cwd, mode, undefined, subagents),
197
+ buildSystemPrompt(cwd, mode, sandboxMode, undefined, subagents, sandboxSettings),
173
198
  ].join("\n");
174
199
  }
200
+ function formatSandboxPromptSection(sandboxMode, settings) {
201
+ if (sandboxMode === "off")
202
+ return "";
203
+ const s = settings ?? {};
204
+ let networkLine;
205
+ if (s.allowNet) {
206
+ networkLine = s.allowedHosts?.length
207
+ ? `- Network access is restricted to: ${s.allowedHosts.join(", ")}.`
208
+ : "- Network access is enabled.";
209
+ }
210
+ else {
211
+ networkLine = "- Network is disabled.";
212
+ }
213
+ const lines = [
214
+ "",
215
+ "SANDBOX MODE:",
216
+ "- Bash commands run inside a Shuru sandbox.",
217
+ networkLine,
218
+ "- The current workspace is mounted inside the sandbox at `/workspace`.",
219
+ "- Shell-side workspace file changes do not persist back to the host in this version.",
220
+ "- Use `read_file`, `edit_file`, and `write_file` for durable source edits.",
221
+ "- If a task needs a host-persistent shell mutation, explain that sandbox mode blocks that workflow and ask whether to disable sandbox mode.",
222
+ ];
223
+ if (s.ports?.length) {
224
+ lines.push(`- Port forwards: ${s.ports.join(", ")}.`);
225
+ }
226
+ if (s.from) {
227
+ lines.push(`- Starting from checkpoint: ${s.from}.`);
228
+ }
229
+ return lines.join("\n");
230
+ }
175
231
  function applyModelConstraints(system, modelId) {
176
232
  const modelInfo = getModelInfo(modelId);
177
233
  if (modelInfo?.supportsClientTools !== false) {
@@ -192,6 +248,7 @@ export class Agent {
192
248
  baseURL = null;
193
249
  bash;
194
250
  delegations;
251
+ schedules;
195
252
  sessionStore = null;
196
253
  workspace = null;
197
254
  session = null;
@@ -204,14 +261,19 @@ export class Agent {
204
261
  maxTokens;
205
262
  planContext = null;
206
263
  subagentStatusListeners = new Set();
264
+ sendTelegramFile = null;
207
265
  constructor(apiKey, baseURL, model, maxToolRounds, options = {}) {
208
266
  this.baseURL = baseURL || null;
209
267
  if (apiKey) {
210
268
  this.setApiKey(apiKey, baseURL);
211
269
  }
212
- this.bash = new BashTool();
270
+ this.bash = new BashTool(process.cwd(), {
271
+ sandboxMode: options.sandboxMode ?? "off",
272
+ sandboxSettings: options.sandboxSettings,
273
+ });
213
274
  this.delegations = new DelegationManager(() => this.bash.getCwd());
214
275
  this.modelId = normalizeModelId(model || DEFAULT_MODEL);
276
+ this.schedules = new ScheduleManager(() => this.bash.getCwd(), () => this.modelId);
215
277
  this.maxToolRounds = maxToolRounds || MAX_TOOL_ROUNDS;
216
278
  const envMax = Number(process.env.GROK_MAX_TOKENS);
217
279
  this.maxTokens = Number.isFinite(envMax) && envMax > 0 ? envMax : 16_384;
@@ -239,6 +301,18 @@ export class Agent {
239
301
  getMode() {
240
302
  return this.mode;
241
303
  }
304
+ getSandboxMode() {
305
+ return this.bash.getSandboxMode();
306
+ }
307
+ setSandboxMode(mode) {
308
+ this.bash.setSandboxMode(mode);
309
+ }
310
+ getSandboxSettings() {
311
+ return this.bash.getSandboxSettings();
312
+ }
313
+ setSandboxSettings(settings) {
314
+ this.bash.setSandboxSettings(settings);
315
+ }
242
316
  setMode(mode) {
243
317
  if (mode !== this.mode) {
244
318
  this.mode = mode;
@@ -251,6 +325,9 @@ export class Agent {
251
325
  setPlanContext(ctx) {
252
326
  this.planContext = ctx;
253
327
  }
328
+ setSendTelegramFile(fn) {
329
+ this.sendTelegramFile = fn;
330
+ }
254
331
  hasApiKey() {
255
332
  return !!this.apiKey;
256
333
  }
@@ -262,8 +339,18 @@ export class Agent {
262
339
  getCwd() {
263
340
  return this.bash.getCwd();
264
341
  }
342
+ async listSchedules() {
343
+ return this.schedules.list();
344
+ }
345
+ async removeSchedule(id) {
346
+ const removed = await this.schedules.remove(id);
347
+ return removed ? `Removed schedule "${removed.name}".` : `Schedule "${id}" not found.`;
348
+ }
349
+ async getScheduleDaemonStatus() {
350
+ return this.schedules.getDaemonStatus();
351
+ }
265
352
  getContextStats(contextWindow, inFlightText = "") {
266
- const system = buildSystemPrompt(this.bash.getCwd(), this.mode, this.planContext);
353
+ const system = buildSystemPrompt(this.bash.getCwd(), this.mode, this.bash.getSandboxMode(), this.planContext, undefined, this.bash.getSandboxSettings());
267
354
  const usedTokens = Math.min(contextWindow, estimateConversationTokens(system, this.messages, inFlightText));
268
355
  const remainingTokens = Math.max(0, contextWindow - usedTokens);
269
356
  return {
@@ -380,10 +467,11 @@ export class Agent {
380
467
  const agentKey = String(request.agent);
381
468
  const isExplore = agentKey === "explore";
382
469
  const isGeneral = agentKey === "general";
470
+ const isVision = agentKey === "vision";
383
471
  const subagents = loadValidSubAgents();
384
- const custom = !isExplore && !isGeneral ? findCustomSubagent(agentKey, subagents) : undefined;
385
- if (!isExplore && !isGeneral && !custom) {
386
- const message = `Unknown sub-agent "${agentKey}". Use general, explore, or a configured name from ~/.grok/user-settings.json.`;
472
+ const custom = !isExplore && !isGeneral && !isVision ? findCustomSubagent(agentKey, subagents) : undefined;
473
+ if (!isExplore && !isGeneral && !isVision && !custom) {
474
+ const message = `Unknown sub-agent "${agentKey}". Use general, explore, vision, or a configured name from ~/.grok/user-settings.json.`;
387
475
  return {
388
476
  success: false,
389
477
  output: message,
@@ -395,16 +483,21 @@ export class Agent {
395
483
  };
396
484
  }
397
485
  const childMode = isExplore ? "ask" : "agent";
398
- const childBash = new BashTool(this.bash.getCwd());
486
+ const childBash = new BashTool(this.bash.getCwd(), {
487
+ sandboxMode: this.bash.getSandboxMode(),
488
+ sandboxSettings: this.bash.getSandboxSettings(),
489
+ });
399
490
  const childBaseTools = createTools(childBash, provider, childMode);
400
491
  const initialDetail = isExplore ? "Scanning the codebase" : "Planning delegated work";
401
492
  let assistantText = "";
402
493
  let lastActivity = initialDetail;
403
494
  let childTools = childBaseTools;
404
495
  let closeMcp;
405
- const childModelId = normalizeModelId(isExplore ? DEFAULT_MODEL : custom ? custom.model : this.modelId);
406
- const childRuntime = resolveModelRuntime(provider, childModelId);
407
- const childSystem = applyModelConstraints(buildSubagentPrompt(request, childBash.getCwd(), custom ?? null, subagents), childRuntime.modelId);
496
+ const childModelId = normalizeModelId(isVision ? VISION_MODEL : isExplore ? DEFAULT_MODEL : custom ? custom.model : this.modelId);
497
+ const childRuntime = isVision
498
+ ? { ...resolveModelRuntime(provider, childModelId), model: provider.responses(childModelId) }
499
+ : resolveModelRuntime(provider, childModelId);
500
+ const childSystem = applyModelConstraints(buildSubagentPrompt(request, childBash.getCwd(), custom ?? null, childBash.getSandboxMode(), subagents, childBash.getSandboxSettings()), childRuntime.modelId);
408
501
  onActivity?.(initialDetail);
409
502
  try {
410
503
  if (childMode === "agent" && childRuntime.modelInfo?.supportsClientTools !== false) {
@@ -416,10 +509,13 @@ export class Agent {
416
509
  onActivity?.(lastActivity);
417
510
  }
418
511
  }
512
+ const childMessages = isVision
513
+ ? await buildVisionUserMessages(request.prompt, childBash.getCwd(), signal)
514
+ : [{ role: "user", content: request.prompt }];
419
515
  const result = streamText({
420
516
  model: childRuntime.model,
421
517
  system: childSystem,
422
- messages: [{ role: "user", content: request.prompt }],
518
+ messages: childMessages,
423
519
  tools: childRuntime.modelInfo?.supportsClientTools === false ? {} : childTools,
424
520
  stopWhen: stepCountIs(Math.min(this.maxToolRounds, isExplore ? 60 : 120)),
425
521
  maxRetries: 0,
@@ -505,6 +601,8 @@ export class Agent {
505
601
  }
506
602
  return await this.delegations.start(request, {
507
603
  model: this.modelId,
604
+ sandboxMode: this.bash.getSandboxMode(),
605
+ sandboxSettings: this.bash.getSandboxSettings(),
508
606
  maxToolRounds: this.maxToolRounds,
509
607
  maxTokens: this.maxTokens,
510
608
  });
@@ -614,7 +712,7 @@ export class Agent {
614
712
  this.messageSeqs.push(null);
615
713
  const provider = this.requireProvider();
616
714
  const subagents = loadValidSubAgents();
617
- const system = applyModelConstraints(buildSystemPrompt(this.bash.getCwd(), this.mode, this.planContext, subagents), this.modelId);
715
+ const system = applyModelConstraints(buildSystemPrompt(this.bash.getCwd(), this.mode, this.bash.getSandboxMode(), this.planContext, subagents, this.bash.getSandboxSettings()), this.modelId);
618
716
  const runtime = resolveModelRuntime(provider, this.modelId);
619
717
  const modelInfo = runtime.modelInfo;
620
718
  this.planContext = null;
@@ -639,7 +737,9 @@ export class Agent {
639
737
  runDelegation: (request, abortSignal) => this.runDelegation(request, combineAbortSignals(signal, abortSignal)),
640
738
  readDelegation: (id) => this.readDelegation(id),
641
739
  listDelegations: () => this.listDelegations(),
740
+ scheduleManager: this.schedules,
642
741
  subagents,
742
+ sendTelegramFile: this.sendTelegramFile ?? undefined,
643
743
  });
644
744
  let tools = runtime.modelInfo?.supportsClientTools === false ? {} : baseTools;
645
745
  if (this.mode === "agent" && runtime.modelInfo?.supportsClientTools !== false) {
@@ -876,12 +976,13 @@ function toToolResult(output) {
876
976
  return {
877
977
  success: r.success,
878
978
  output: r.output,
879
- error: r.success ? undefined : r.output,
979
+ error: r.error ?? (r.success ? undefined : r.output),
880
980
  diff: r.diff,
881
981
  plan: r.plan,
882
982
  task: r.task,
883
983
  delegation: r.delegation,
884
984
  backgroundProcess: r.backgroundProcess,
985
+ media: r.media,
885
986
  };
886
987
  }
887
988
  return { success: true, output: String(output) };
@@ -898,6 +999,10 @@ function formatSubagentActivity(toolName, args) {
898
999
  return `Web search "${truncate(parsed.query || "", 50)}"`;
899
1000
  if (toolName === "search_x")
900
1001
  return `X search "${truncate(parsed.query || "", 50)}"`;
1002
+ if (toolName === "generate_image")
1003
+ return `Generate image "${truncate(parsed.prompt || "", 50)}"`;
1004
+ if (toolName === "generate_video")
1005
+ return `Generate video "${truncate(parsed.prompt || "", 50)}"`;
901
1006
  if (toolName === "bash")
902
1007
  return truncate(parsed.command || "Run command", 70);
903
1008
  return truncate(`${toolName}`, 70);