ima2-gen 1.1.18 → 1.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +2 -2
  2. package/bin/commands/video.js +4 -0
  3. package/bin/commands/video.ts +3 -0
  4. package/docs/README.ja.md +2 -2
  5. package/docs/README.ko.md +15 -3
  6. package/docs/README.zh-CN.md +2 -2
  7. package/lib/agentGenerationPlanner.js +18 -1
  8. package/lib/agentGenerationPlanner.ts +21 -1
  9. package/lib/agentRuntime.js +105 -1
  10. package/lib/agentRuntime.ts +118 -1
  11. package/lib/agentTypes.js +1 -0
  12. package/lib/agentTypes.ts +2 -1
  13. package/lib/assetLifecycle.js +12 -8
  14. package/lib/assetLifecycle.ts +12 -8
  15. package/lib/capabilities.js +1 -1
  16. package/lib/capabilities.ts +1 -1
  17. package/lib/grokVideoAdapter.js +30 -2
  18. package/lib/grokVideoAdapter.ts +36 -2
  19. package/lib/historyList.js +1 -0
  20. package/lib/historyList.ts +1 -0
  21. package/lib/videoSeriesChain.js +24 -0
  22. package/lib/videoSeriesChain.ts +29 -0
  23. package/node_modules/progrok/README.md +300 -22
  24. package/node_modules/progrok/dist/index.js +558 -173
  25. package/node_modules/progrok/dist/index.js.map +1 -1
  26. package/node_modules/progrok/package.json +3 -3
  27. package/node_modules/progrok/skills/progrok/SKILL.md +145 -109
  28. package/package.json +2 -2
  29. package/routes/video.js +10 -1
  30. package/routes/video.ts +11 -1
  31. package/ui/dist/.vite/manifest.json +12 -12
  32. package/ui/dist/assets/AgentWorkspace-DE_wg90f.js +3 -0
  33. package/ui/dist/assets/{CardNewsWorkspace-DmqCMnIx.js → CardNewsWorkspace--Myc5pAp.js} +1 -1
  34. package/ui/dist/assets/NodeCanvas-4U5oOT2y.js +7 -0
  35. package/ui/dist/assets/{PromptBuilderPanel-CoWjqQZS.js → PromptBuilderPanel-DNW1U8zI.js} +2 -2
  36. package/ui/dist/assets/{PromptImportDialog-C2zGZkyK.js → PromptImportDialog-o-4Sqki1.js} +2 -2
  37. package/ui/dist/assets/{PromptImportDiscoverySection-N0ZxHLYs.js → PromptImportDiscoverySection-BAbrRP8B.js} +1 -1
  38. package/ui/dist/assets/{PromptImportFolderSection-BC3dCASZ.js → PromptImportFolderSection-L-XI2noz.js} +1 -1
  39. package/ui/dist/assets/{PromptLibraryPanel-CcVliYnF.js → PromptLibraryPanel-CrW9LYGD.js} +2 -2
  40. package/ui/dist/assets/{SettingsWorkspace-CiB4ux7E.js → SettingsWorkspace-Dn4SYTyZ.js} +1 -1
  41. package/ui/dist/assets/index-B6tcw_UF.css +1 -0
  42. package/ui/dist/assets/{index-C93CfR9P.js → index-BONbNNIi.js} +1 -1
  43. package/ui/dist/assets/index-CeSZ2L3-.js +32 -0
  44. package/ui/dist/index.html +2 -2
  45. package/vendor/progrok-0.1.1.tgz +0 -0
  46. package/ui/dist/assets/AgentWorkspace-BTuPjlDH.js +0 -3
  47. package/ui/dist/assets/NodeCanvas-jr9WXfNm.js +0 -7
  48. package/ui/dist/assets/index-CIhB_ia7.css +0 -1
  49. package/ui/dist/assets/index-uBEJn5jz.js +0 -32
  50. package/vendor/progrok-0.1.0.tgz +0 -0
@@ -45,6 +45,25 @@ function sourceImageUrl(image, mime) {
45
45
  const detected = mime || detectImageMimeFromB64(image) || "image/png";
46
46
  return `data:${detected};base64,${image}`;
47
47
  }
48
+ /** Map aspect ratio + resolution to pixel dimensions for white canvas injection. */
49
+ function aspectToCanvas(aspectRatio, resolution) {
50
+ const base = resolution === "720p" ? 720 : 480;
51
+ const ratios = {
52
+ "16:9": [16, 9], "9:16": [9, 16], "4:3": [4, 3], "3:4": [3, 4],
53
+ "3:2": [3, 2], "2:3": [2, 3], "1:1": [1, 1], "auto": [16, 9],
54
+ };
55
+ const [w, h] = ratios[aspectRatio] || [16, 9];
56
+ if (w >= h)
57
+ return { width: Math.round(base * w / h), height: base };
58
+ return { width: base, height: Math.round(base * h / w) };
59
+ }
60
+ /** Generate a minimal white PNG as base64 (no external deps). */
61
+ function generateWhiteCanvasB64() {
62
+ // Minimal valid 1x1 white PNG, scaled conceptually — xAI will accept any valid PNG
63
+ // For simplicity, use a tiny white PNG (the model doesn't use it as a real frame)
64
+ const PNG_1x1_WHITE = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/58BAwAHBQKhPX8EPAAAAABJRU5ErkJggg==";
65
+ return PNG_1x1_WHITE;
66
+ }
48
67
  const FAILED_CODE_MAP = {
49
68
  invalid_argument: { code: "GROK_VIDEO_REQUEST_FAILED", status: 400 },
50
69
  permission_denied: { code: "GROK_VIDEO_REQUEST_FAILED", status: 403 },
@@ -383,11 +402,20 @@ export async function generateVideoViaGrok(prompt, ctx, options = {}) {
383
402
  const payload = buildVideoGenerationPayload(plan, { model, sourceImageUrl: srcUrl, referenceImageUrls: refUrls });
384
403
  let xaiVideoRequestId;
385
404
  let effectiveModel = model;
405
+ // grokv1.5 doesn't support T2V — inject a white canvas as source image to use I2V path
406
+ let effectivePayload = payload;
407
+ if (model === "grok-imagine-video-1.5-preview" && !srcUrl && refUrls.length === 0) {
408
+ const { width, height } = aspectToCanvas(plan.aspectRatio, plan.resolution);
409
+ const whiteCanvas = generateWhiteCanvasB64();
410
+ const canvasSrcUrl = `data:image/png;base64,${whiteCanvas}`;
411
+ effectivePayload = buildVideoGenerationPayload({ ...plan, prompt: `${plan.prompt}. This is not a start frame — generate freely as a new video.` }, { model, sourceImageUrl: canvasSrcUrl, referenceImageUrls: [] });
412
+ logEvent("grok", "video:1.5-t2v-canvas", { requestId: options.requestId, width, height });
413
+ }
386
414
  try {
387
- xaiVideoRequestId = await startVideoRequest(ctx, payload, options);
415
+ xaiVideoRequestId = await startVideoRequest(ctx, effectivePayload, options);
388
416
  }
389
417
  catch (e) {
390
- // Fallback: if 1.5-preview fails, retry with base model
418
+ // Fallback: if 1.5-preview still fails, retry with base model
391
419
  if (model !== "grok-imagine-video" && e?.status === 400) {
392
420
  effectiveModel = "grok-imagine-video";
393
421
  const fallbackPayload = buildVideoGenerationPayload(plan, { model: effectiveModel, sourceImageUrl: srcUrl, referenceImageUrls: refUrls });
@@ -124,6 +124,26 @@ function sourceImageUrl(image: string, mime?: string | null): string {
124
124
  return `data:${detected};base64,${image}`;
125
125
  }
126
126
 
127
+ /** Map aspect ratio + resolution to pixel dimensions for white canvas injection. */
128
+ function aspectToCanvas(aspectRatio: string, resolution: string): { width: number; height: number } {
129
+ const base = resolution === "720p" ? 720 : 480;
130
+ const ratios: Record<string, [number, number]> = {
131
+ "16:9": [16, 9], "9:16": [9, 16], "4:3": [4, 3], "3:4": [3, 4],
132
+ "3:2": [3, 2], "2:3": [2, 3], "1:1": [1, 1], "auto": [16, 9],
133
+ };
134
+ const [w, h] = ratios[aspectRatio] || [16, 9];
135
+ if (w >= h) return { width: Math.round(base * w / h), height: base };
136
+ return { width: base, height: Math.round(base * h / w) };
137
+ }
138
+
139
+ /** Generate a minimal white PNG as base64 (no external deps). */
140
+ function generateWhiteCanvasB64(): string {
141
+ // Minimal valid 1x1 white PNG, scaled conceptually — xAI will accept any valid PNG
142
+ // For simplicity, use a tiny white PNG (the model doesn't use it as a real frame)
143
+ const PNG_1x1_WHITE = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/58BAwAHBQKhPX8EPAAAAABJRU5ErkJggg==";
144
+ return PNG_1x1_WHITE;
145
+ }
146
+
127
147
  const FAILED_CODE_MAP: Record<string, { code: string; status: number }> = {
128
148
  invalid_argument: { code: "GROK_VIDEO_REQUEST_FAILED", status: 400 },
129
149
  permission_denied: { code: "GROK_VIDEO_REQUEST_FAILED", status: 403 },
@@ -449,10 +469,24 @@ export async function generateVideoViaGrok(prompt: string, ctx: RouteRuntimeCont
449
469
  const payload = buildVideoGenerationPayload(plan, { model, sourceImageUrl: srcUrl, referenceImageUrls: refUrls });
450
470
  let xaiVideoRequestId: string;
451
471
  let effectiveModel = model;
472
+
473
+ // grokv1.5 doesn't support T2V — inject a white canvas as source image to use I2V path
474
+ let effectivePayload = payload;
475
+ if (model === "grok-imagine-video-1.5-preview" && !srcUrl && refUrls.length === 0) {
476
+ const { width, height } = aspectToCanvas(plan.aspectRatio, plan.resolution);
477
+ const whiteCanvas = generateWhiteCanvasB64();
478
+ const canvasSrcUrl = `data:image/png;base64,${whiteCanvas}`;
479
+ effectivePayload = buildVideoGenerationPayload(
480
+ { ...plan, prompt: `${plan.prompt}. This is not a start frame — generate freely as a new video.` },
481
+ { model, sourceImageUrl: canvasSrcUrl, referenceImageUrls: [] },
482
+ );
483
+ logEvent("grok", "video:1.5-t2v-canvas", { requestId: options.requestId, width, height });
484
+ }
485
+
452
486
  try {
453
- xaiVideoRequestId = await startVideoRequest(ctx, payload, options);
487
+ xaiVideoRequestId = await startVideoRequest(ctx, effectivePayload, options);
454
488
  } catch (e: any) {
455
- // Fallback: if 1.5-preview fails, retry with base model
489
+ // Fallback: if 1.5-preview still fails, retry with base model
456
490
  if (model !== "grok-imagine-video" && e?.status === 400) {
457
491
  effectiveModel = "grok-imagine-video";
458
492
  const fallbackPayload = buildVideoGenerationPayload(plan, { model: effectiveModel, sourceImageUrl: srcUrl, referenceImageUrls: refUrls });
@@ -34,6 +34,7 @@ export async function listHistoryRows(baseDir = config.storage.generatedDir) {
34
34
  url: `/generated/${rel.split("/").map(encodeURIComponent).join("/")}`,
35
35
  mediaType: meta?.mediaType || (/\.mp4$/i.test(name) ? "video" : "image"),
36
36
  video: meta?.video || null,
37
+ videoSeries: meta?.videoSeries || null,
37
38
  createdAt: meta?.createdAt || st?.mtimeMs || 0,
38
39
  prompt: meta?.prompt || null,
39
40
  userPrompt: meta?.userPrompt || meta?.prompt || null,
@@ -36,6 +36,7 @@ export async function listHistoryRows(baseDir = config.storage.generatedDir) {
36
36
  url: `/generated/${rel.split("/").map(encodeURIComponent).join("/")}`,
37
37
  mediaType: meta?.mediaType || (/\.mp4$/i.test(name) ? "video" : "image"),
38
38
  video: meta?.video || null,
39
+ videoSeries: meta?.videoSeries || null,
39
40
  createdAt: meta?.createdAt || st?.mtimeMs || 0,
40
41
  prompt: meta?.prompt || null,
41
42
  userPrompt: meta?.userPrompt || meta?.prompt || null,
@@ -0,0 +1,24 @@
1
+ import { readdir, readFile } from "fs/promises";
2
+ import { join } from "path";
3
+ /**
4
+ * Scan generatedDir for videos with matching topic, return the most recent N revisedPrompts.
5
+ */
6
+ export async function getVideoSeriesChain(generatedDir, topic, limit = 4) {
7
+ if (!topic.trim())
8
+ return [];
9
+ const entries = await readdir(generatedDir).catch(() => []);
10
+ const sidecars = entries.filter((e) => e.endsWith(".mp4.json"));
11
+ const matches = [];
12
+ for (const sidecar of sidecars) {
13
+ try {
14
+ const raw = await readFile(join(generatedDir, sidecar), "utf-8");
15
+ const meta = JSON.parse(raw);
16
+ if (meta.videoSeries?.topic === topic && meta.revisedPrompt) {
17
+ matches.push({ revisedPrompt: meta.revisedPrompt, createdAt: meta.createdAt ?? 0 });
18
+ }
19
+ }
20
+ catch { /* skip unreadable */ }
21
+ }
22
+ matches.sort((a, b) => b.createdAt - a.createdAt);
23
+ return matches.slice(0, limit).reverse().map((m) => m.revisedPrompt);
24
+ }
@@ -0,0 +1,29 @@
1
+ import { readdir, readFile } from "fs/promises";
2
+ import { join } from "path";
3
+
4
+ interface VideoSeriesMeta {
5
+ revisedPrompt?: string;
6
+ createdAt?: number;
7
+ videoSeries?: { topic: string; chainIndex?: number };
8
+ }
9
+
10
+ /**
11
+ * Scan generatedDir for videos with matching topic, return the most recent N revisedPrompts.
12
+ */
13
+ export async function getVideoSeriesChain(generatedDir: string, topic: string, limit = 4): Promise<string[]> {
14
+ if (!topic.trim()) return [];
15
+ const entries = await readdir(generatedDir).catch(() => [] as string[]);
16
+ const sidecars = entries.filter((e) => e.endsWith(".mp4.json"));
17
+ const matches: Array<{ revisedPrompt: string; createdAt: number }> = [];
18
+ for (const sidecar of sidecars) {
19
+ try {
20
+ const raw = await readFile(join(generatedDir, sidecar), "utf-8");
21
+ const meta: VideoSeriesMeta = JSON.parse(raw);
22
+ if (meta.videoSeries?.topic === topic && meta.revisedPrompt) {
23
+ matches.push({ revisedPrompt: meta.revisedPrompt, createdAt: meta.createdAt ?? 0 });
24
+ }
25
+ } catch { /* skip unreadable */ }
26
+ }
27
+ matches.sort((a, b) => b.createdAt - a.createdAt);
28
+ return matches.slice(0, limit).reverse().map((m) => m.revisedPrompt);
29
+ }
@@ -1,6 +1,74 @@
1
1
  # progrok
2
2
 
3
- Use Grok models for free via OAuth proxy. No API key needed.
3
+ [![npm version](https://img.shields.io/npm/v/progrok.svg)](https://www.npmjs.com/package/progrok)
4
+ [![license: MIT](https://img.shields.io/badge/license-MIT-16a085.svg)](./LICENSE)
5
+ [![docs](https://img.shields.io/badge/docs-GitHub%20Pages-4cc9a6.svg)](https://lidge-jun.github.io/progrok/)
6
+ [![node](https://img.shields.io/badge/node-%3E%3D18-2d3748.svg)](./package.json)
7
+
8
+ Activate your xAI Grok OAuth session as a local API and tool surface.
9
+
10
+ `progrok` is an OAuth bridge for Grok. It signs in with your xAI account, stores
11
+ a refreshable local OAuth session, and activates that session through two
12
+ developer-facing surfaces:
13
+
14
+ 1. an OpenAI-compatible localhost proxy that forwards `/v1/*` requests to
15
+ `api.x.ai`, and
16
+ 2. direct CLI commands for Grok workflows that need source selection, JSON
17
+ output, async polling, local files, or machine-readable metadata.
18
+
19
+ The point is not only "no API key." The point is that Hermes Agent, OpenClaw,
20
+ and Grok Build-style coding workflows all rely on the same xAI OAuth credential
21
+ lineage: the xAI account session is the authority, and local tools need a way to
22
+ turn that session into a programmable endpoint. progrok is that activation tool.
23
+ Point the OpenAI SDK, curl scripts, or agent tools at `127.0.0.1:18645` and let
24
+ progrok inject the real xAI bearer token locally.
25
+
26
+ > Requires an active SuperGrok subscription. progrok does not bypass xAI account
27
+ > access, quotas, pricing, or product limits.
28
+
29
+ ## Links
30
+
31
+ - Live docs: [lidge-jun.github.io/progrok](https://lidge-jun.github.io/progrok/)
32
+ - OAuth bridge: [lidge-jun.github.io/progrok/docs/concepts/oauth-bridge](https://lidge-jun.github.io/progrok/docs/concepts/oauth-bridge)
33
+ - Quick start: [lidge-jun.github.io/progrok/docs/quickstart](https://lidge-jun.github.io/progrok/docs/quickstart)
34
+ - npm: [npmjs.com/package/progrok](https://www.npmjs.com/package/progrok)
35
+ - Repository: [github.com/lidge-jun/progrok](https://github.com/lidge-jun/progrok)
36
+
37
+ ## Why OAuth
38
+
39
+ xAI account access is session-based in the tools that made this workflow
40
+ useful. Hermes Agent and OpenClaw document the shared xAI OAuth client
41
+ identifier used by progrok, and Grok Build-style coding workflows benefit from
42
+ the same model: authenticate once with the xAI account, then expose Grok to
43
+ developer tooling through a local API surface.
44
+
45
+ That changes the shape of the problem:
46
+
47
+ - the user account and subscription decide what models and tools are available;
48
+ - the local machine holds the refreshable credential;
49
+ - existing SDKs and agents expect a base URL plus an API key;
50
+ - Grok media, search, and model discovery need more workflow glue than a raw
51
+ HTTP proxy provides.
52
+
53
+ progrok handles that glue. It activates the OAuth credential as a proxy for
54
+ OpenAI-compatible clients and as direct commands for search, images, video,
55
+ models, and capability discovery.
56
+
57
+ ## What "activation tool" means
58
+
59
+ After `progrok login`, the stored OAuth session powers every surface below:
60
+
61
+ | Surface | Command or URL | What gets activated |
62
+ | --- | --- | --- |
63
+ | OpenAI-compatible API | `http://127.0.0.1:18645/v1/*` | Chat, Responses, reasoning, structured output, server-side tools, files, batches, and other HTTP xAI API paths your account can access. |
64
+ | Current search | `progrok search` | Grok Responses with web search, X search, citations, JSON output, and optional reasoning effort. |
65
+ | Image workflows | `progrok image` | Imagine generation and editing with local reference files and output handling. |
66
+ | Video workflows | `progrok video` | Async video submission, polling, progress display, and download handling. |
67
+ | Coding models | `grok-build-0.1` through the proxy | Grok Build-style coding work from clients that can point at a local OpenAI-compatible endpoint. |
68
+ | Agent discovery | `progrok capabilities --json` | Machine-readable ports, commands, models, endpoints, and auth requirements. |
69
+
70
+ The placeholder `OPENAI_API_KEY` or `Authorization` value is only there to
71
+ satisfy client libraries. progrok replaces it before forwarding the request.
4
72
 
5
73
  ## Install
6
74
 
@@ -11,43 +79,253 @@ npm install -g progrok
11
79
  ## Quick Start
12
80
 
13
81
  ```bash
14
- # 1. Login with your xAI account (SuperGrok subscription required)
82
+ # 1. Activate your xAI OAuth session.
15
83
  progrok login
16
84
 
17
- # 2. Start the proxy server
85
+ # SSH or remote machine:
86
+ progrok login --device-code
87
+
88
+ # 2. Start the OpenAI-compatible local proxy.
18
89
  progrok proxy
19
90
 
20
- # 3. Use from any OpenAI-compatible client
91
+ # 3. Call Grok through localhost.
21
92
  curl http://127.0.0.1:18645/v1/chat/completions \
22
93
  -H "Authorization: Bearer anything" \
23
94
  -H "Content-Type: application/json" \
24
- -d '{"model": "grok-4.3", "messages": [{"role": "user", "content": "Hello"}]}'
95
+ -d '{"model":"grok-4.3","messages":[{"role":"user","content":"Hello"}]}'
96
+ ```
97
+
98
+ The proxy replaces the placeholder `Authorization` value with your stored xAI
99
+ OAuth bearer token before forwarding the request. The API key value in your
100
+ client can be any non-empty placeholder.
101
+
102
+ For direct tool activation, the proxy process is optional:
103
+
104
+ ```bash
105
+ progrok search --x --json "Grok Build release discussion"
106
+ progrok image "a precise product diagram of an OAuth bridge CLI" --output ./out
107
+ progrok video "a local proxy turning on Grok tools" --duration 5
108
+ progrok models --detail
109
+ progrok capabilities --json
110
+ ```
111
+
112
+ ## OpenAI SDK Example
113
+
114
+ ```python
115
+ from openai import OpenAI
116
+
117
+ client = OpenAI(
118
+ base_url="http://127.0.0.1:18645/v1",
119
+ api_key="anything",
120
+ )
121
+
122
+ result = client.chat.completions.create(
123
+ model="grok-4.3",
124
+ messages=[{"role": "user", "content": "Explain MCP in 5 bullets"}],
125
+ )
126
+
127
+ print(result.choices[0].message.content)
128
+ ```
129
+
130
+ Shell configuration for tools that respect OpenAI-compatible environment
131
+ variables:
132
+
133
+ ```bash
134
+ export OPENAI_BASE_URL=http://127.0.0.1:18645/v1
135
+ export OPENAI_API_KEY=anything
25
136
  ```
26
137
 
27
138
  ## Commands
28
139
 
29
- | Command | Description |
30
- |---------|-------------|
31
- | `progrok login` | OAuth login via browser |
32
- | `progrok login --device-code` | Login via device code (SSH/remote) |
33
- | `progrok proxy` | Start OpenAI-compatible proxy on port 18645 |
34
- | `progrok chat` | Open web chat UI in browser |
35
- | `progrok models` | List available Grok models |
36
- | `progrok status` | Show auth status |
37
- | `progrok logout` | Remove stored credentials |
140
+ | Command | Use it for |
141
+ | --- | --- |
142
+ | `progrok login` | Browser OAuth login with your xAI account. |
143
+ | `progrok login --device-code` | OAuth login for SSH, CI shells, or remote machines. |
144
+ | `progrok logout` | Remove stored local credentials. |
145
+ | `progrok status` | Check whether a local OAuth session exists. |
146
+ | `progrok proxy` | Start the local OpenAI-compatible proxy on `127.0.0.1:18645`. |
147
+ | `progrok chat` | Open the local browser chat UI on `127.0.0.1:18646`. |
148
+ | `progrok models --detail` | List model aliases, pricing, context windows, and media models. |
149
+ | `progrok search <query>` | Search web and X sources through Grok Responses tools. |
150
+ | `progrok search <query> --web` | Restrict search to web sources. |
151
+ | `progrok search <query> --x` | Restrict search to X sources. |
152
+ | `progrok search <query> --reasoning high` | Add reasoning effort to a search request. |
153
+ | `progrok image <prompt>` | Generate an Imagine image. |
154
+ | `progrok image <prompt> --ref ./input.png` | Edit or compose from a reference image. |
155
+ | `progrok video <prompt>` | Submit a text-to-video job and poll until completion. |
156
+ | `progrok video <prompt> --image ./input.png` | Submit an image-to-video job. |
157
+ | `progrok capabilities --json` | Print machine-readable command, model, and endpoint metadata. |
158
+ | `progrok skill` | Print an agent-oriented usage guide. |
159
+
160
+ ## Native Search
161
+
162
+ `progrok search` calls xAI's Responses API directly with `web_search` and
163
+ `x_search` tools. It does not require the proxy process to be running because it
164
+ loads the same OAuth session directly.
165
+
166
+ ```bash
167
+ progrok search "latest Astro release"
168
+ progrok search --web "Node.js 22 features"
169
+ progrok search --x "grok API launch"
170
+ progrok search --json "rust async traits"
171
+ progrok search --model grok-4.20-multi-agent-0309 --reasoning xhigh \
172
+ "compare current open-source browser automation tools"
173
+ ```
174
+
175
+ Reasoning effort values: `none`, `low`, `medium`, `high`, `xhigh`.
176
+
177
+ ## Image and Video
178
+
179
+ Image generation:
180
+
181
+ ```bash
182
+ progrok image "a crisp terminal UI product shot for a CLI called progrok"
183
+ progrok image "make this diagram cleaner" --ref ./diagram.png --output ./out
184
+ ```
185
+
186
+ Video generation:
187
+
188
+ ```bash
189
+ progrok video "a terminal command expanding into a network diagram"
190
+ progrok video "turn this interface into a smooth product demo" --image ./screen.png
191
+ progrok video "short launch animation" --model grok-imagine-video-1.5-preview
192
+ ```
193
+
194
+ Media commands call xAI endpoints directly with your OAuth session and poll async
195
+ jobs until completion.
196
+
197
+ ## Proxy Coverage
198
+
199
+ The proxy forwards every HTTP `/v1/*` path to `api.x.ai`, so it can activate the
200
+ xAI API surface available to your account:
201
+
202
+ - Chat Completions and Responses
203
+ - reasoning, citations, structured output, and tool calls
204
+ - image generation and editing
205
+ - video generation, editing, extension, and polling
206
+ - text-to-speech, speech-to-text, and realtime client-secret minting
207
+ - files, batches, tokenizer, models, and collection search
208
+
209
+ WebSocket endpoints are not proxied. For realtime voice streams, mint a client
210
+ secret through the HTTP proxy and connect directly to xAI's WebSocket endpoint.
211
+
212
+ ## Models
213
+
214
+ | Model | Best for | Context | Notes |
215
+ | --- | --- | --- | --- |
216
+ | `grok-4.3` | Default chat, tools, search, vision | 1M | Also available through common Grok aliases. |
217
+ | `grok-build-0.1` | Fast agentic coding | 256K | Good default for Grok Build-style coding tools through the OAuth proxy. |
218
+ | `grok-4.20-0309-reasoning` | Deep reasoning | 200K+ | Legacy reasoning model. |
219
+ | `grok-4.20-0309-non-reasoning` | Lower-latency text | 200K+ | Legacy non-reasoning model. |
220
+ | `grok-4.20-multi-agent-0309` | Deep research | 200K+ | Supports high and xhigh effort. |
221
+ | `grok-imagine-image` | Image generation and editing | - | Billed per image. |
222
+ | `grok-imagine-image-quality` | Higher-quality image output | - | Billed per image. |
223
+ | `grok-imagine-video` | Video generation and editing | - | Billed per second. |
224
+ | `grok-imagine-video-1.5-preview` | Video v1.5 preview | - | Improved image-to-video behavior. |
225
+
226
+ Run the live metadata command before relying on a model in automation:
227
+
228
+ ```bash
229
+ progrok models --detail
230
+ progrok capabilities --json
231
+ ```
38
232
 
39
233
  ## How It Works
40
234
 
41
- progrok authenticates with xAI via OAuth (the same flow as Grok web app), then runs a local proxy that injects your OAuth token into API requests. Any OpenAI-compatible client can connect to the proxy — no API key purchase required.
235
+ ```text
236
+ OpenAI client, coding agent, curl script, or local tool
237
+ -> http://127.0.0.1:18645/v1/*
238
+ -> progrok loads ~/.progrok/auth.json
239
+ -> progrok refreshes the token if needed
240
+ -> progrok injects the xAI OAuth bearer token
241
+ -> https://api.x.ai/v1/*
242
+ ```
243
+
244
+ Credentials are stored locally at `~/.progrok/auth.json` and refreshed before
245
+ expiry. Treat that file like any other account credential.
246
+
247
+ The direct command path is similar but skips the proxy server:
248
+
249
+ ```text
250
+ progrok search / image / video / models / capabilities
251
+ -> load the same local OAuth session
252
+ -> call the relevant xAI endpoint
253
+ -> add CLI-specific behavior such as polling, files, or JSON output
254
+ ```
255
+
256
+ ## Relationship to Hermes Agent, OpenClaw, and Grok Build
257
+
258
+ progrok's OAuth client attribution comes from Hermes Agent and OpenClaw under
259
+ their MIT licenses. Those projects demonstrated the important part: Grok can be
260
+ made useful to local developer tools through xAI OAuth rather than through a
261
+ manually provisioned API key.
262
+
263
+ progrok takes that pattern and packages it as a focused bridge:
264
+
265
+ - Hermes Agent and OpenClaw establish the shared OAuth client lineage.
266
+ - Grok Build-style workflows need a coding model reachable from agent tools.
267
+ - progrok provides the localhost OpenAI-compatible endpoint and direct commands
268
+ that let those tools use the same authenticated account session.
269
+
270
+ This is why the documentation describes progrok as an activation tool. Login is
271
+ the authorization step; the proxy and CLI commands are the activated surfaces.
42
272
 
43
- ## Supported Models
273
+ ## Security Notes
44
274
 
45
- - `grok-4.3` (default) General-purpose reasoning
46
- - `grok-4.20-beta-latest-reasoning` Deep reasoning
47
- - `grok-4.20-beta-latest-non-reasoning` — Fast responses
48
- - `grok-build-0.1` Code-optimized
49
- - And more via `progrok models`
275
+ - The proxy binds to localhost by default.
276
+ - Do not expose the proxy port to a public network without adding your own access
277
+ controls.
278
+ - The placeholder API key sent by OpenAI-compatible clients is ignored by the
279
+ proxy and replaced with your xAI OAuth token.
280
+ - `progrok logout` removes the local credential file.
281
+ - Requests are forwarded to xAI. Sensitive prompt data should be handled under
282
+ the same policy you use for direct xAI API usage.
283
+ - The OAuth file enables account-backed access. Do not commit it, sync it to
284
+ untrusted machines, or share it between users.
285
+
286
+ ## Troubleshooting
287
+
288
+ ### `progrok status` says no session
289
+
290
+ Run `progrok login` again. On remote machines, use `progrok login --device-code`.
291
+
292
+ ### The proxy starts but clients fail
293
+
294
+ Check that the client points to:
295
+
296
+ ```bash
297
+ http://127.0.0.1:18645/v1
298
+ ```
299
+
300
+ Also check that the client sends a non-empty API key placeholder.
301
+
302
+ ### Port 18645 is already in use
303
+
304
+ Stop the existing process or start the proxy on another port if your version
305
+ supports a port flag. Then update `OPENAI_BASE_URL` accordingly.
306
+
307
+ ### A model returns 404 or 400
308
+
309
+ Run:
310
+
311
+ ```bash
312
+ progrok models --detail
313
+ ```
314
+
315
+ Model aliases and preview names can change. Use the live list before scripting a
316
+ long-running workflow.
317
+
318
+ ### Search, image, or video commands fail
319
+
320
+ These commands call xAI directly with OAuth and may be subject to product access,
321
+ rate limits, quota, and account capability. Start with:
322
+
323
+ ```bash
324
+ progrok status
325
+ progrok capabilities --json
326
+ ```
50
327
 
51
328
  ## License
52
329
 
53
- MIT See [THIRD_PARTY_NOTICES.md](./THIRD_PARTY_NOTICES.md) for OAuth client attribution.
330
+ MIT. See [THIRD_PARTY_NOTICES.md](./THIRD_PARTY_NOTICES.md) for OAuth client
331
+ attribution.