@j-o-r/hello-dave 0.0.10 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/README.md.bak.1779452127 +240 -0
- package/TODO.md +30 -8
- package/agents/code_agent.js +6 -6
- package/agents/daisy_agent.js +10 -7
- package/agents/minimax.js +173 -0
- package/agents/stability.js +173 -0
- package/bin/codeDave +1 -1
- package/bin/dave.js +1 -1
- package/docs/music-toolsets.md +137 -0
- package/docs/plans/minimax-music-generation.md +80 -0
- package/docs/plans/unified-agent-architecture.md +146 -0
- package/docs/plans/websocket-streaming-plan.md.bak +317 -0
- package/docs/prompt/task_clarification_and_documentation.md +35 -0
- package/lib/API/minimax/ImageToolset.js +169 -0
- package/lib/API/minimax/MusicToolset.js +290 -0
- package/lib/API/minimax/VideoToolset.js +296 -0
- package/lib/API/minimax/image.generation.md +239 -0
- package/lib/API/minimax/image.js +219 -0
- package/lib/API/minimax/image.to.image.md +257 -0
- package/lib/API/minimax/index.js +16 -0
- package/lib/API/minimax/music.cover.preprocess.md +206 -0
- package/lib/API/minimax/music.generation.md +346 -0
- package/lib/API/minimax/music.js +257 -0
- package/lib/API/minimax/music.lyrics.generation.md +205 -0
- package/lib/API/minimax/video.download.md +133 -0
- package/lib/API/minimax/video.first.last.image.md +186 -0
- package/lib/API/minimax/video.from.image.md +206 -0
- package/lib/API/minimax/video.from.subject.md +164 -0
- package/lib/API/minimax/video.generation.md +192 -0
- package/lib/API/minimax/video.js +339 -0
- package/lib/API/minimax/video.query.md +128 -0
- package/lib/API/stability.ai/ImageToolset.js +357 -0
- package/lib/API/stability.ai/MusicToolset.js +302 -0
- package/lib/API/stability.ai/audio-3.md +205 -0
- package/lib/API/stability.ai/audio.js +679 -0
- package/lib/API/stability.ai/image.js +911 -0
- package/lib/API/stability.ai/image.md +271 -0
- package/lib/API/stability.ai/index.js +11 -0
- package/lib/API/stability.ai/openapi.json +17118 -0
- package/lib/API/x.ai/ImageToolset.js +165 -0
- package/lib/API/x.ai/image.editing.md +86 -0
- package/lib/API/x.ai/image.js +393 -0
- package/lib/API/x.ai/image.md +213 -0
- package/lib/API/x.ai/image.to.generation.md +494 -0
- package/lib/API/x.ai/image.to.video.md +23 -0
- package/lib/API/x.ai/index.js +7 -0
- package/lib/AgentManager.js +1 -1
- package/lib/CdnToolset.js +191 -0
- package/lib/ToolSet.js +19 -1
- package/lib/cdn.js +373 -0
- package/lib/fafs.js +3 -1
- package/lib/genericToolset.js +43 -166
- package/lib/index.js +9 -1
- package/package.json +2 -2
- package/types/API/minimax/ImageToolset.d.ts +3 -0
- package/types/API/minimax/MusicToolset.d.ts +3 -0
- package/types/API/minimax/VideoToolset.d.ts +3 -0
- package/types/API/minimax/image.d.ts +109 -0
- package/types/API/minimax/index.d.ts +15 -0
- package/types/API/minimax/music.d.ts +46 -0
- package/types/API/minimax/video.d.ts +165 -0
- package/types/API/stability.ai/ImageToolset.d.ts +3 -0
- package/types/API/stability.ai/MusicToolset.d.ts +3 -0
- package/types/API/stability.ai/audio.d.ts +193 -0
- package/types/API/stability.ai/image.d.ts +274 -0
- package/types/API/stability.ai/index.d.ts +11 -0
- package/types/API/x.ai/ImageToolset.d.ts +3 -0
- package/types/API/x.ai/image.d.ts +82 -0
- package/types/API/x.ai/index.d.ts +7 -0
- package/types/AgentManager.d.ts +1 -1
- package/types/CdnToolset.d.ts +20 -0
- package/types/ToolSet.d.ts +8 -0
- package/types/cdn.d.ts +141 -0
- package/types/index.d.ts +9 -2
- package/docs/multi-agent-clusters.md.bak +0 -229
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file lib/API/x.ai/ImageToolset.js
|
|
3
|
+
* @module x.ai/ImageToolset
|
|
4
|
+
* @description Comprehensive ToolSet for the xAI Grok Imagine API.
|
|
5
|
+
*
|
|
6
|
+
* Exposes high-level tools for:
|
|
7
|
+
* - generate_image → Text-to-image generation
|
|
8
|
+
* - edit_image → Natural language image editing (supports up to 3 reference images)
|
|
9
|
+
* - generate_video → Image-to-video generation (fully automatic)
|
|
10
|
+
*
|
|
11
|
+
* Designed for AI agents and LLMs. Each tool includes rich descriptions and JSON schemas.
|
|
12
|
+
*
|
|
13
|
+
* @see ./image.js for the underlying implementation
|
|
14
|
+
* @see ./image.md for the full API specification
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import ToolSet from '../../ToolSet.js';
|
|
18
|
+
import * as xai from './image.js';
|
|
19
|
+
|
|
20
|
+
const tools = new ToolSet('auto');
|
|
21
|
+
|
|
22
|
+
/* ============================================================
|
|
23
|
+
GENERATE IMAGE
|
|
24
|
+
============================================================ */
|
|
25
|
+
|
|
26
|
+
tools.add(
|
|
27
|
+
'generate_image',
|
|
28
|
+
'Generate high-quality images from a text prompt using Grok Imagine. ' +
|
|
29
|
+
'Supports configurable number of images and output format. ' +
|
|
30
|
+
'Returns the local file path of the generated image(s).',
|
|
31
|
+
{
|
|
32
|
+
type: 'object',
|
|
33
|
+
properties: {
|
|
34
|
+
prompt: {
|
|
35
|
+
type: 'string',
|
|
36
|
+
description: 'Detailed English prompt describing the desired image. ' +
|
|
37
|
+
'Be specific about style, lighting, composition, and subject.'
|
|
38
|
+
},
|
|
39
|
+
n: {
|
|
40
|
+
type: 'integer',
|
|
41
|
+
minimum: 1,
|
|
42
|
+
maximum: 10,
|
|
43
|
+
default: 1,
|
|
44
|
+
description: 'Number of images to generate in one request.'
|
|
45
|
+
},
|
|
46
|
+
output_format: {
|
|
47
|
+
type: 'string',
|
|
48
|
+
enum: ['png', 'jpeg', 'webp'],
|
|
49
|
+
default: 'png',
|
|
50
|
+
description: 'Preferred output format.'
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
required: ['prompt']
|
|
54
|
+
},
|
|
55
|
+
async (params) => {
|
|
56
|
+
const result = await xai.generateImage(params.prompt, params);
|
|
57
|
+
|
|
58
|
+
return JSON.stringify({
|
|
59
|
+
local_path: result.local_path,
|
|
60
|
+
url: result.url,
|
|
61
|
+
revised_prompt: result.revised_prompt,
|
|
62
|
+
note: 'Image generated successfully with generate_image.'
|
|
63
|
+
}, null, 2);
|
|
64
|
+
}
|
|
65
|
+
);
|
|
66
|
+
|
|
67
|
+
/* ============================================================
|
|
68
|
+
EDIT IMAGE
|
|
69
|
+
============================================================ */
|
|
70
|
+
|
|
71
|
+
tools.add(
|
|
72
|
+
'edit_image',
|
|
73
|
+
'Edit one or more images using natural language instructions. ' +
|
|
74
|
+
'Supports up to 3 reference images for compositing, style transfer, or multi-subject editing. ' +
|
|
75
|
+
'Provide image URLs, local paths, or base64 data.',
|
|
76
|
+
{
|
|
77
|
+
type: 'object',
|
|
78
|
+
properties: {
|
|
79
|
+
prompt: {
|
|
80
|
+
type: 'string',
|
|
81
|
+
description: 'Natural language description of the edit you want to apply.'
|
|
82
|
+
},
|
|
83
|
+
image_url: {
|
|
84
|
+
type: 'string',
|
|
85
|
+
description: 'Single image URL or path to edit.'
|
|
86
|
+
},
|
|
87
|
+
image_urls: {
|
|
88
|
+
type: 'array',
|
|
89
|
+
items: { type: 'string' },
|
|
90
|
+
description: 'Array of up to 3 image URLs or paths for multi-image editing.'
|
|
91
|
+
},
|
|
92
|
+
output_format: {
|
|
93
|
+
type: 'string',
|
|
94
|
+
enum: ['png', 'jpeg', 'webp'],
|
|
95
|
+
default: 'png'
|
|
96
|
+
}
|
|
97
|
+
},
|
|
98
|
+
required: ['prompt']
|
|
99
|
+
},
|
|
100
|
+
async (params) => {
|
|
101
|
+
const images = params.image_urls || (params.image_url ? [params.image_url] : []);
|
|
102
|
+
|
|
103
|
+
const result = await xai.editImage(params.prompt, images, params);
|
|
104
|
+
|
|
105
|
+
return JSON.stringify({
|
|
106
|
+
local_path: result.local_path,
|
|
107
|
+
url: result.url,
|
|
108
|
+
revised_prompt: result.revised_prompt,
|
|
109
|
+
note: 'Image edited successfully with edit_image.'
|
|
110
|
+
}, null, 2);
|
|
111
|
+
}
|
|
112
|
+
);
|
|
113
|
+
|
|
114
|
+
/* ============================================================
|
|
115
|
+
GENERATE VIDEO
|
|
116
|
+
============================================================ */
|
|
117
|
+
|
|
118
|
+
tools.add(
|
|
119
|
+
'generate_video',
|
|
120
|
+
'Generate a video from a still image and text prompt using Grok Imagine Video. ' +
|
|
121
|
+
'This tool is fully automatic — it submits the request and waits internally until the video is ready. ' +
|
|
122
|
+
'Returns the local file path of the generated video.',
|
|
123
|
+
{
|
|
124
|
+
type: 'object',
|
|
125
|
+
properties: {
|
|
126
|
+
prompt: {
|
|
127
|
+
type: 'string',
|
|
128
|
+
description: 'Description of the desired motion, camera movement, or animation.'
|
|
129
|
+
},
|
|
130
|
+
image_url: {
|
|
131
|
+
type: 'string',
|
|
132
|
+
description: 'Source image URL or local path.'
|
|
133
|
+
},
|
|
134
|
+
duration: {
|
|
135
|
+
type: 'number',
|
|
136
|
+
minimum: 1,
|
|
137
|
+
maximum: 15,
|
|
138
|
+
default: 8,
|
|
139
|
+
description: 'Video duration in seconds.'
|
|
140
|
+
},
|
|
141
|
+
aspect_ratio: {
|
|
142
|
+
type: 'string',
|
|
143
|
+
default: '16:9',
|
|
144
|
+
description: 'Aspect ratio of the output video.'
|
|
145
|
+
},
|
|
146
|
+
resolution: {
|
|
147
|
+
type: 'string',
|
|
148
|
+
default: '720p',
|
|
149
|
+
description: 'Output resolution.'
|
|
150
|
+
}
|
|
151
|
+
},
|
|
152
|
+
required: ['prompt', 'image_url']
|
|
153
|
+
},
|
|
154
|
+
async (params) => {
|
|
155
|
+
const result = await xai.generateVideo(params.prompt, params.image_url, params);
|
|
156
|
+
|
|
157
|
+
return JSON.stringify({
|
|
158
|
+
local_path: result.local_path,
|
|
159
|
+
url: result.url,
|
|
160
|
+
note: 'Video generated successfully with generate_video.'
|
|
161
|
+
}, null, 2);
|
|
162
|
+
}
|
|
163
|
+
);
|
|
164
|
+
|
|
165
|
+
export default tools;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
#### Model Capabilities
|
|
2
|
+
|
|
3
|
+
# Image Editing
|
|
4
|
+
|
|
5
|
+
Edit an existing image by providing a source image along with your prompt. The model understands the image content and applies your requested changes.
|
|
6
|
+
|
|
7
|
+
> [!WARNING]
|
|
8
|
+
>
|
|
9
|
+
> The OpenAI SDK's `images.edit()` method is not supported for image editing because it uses `multipart/form-data`, while the xAI API requires `application/json`. Use the xAI SDK, Vercel AI SDK, or direct HTTP requests instead.
|
|
10
|
+
|
|
11
|
+
With the xAI SDK, use the same `sample()` method; just add the `image_url` parameter:
|
|
12
|
+
|
|
13
|
+
```python customLanguage="pythonXAI"
|
|
14
|
+
import base64
|
|
15
|
+
import xai_sdk
|
|
16
|
+
|
|
17
|
+
client = xai_sdk.Client()
|
|
18
|
+
|
|
19
|
+
# Load image from file and encode as base64
|
|
20
|
+
with open("photo.png", "rb") as f:
|
|
21
|
+
image_data = base64.b64encode(f.read()).decode("utf-8")
|
|
22
|
+
|
|
23
|
+
response = client.image.sample(
|
|
24
|
+
prompt="Render this as a pencil sketch with detailed shading",
|
|
25
|
+
model="grok-imagine-image-quality",
|
|
26
|
+
image_url=f"data:image/png;base64,{image_data}",
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
print(response.url)
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
# Using a public URL as the source image
|
|
34
|
+
curl -X POST https://api.x.ai/v1/images/edits \
|
|
35
|
+
-H "Content-Type: application/json" \
|
|
36
|
+
-H "Authorization: Bearer $XAI_API_KEY" \
|
|
37
|
+
-d '{
|
|
38
|
+
"model": "grok-imagine-image-quality",
|
|
39
|
+
"prompt": "Render this as a pencil sketch with detailed shading",
|
|
40
|
+
"image": {
|
|
41
|
+
"url": "https://docs.x.ai/assets/api-examples/images/style-realistic.png",
|
|
42
|
+
"type": "image_url"
|
|
43
|
+
}
|
|
44
|
+
}'
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
```javascript customLanguage="javascriptAISDK"
|
|
48
|
+
import { xai } from "@ai-sdk/xai";
|
|
49
|
+
import { generateImage } from "ai";
|
|
50
|
+
import fs from "fs";
|
|
51
|
+
|
|
52
|
+
// Load image and encode as base64
|
|
53
|
+
const imageBuffer = fs.readFileSync("photo.png");
|
|
54
|
+
const base64Image = imageBuffer.toString("base64");
|
|
55
|
+
|
|
56
|
+
const { image } = await generateImage({
|
|
57
|
+
model: xai.image("grok-imagine-image-quality"),
|
|
58
|
+
prompt: {
|
|
59
|
+
text: "Render this as a pencil sketch with detailed shading",
|
|
60
|
+
images: [`data:image/png;base64,${base64Image}`],
|
|
61
|
+
},
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
console.log(image.base64);
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
You can provide the source image as:
|
|
68
|
+
|
|
69
|
+
* A **public URL** pointing to an image
|
|
70
|
+
* A **base64-encoded data URI** (e.g., `data:image/jpeg;base64,...`)
|
|
71
|
+
|
|
72
|
+
## Multi-turn editing
|
|
73
|
+
|
|
74
|
+
Chain multiple edits together by using each output as the input for the next. This enables iterative refinement; start with a base image and progressively add details, adjust styles, or make corrections.
|
|
75
|
+
|
|
76
|
+
## Style transfer
|
|
77
|
+
|
|
78
|
+
The `grok-imagine-image-quality` model supports a wide range of visual styles, from ultra-realistic photography to anime, oil paintings, and pencil sketches. Transform existing images by describing the desired aesthetic in your prompt.
|
|
79
|
+
|
|
80
|
+
## Related
|
|
81
|
+
|
|
82
|
+
* [Image Generation](/developers/model-capabilities/images/generation) — Generate images from text prompts
|
|
83
|
+
* [Multi-Image Editing](/developers/model-capabilities/images/multi-image-editing) — Edit with multiple source images
|
|
84
|
+
* [API Reference](/developers/rest-api-reference) — Full endpoint documentation
|
|
85
|
+
* [Imagine API Landing Page](https://x.ai/api/imagine) — Showcase of the Imagine API in action
|
|
86
|
+
|
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file lib/API/x.ai/image.js
|
|
3
|
+
* @module x.ai/image
|
|
4
|
+
* @description Pure HTTP wrapper for the xAI Grok Imagine API.
|
|
5
|
+
*
|
|
6
|
+
* Provides a clean, minimal interface for:
|
|
7
|
+
* - Text-to-image generation
|
|
8
|
+
* - Natural language image editing (supports up to 3 reference images)
|
|
9
|
+
* - Image-to-video generation (fully automatic with internal polling)
|
|
10
|
+
*
|
|
11
|
+
* All functions handle authentication, request formatting, local file saving,
|
|
12
|
+
* and error handling. Video generation waits internally until the result is ready.
|
|
13
|
+
*
|
|
14
|
+
* @see ./image.md for the full API specification and examples
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* @constant {string} BASE_URL
|
|
19
|
+
* @description Base URL for the xAI API.
|
|
20
|
+
*/
|
|
21
|
+
const BASE_URL = 'https://api.x.ai/v1';
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* @constant {string} TMP_DIR
|
|
25
|
+
* @description Local directory where generated images and videos are saved.
|
|
26
|
+
*/
|
|
27
|
+
const TMP_DIR = path.join(process.cwd(), '.cache', 'xai');
|
|
28
|
+
|
|
29
|
+
import { request as doRequest } from '@j-o-r/apiserver';
|
|
30
|
+
import fs from 'fs/promises';
|
|
31
|
+
import path from 'path';
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Builds authenticated headers for xAI requests.
|
|
35
|
+
*
|
|
36
|
+
* @returns {Object} Headers containing Authorization Bearer token.
|
|
37
|
+
* @throws {Error} If `XAI_API_KEY` environment variable is not set.
|
|
38
|
+
*/
|
|
39
|
+
const getHeaders = () => {
|
|
40
|
+
if (!process.env.XAIKEY) {
|
|
41
|
+
throw new Error('Missing XAIKEY! Please export XAIKEY=your_key');
|
|
42
|
+
}
|
|
43
|
+
return {
|
|
44
|
+
'Authorization': `Bearer ${process.env.XAIKEY}`,
|
|
45
|
+
'Content-Type': 'application/json'
|
|
46
|
+
};
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Ensures the temporary directory for media files exists.
|
|
51
|
+
*
|
|
52
|
+
* @async
|
|
53
|
+
* @returns {Promise<void>}
|
|
54
|
+
*/
|
|
55
|
+
async function ensureTmpDir() {
|
|
56
|
+
await fs.mkdir(TMP_DIR, { recursive: true });
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Saves image or video data (URL, base64, Buffer, etc.) to a local file.
|
|
61
|
+
*
|
|
62
|
+
* @async
|
|
63
|
+
* @param {string|Buffer|Blob|ArrayBuffer} data - Media content to save.
|
|
64
|
+
* @param {string} [filenamePrefix='xai-media'] - Prefix for the saved file.
|
|
65
|
+
* @param {string} [ext='png'] - File extension.
|
|
66
|
+
* @returns {Promise<string>} Absolute path to the saved file.
|
|
67
|
+
*/
|
|
68
|
+
async function saveMediaToLocal(data, filenamePrefix = 'xai-media', ext = 'png') {
|
|
69
|
+
await ensureTmpDir();
|
|
70
|
+
|
|
71
|
+
const filename = `${filenamePrefix}-${Date.now()}.${ext}`;
|
|
72
|
+
const localPath = path.join(TMP_DIR, filename);
|
|
73
|
+
|
|
74
|
+
let buffer;
|
|
75
|
+
if (typeof data === 'string') {
|
|
76
|
+
if (data.startsWith('http')) {
|
|
77
|
+
const response = await fetch(data);
|
|
78
|
+
if (!response.ok) throw new Error(`Failed to download: ${response.status}`);
|
|
79
|
+
buffer = Buffer.from(await response.arrayBuffer());
|
|
80
|
+
} else if (data.match(/^[A-Za-z0-9+/=]+$/)) {
|
|
81
|
+
buffer = Buffer.from(data, 'base64');
|
|
82
|
+
} else {
|
|
83
|
+
throw new Error('Unsupported string format');
|
|
84
|
+
}
|
|
85
|
+
} else if (data instanceof Buffer) {
|
|
86
|
+
buffer = data;
|
|
87
|
+
} else if (data instanceof Blob || data instanceof ArrayBuffer) {
|
|
88
|
+
buffer = Buffer.from(await (data instanceof Blob ? data.arrayBuffer() : data));
|
|
89
|
+
} else {
|
|
90
|
+
throw new Error('Unsupported media data type');
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
await fs.writeFile(localPath, buffer);
|
|
94
|
+
return localPath;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Prepares an image input into the format expected by xAI.
|
|
99
|
+
* Accepts URLs, base64 data URIs, local file paths, Buffers, or Blobs.
|
|
100
|
+
*
|
|
101
|
+
* @async
|
|
102
|
+
* @param {string|Buffer|Blob} imageInput - Image source.
|
|
103
|
+
* @returns {Promise<Object>} Object with `url` and `type`.
|
|
104
|
+
*/
|
|
105
|
+
async function prepareImageInput(imageInput) {
|
|
106
|
+
if (!imageInput) throw new Error('Image input is required');
|
|
107
|
+
|
|
108
|
+
if (typeof imageInput === 'string') {
|
|
109
|
+
if (imageInput.startsWith('http://') || imageInput.startsWith('https://')) {
|
|
110
|
+
return { url: imageInput, type: 'image_url' };
|
|
111
|
+
}
|
|
112
|
+
if (imageInput.startsWith('data:')) {
|
|
113
|
+
return { url: imageInput, type: 'image_url' };
|
|
114
|
+
}
|
|
115
|
+
// Local file → base64 data URI
|
|
116
|
+
const buffer = await fs.readFile(imageInput);
|
|
117
|
+
const ext = path.extname(imageInput).toLowerCase().replace('.', '');
|
|
118
|
+
const mime = ext === 'jpg' ? 'jpeg' : ext;
|
|
119
|
+
return {
|
|
120
|
+
url: `data:image/${mime};base64,${buffer.toString('base64')}`,
|
|
121
|
+
type: 'image_url'
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
if (imageInput instanceof Buffer) {
|
|
126
|
+
return {
|
|
127
|
+
url: `data:image/png;base64,${imageInput.toString('base64')}`,
|
|
128
|
+
type: 'image_url'
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if (imageInput instanceof Blob) {
|
|
133
|
+
const buffer = Buffer.from(await imageInput.arrayBuffer());
|
|
134
|
+
return {
|
|
135
|
+
url: `data:image/png;base64,${buffer.toString('base64')}`,
|
|
136
|
+
type: 'image_url'
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
throw new Error('Unsupported image input type');
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/* ============================================================
|
|
144
|
+
PUBLIC: IMAGE GENERATION
|
|
145
|
+
============================================================ */
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Generates one or more images from a text prompt using Grok Imagine.
|
|
149
|
+
*
|
|
150
|
+
* @async
|
|
151
|
+
* @function generateImage
|
|
152
|
+
* @param {string} prompt - Detailed text prompt describing the desired image.
|
|
153
|
+
* @param {Object} [options={}] - Optional generation parameters.
|
|
154
|
+
* @param {string} [options.model='grok-imagine-image-quality'] - Model to use.
|
|
155
|
+
* @param {number} [options.n=1] - Number of images to generate (1–10).
|
|
156
|
+
* @param {string} [options.response_format='url'] - `'url'` or `'b64_json'`.
|
|
157
|
+
* @returns {Promise<Object>} Result containing `local_path`, `url` (or `base64`), and metadata.
|
|
158
|
+
* @throws {Error} On missing prompt or API errors.
|
|
159
|
+
*
|
|
160
|
+
* @example
|
|
161
|
+
* const result = await generateImage("A futuristic city at night");
|
|
162
|
+
* console.log(result.local_path);
|
|
163
|
+
*/
|
|
164
|
+
async function generateImage(prompt, options = {}) {
|
|
165
|
+
if (!prompt || typeof prompt !== 'string') {
|
|
166
|
+
throw new Error('generateImage() requires a prompt string');
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const body = {
|
|
170
|
+
model: options.model || 'grok-imagine-image-quality',
|
|
171
|
+
prompt,
|
|
172
|
+
n: options.n || 1,
|
|
173
|
+
response_format: options.response_format || 'url'
|
|
174
|
+
// Note: "size" is not supported by the xAI Grok Imagine API
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
const headers = getHeaders();
|
|
178
|
+
const res = await doRequest(`${BASE_URL}/images/generations`, 'POST', headers, body);
|
|
179
|
+
|
|
180
|
+
if (res.status !== 200) {
|
|
181
|
+
throw new Error(`xAI API error ${res.status}: ${JSON.stringify(res.response)}`);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const data = res.response;
|
|
185
|
+
const result = data.data?.[0];
|
|
186
|
+
|
|
187
|
+
if (result?.url) {
|
|
188
|
+
const localPath = await saveMediaToLocal(result.url, 'xai-generated', 'png');
|
|
189
|
+
return {
|
|
190
|
+
local_path: localPath,
|
|
191
|
+
url: result.url,
|
|
192
|
+
revised_prompt: result.revised_prompt,
|
|
193
|
+
raw: data
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
if (result?.b64_json) {
|
|
198
|
+
const localPath = await saveMediaToLocal(result.b64_json, 'xai-generated', 'png');
|
|
199
|
+
return {
|
|
200
|
+
local_path: localPath,
|
|
201
|
+
base64: result.b64_json,
|
|
202
|
+
revised_prompt: result.revised_prompt,
|
|
203
|
+
raw: data
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
return { raw: data };
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/* ============================================================
|
|
211
|
+
PUBLIC: IMAGE EDITING
|
|
212
|
+
============================================================ */
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Edits one or more images using a natural language prompt.
|
|
216
|
+
* Supports up to 3 reference images for compositing, style transfer, or multi-subject scenes.
|
|
217
|
+
*
|
|
218
|
+
* @async
|
|
219
|
+
* @function editImage
|
|
220
|
+
* @param {string} prompt - Description of the desired edit.
|
|
221
|
+
* @param {string|Buffer|Blob|Array<string|Buffer|Blob>} imageInputs - One or more images (URL, path, base64, Buffer, or Blob).
|
|
222
|
+
* @param {Object} [options={}] - Optional parameters.
|
|
223
|
+
* @param {string} [options.model='grok-imagine-image-quality']
|
|
224
|
+
* @param {number} [options.n=1]
|
|
225
|
+
* @param {string} [options.response_format='url']
|
|
226
|
+
* @returns {Promise<Object>} Result with edited image.
|
|
227
|
+
* @throws {Error} If more than 3 images are provided or on API error.
|
|
228
|
+
*
|
|
229
|
+
* @example
|
|
230
|
+
* // Single image edit
|
|
231
|
+
* const result = await editImage("Make this a pencil sketch", "./photo.png");
|
|
232
|
+
*
|
|
233
|
+
* @example
|
|
234
|
+
* // Multi-image editing (up to 3)
|
|
235
|
+
* const result = await editImage(
|
|
236
|
+
* "Combine these two people into one scene",
|
|
237
|
+
* ["./person1.png", "./person2.png"]
|
|
238
|
+
* );
|
|
239
|
+
*/
|
|
240
|
+
async function editImage(prompt, imageInputs, options = {}) {
|
|
241
|
+
if (!prompt) throw new Error('editImage() requires a prompt');
|
|
242
|
+
if (!imageInputs) throw new Error('editImage() requires at least one image');
|
|
243
|
+
|
|
244
|
+
const images = Array.isArray(imageInputs) ? imageInputs : [imageInputs];
|
|
245
|
+
|
|
246
|
+
if (images.length > 3) {
|
|
247
|
+
throw new Error('editImage() supports a maximum of 3 reference images');
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const preparedImages = await Promise.all(images.map(prepareImageInput));
|
|
251
|
+
|
|
252
|
+
const body = {
|
|
253
|
+
model: options.model || 'grok-imagine-image-quality',
|
|
254
|
+
prompt,
|
|
255
|
+
image: preparedImages.length === 1 ? preparedImages[0] : preparedImages,
|
|
256
|
+
n: options.n || 1,
|
|
257
|
+
response_format: options.response_format || 'url'
|
|
258
|
+
};
|
|
259
|
+
|
|
260
|
+
const headers = getHeaders();
|
|
261
|
+
const res = await doRequest(`${BASE_URL}/images/edits`, 'POST', headers, body);
|
|
262
|
+
|
|
263
|
+
if (res.status !== 200) {
|
|
264
|
+
throw new Error(`xAI API error ${res.status}: ${JSON.stringify(res.response)}`);
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
const data = res.response;
|
|
268
|
+
const result = data.data?.[0];
|
|
269
|
+
|
|
270
|
+
if (result?.url) {
|
|
271
|
+
const localPath = await saveMediaToLocal(result.url, 'xai-edited', 'png');
|
|
272
|
+
return {
|
|
273
|
+
local_path: localPath,
|
|
274
|
+
url: result.url,
|
|
275
|
+
revised_prompt: result.revised_prompt,
|
|
276
|
+
raw: data
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
if (result?.b64_json) {
|
|
281
|
+
const localPath = await saveMediaToLocal(result.b64_json, 'xai-edited', 'png');
|
|
282
|
+
return {
|
|
283
|
+
local_path: localPath,
|
|
284
|
+
base64: result.b64_json,
|
|
285
|
+
revised_prompt: result.revised_prompt,
|
|
286
|
+
raw: data
|
|
287
|
+
};
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
return { raw: data };
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/* ============================================================
|
|
294
|
+
INTERNAL: Poll video result
|
|
295
|
+
============================================================ */
|
|
296
|
+
|
|
297
|
+
async function _pollVideoResult(requestId, maxAttempts = 60, intervalMs = 5000) {
|
|
298
|
+
if (!requestId) throw new Error('Video request_id is required');
|
|
299
|
+
|
|
300
|
+
const headers = getHeaders();
|
|
301
|
+
|
|
302
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
303
|
+
const res = await doRequest(`${BASE_URL}/videos/${requestId}`, 'GET', headers);
|
|
304
|
+
|
|
305
|
+
if (res.status === 200) {
|
|
306
|
+
const data = res.response;
|
|
307
|
+
|
|
308
|
+
if (data.status === 'done' && data.video?.url) {
|
|
309
|
+
const localPath = await saveMediaToLocal(data.video.url, 'xai-video', 'mp4');
|
|
310
|
+
return {
|
|
311
|
+
local_path: localPath,
|
|
312
|
+
url: data.video.url,
|
|
313
|
+
status: 'done',
|
|
314
|
+
raw: data
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
if (data.status === 'failed' || data.status === 'expired') {
|
|
319
|
+
throw new Error(`Video generation ${data.status}: ${JSON.stringify(data)}`);
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
await new Promise(resolve => setTimeout(resolve, intervalMs));
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
throw new Error(`Timeout polling video request ${requestId}`);
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/* ============================================================
|
|
330
|
+
PUBLIC: IMAGE-TO-VIDEO (fully automatic)
|
|
331
|
+
============================================================ */
|
|
332
|
+
|
|
333
|
+
/**
|
|
334
|
+
* Generates a video from a still image and text prompt.
|
|
335
|
+
* This function is fully automatic — it submits the request and polls internally
|
|
336
|
+
* until the video is ready before returning.
|
|
337
|
+
*
|
|
338
|
+
* @async
|
|
339
|
+
* @function generateVideo
|
|
340
|
+
* @param {string} prompt - Description of the desired motion or animation.
|
|
341
|
+
* @param {string|Buffer|Blob} imageInput - Source image (URL, path, base64, Buffer, or Blob).
|
|
342
|
+
* @param {Object} [options={}] - Optional video parameters.
|
|
343
|
+
* @param {number} [options.duration=8] - Video duration in seconds.
|
|
344
|
+
* @param {string} [options.aspect_ratio='16:9'] - Aspect ratio.
|
|
345
|
+
* @param {string} [options.resolution='720p'] - Output resolution.
|
|
346
|
+
* @returns {Promise<Object>} Result containing `local_path` and `url` of the generated video.
|
|
347
|
+
* @throws {Error} On missing inputs or generation failure.
|
|
348
|
+
*
|
|
349
|
+
* @example
|
|
350
|
+
* const result = await generateVideo(
|
|
351
|
+
* "Make the water crash down and slowly pan out",
|
|
352
|
+
* "https://example.com/waterfall.png",
|
|
353
|
+
* { duration: 12 }
|
|
354
|
+
* );
|
|
355
|
+
* console.log(result.local_path);
|
|
356
|
+
*/
|
|
357
|
+
async function generateVideo(prompt, imageInput, options = {}) {
|
|
358
|
+
if (!prompt) throw new Error('generateVideo() requires a prompt');
|
|
359
|
+
if (!imageInput) throw new Error('generateVideo() requires an image');
|
|
360
|
+
|
|
361
|
+
const preparedImage = await prepareImageInput(imageInput);
|
|
362
|
+
|
|
363
|
+
const body = {
|
|
364
|
+
model: options.model || 'grok-imagine-video',
|
|
365
|
+
prompt,
|
|
366
|
+
image: preparedImage,
|
|
367
|
+
duration: options.duration || 8,
|
|
368
|
+
aspect_ratio: options.aspect_ratio || '16:9',
|
|
369
|
+
resolution: options.resolution || '720p'
|
|
370
|
+
};
|
|
371
|
+
|
|
372
|
+
const headers = getHeaders();
|
|
373
|
+
const res = await doRequest(`${BASE_URL}/videos/generations`, 'POST', headers, body);
|
|
374
|
+
|
|
375
|
+
if (res.status !== 200) {
|
|
376
|
+
throw new Error(`xAI API error ${res.status}: ${JSON.stringify(res.response)}`);
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
const { request_id } = res.response;
|
|
380
|
+
|
|
381
|
+
if (!request_id) {
|
|
382
|
+
throw new Error('No request_id returned from video generation');
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// Automatically poll until ready
|
|
386
|
+
return await _pollVideoResult(request_id);
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
export {
|
|
390
|
+
generateImage,
|
|
391
|
+
editImage,
|
|
392
|
+
generateVideo
|
|
393
|
+
};
|