agent-pulse 1.4.2 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -450,6 +450,41 @@ const result = await agent.run("A futuristic city skyline in neon colors.");
450
450
  // "![Generated Image](data:image/png;base64,...)" or "![Generated Image](https://...)"
451
451
  ```
452
452
 
453
+ #### Image Editing (Reference Image)
454
+
455
+ Edit or remix an existing image by passing `reference_image` in config. This is **provider-agnostic** — the same code works with both Grok and Google providers.
456
+
457
+ ```typescript
458
+ import { Agent, grok } from 'agent-pulse';
459
+
460
+ const agent = new Agent({
461
+ name: 'editor',
462
+ provider: new grok('grok-imagine-image'),
463
+ config: {
464
+ aspect_ratio: '3:4',
465
+ response_format: 'b64_json',
466
+ reference_image: 'data:image/png;base64,iVBORw0KGgo...' // base64 data URI
467
+ }
468
+ });
469
+
470
+ const result = await agent.run("Change the background to a sunset beach.");
471
+ // result.content contains the edited image as a markdown data URI
472
+ ```
473
+
474
+ To swap providers, just change the provider line — `reference_image` works the same way:
475
+
476
+ ```typescript
477
+ // Grok
478
+ provider: new grok('grok-imagine-image', process.env.GROK_API_KEY)
479
+
480
+ // Google Gemini
481
+ provider: new google('gemini-2.5-flash-image', process.env.GOOGLE_API_KEY)
482
+ ```
483
+
484
+ > **How it works under the hood:**
485
+ > - **Grok**: Sends a JSON request to `/v1/images/edits` with `image_url` (since the OpenAI SDK's `images.edit()` uses multipart/form-data, which x.ai doesn't support).
486
+ > - **Google**: Injects the image as `inlineData` alongside the text prompt in the multimodal content.
487
+
453
488
  #### Google (Gemini)
454
489
  Gemini models can generate images as part of their response.
455
490
 
@@ -188,6 +188,21 @@ class GoogleProvider {
188
188
  }];
189
189
  }
190
190
  }
191
+ // Inject reference_image from config as inlineData (provider-agnostic image editing)
192
+ if (config?.reference_image) {
193
+ const dataUriMatch = String(config.reference_image).match(/^data:(image\/\w+);base64,(.+)$/);
194
+ if (dataUriMatch) {
195
+ const lastUserMsg = [...contents].reverse().find((c) => c.role === 'user');
196
+ if (lastUserMsg) {
197
+ lastUserMsg.parts.push({
198
+ inlineData: {
199
+ mimeType: dataUriMatch[1],
200
+ data: dataUriMatch[2]
201
+ }
202
+ });
203
+ }
204
+ }
205
+ }
191
206
  // 4. Call API (Streaming)
192
207
  const result = await this.client.models.generateContentStream({
193
208
  model: this.model,
@@ -23,44 +23,18 @@ class GrokProvider {
23
23
  const promptText = Array.isArray(prompt)
24
24
  ? prompt.filter(m => m.role === 'user').map(m => m.content).join('\n')
25
25
  : String(prompt);
26
- let images;
27
- if (config?.image_url) {
28
- // Image editing direct JSON request to /v1/images/edits
29
- // (OpenAI SDK's images.edit() uses multipart/form-data, but x.ai requires JSON)
30
- const body = {
31
- model: this.model,
32
- prompt: promptText,
33
- image_url: config.image_url,
34
- n: config?.n || 1,
35
- response_format: config?.response_format || 'b64_json',
36
- ...(config?.aspect_ratio && { aspect_ratio: config.aspect_ratio }),
37
- };
38
- const res = await fetch('https://api.x.ai/v1/images/edits', {
39
- method: 'POST',
40
- headers: {
41
- 'Authorization': `Bearer ${this.client.apiKey}`,
42
- 'Content-Type': 'application/json',
43
- },
44
- body: JSON.stringify(body),
45
- });
46
- if (!res.ok) {
47
- const errText = await res.text();
48
- throw new Error(`Request failed with status ${res.status}: ${errText}`);
49
- }
50
- const json = await res.json();
51
- images = json.data;
52
- }
53
- else {
54
- // Standard image generation
55
- const response = await this.client.images.generate({
56
- model: this.model,
57
- prompt: promptText,
58
- n: config?.n || 1,
59
- response_format: config?.response_format || 'b64_json',
60
- ...(config?.aspect_ratio && { aspect_ratio: config.aspect_ratio }),
61
- });
62
- images = response.data;
63
- }
26
+ // Both generation and editing use /v1/images/generations
27
+ // For editing, just add image_url to the same request
28
+ const imageUrl = config?.reference_image || config?.image_url;
29
+ const response = await this.client.images.generate({
30
+ model: this.model,
31
+ prompt: promptText,
32
+ n: config?.n || 1,
33
+ response_format: config?.response_format || 'b64_json',
34
+ ...(config?.aspect_ratio && { aspect_ratio: config.aspect_ratio }),
35
+ ...(imageUrl && { image_url: imageUrl }),
36
+ });
37
+ const images = response.data;
64
38
  const markdownParts = images.map((img, i) => {
65
39
  if (img.b64_json) {
66
40
  return `![Generated Image${images.length > 1 ? ` ${i + 1}` : ''}](data:image/png;base64,${img.b64_json})`;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-pulse",
3
- "version": "1.4.2",
3
+ "version": "1.4.4",
4
4
  "description": "A lightweight, agentic AI framework for JavaScript/TypeScript",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -44,4 +44,4 @@
44
44
  "typescript": "^5.3.3",
45
45
  "vitest": "^1.3.1"
46
46
  }
47
- }
47
+ }