openclacky 1.2.8 → 1.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/lib/clacky/agent/llm_caller.rb +3 -0
- data/lib/clacky/agent/message_compressor_helper.rb +6 -5
- data/lib/clacky/agent/session_serializer.rb +4 -0
- data/lib/clacky/agent.rb +9 -0
- data/lib/clacky/agent_config.rb +111 -8
- data/lib/clacky/brand_config.rb +1 -0
- data/lib/clacky/cli.rb +49 -22
- data/lib/clacky/client.rb +6 -2
- data/lib/clacky/default_skills/channel-manager/SKILL.md +33 -110
- data/lib/clacky/default_skills/media-gen/SKILL.md +128 -0
- data/lib/clacky/idle_compression_timer.rb +38 -15
- data/lib/clacky/media/base.rb +68 -0
- data/lib/clacky/media/gemini.rb +36 -0
- data/lib/clacky/media/generator.rb +78 -0
- data/lib/clacky/media/openai_compat.rb +168 -0
- data/lib/clacky/providers.rb +89 -2
- data/lib/clacky/rich_ui_controller.rb +1549 -0
- data/lib/clacky/server/channel/adapters/weixin/adapter.rb +24 -2
- data/lib/clacky/server/channel/channel_manager.rb +89 -2
- data/lib/clacky/server/http_server.rb +334 -29
- data/lib/clacky/session_manager.rb +9 -8
- data/lib/clacky/telemetry.rb +26 -6
- data/lib/clacky/ui2/layout_manager.rb +11 -7
- data/lib/clacky/ui2/ui_controller.rb +2 -2
- data/lib/clacky/ui_interface.rb +1 -1
- data/lib/clacky/utils/model_pricing.rb +75 -53
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/web/app.css +393 -14
- data/lib/clacky/web/billing.js +1 -1
- data/lib/clacky/web/i18n.js +86 -4
- data/lib/clacky/web/index.html +23 -3
- data/lib/clacky/web/model-tester.js +58 -0
- data/lib/clacky/web/onboard.js +17 -30
- data/lib/clacky/web/sessions.js +443 -2
- data/lib/clacky/web/settings.js +372 -97
- data/lib/clacky/web/workspace.js +9 -1
- data/lib/clacky.rb +3 -0
- data/scripts/build/lib/network.sh +61 -30
- data/scripts/install.ps1 +16 -4
- data/scripts/install.sh +61 -30
- data/scripts/install_browser.sh +61 -30
- data/scripts/install_full.sh +61 -30
- data/scripts/install_rails_deps.sh +61 -30
- data/scripts/install_system_deps.sh +61 -30
- metadata +12 -3
- data/lib/clacky/default_skills/channel-manager/feishu_setup.rb +0 -574
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: media-gen
|
|
3
|
+
description: 'Generate images (and later videos / audio) inside the current task. Use this skill whenever the user asks to create, generate, or produce a picture / image / illustration / cover / poster / icon / artwork — including phrases like 生成图片, 画一张, 做封面, 来张配图, generate image, make a picture, draw, create artwork, design a cover. Also use when building documents (slides, PPT, posters, marketing pages, README hero shots) where an image is needed inline. Routes calls through the local Clacky HTTP server, which uses the user-configured `type=image` model — you do NOT need to know which provider; the server handles it.'
|
|
4
|
+
disable-model-invocation: false
|
|
5
|
+
user-invocable: true
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# media-gen
|
|
9
|
+
|
|
10
|
+
Generate images on demand by calling the local Clacky HTTP server, which dispatches to whichever image-generation model the user configured (`type=image` in their model settings).
|
|
11
|
+
|
|
12
|
+
## Endpoint
|
|
13
|
+
|
|
14
|
+
```
|
|
15
|
+
POST http://${CLACKY_SERVER_HOST}:${CLACKY_SERVER_PORT}/api/media/image
|
|
16
|
+
GET http://${CLACKY_SERVER_HOST}:${CLACKY_SERVER_PORT}/api/media/types
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Step 1 — Verify a backend is configured
|
|
20
|
+
|
|
21
|
+
Before generating anything, confirm the user has a `type=image` model set up:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
curl -s http://${CLACKY_SERVER_HOST}:${CLACKY_SERVER_PORT}/api/media/types
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
If the response shows `image.configured = false`, stop and tell the user:
|
|
28
|
+
|
|
29
|
+
> 还没有配置生图模型。请打开 Clacky 设置页 → 添加模型 → 类型选 `image`(推荐 `or-gemini-3-pro-image` 或 `or-gpt-image-1`)。配好后再让我生图。
|
|
30
|
+
|
|
31
|
+
Do NOT try to fall back to `terminal` + a hand-written `curl https://api.openai.com/...` — that bypasses the user's configured backend and won't be billed correctly.
|
|
32
|
+
|
|
33
|
+
## Step 2 — Generate the image
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
curl -s -X POST http://${CLACKY_SERVER_HOST}:${CLACKY_SERVER_PORT}/api/media/image \
|
|
37
|
+
-H "Content-Type: application/json" \
|
|
38
|
+
-d '{
|
|
39
|
+
"prompt": "A clean, modern hero illustration for a tech startup landing page. Soft gradient background, abstract geometric shapes in blue and purple, minimal style, 4K quality.",
|
|
40
|
+
"aspect_ratio": "landscape"
|
|
41
|
+
}'
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Request fields
|
|
45
|
+
|
|
46
|
+
| Field | Required | Values | Notes |
|
|
47
|
+
|----------------|----------|-------------------------------------|-------|
|
|
48
|
+
| `prompt` | yes | string | Be detailed and concrete. See prompt tips below. |
|
|
49
|
+
| `aspect_ratio` | no | `landscape` / `square` / `portrait` | Defaults to `landscape`. |
|
|
50
|
+
| `output_dir` | no | absolute path | Defaults to the current working directory. The image is saved under `<output_dir>/assets/generated/`. |
|
|
51
|
+
|
|
52
|
+
### Response shape (success)
|
|
53
|
+
|
|
54
|
+
```json
|
|
55
|
+
{
|
|
56
|
+
"success": true,
|
|
57
|
+
"image": "/abs/path/to/working_dir/assets/generated/img_20260525_011820_a1b2c3d4.png",
|
|
58
|
+
"model": "or-gemini-3-pro-image",
|
|
59
|
+
"provider": "openclacky",
|
|
60
|
+
"prompt": "A clean, modern hero illustration ...",
|
|
61
|
+
"aspect_ratio": "landscape",
|
|
62
|
+
"size": "1536x1024",
|
|
63
|
+
"usage": {
|
|
64
|
+
"prompt_tokens": 50,
|
|
65
|
+
"completion_tokens": 4500,
|
|
66
|
+
"cache_read_tokens": 0,
|
|
67
|
+
"cache_write_tokens": 0,
|
|
68
|
+
"total_tokens": 4550
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
The `image` field is an absolute path on disk. To embed it in markdown, slides, or HTML, convert it to a path relative to the document you're writing.
|
|
74
|
+
|
|
75
|
+
`usage` may be absent when the configured backend doesn't return token counts. Treat it as optional.
|
|
76
|
+
|
|
77
|
+
### Response shape (failure)
|
|
78
|
+
|
|
79
|
+
```json
|
|
80
|
+
{
|
|
81
|
+
"success": false,
|
|
82
|
+
"image": null,
|
|
83
|
+
"error": "Upstream 401: Invalid API key",
|
|
84
|
+
"error_type": "api_error",
|
|
85
|
+
"model": "...",
|
|
86
|
+
"provider": "..."
|
|
87
|
+
}
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Common `error_type` values: `not_configured`, `auth_required`, `network_error`, `api_error`, `empty_response`. Tell the user the error plainly; if it's `auth_required` or `api_error 401/403`, point them at settings to fix the api_key.
|
|
91
|
+
|
|
92
|
+
## Step 3 — Show the image
|
|
93
|
+
|
|
94
|
+
`Read` does NOT show the image to the user — it only feeds it into your own context. To make the user actually see it, write a markdown tag in your reply:
|
|
95
|
+
|
|
96
|
+
```markdown
|
|
97
|
+

|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Take the `image` field from the response and prefix `file://` (three slashes, since the path is absolute).
|
|
101
|
+
|
|
102
|
+
If you're also embedding it in a document (README, PPT, etc.), use a relative path: ``.
|
|
103
|
+
|
|
104
|
+
## Prompt writing tips
|
|
105
|
+
|
|
106
|
+
A good image prompt has 4 layers, in this order:
|
|
107
|
+
|
|
108
|
+
1. **Subject** — what is in the image, concretely. ("a golden retriever puppy", "a stylized icon of a rocket")
|
|
109
|
+
2. **Style / medium** — photo / illustration / 3D render / watercolor / flat vector / line art
|
|
110
|
+
3. **Composition / lighting** — close-up / wide shot / overhead / soft natural light / dramatic backlight
|
|
111
|
+
4. **Mood / palette** — minimal / playful / corporate / pastel / high-contrast monochrome
|
|
112
|
+
|
|
113
|
+
For PPT / slide decks specifically:
|
|
114
|
+
- Hero / cover slides: `aspect_ratio: landscape`, prompt should emphasise "clean", "minimal", "negative space" so text overlays well
|
|
115
|
+
- Section dividers: `aspect_ratio: landscape`, abstract or pattern-style works better than literal subjects
|
|
116
|
+
- Inline figures: `aspect_ratio: square` or `portrait`, more literal subject is fine
|
|
117
|
+
|
|
118
|
+
When the user gives a vague request like "给我配张图", ask one clarifying question (subject? style?) before calling the API — costs real money per image.
|
|
119
|
+
|
|
120
|
+
## When NOT to use this skill
|
|
121
|
+
|
|
122
|
+
- The user asks to **edit** an existing image (this skill is text-to-image only today)
|
|
123
|
+
- The user wants a **diagram / chart** with specific data — use a charting library (matplotlib, mermaid, etc.) instead; image gen is for illustrations, not data viz
|
|
124
|
+
- The user asks for **screenshots** of real software — use the browser tool
|
|
125
|
+
|
|
126
|
+
## Future modalities
|
|
127
|
+
|
|
128
|
+
The same `/api/media/` namespace will gain `video` and `audio` endpoints. The pattern is identical: the user configures `type=video` / `type=audio` models in settings, this skill (or its successor) calls the matching endpoint.
|
|
@@ -32,6 +32,7 @@ module Clacky
|
|
|
32
32
|
@timer_thread = nil
|
|
33
33
|
@compress_thread = nil
|
|
34
34
|
@mutex = Mutex.new
|
|
35
|
+
@shutdown = false
|
|
35
36
|
end
|
|
36
37
|
|
|
37
38
|
# Start (or restart) the idle timer.
|
|
@@ -39,24 +40,35 @@ module Clacky
|
|
|
39
40
|
def start
|
|
40
41
|
cancel # reset any existing timer
|
|
41
42
|
|
|
42
|
-
@
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
compress_thread
|
|
51
|
-
|
|
52
|
-
|
|
43
|
+
@mutex.synchronize do
|
|
44
|
+
return false if @shutdown
|
|
45
|
+
|
|
46
|
+
@timer_thread = Thread.new do
|
|
47
|
+
Thread.current.name = "idle-compression-timer"
|
|
48
|
+
sleep IDLE_DELAY
|
|
49
|
+
next if shutdown?
|
|
50
|
+
|
|
51
|
+
# Register @compress_thread inside the mutex BEFORE the thread starts running,
|
|
52
|
+
# so cancel() can always find and interrupt it even if it fires immediately.
|
|
53
|
+
compress_thread = nil
|
|
54
|
+
@mutex.synchronize do
|
|
55
|
+
unless @shutdown
|
|
56
|
+
compress_thread = Thread.new do
|
|
57
|
+
Thread.current.name = "idle-compression-work"
|
|
58
|
+
run_compression
|
|
59
|
+
end
|
|
60
|
+
@compress_thread = compress_thread
|
|
61
|
+
end
|
|
53
62
|
end
|
|
54
|
-
@compress_thread = compress_thread
|
|
55
|
-
end
|
|
56
63
|
|
|
57
|
-
|
|
58
|
-
|
|
64
|
+
compress_thread&.join
|
|
65
|
+
@mutex.synchronize { @compress_thread = nil; @timer_thread = nil }
|
|
66
|
+
end
|
|
59
67
|
end
|
|
68
|
+
true
|
|
69
|
+
rescue ThreadError => e
|
|
70
|
+
log("Idle compression timer could not start: #{e.message}", level: :debug)
|
|
71
|
+
false
|
|
60
72
|
end
|
|
61
73
|
|
|
62
74
|
# Cancel the timer and any in-progress compression.
|
|
@@ -81,6 +93,13 @@ module Clacky
|
|
|
81
93
|
compress_thread_to_join&.join(5)
|
|
82
94
|
end
|
|
83
95
|
|
|
96
|
+
# Permanently stop this timer. Used during application shutdown so
|
|
97
|
+
# background agent-thread ensure blocks cannot create new timer threads.
|
|
98
|
+
def shutdown
|
|
99
|
+
@mutex.synchronize { @shutdown = true }
|
|
100
|
+
cancel
|
|
101
|
+
end
|
|
102
|
+
|
|
84
103
|
# True if the timer or compression is currently active.
|
|
85
104
|
def active?
|
|
86
105
|
@mutex.synchronize { @timer_thread&.alive? || @compress_thread&.alive? }
|
|
@@ -94,6 +113,10 @@ module Clacky
|
|
|
94
113
|
@mutex.synchronize { @compress_thread&.alive? || false }
|
|
95
114
|
end
|
|
96
115
|
|
|
116
|
+
def shutdown?
|
|
117
|
+
@mutex.synchronize { @shutdown }
|
|
118
|
+
end
|
|
119
|
+
|
|
97
120
|
private def run_compression
|
|
98
121
|
success = @agent.trigger_idle_compression
|
|
99
122
|
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require "base64"
|
|
5
|
+
require "securerandom"
|
|
6
|
+
|
|
7
|
+
module Clacky
|
|
8
|
+
module Media
|
|
9
|
+
# Abstract base for media (image / video / audio) generation providers.
|
|
10
|
+
#
|
|
11
|
+
# Subclasses implement #generate_image (and later #generate_video,
|
|
12
|
+
# #generate_audio). The base class supplies the uniform success/error
|
|
13
|
+
# response shape and the on-disk persistence helper, mirroring the
|
|
14
|
+
# design used by Hermes' image_gen_provider so the surface stays
|
|
15
|
+
# learnable across modalities.
|
|
16
|
+
class Base
|
|
17
|
+
# @param model_entry [Hash] one entry from AgentConfig#models — must
|
|
18
|
+
# include "model", "base_url", "api_key" keys.
|
|
19
|
+
def initialize(model_entry)
|
|
20
|
+
@model_entry = model_entry
|
|
21
|
+
@model = model_entry["model"]
|
|
22
|
+
@base_url = model_entry["base_url"]
|
|
23
|
+
@api_key = model_entry["api_key"]
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# @return [Hash] either success_response(...) or error_response(...)
|
|
27
|
+
def generate_image(prompt:, aspect_ratio: "landscape", output_dir: nil, **_kwargs)
|
|
28
|
+
raise NotImplementedError, "#{self.class.name} must implement #generate_image"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Persist a base64-encoded image under <output_dir>/assets/generated/.
|
|
32
|
+
# Returns the absolute path on disk.
|
|
33
|
+
private def save_b64_image(b64_data, output_dir:, prefix: "img", extension: "png")
|
|
34
|
+
target_dir = File.join(output_dir, "assets", "generated")
|
|
35
|
+
FileUtils.mkdir_p(target_dir)
|
|
36
|
+
ts = Time.now.strftime("%Y%m%d_%H%M%S")
|
|
37
|
+
short = SecureRandom.hex(4)
|
|
38
|
+
path = File.join(target_dir, "#{prefix}_#{ts}_#{short}.#{extension}")
|
|
39
|
+
File.binwrite(path, Base64.decode64(b64_data))
|
|
40
|
+
path
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private def success_response(image:, prompt:, aspect_ratio:, provider:, extra: {})
|
|
44
|
+
{
|
|
45
|
+
"success" => true,
|
|
46
|
+
"image" => image,
|
|
47
|
+
"model" => @model,
|
|
48
|
+
"prompt" => prompt,
|
|
49
|
+
"aspect_ratio" => aspect_ratio,
|
|
50
|
+
"provider" => provider
|
|
51
|
+
}.merge(extra)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private def error_response(error:, error_type: "provider_error", provider: "", prompt: "", aspect_ratio: "landscape")
|
|
55
|
+
{
|
|
56
|
+
"success" => false,
|
|
57
|
+
"image" => nil,
|
|
58
|
+
"error" => error,
|
|
59
|
+
"error_type" => error_type,
|
|
60
|
+
"model" => @model,
|
|
61
|
+
"prompt" => prompt,
|
|
62
|
+
"aspect_ratio" => aspect_ratio,
|
|
63
|
+
"provider" => provider
|
|
64
|
+
}
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "faraday"
|
|
4
|
+
require "json"
|
|
5
|
+
require_relative "base"
|
|
6
|
+
|
|
7
|
+
module Clacky
|
|
8
|
+
module Media
|
|
9
|
+
# Native Google Gemini image generation adapter.
|
|
10
|
+
#
|
|
11
|
+
# Reserved for users who configure a direct Google AI Studio base_url
|
|
12
|
+
# (e.g. https://generativelanguage.googleapis.com) with a raw Google API
|
|
13
|
+
# key. The official endpoints are:
|
|
14
|
+
# POST /v1beta/models/<model>:generateContent — image-out via Gemini
|
|
15
|
+
# POST /v1beta/models/<model>:predict — Imagen
|
|
16
|
+
# with x-goog-api-key auth, contents[].parts[] request schema, and
|
|
17
|
+
# candidates[].content.parts[].inlineData response schema. Completely
|
|
18
|
+
# different from the OpenAI /v1/images/generations contract.
|
|
19
|
+
#
|
|
20
|
+
# Today every shipping path (openclacky gateway, OpenRouter) wraps Gemini
|
|
21
|
+
# behind an OpenAI-compatible facade, so OpenAICompat handles them and
|
|
22
|
+
# this class is intentionally a stub. We surface a clear error rather
|
|
23
|
+
# than silently 404 against Google's actual host.
|
|
24
|
+
class Gemini < Base
|
|
25
|
+
def generate_image(prompt:, aspect_ratio: "landscape", output_dir: nil, **_kwargs)
|
|
26
|
+
error_response(
|
|
27
|
+
error: "Direct Google AI Studio (generativelanguage.googleapis.com) image generation is not yet supported. Use the openclacky or OpenRouter gateway instead — set base_url to https://api.openclacky.com or https://openrouter.ai/api/v1 and pick a Gemini image model (e.g. or-gemini-3-pro-image, google/gemini-3-pro-image-preview).",
|
|
28
|
+
error_type: "not_implemented",
|
|
29
|
+
provider: "gemini-direct",
|
|
30
|
+
prompt: prompt,
|
|
31
|
+
aspect_ratio: aspect_ratio
|
|
32
|
+
)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "openai_compat"
|
|
4
|
+
require_relative "gemini"
|
|
5
|
+
|
|
6
|
+
module Clacky
|
|
7
|
+
module Media
|
|
8
|
+
# Top-level dispatcher: takes an AgentConfig and a request, picks the
|
|
9
|
+
# right provider class based on the configured image model's base_url,
|
|
10
|
+
# and delegates.
|
|
11
|
+
#
|
|
12
|
+
# Adding a new modality (video / audio) means:
|
|
13
|
+
# 1. add a generate_<modality> method here that resolves the correct
|
|
14
|
+
# type=<modality> entry and class
|
|
15
|
+
# 2. add a provider class under lib/clacky/media/ implementing the call
|
|
16
|
+
class Generator
|
|
17
|
+
# Hosts that speak the native Google AI Studio API instead of an
|
|
18
|
+
# OpenAI-compatible facade. Matched as a substring against the
|
|
19
|
+
# configured base_url so any regional / staging variant is caught.
|
|
20
|
+
GOOGLE_NATIVE_HOSTS = [
|
|
21
|
+
"generativelanguage.googleapis.com",
|
|
22
|
+
"aiplatform.googleapis.com"
|
|
23
|
+
].freeze
|
|
24
|
+
|
|
25
|
+
# @param agent_config [Clacky::AgentConfig]
|
|
26
|
+
def initialize(agent_config)
|
|
27
|
+
@agent_config = agent_config
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# @return [Hash, nil] the type=image model entry, or nil if not configured
|
|
31
|
+
def image_model_entry
|
|
32
|
+
@agent_config.find_model_by_type("image")
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def generate_image(prompt:, aspect_ratio: "landscape", output_dir: nil, **kwargs)
|
|
36
|
+
entry = image_model_entry
|
|
37
|
+
if entry.nil?
|
|
38
|
+
return {
|
|
39
|
+
"success" => false,
|
|
40
|
+
"image" => nil,
|
|
41
|
+
"error" => "No image model configured. Add a model with type=image in settings.",
|
|
42
|
+
"error_type" => "not_configured",
|
|
43
|
+
"provider" => "",
|
|
44
|
+
"model" => "",
|
|
45
|
+
"prompt" => prompt
|
|
46
|
+
}
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
provider = build_provider_for(entry)
|
|
50
|
+
provider.generate_image(
|
|
51
|
+
prompt: prompt,
|
|
52
|
+
aspect_ratio: aspect_ratio,
|
|
53
|
+
output_dir: output_dir,
|
|
54
|
+
**kwargs
|
|
55
|
+
)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Pick the adapter class for a media model entry.
|
|
59
|
+
#
|
|
60
|
+
# Routing rules:
|
|
61
|
+
# • base_url points directly at a Google AI Studio host → Gemini
|
|
62
|
+
# (native /v1beta/models/<m>:generateContent schema).
|
|
63
|
+
# • everything else → OpenAICompat. This covers OpenAI itself, the
|
|
64
|
+
# openclacky gateway, OpenRouter, and any third-party proxy that
|
|
65
|
+
# re-exposes Gemini / Imagen / DALL-E behind /v1/images/generations.
|
|
66
|
+
# OpenAICompat#generate_image branches internally on model id to
|
|
67
|
+
# drop OpenAI-only params (size) when talking to Gemini families.
|
|
68
|
+
private def build_provider_for(entry)
|
|
69
|
+
url = entry["base_url"].to_s
|
|
70
|
+
if GOOGLE_NATIVE_HOSTS.any? { |host| url.include?(host) }
|
|
71
|
+
Gemini.new(entry)
|
|
72
|
+
else
|
|
73
|
+
OpenAICompat.new(entry)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "faraday"
|
|
4
|
+
require "json"
|
|
5
|
+
require_relative "base"
|
|
6
|
+
|
|
7
|
+
module Clacky
|
|
8
|
+
module Media
|
|
9
|
+
# OpenAI-compatible image generation provider.
|
|
10
|
+
#
|
|
11
|
+
# Talks to POST <base_url>/images/generations with the standard OpenAI
|
|
12
|
+
# request shape. Handles three providers under one class because they
|
|
13
|
+
# all expose the same endpoint: OpenAI, OpenRouter, and the openclacky
|
|
14
|
+
# platform gateway. Provider-specific quirks (model id naming, billing)
|
|
15
|
+
# live in PRESETS, not here.
|
|
16
|
+
class OpenAICompat < Base
|
|
17
|
+
ASPECT_TO_SIZE = {
|
|
18
|
+
"landscape" => "1536x1024",
|
|
19
|
+
"square" => "1024x1024",
|
|
20
|
+
"portrait" => "1024x1536"
|
|
21
|
+
}.freeze
|
|
22
|
+
|
|
23
|
+
DEFAULT_ASPECT = "landscape"
|
|
24
|
+
|
|
25
|
+
def generate_image(prompt:, aspect_ratio: DEFAULT_ASPECT, output_dir: nil, n: 1, **_kwargs)
|
|
26
|
+
provider_id = Clacky::Providers.find_by_base_url(@base_url) || "custom"
|
|
27
|
+
aspect = ASPECT_TO_SIZE.key?(aspect_ratio) ? aspect_ratio : DEFAULT_ASPECT
|
|
28
|
+
size = ASPECT_TO_SIZE[aspect]
|
|
29
|
+
|
|
30
|
+
if prompt.to_s.strip.empty?
|
|
31
|
+
return error_response(
|
|
32
|
+
error: "Prompt is required and must be a non-empty string",
|
|
33
|
+
error_type: "invalid_argument",
|
|
34
|
+
provider: provider_id,
|
|
35
|
+
aspect_ratio: aspect
|
|
36
|
+
)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
if @api_key.to_s.empty?
|
|
40
|
+
return error_response(
|
|
41
|
+
error: "api_key not configured for image model '#{@model}'",
|
|
42
|
+
error_type: "auth_required",
|
|
43
|
+
provider: provider_id,
|
|
44
|
+
prompt: prompt,
|
|
45
|
+
aspect_ratio: aspect
|
|
46
|
+
)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
payload = { model: @model, n: n }
|
|
50
|
+
if gemini_family?(@model)
|
|
51
|
+
# Gemini image models (routed via openclacky / openrouter gateway)
|
|
52
|
+
# don't accept the OpenAI `size` parameter — they infer aspect from
|
|
53
|
+
# the prompt text. Embedding a hint keeps the user's aspect choice
|
|
54
|
+
# honoured without breaking the gateway request validator.
|
|
55
|
+
payload[:prompt] = "#{prompt}\n\n[aspect: #{aspect}]"
|
|
56
|
+
else
|
|
57
|
+
payload[:prompt] = prompt
|
|
58
|
+
payload[:size] = size
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
begin
|
|
62
|
+
response = connection.post("images/generations") do |req|
|
|
63
|
+
req.headers["Content-Type"] = "application/json"
|
|
64
|
+
req.headers["Authorization"] = "Bearer #{@api_key}"
|
|
65
|
+
req.body = JSON.generate(payload)
|
|
66
|
+
end
|
|
67
|
+
rescue Faraday::Error => e
|
|
68
|
+
return error_response(
|
|
69
|
+
error: "HTTP request failed: #{e.message}",
|
|
70
|
+
error_type: "network_error",
|
|
71
|
+
provider: provider_id,
|
|
72
|
+
prompt: prompt,
|
|
73
|
+
aspect_ratio: aspect
|
|
74
|
+
)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
unless response.success?
|
|
78
|
+
return error_response(
|
|
79
|
+
error: "Upstream #{response.status}: #{truncate(response.body, 500)}",
|
|
80
|
+
error_type: "api_error",
|
|
81
|
+
provider: provider_id,
|
|
82
|
+
prompt: prompt,
|
|
83
|
+
aspect_ratio: aspect
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
body = parse_json(response.body)
|
|
88
|
+
return error_response(
|
|
89
|
+
error: "Invalid JSON response from upstream",
|
|
90
|
+
error_type: "invalid_response",
|
|
91
|
+
provider: provider_id,
|
|
92
|
+
prompt: prompt,
|
|
93
|
+
aspect_ratio: aspect
|
|
94
|
+
) unless body.is_a?(Hash)
|
|
95
|
+
|
|
96
|
+
data = body["data"] || []
|
|
97
|
+
first = data.first
|
|
98
|
+
if first.nil?
|
|
99
|
+
return error_response(
|
|
100
|
+
error: "Upstream returned no image data",
|
|
101
|
+
error_type: "empty_response",
|
|
102
|
+
provider: provider_id,
|
|
103
|
+
prompt: prompt,
|
|
104
|
+
aspect_ratio: aspect
|
|
105
|
+
)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
image_ref =
|
|
109
|
+
if first["b64_json"]
|
|
110
|
+
save_b64_image(first["b64_json"], output_dir: output_dir || Dir.pwd, prefix: "img")
|
|
111
|
+
elsif first["url"]
|
|
112
|
+
first["url"]
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
if image_ref.nil?
|
|
116
|
+
return error_response(
|
|
117
|
+
error: "Response contained neither b64_json nor url",
|
|
118
|
+
error_type: "empty_response",
|
|
119
|
+
provider: provider_id,
|
|
120
|
+
prompt: prompt,
|
|
121
|
+
aspect_ratio: aspect
|
|
122
|
+
)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
success_response(
|
|
126
|
+
image: image_ref,
|
|
127
|
+
prompt: prompt,
|
|
128
|
+
aspect_ratio: aspect,
|
|
129
|
+
provider: provider_id,
|
|
130
|
+
extra: {
|
|
131
|
+
"size" => size,
|
|
132
|
+
"usage" => body["usage"],
|
|
133
|
+
"cost_usd" => body["cost_usd"]
|
|
134
|
+
}.compact
|
|
135
|
+
)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
private def connection
|
|
139
|
+
Faraday.new(url: normalized_base_url) do |f|
|
|
140
|
+
f.options.timeout = 240
|
|
141
|
+
f.options.open_timeout = 10
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
private def gemini_family?(model_name)
|
|
146
|
+
model_name.to_s.match?(/gemini|imagen/i)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# base_url is taken verbatim from PRESETS (each provider already
|
|
150
|
+
# includes the API version segment when needed). We only ensure a
|
|
151
|
+
# trailing slash so Faraday's relative-path join behaves.
|
|
152
|
+
private def normalized_base_url
|
|
153
|
+
"#{@base_url.to_s.chomp("/")}/"
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
private def parse_json(body)
|
|
157
|
+
JSON.parse(body)
|
|
158
|
+
rescue JSON::ParserError
|
|
159
|
+
nil
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
private def truncate(str, max)
|
|
163
|
+
s = str.to_s
|
|
164
|
+
s.length > max ? "#{s[0, max]}..." : s
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|