openclacky 1.2.16 → 1.2.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +29 -0
- data/lib/clacky/agent/session_serializer.rb +5 -2
- data/lib/clacky/agent/skill_manager.rb +1 -1
- data/lib/clacky/agent.rb +6 -11
- data/lib/clacky/default_skills/channel-manager/SKILL.md +4 -2
- data/lib/clacky/default_skills/media-gen/SKILL.md +1 -0
- data/lib/clacky/default_skills/skill-creator/SKILL.md +1 -0
- data/lib/clacky/media/base.rb +32 -0
- data/lib/clacky/media/dashscope.rb +243 -0
- data/lib/clacky/media/generator.rb +18 -0
- data/lib/clacky/server/channel/channel_manager.rb +115 -31
- data/lib/clacky/server/http_server.rb +25 -2
- data/lib/clacky/skill.rb +3 -1
- data/lib/clacky/telemetry.rb +20 -0
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/web/app.css +305 -0
- data/lib/clacky/web/billing.js +144 -0
- data/lib/clacky/web/i18n.js +144 -1
- data/lib/clacky/web/index.html +11 -0
- data/lib/clacky/web/marked.min.js +55 -45
- data/lib/clacky/web/sessions.js +6 -1
- data/lib/clacky/web/share.js +843 -0
- data/lib/clacky/web/skills.js +5 -5
- data/lib/clacky/web/vendor/qrcode/qrcode.min.js +8 -0
- data/lib/clacky/web/ws-dispatcher.js +1 -0
- data/scripts/install.ps1 +20 -19
- metadata +5 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: fcf1cc94591160df5daf797ece584049cf5c9881bc1e3899e84d8fdee61d330f
|
|
4
|
+
data.tar.gz: cde0fb1ea11582f9e4934a68635b44af3bda3cf886619280d9c0afce6a36eb5e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2395d1e2b130021001ebad6aafa7eb11d5a884201852030e203e237b8e91b6ef9c67b7d50b691661ccae66b6baccd941e8e9c6bf92ef2addd490166c57b06ab2
|
|
7
|
+
data.tar.gz: 32cad874d49b8df4892081a5e5ade95115fdb58c9c52fd88cd95d130aba0ae65068adf0966ab259cdf383bb8293fb6d4f43c5f0a9177844ac295355c1dcac94e
|
data/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,35 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.2.18] - 2026-06-13
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Alibaba DashScope (Qwen-Image) as a new image generation backend
|
|
12
|
+
- "Always show" toggle for media-gen and skill-creators default skills, keeping them visible in all sessions
|
|
13
|
+
|
|
14
|
+
### Fixed
|
|
15
|
+
- Brand skill files not accessible outside their initial session context
|
|
16
|
+
- `/model` command
|
|
17
|
+
|
|
18
|
+
### More
|
|
19
|
+
- Brand skills page now auto-refreshes on enter
|
|
20
|
+
|
|
21
|
+
## [1.2.17] - 2026-06-12
|
|
22
|
+
|
|
23
|
+
### Added
|
|
24
|
+
- Session sharing to Web UI — share any session via a shareable link with billing integration
|
|
25
|
+
- Share telemetry tracking
|
|
26
|
+
|
|
27
|
+
### Fixed
|
|
28
|
+
- Markdown rendering in certain edge cases
|
|
29
|
+
- Image blocks not detected in replay round counting, potentially causing history truncation
|
|
30
|
+
- History images served as base64 causing replay lag, now proxied through server
|
|
31
|
+
- WSL kernel repair getting stuck in infinite loop on pending state
|
|
32
|
+
- WeChat QR login fallback showing false stale-session errors
|
|
33
|
+
|
|
34
|
+
### More
|
|
35
|
+
- Background color styling update
|
|
36
|
+
|
|
8
37
|
## [1.2.16] - 2026-06-10
|
|
9
38
|
|
|
10
39
|
### Added
|
|
@@ -201,8 +201,11 @@ module Clacky
|
|
|
201
201
|
if msg[:content].is_a?(String)
|
|
202
202
|
!msg[:content].start_with?("[SYSTEM]")
|
|
203
203
|
elsif msg[:content].is_a?(Array)
|
|
204
|
-
# Must contain at least one text or image block (not a tool_result array)
|
|
205
|
-
|
|
204
|
+
# Must contain at least one text or image block (not a tool_result array).
|
|
205
|
+
# "image_url" covers image-only messages (user sent a picture with no
|
|
206
|
+
# accompanying text); without it such messages start no round and get
|
|
207
|
+
# dropped on replay, making the image vanish on session reopen.
|
|
208
|
+
msg[:content].any? { |b| b.is_a?(Hash) && %w[text image image_url].include?(b[:type].to_s) }
|
|
206
209
|
else
|
|
207
210
|
false
|
|
208
211
|
end
|
|
@@ -346,7 +346,7 @@ module Clacky
|
|
|
346
346
|
|
|
347
347
|
# For encrypted brand skills with supporting scripts: decrypt to a tmpdir so the
|
|
348
348
|
# LLM receives the real paths it can execute. The tmpdir is registered on the agent
|
|
349
|
-
# and
|
|
349
|
+
# and lives for the agent's lifetime (the session).
|
|
350
350
|
script_dir = nil
|
|
351
351
|
if skill.encrypted? && skill.has_supporting_files?
|
|
352
352
|
script_dir = Dir.mktmpdir("clacky-skill-#{skill.identifier}-")
|
data/lib/clacky/agent.rb
CHANGED
|
@@ -42,7 +42,7 @@ module Clacky
|
|
|
42
42
|
|
|
43
43
|
attr_reader :session_id, :name, :history, :iterations, :total_cost, :working_dir, :created_at, :total_tasks, :todos,
|
|
44
44
|
:cache_stats, :cost_source, :ui, :skill_loader, :agent_profile,
|
|
45
|
-
:status, :error, :updated_at, :source,
|
|
45
|
+
:status, :error, :updated_at, :source, :config,
|
|
46
46
|
:latest_latency, # Hash of latency metrics from the most recent LLM call (see Client#send_messages_with_tools)
|
|
47
47
|
:reasoning_effort
|
|
48
48
|
attr_accessor :pinned
|
|
@@ -102,7 +102,7 @@ module Clacky
|
|
|
102
102
|
@ui = ui # UIController for direct UI interaction
|
|
103
103
|
@debug_logs = [] # Debug logs for troubleshooting
|
|
104
104
|
@pending_injections = [] # Pending inline skill injections to flush after observe()
|
|
105
|
-
@pending_script_tmpdirs = [] # Decrypted-script tmpdirs
|
|
105
|
+
@pending_script_tmpdirs = [] # Decrypted-script tmpdirs that live for the agent's lifetime
|
|
106
106
|
@pending_error_rollback = false # Deferred rollback flag set by restore_session on error
|
|
107
107
|
@last_run_interrupted = false # Set when run() exits via AgentInterrupted; tells the next run() to keep the task-start snapshot (continuation of the same task across a relay, not a brand-new task)
|
|
108
108
|
|
|
@@ -677,11 +677,6 @@ module Clacky
|
|
|
677
677
|
Clacky::Logger.warn("[ph_debug] agent_run_ensure")
|
|
678
678
|
@ui&.show_progress(phase: "done")
|
|
679
679
|
|
|
680
|
-
# Shred any decrypted-script tmpdirs created during this run for encrypted brand skills.
|
|
681
|
-
# This covers the inline-injection path; the subagent path shreds immediately after
|
|
682
|
-
# subagent.run returns (see execute_skill_with_subagent).
|
|
683
|
-
shred_script_tmpdirs
|
|
684
|
-
|
|
685
680
|
# Fire-and-forget telemetry after every agent run.
|
|
686
681
|
# Tracks daily active users (distinct devices per day) and task volume.
|
|
687
682
|
Clacky::Telemetry.task!(result: result)
|
|
@@ -1055,7 +1050,7 @@ module Clacky
|
|
|
1055
1050
|
else
|
|
1056
1051
|
# Use tool's format_result method to get display-friendly string
|
|
1057
1052
|
formatted_result = tool.respond_to?(:format_result) ? tool.format_result(result) : result.to_s
|
|
1058
|
-
@ui&.show_tool_result(formatted_result)
|
|
1053
|
+
@ui&.show_tool_result(redact_tool_args(formatted_result))
|
|
1059
1054
|
end
|
|
1060
1055
|
|
|
1061
1056
|
results << build_success_result(call, result)
|
|
@@ -1073,7 +1068,7 @@ module Clacky
|
|
|
1073
1068
|
Clacky::Logger.error("tool_execution_error", tool: call[:name], error: e)
|
|
1074
1069
|
|
|
1075
1070
|
@hooks.trigger(:on_tool_error, call, e)
|
|
1076
|
-
@ui&.show_tool_error(e)
|
|
1071
|
+
@ui&.show_tool_error(redact_tool_args(e.message))
|
|
1077
1072
|
# Use build_denied_result with system_injected=true so LLM knows it can retry
|
|
1078
1073
|
results << build_denied_result(call, e.message, true)
|
|
1079
1074
|
end
|
|
@@ -1176,8 +1171,8 @@ module Clacky
|
|
|
1176
1171
|
end
|
|
1177
1172
|
|
|
1178
1173
|
# Register a tmpdir that contains decrypted brand skill scripts.
|
|
1179
|
-
# SkillManager calls this after decrypt_all_scripts
|
|
1180
|
-
#
|
|
1174
|
+
# SkillManager calls this after decrypt_all_scripts. The tmpdir lives for
|
|
1175
|
+
# the agent's lifetime (a session), not just a single agent.run.
|
|
1181
1176
|
# @param dir [String] Absolute path to the tmpdir
|
|
1182
1177
|
def register_script_tmpdir(dir)
|
|
1183
1178
|
@pending_script_tmpdirs << dir
|
|
@@ -253,13 +253,15 @@ browser(action="navigate", url="<qr_page_url>")
|
|
|
253
253
|
>
|
|
254
254
|
> `http://${CLACKY_SERVER_HOST}:${CLACKY_SERVER_PORT}/weixin-qr.html?url=<URL-encoded qrcode_url>`
|
|
255
255
|
>
|
|
256
|
-
> Scan the QR code with WeChat
|
|
256
|
+
> Scan the QR code with WeChat and confirm in the app. I'm already watching for your scan — no need to reply.
|
|
257
|
+
|
|
258
|
+
**Do NOT wait for the user to reply "done".** Immediately proceed to Step 3 and start polling — exactly as in the browser-succeeds path. The polling script must already be running while the user scans, so it can observe the `scaned → confirmed` transition; otherwise a real scan can be misread as a stale session.
|
|
257
259
|
|
|
258
260
|
The page renders a proper scannable QR code image. Do NOT open the raw `qrcode_url` directly — that page shows "请使用微信扫码打开" with no actual QR image.
|
|
259
261
|
|
|
260
262
|
#### Step 3 — Wait for scan and save credentials
|
|
261
263
|
|
|
262
|
-
|
|
264
|
+
As soon as the QR page has been presented to the user — whether you opened it via the browser tool **or** gave the user the manual link — immediately run the polling script in the background. **In both cases, do NOT wait for the user to confirm or reply "done" before starting the poll** — the script must already be running while the user scans:
|
|
263
265
|
|
|
264
266
|
```bash
|
|
265
267
|
ruby "SKILL_DIR/weixin_setup.rb" --qrcode-id "$QRCODE_ID"
|
|
@@ -3,6 +3,7 @@ name: media-gen
|
|
|
3
3
|
description: 'Generate images (and later videos / audio) inside the current task. Use this skill whenever the user asks to create, generate, or produce a picture / image / illustration / cover / poster / icon / artwork — including phrases like 生成图片, 画一张, 做封面, 来张配图, generate image, make a picture, draw, create artwork, design a cover. Also use when building documents (slides, PPT, posters, marketing pages, README hero shots) where an image is needed inline. Routes calls through the local Clacky HTTP server, which uses the user-configured `type=image` model — you do NOT need to know which provider; the server handles it.'
|
|
4
4
|
disable-model-invocation: false
|
|
5
5
|
user-invocable: true
|
|
6
|
+
always-show: true
|
|
6
7
|
---
|
|
7
8
|
|
|
8
9
|
# media-gen
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: skill-creator
|
|
3
3
|
description: Create new skills, modify and improve existing skills, and measure skill performance. Use when users want to create a skill from scratch, edit, or optimize an existing skill, run evals to test a skill, benchmark skill performance with variance analysis, or optimize a skill's description for better triggering accuracy.
|
|
4
|
+
always-show: true
|
|
4
5
|
---
|
|
5
6
|
|
|
6
7
|
# Skill Creator
|
data/lib/clacky/media/base.rb
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
require "fileutils"
|
|
4
4
|
require "base64"
|
|
5
5
|
require "securerandom"
|
|
6
|
+
require "faraday"
|
|
6
7
|
|
|
7
8
|
module Clacky
|
|
8
9
|
module Media
|
|
@@ -40,6 +41,37 @@ module Clacky
|
|
|
40
41
|
path
|
|
41
42
|
end
|
|
42
43
|
|
|
44
|
+
# Download a remote image URL and persist it under
|
|
45
|
+
# <output_dir>/assets/generated/, mirroring save_b64_image so providers
|
|
46
|
+
# that return URLs (e.g. DashScope, whose links expire after 24h) land
|
|
47
|
+
# local files at the same path shape as base64 providers.
|
|
48
|
+
# Returns the absolute path on disk, or nil if the download fails.
|
|
49
|
+
private def save_image_from_url(url, output_dir:, prefix: "img", extension: "png")
|
|
50
|
+
body = download_url(url)
|
|
51
|
+
return nil if body.nil? || body.empty?
|
|
52
|
+
|
|
53
|
+
target_dir = File.join(output_dir, "assets", "generated")
|
|
54
|
+
FileUtils.mkdir_p(target_dir)
|
|
55
|
+
ts = Time.now.strftime("%Y%m%d_%H%M%S")
|
|
56
|
+
short = SecureRandom.hex(4)
|
|
57
|
+
path = File.join(target_dir, "#{prefix}_#{ts}_#{short}.#{extension}")
|
|
58
|
+
File.binwrite(path, body)
|
|
59
|
+
path
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Fetch raw bytes from a URL. Isolated so specs can stub it without a
|
|
63
|
+
# live HTTP call. Returns the response body String, or nil on failure.
|
|
64
|
+
private def download_url(url)
|
|
65
|
+
conn = Faraday.new do |f|
|
|
66
|
+
f.options.timeout = 120
|
|
67
|
+
f.options.open_timeout = 10
|
|
68
|
+
end
|
|
69
|
+
resp = conn.get(url)
|
|
70
|
+
resp.success? ? resp.body : nil
|
|
71
|
+
rescue Faraday::Error
|
|
72
|
+
nil
|
|
73
|
+
end
|
|
74
|
+
|
|
43
75
|
private def success_response(image:, prompt:, aspect_ratio:, provider:, extra: {})
|
|
44
76
|
{
|
|
45
77
|
"success" => true,
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "faraday"
|
|
4
|
+
require "json"
|
|
5
|
+
require "uri"
|
|
6
|
+
require_relative "base"
|
|
7
|
+
|
|
8
|
+
module Clacky
|
|
9
|
+
module Media
|
|
10
|
+
# Alibaba DashScope (Qwen-Image) image generation provider.
|
|
11
|
+
#
|
|
12
|
+
# DashScope is NOT an OpenAI-compatible image API. It has its own
|
|
13
|
+
# endpoint, request envelope and response schema:
|
|
14
|
+
#
|
|
15
|
+
# POST <host>/api/v1/services/aigc/multimodal-generation/generation
|
|
16
|
+
# Authorization: Bearer <key>
|
|
17
|
+
# { "model": "qwen-image-2.0-pro",
|
|
18
|
+
# "input": { "messages": [ { "role": "user",
|
|
19
|
+
# "content": [ { "text": "<prompt>" } ] } ] },
|
|
20
|
+
# "parameters": { "size": "2048*2048", "n": 1,
|
|
21
|
+
# "prompt_extend": true, "watermark": false } }
|
|
22
|
+
#
|
|
23
|
+
# => { "output": { "choices": [ { "message": { "content": [
|
|
24
|
+
# { "image": "https://...png?Expires=..." } ] } } ] },
|
|
25
|
+
# "usage": { "width": 2048, "height": 2048, "image_count": 1 } }
|
|
26
|
+
#
|
|
27
|
+
# The image link expires after 24h, so we download and persist it under
|
|
28
|
+
# <output_dir>/assets/generated/ (via Base#save_image_from_url), matching
|
|
29
|
+
# the on-disk shape of the base64 providers.
|
|
30
|
+
#
|
|
31
|
+
# Routing: Generator sends any base_url under *.aliyuncs.com here. We
|
|
32
|
+
# derive the real generation endpoint from the host so users can paste
|
|
33
|
+
# the compatible-mode base_url (…/compatible-mode/v1) they already use
|
|
34
|
+
# for Qwen text models and still get working image generation.
|
|
35
|
+
class DashScope < Base
|
|
36
|
+
GENERATION_PATH = "/api/v1/services/aigc/multimodal-generation/generation"
|
|
37
|
+
|
|
38
|
+
# aspect_ratio -> "<width>*<height>" (DashScope uses '*' not 'x').
|
|
39
|
+
# qwen-image-2.0 / -plus / -max share these recommended resolutions;
|
|
40
|
+
# the 2.0 series accepts arbitrary sizes within 512*512..2048*2048,
|
|
41
|
+
# the max/plus series only accept a fixed set, so we stick to values
|
|
42
|
+
# that are valid for every family.
|
|
43
|
+
ASPECT_TO_SIZE_V2 = {
|
|
44
|
+
"landscape" => "2688*1536", # 16:9
|
|
45
|
+
"square" => "2048*2048", # 1:1
|
|
46
|
+
"portrait" => "1536*2688" # 9:16
|
|
47
|
+
}.freeze
|
|
48
|
+
|
|
49
|
+
ASPECT_TO_SIZE_MAX_PLUS = {
|
|
50
|
+
"landscape" => "1664*928", # 16:9
|
|
51
|
+
"square" => "1328*1328", # 1:1
|
|
52
|
+
"portrait" => "928*1664" # 9:16
|
|
53
|
+
}.freeze
|
|
54
|
+
|
|
55
|
+
DEFAULT_ASPECT = "landscape"
|
|
56
|
+
PROVIDER_ID = "qwen"
|
|
57
|
+
|
|
58
|
+
def generate_image(prompt:, aspect_ratio: DEFAULT_ASPECT, output_dir: nil, n: 1, **_kwargs)
|
|
59
|
+
aspect = size_table.key?(aspect_ratio) ? aspect_ratio : DEFAULT_ASPECT
|
|
60
|
+
size = size_table[aspect]
|
|
61
|
+
|
|
62
|
+
if prompt.to_s.strip.empty?
|
|
63
|
+
return error_response(
|
|
64
|
+
error: "Prompt is required and must be a non-empty string",
|
|
65
|
+
error_type: "invalid_argument",
|
|
66
|
+
provider: PROVIDER_ID,
|
|
67
|
+
aspect_ratio: aspect
|
|
68
|
+
)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
if @api_key.to_s.empty?
|
|
72
|
+
return error_response(
|
|
73
|
+
error: "api_key not configured for image model '#{@model}'",
|
|
74
|
+
error_type: "auth_required",
|
|
75
|
+
provider: PROVIDER_ID,
|
|
76
|
+
prompt: prompt,
|
|
77
|
+
aspect_ratio: aspect
|
|
78
|
+
)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
payload = {
|
|
82
|
+
model: @model,
|
|
83
|
+
input: {
|
|
84
|
+
messages: [
|
|
85
|
+
{ role: "user", content: [{ text: prompt }] }
|
|
86
|
+
]
|
|
87
|
+
},
|
|
88
|
+
parameters: {
|
|
89
|
+
size: size,
|
|
90
|
+
n: n,
|
|
91
|
+
prompt_extend: true,
|
|
92
|
+
watermark: false
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
begin
|
|
97
|
+
response = connection.post(GENERATION_PATH) do |req|
|
|
98
|
+
req.headers["Content-Type"] = "application/json"
|
|
99
|
+
req.headers["Authorization"] = "Bearer #{@api_key}"
|
|
100
|
+
req.body = JSON.generate(payload)
|
|
101
|
+
end
|
|
102
|
+
rescue Faraday::Error => e
|
|
103
|
+
return error_response(
|
|
104
|
+
error: "HTTP request failed: #{e.message}",
|
|
105
|
+
error_type: "network_error",
|
|
106
|
+
provider: PROVIDER_ID,
|
|
107
|
+
prompt: prompt,
|
|
108
|
+
aspect_ratio: aspect
|
|
109
|
+
)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
body = parse_json(response.body)
|
|
113
|
+
unless body.is_a?(Hash)
|
|
114
|
+
return error_response(
|
|
115
|
+
error: "Invalid JSON response from upstream",
|
|
116
|
+
error_type: "invalid_response",
|
|
117
|
+
provider: PROVIDER_ID,
|
|
118
|
+
prompt: prompt,
|
|
119
|
+
aspect_ratio: aspect
|
|
120
|
+
)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# DashScope reports business failures via top-level code/message,
|
|
124
|
+
# sometimes alongside a non-2xx status, sometimes 200.
|
|
125
|
+
if body["code"] && !body["code"].to_s.empty?
|
|
126
|
+
return error_response(
|
|
127
|
+
error: "Upstream error #{body["code"]}: #{body["message"]}",
|
|
128
|
+
error_type: "api_error",
|
|
129
|
+
provider: PROVIDER_ID,
|
|
130
|
+
prompt: prompt,
|
|
131
|
+
aspect_ratio: aspect
|
|
132
|
+
)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
unless response.success?
|
|
136
|
+
return error_response(
|
|
137
|
+
error: "Upstream #{response.status}: #{truncate(response.body, 500)}",
|
|
138
|
+
error_type: "api_error",
|
|
139
|
+
provider: PROVIDER_ID,
|
|
140
|
+
prompt: prompt,
|
|
141
|
+
aspect_ratio: aspect
|
|
142
|
+
)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
image_url = extract_image_url(body)
|
|
146
|
+
if image_url.nil?
|
|
147
|
+
return error_response(
|
|
148
|
+
error: "Upstream returned no image data",
|
|
149
|
+
error_type: "empty_response",
|
|
150
|
+
provider: PROVIDER_ID,
|
|
151
|
+
prompt: prompt,
|
|
152
|
+
aspect_ratio: aspect
|
|
153
|
+
)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
local_path = save_image_from_url(image_url, output_dir: output_dir || Dir.pwd, prefix: "img")
|
|
157
|
+
if local_path.nil?
|
|
158
|
+
return error_response(
|
|
159
|
+
error: "Failed to download generated image from #{image_url}",
|
|
160
|
+
error_type: "download_failed",
|
|
161
|
+
provider: PROVIDER_ID,
|
|
162
|
+
prompt: prompt,
|
|
163
|
+
aspect_ratio: aspect
|
|
164
|
+
)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
usage = body["usage"]
|
|
168
|
+
success_response(
|
|
169
|
+
image: local_path,
|
|
170
|
+
prompt: prompt,
|
|
171
|
+
aspect_ratio: aspect,
|
|
172
|
+
provider: PROVIDER_ID,
|
|
173
|
+
extra: {
|
|
174
|
+
"size" => size,
|
|
175
|
+
"usage" => usage,
|
|
176
|
+
"request_id" => body["request_id"]
|
|
177
|
+
}.compact
|
|
178
|
+
)
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# qwen-image-max / qwen-image-plus accept only the fixed resolution set;
|
|
182
|
+
# everything else (qwen-image-2.0 family, plain qwen-image) uses the 2.0
|
|
183
|
+
# recommended sizes.
|
|
184
|
+
private def size_table
|
|
185
|
+
if @model.to_s.match?(/qwen-image-(max|plus)/i)
|
|
186
|
+
ASPECT_TO_SIZE_MAX_PLUS
|
|
187
|
+
else
|
|
188
|
+
ASPECT_TO_SIZE_V2
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# output.choices[].message.content[].image -> first image URL
|
|
193
|
+
private def extract_image_url(body)
|
|
194
|
+
choices = body.dig("output", "choices")
|
|
195
|
+
return nil unless choices.is_a?(Array)
|
|
196
|
+
|
|
197
|
+
choices.each do |choice|
|
|
198
|
+
content = choice.dig("message", "content")
|
|
199
|
+
next unless content.is_a?(Array)
|
|
200
|
+
|
|
201
|
+
content.each do |block|
|
|
202
|
+
img = block.is_a?(Hash) ? block["image"] : nil
|
|
203
|
+
return img if img.is_a?(String) && !img.empty?
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
nil
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
private def connection
|
|
210
|
+
Faraday.new(url: endpoint_base) do |f|
|
|
211
|
+
f.options.timeout = 240
|
|
212
|
+
f.options.open_timeout = 10
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Derive the API root (scheme + host) from the configured base_url,
|
|
217
|
+
# discarding any path the user pasted (e.g. /compatible-mode/v1). The
|
|
218
|
+
# generation path is then appended by #connection.post. Falls back to
|
|
219
|
+
# the mainland host if the configured URL can't be parsed.
|
|
220
|
+
private def endpoint_base
|
|
221
|
+
uri = URI.parse(@base_url.to_s)
|
|
222
|
+
if uri.scheme && uri.host
|
|
223
|
+
"#{uri.scheme}://#{uri.host}"
|
|
224
|
+
else
|
|
225
|
+
"https://dashscope.aliyuncs.com"
|
|
226
|
+
end
|
|
227
|
+
rescue URI::InvalidURIError
|
|
228
|
+
"https://dashscope.aliyuncs.com"
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
private def parse_json(body)
|
|
232
|
+
JSON.parse(body)
|
|
233
|
+
rescue JSON::ParserError
|
|
234
|
+
nil
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
private def truncate(str, max)
|
|
238
|
+
s = str.to_s
|
|
239
|
+
s.length > max ? "#{s[0, max]}..." : s
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
end
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "openai_compat"
|
|
4
4
|
require_relative "gemini"
|
|
5
|
+
require_relative "dashscope"
|
|
5
6
|
|
|
6
7
|
module Clacky
|
|
7
8
|
module Media
|
|
@@ -22,6 +23,17 @@ module Clacky
|
|
|
22
23
|
"aiplatform.googleapis.com"
|
|
23
24
|
].freeze
|
|
24
25
|
|
|
26
|
+
# Hosts that speak Alibaba's native DashScope (Qwen-Image) API instead
|
|
27
|
+
# of an OpenAI-compatible facade. Matched as a substring so every
|
|
28
|
+
# regional variant (dashscope / dashscope-intl / dashscope-us, and the
|
|
29
|
+
# Singapore *.maas.aliyuncs.com workspace hosts) is caught. Third-party
|
|
30
|
+
# aggregators (SiliconFlow, OpenRouter, …) that re-expose qwen-image
|
|
31
|
+
# behind an OpenAI-compatible endpoint are NOT under aliyuncs.com, so
|
|
32
|
+
# they correctly keep going through OpenAICompat.
|
|
33
|
+
DASHSCOPE_NATIVE_HOSTS = [
|
|
34
|
+
"aliyuncs.com"
|
|
35
|
+
].freeze
|
|
36
|
+
|
|
25
37
|
# @param agent_config [Clacky::AgentConfig]
|
|
26
38
|
def initialize(agent_config)
|
|
27
39
|
@agent_config = agent_config
|
|
@@ -60,6 +72,10 @@ module Clacky
|
|
|
60
72
|
# Routing rules:
|
|
61
73
|
# • base_url points directly at a Google AI Studio host → Gemini
|
|
62
74
|
# (native /v1beta/models/<m>:generateContent schema).
|
|
75
|
+
# • base_url points at an Alibaba DashScope host (*.aliyuncs.com) →
|
|
76
|
+
# DashScope (native /api/v1/.../multimodal-generation schema for
|
|
77
|
+
# Qwen-Image). Third-party aggregators re-exposing qwen-image behind
|
|
78
|
+
# an OpenAI-compatible facade are NOT on aliyuncs.com and fall through.
|
|
63
79
|
# • everything else → OpenAICompat. This covers OpenAI itself, the
|
|
64
80
|
# openclacky gateway, OpenRouter, and any third-party proxy that
|
|
65
81
|
# re-exposes Gemini / Imagen / DALL-E behind /v1/images/generations.
|
|
@@ -69,6 +85,8 @@ module Clacky
|
|
|
69
85
|
url = entry["base_url"].to_s
|
|
70
86
|
if GOOGLE_NATIVE_HOSTS.any? { |host| url.include?(host) }
|
|
71
87
|
Gemini.new(entry)
|
|
88
|
+
elsif DASHSCOPE_NATIVE_HOSTS.any? { |host| url.include?(host) }
|
|
89
|
+
DashScope.new(entry)
|
|
72
90
|
else
|
|
73
91
|
OpenAICompat.new(entry)
|
|
74
92
|
end
|