openclacky 1.2.17 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -0
- data/lib/clacky/agent/skill_manager.rb +1 -1
- data/lib/clacky/agent/time_machine.rb +256 -74
- data/lib/clacky/agent/tool_executor.rb +12 -0
- data/lib/clacky/agent.rb +21 -31
- data/lib/clacky/agent_config.rb +18 -0
- data/lib/clacky/cli.rb +55 -3
- data/lib/clacky/default_skills/media-gen/SKILL.md +173 -5
- data/lib/clacky/default_skills/skill-creator/SKILL.md +1 -0
- data/lib/clacky/media/base.rb +125 -0
- data/lib/clacky/media/dashscope.rb +243 -0
- data/lib/clacky/media/gemini.rb +10 -0
- data/lib/clacky/media/generator.rb +75 -0
- data/lib/clacky/media/openai_compat.rb +160 -0
- data/lib/clacky/message_history.rb +12 -7
- data/lib/clacky/providers.rb +28 -0
- data/lib/clacky/rich_ui_controller.rb +3 -1
- data/lib/clacky/server/backup_manager.rb +200 -0
- data/lib/clacky/server/channel/adapters/feishu/adapter.rb +10 -2
- data/lib/clacky/server/channel/adapters/feishu/bot.rb +68 -15
- data/lib/clacky/server/channel/channel_manager.rb +180 -81
- data/lib/clacky/server/http_server.rb +348 -15
- data/lib/clacky/server/scheduler.rb +19 -0
- data/lib/clacky/server/session_registry.rb +8 -4
- data/lib/clacky/session_manager.rb +40 -2
- data/lib/clacky/skill.rb +3 -1
- data/lib/clacky/tools/trash_manager.rb +14 -0
- data/lib/clacky/ui2/components/command_suggestions.rb +1 -0
- data/lib/clacky/ui2/components/modal_component.rb +34 -7
- data/lib/clacky/ui2/ui_controller.rb +150 -19
- data/lib/clacky/utils/file_processor.rb +75 -4
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/web/app.css +2038 -1147
- data/lib/clacky/web/app.js +22 -1
- data/lib/clacky/web/backup.js +119 -0
- data/lib/clacky/web/billing.js +94 -7
- data/lib/clacky/web/channels.js +81 -11
- data/lib/clacky/web/design-sample.css +247 -0
- data/lib/clacky/web/design-sample.html +127 -0
- data/lib/clacky/web/favicon.svg +16 -0
- data/lib/clacky/web/i18n.js +159 -31
- data/lib/clacky/web/index.html +175 -55
- data/lib/clacky/web/logo_nav_dark.png +0 -0
- data/lib/clacky/web/onboard.js +114 -28
- data/lib/clacky/web/sessions.js +436 -192
- data/lib/clacky/web/settings.js +21 -1
- data/lib/clacky/web/skills.js +6 -6
- data/lib/clacky/web/tasks.js +129 -61
- data/lib/clacky/web/utils.js +72 -0
- data/lib/clacky/web/ws-dispatcher.js +6 -0
- data/lib/clacky.rb +1 -0
- metadata +8 -3
- data/lib/clacky/server/channel/group_message_buffer.rb +0 -53
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "faraday"
|
|
4
|
+
require "json"
|
|
5
|
+
require "uri"
|
|
6
|
+
require_relative "base"
|
|
7
|
+
|
|
8
|
+
module Clacky
|
|
9
|
+
module Media
|
|
10
|
+
# Alibaba DashScope (Qwen-Image) image generation provider.
|
|
11
|
+
#
|
|
12
|
+
# DashScope is NOT an OpenAI-compatible image API. It has its own
|
|
13
|
+
# endpoint, request envelope and response schema:
|
|
14
|
+
#
|
|
15
|
+
# POST <host>/api/v1/services/aigc/multimodal-generation/generation
|
|
16
|
+
# Authorization: Bearer <key>
|
|
17
|
+
# { "model": "qwen-image-2.0-pro",
|
|
18
|
+
# "input": { "messages": [ { "role": "user",
|
|
19
|
+
# "content": [ { "text": "<prompt>" } ] } ] },
|
|
20
|
+
# "parameters": { "size": "2048*2048", "n": 1,
|
|
21
|
+
# "prompt_extend": true, "watermark": false } }
|
|
22
|
+
#
|
|
23
|
+
# => { "output": { "choices": [ { "message": { "content": [
|
|
24
|
+
# { "image": "https://...png?Expires=..." } ] } } ] },
|
|
25
|
+
# "usage": { "width": 2048, "height": 2048, "image_count": 1 } }
|
|
26
|
+
#
|
|
27
|
+
# The image link expires after 24h, so we download and persist it under
|
|
28
|
+
# <output_dir>/assets/generated/ (via Base#save_image_from_url), matching
|
|
29
|
+
# the on-disk shape of the base64 providers.
|
|
30
|
+
#
|
|
31
|
+
# Routing: Generator sends any base_url under *.aliyuncs.com here. We
|
|
32
|
+
# derive the real generation endpoint from the host so users can paste
|
|
33
|
+
# the compatible-mode base_url (…/compatible-mode/v1) they already use
|
|
34
|
+
# for Qwen text models and still get working image generation.
|
|
35
|
+
class DashScope < Base
|
|
36
|
+
GENERATION_PATH = "/api/v1/services/aigc/multimodal-generation/generation"
|
|
37
|
+
|
|
38
|
+
# aspect_ratio -> "<width>*<height>" (DashScope uses '*' not 'x').
|
|
39
|
+
# qwen-image-2.0 / -plus / -max share these recommended resolutions;
|
|
40
|
+
# the 2.0 series accepts arbitrary sizes within 512*512..2048*2048,
|
|
41
|
+
# the max/plus series only accept a fixed set, so we stick to values
|
|
42
|
+
# that are valid for every family.
|
|
43
|
+
ASPECT_TO_SIZE_V2 = {
|
|
44
|
+
"landscape" => "2688*1536", # 16:9
|
|
45
|
+
"square" => "2048*2048", # 1:1
|
|
46
|
+
"portrait" => "1536*2688" # 9:16
|
|
47
|
+
}.freeze
|
|
48
|
+
|
|
49
|
+
ASPECT_TO_SIZE_MAX_PLUS = {
|
|
50
|
+
"landscape" => "1664*928", # 16:9
|
|
51
|
+
"square" => "1328*1328", # 1:1
|
|
52
|
+
"portrait" => "928*1664" # 9:16
|
|
53
|
+
}.freeze
|
|
54
|
+
|
|
55
|
+
DEFAULT_ASPECT = "landscape"
|
|
56
|
+
PROVIDER_ID = "qwen"
|
|
57
|
+
|
|
58
|
+
def generate_image(prompt:, aspect_ratio: DEFAULT_ASPECT, output_dir: nil, n: 1, **_kwargs)
|
|
59
|
+
aspect = size_table.key?(aspect_ratio) ? aspect_ratio : DEFAULT_ASPECT
|
|
60
|
+
size = size_table[aspect]
|
|
61
|
+
|
|
62
|
+
if prompt.to_s.strip.empty?
|
|
63
|
+
return error_response(
|
|
64
|
+
error: "Prompt is required and must be a non-empty string",
|
|
65
|
+
error_type: "invalid_argument",
|
|
66
|
+
provider: PROVIDER_ID,
|
|
67
|
+
aspect_ratio: aspect
|
|
68
|
+
)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
if @api_key.to_s.empty?
|
|
72
|
+
return error_response(
|
|
73
|
+
error: "api_key not configured for image model '#{@model}'",
|
|
74
|
+
error_type: "auth_required",
|
|
75
|
+
provider: PROVIDER_ID,
|
|
76
|
+
prompt: prompt,
|
|
77
|
+
aspect_ratio: aspect
|
|
78
|
+
)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
payload = {
|
|
82
|
+
model: @model,
|
|
83
|
+
input: {
|
|
84
|
+
messages: [
|
|
85
|
+
{ role: "user", content: [{ text: prompt }] }
|
|
86
|
+
]
|
|
87
|
+
},
|
|
88
|
+
parameters: {
|
|
89
|
+
size: size,
|
|
90
|
+
n: n,
|
|
91
|
+
prompt_extend: true,
|
|
92
|
+
watermark: false
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
begin
|
|
97
|
+
response = connection.post(GENERATION_PATH) do |req|
|
|
98
|
+
req.headers["Content-Type"] = "application/json"
|
|
99
|
+
req.headers["Authorization"] = "Bearer #{@api_key}"
|
|
100
|
+
req.body = JSON.generate(payload)
|
|
101
|
+
end
|
|
102
|
+
rescue Faraday::Error => e
|
|
103
|
+
return error_response(
|
|
104
|
+
error: "HTTP request failed: #{e.message}",
|
|
105
|
+
error_type: "network_error",
|
|
106
|
+
provider: PROVIDER_ID,
|
|
107
|
+
prompt: prompt,
|
|
108
|
+
aspect_ratio: aspect
|
|
109
|
+
)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
body = parse_json(response.body)
|
|
113
|
+
unless body.is_a?(Hash)
|
|
114
|
+
return error_response(
|
|
115
|
+
error: "Invalid JSON response from upstream",
|
|
116
|
+
error_type: "invalid_response",
|
|
117
|
+
provider: PROVIDER_ID,
|
|
118
|
+
prompt: prompt,
|
|
119
|
+
aspect_ratio: aspect
|
|
120
|
+
)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# DashScope reports business failures via top-level code/message,
|
|
124
|
+
# sometimes alongside a non-2xx status, sometimes 200.
|
|
125
|
+
if body["code"] && !body["code"].to_s.empty?
|
|
126
|
+
return error_response(
|
|
127
|
+
error: "Upstream error #{body["code"]}: #{body["message"]}",
|
|
128
|
+
error_type: "api_error",
|
|
129
|
+
provider: PROVIDER_ID,
|
|
130
|
+
prompt: prompt,
|
|
131
|
+
aspect_ratio: aspect
|
|
132
|
+
)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
unless response.success?
|
|
136
|
+
return error_response(
|
|
137
|
+
error: "Upstream #{response.status}: #{truncate(response.body, 500)}",
|
|
138
|
+
error_type: "api_error",
|
|
139
|
+
provider: PROVIDER_ID,
|
|
140
|
+
prompt: prompt,
|
|
141
|
+
aspect_ratio: aspect
|
|
142
|
+
)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
image_url = extract_image_url(body)
|
|
146
|
+
if image_url.nil?
|
|
147
|
+
return error_response(
|
|
148
|
+
error: "Upstream returned no image data",
|
|
149
|
+
error_type: "empty_response",
|
|
150
|
+
provider: PROVIDER_ID,
|
|
151
|
+
prompt: prompt,
|
|
152
|
+
aspect_ratio: aspect
|
|
153
|
+
)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
local_path = save_image_from_url(image_url, output_dir: output_dir || Dir.pwd, prefix: "img")
|
|
157
|
+
if local_path.nil?
|
|
158
|
+
return error_response(
|
|
159
|
+
error: "Failed to download generated image from #{image_url}",
|
|
160
|
+
error_type: "download_failed",
|
|
161
|
+
provider: PROVIDER_ID,
|
|
162
|
+
prompt: prompt,
|
|
163
|
+
aspect_ratio: aspect
|
|
164
|
+
)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
usage = body["usage"]
|
|
168
|
+
success_response(
|
|
169
|
+
image: local_path,
|
|
170
|
+
prompt: prompt,
|
|
171
|
+
aspect_ratio: aspect,
|
|
172
|
+
provider: PROVIDER_ID,
|
|
173
|
+
extra: {
|
|
174
|
+
"size" => size,
|
|
175
|
+
"usage" => usage,
|
|
176
|
+
"request_id" => body["request_id"]
|
|
177
|
+
}.compact
|
|
178
|
+
)
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# qwen-image-max / qwen-image-plus accept only the fixed resolution set;
|
|
182
|
+
# everything else (qwen-image-2.0 family, plain qwen-image) uses the 2.0
|
|
183
|
+
# recommended sizes.
|
|
184
|
+
private def size_table
|
|
185
|
+
if @model.to_s.match?(/qwen-image-(max|plus)/i)
|
|
186
|
+
ASPECT_TO_SIZE_MAX_PLUS
|
|
187
|
+
else
|
|
188
|
+
ASPECT_TO_SIZE_V2
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# output.choices[].message.content[].image -> first image URL
|
|
193
|
+
private def extract_image_url(body)
|
|
194
|
+
choices = body.dig("output", "choices")
|
|
195
|
+
return nil unless choices.is_a?(Array)
|
|
196
|
+
|
|
197
|
+
choices.each do |choice|
|
|
198
|
+
content = choice.dig("message", "content")
|
|
199
|
+
next unless content.is_a?(Array)
|
|
200
|
+
|
|
201
|
+
content.each do |block|
|
|
202
|
+
img = block.is_a?(Hash) ? block["image"] : nil
|
|
203
|
+
return img if img.is_a?(String) && !img.empty?
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
nil
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
private def connection
|
|
210
|
+
Faraday.new(url: endpoint_base) do |f|
|
|
211
|
+
f.options.timeout = 240
|
|
212
|
+
f.options.open_timeout = 10
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Derive the API root (scheme + host) from the configured base_url,
|
|
217
|
+
# discarding any path the user pasted (e.g. /compatible-mode/v1). The
|
|
218
|
+
# generation path is then appended by #connection.post. Falls back to
|
|
219
|
+
# the mainland host if the configured URL can't be parsed.
|
|
220
|
+
private def endpoint_base
|
|
221
|
+
uri = URI.parse(@base_url.to_s)
|
|
222
|
+
if uri.scheme && uri.host
|
|
223
|
+
"#{uri.scheme}://#{uri.host}"
|
|
224
|
+
else
|
|
225
|
+
"https://dashscope.aliyuncs.com"
|
|
226
|
+
end
|
|
227
|
+
rescue URI::InvalidURIError
|
|
228
|
+
"https://dashscope.aliyuncs.com"
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
private def parse_json(body)
|
|
232
|
+
JSON.parse(body)
|
|
233
|
+
rescue JSON::ParserError
|
|
234
|
+
nil
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
private def truncate(str, max)
|
|
238
|
+
s = str.to_s
|
|
239
|
+
s.length > max ? "#{s[0, max]}..." : s
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
end
|
data/lib/clacky/media/gemini.rb
CHANGED
|
@@ -31,6 +31,16 @@ module Clacky
|
|
|
31
31
|
aspect_ratio: aspect_ratio
|
|
32
32
|
)
|
|
33
33
|
end
|
|
34
|
+
|
|
35
|
+
def generate_video(prompt:, aspect_ratio: "landscape", duration_seconds: nil, output_dir: nil, **_kwargs)
|
|
36
|
+
video_error_response(
|
|
37
|
+
error: "Direct Google AI Studio video generation is not supported. Use the openclacky gateway (base_url https://api.openclacky.com) with a video model such as or-veo-3-1.",
|
|
38
|
+
error_type: "not_implemented",
|
|
39
|
+
provider: "gemini-direct",
|
|
40
|
+
prompt: prompt,
|
|
41
|
+
aspect_ratio: aspect_ratio
|
|
42
|
+
)
|
|
43
|
+
end
|
|
34
44
|
end
|
|
35
45
|
end
|
|
36
46
|
end
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "openai_compat"
|
|
4
4
|
require_relative "gemini"
|
|
5
|
+
require_relative "dashscope"
|
|
5
6
|
|
|
6
7
|
module Clacky
|
|
7
8
|
module Media
|
|
@@ -22,6 +23,17 @@ module Clacky
|
|
|
22
23
|
"aiplatform.googleapis.com"
|
|
23
24
|
].freeze
|
|
24
25
|
|
|
26
|
+
# Hosts that speak Alibaba's native DashScope (Qwen-Image) API instead
|
|
27
|
+
# of an OpenAI-compatible facade. Matched as a substring so every
|
|
28
|
+
# regional variant (dashscope / dashscope-intl / dashscope-us, and the
|
|
29
|
+
# Singapore *.maas.aliyuncs.com workspace hosts) is caught. Third-party
|
|
30
|
+
# aggregators (SiliconFlow, OpenRouter, …) that re-expose qwen-image
|
|
31
|
+
# behind an OpenAI-compatible endpoint are NOT under aliyuncs.com, so
|
|
32
|
+
# they correctly keep going through OpenAICompat.
|
|
33
|
+
DASHSCOPE_NATIVE_HOSTS = [
|
|
34
|
+
"aliyuncs.com"
|
|
35
|
+
].freeze
|
|
36
|
+
|
|
25
37
|
# @param agent_config [Clacky::AgentConfig]
|
|
26
38
|
def initialize(agent_config)
|
|
27
39
|
@agent_config = agent_config
|
|
@@ -32,6 +44,16 @@ module Clacky
|
|
|
32
44
|
@agent_config.find_model_by_type("image")
|
|
33
45
|
end
|
|
34
46
|
|
|
47
|
+
# @return [Hash, nil] the type=video model entry, or nil if not configured
|
|
48
|
+
def video_model_entry
|
|
49
|
+
@agent_config.find_model_by_type("video")
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# @return [Hash, nil] the type=audio model entry, or nil if not configured
|
|
53
|
+
def audio_model_entry
|
|
54
|
+
@agent_config.find_model_by_type("audio")
|
|
55
|
+
end
|
|
56
|
+
|
|
35
57
|
def generate_image(prompt:, aspect_ratio: "landscape", output_dir: nil, **kwargs)
|
|
36
58
|
entry = image_model_entry
|
|
37
59
|
if entry.nil?
|
|
@@ -55,11 +77,62 @@ module Clacky
|
|
|
55
77
|
)
|
|
56
78
|
end
|
|
57
79
|
|
|
80
|
+
def generate_video(prompt:, aspect_ratio: "landscape", duration_seconds: nil, output_dir: nil, **kwargs)
|
|
81
|
+
entry = video_model_entry
|
|
82
|
+
if entry.nil?
|
|
83
|
+
return {
|
|
84
|
+
"success" => false,
|
|
85
|
+
"video" => nil,
|
|
86
|
+
"error" => "No video model configured. Add a model with type=video in settings.",
|
|
87
|
+
"error_type" => "not_configured",
|
|
88
|
+
"provider" => "",
|
|
89
|
+
"model" => "",
|
|
90
|
+
"prompt" => prompt
|
|
91
|
+
}
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
provider = build_provider_for(entry)
|
|
95
|
+
provider.generate_video(
|
|
96
|
+
prompt: prompt,
|
|
97
|
+
aspect_ratio: aspect_ratio,
|
|
98
|
+
duration_seconds: duration_seconds,
|
|
99
|
+
output_dir: output_dir,
|
|
100
|
+
**kwargs
|
|
101
|
+
)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def generate_speech(input:, voice: nil, output_dir: nil, **kwargs)
|
|
105
|
+
entry = audio_model_entry
|
|
106
|
+
if entry.nil?
|
|
107
|
+
return {
|
|
108
|
+
"success" => false,
|
|
109
|
+
"audio" => nil,
|
|
110
|
+
"error" => "No audio model configured. Add a model with type=audio in settings.",
|
|
111
|
+
"error_type" => "not_configured",
|
|
112
|
+
"provider" => "",
|
|
113
|
+
"model" => "",
|
|
114
|
+
"input" => input
|
|
115
|
+
}
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
provider = build_provider_for(entry)
|
|
119
|
+
provider.generate_speech(
|
|
120
|
+
input: input,
|
|
121
|
+
voice: voice,
|
|
122
|
+
output_dir: output_dir,
|
|
123
|
+
**kwargs
|
|
124
|
+
)
|
|
125
|
+
end
|
|
126
|
+
|
|
58
127
|
# Pick the adapter class for a media model entry.
|
|
59
128
|
#
|
|
60
129
|
# Routing rules:
|
|
61
130
|
# • base_url points directly at a Google AI Studio host → Gemini
|
|
62
131
|
# (native /v1beta/models/<m>:generateContent schema).
|
|
132
|
+
# • base_url points at an Alibaba DashScope host (*.aliyuncs.com) →
|
|
133
|
+
# DashScope (native /api/v1/.../multimodal-generation schema for
|
|
134
|
+
# Qwen-Image). Third-party aggregators re-exposing qwen-image behind
|
|
135
|
+
# an OpenAI-compatible facade are NOT on aliyuncs.com and fall through.
|
|
63
136
|
# • everything else → OpenAICompat. This covers OpenAI itself, the
|
|
64
137
|
# openclacky gateway, OpenRouter, and any third-party proxy that
|
|
65
138
|
# re-exposes Gemini / Imagen / DALL-E behind /v1/images/generations.
|
|
@@ -69,6 +142,8 @@ module Clacky
|
|
|
69
142
|
url = entry["base_url"].to_s
|
|
70
143
|
if GOOGLE_NATIVE_HOSTS.any? { |host| url.include?(host) }
|
|
71
144
|
Gemini.new(entry)
|
|
145
|
+
elsif DASHSCOPE_NATIVE_HOSTS.any? { |host| url.include?(host) }
|
|
146
|
+
DashScope.new(entry)
|
|
72
147
|
else
|
|
73
148
|
OpenAICompat.new(entry)
|
|
74
149
|
end
|
|
@@ -22,6 +22,12 @@ module Clacky
|
|
|
22
22
|
|
|
23
23
|
DEFAULT_ASPECT = "landscape"
|
|
24
24
|
|
|
25
|
+
# Video aspect ratios accepted by the gateway's /videos/generations
|
|
26
|
+
# endpoint. The human-friendly labels map straight through; the gateway
|
|
27
|
+
# normalises to Veo's "16:9" / "9:16" internally.
|
|
28
|
+
VIDEO_ASPECTS = %w[landscape portrait].freeze
|
|
29
|
+
DEFAULT_VIDEO_DURATION = 8
|
|
30
|
+
|
|
25
31
|
def generate_image(prompt:, aspect_ratio: DEFAULT_ASPECT, output_dir: nil, n: 1, **_kwargs)
|
|
26
32
|
provider_id = Clacky::Providers.find_by_base_url(@base_url) || "custom"
|
|
27
33
|
aspect = ASPECT_TO_SIZE.key?(aspect_ratio) ? aspect_ratio : DEFAULT_ASPECT
|
|
@@ -135,6 +141,143 @@ module Clacky
|
|
|
135
141
|
)
|
|
136
142
|
end
|
|
137
143
|
|
|
144
|
+
def generate_video(prompt:, aspect_ratio: DEFAULT_ASPECT, duration_seconds: nil, output_dir: nil, image: nil, **_kwargs)
|
|
145
|
+
provider_id = Clacky::Providers.find_by_base_url(@base_url) || "custom"
|
|
146
|
+
aspect = VIDEO_ASPECTS.include?(aspect_ratio) ? aspect_ratio : DEFAULT_ASPECT
|
|
147
|
+
duration = duration_seconds.to_i
|
|
148
|
+
duration = DEFAULT_VIDEO_DURATION if duration <= 0
|
|
149
|
+
|
|
150
|
+
if prompt.to_s.strip.empty?
|
|
151
|
+
return video_error_response(
|
|
152
|
+
error: "Prompt is required and must be a non-empty string",
|
|
153
|
+
error_type: "invalid_argument", provider: provider_id, aspect_ratio: aspect
|
|
154
|
+
)
|
|
155
|
+
end
|
|
156
|
+
if @api_key.to_s.empty?
|
|
157
|
+
return video_error_response(
|
|
158
|
+
error: "api_key not configured for video model '#{@model}'",
|
|
159
|
+
error_type: "auth_required", provider: provider_id, prompt: prompt, aspect_ratio: aspect
|
|
160
|
+
)
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
payload = { model: @model, prompt: prompt, aspect_ratio: aspect, duration_seconds: duration }
|
|
164
|
+
payload[:image] = image if image.is_a?(Hash) && image["b64_json"]
|
|
165
|
+
|
|
166
|
+
begin
|
|
167
|
+
response = video_connection.post("videos/generations") do |req|
|
|
168
|
+
req.headers["Content-Type"] = "application/json"
|
|
169
|
+
req.headers["Authorization"] = "Bearer #{@api_key}"
|
|
170
|
+
req.body = JSON.generate(payload)
|
|
171
|
+
end
|
|
172
|
+
rescue Faraday::Error => e
|
|
173
|
+
return video_error_response(
|
|
174
|
+
error: "HTTP request failed: #{e.message}",
|
|
175
|
+
error_type: "network_error", provider: provider_id, prompt: prompt, aspect_ratio: aspect
|
|
176
|
+
)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
unless response.success?
|
|
180
|
+
return video_error_response(
|
|
181
|
+
error: "Upstream #{response.status}: #{truncate(response.body, 500)}",
|
|
182
|
+
error_type: "api_error", provider: provider_id, prompt: prompt, aspect_ratio: aspect
|
|
183
|
+
)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
body = parse_json(response.body)
|
|
187
|
+
return video_error_response(
|
|
188
|
+
error: "Invalid JSON response from upstream",
|
|
189
|
+
error_type: "invalid_response", provider: provider_id, prompt: prompt, aspect_ratio: aspect
|
|
190
|
+
) unless body.is_a?(Hash)
|
|
191
|
+
|
|
192
|
+
first = (body["data"] || []).first
|
|
193
|
+
if first.nil? || first["b64_json"].to_s.empty?
|
|
194
|
+
return video_error_response(
|
|
195
|
+
error: "Upstream returned no video data",
|
|
196
|
+
error_type: "empty_response", provider: provider_id, prompt: prompt, aspect_ratio: aspect
|
|
197
|
+
)
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
path = save_b64_video(first["b64_json"], output_dir: output_dir || Dir.pwd, prefix: "vid")
|
|
201
|
+
video_success_response(
|
|
202
|
+
video: path, prompt: prompt, aspect_ratio: aspect, provider: provider_id,
|
|
203
|
+
extra: {
|
|
204
|
+
"duration_seconds" => duration,
|
|
205
|
+
"usage" => body["usage"],
|
|
206
|
+
"cost_usd" => body["cost_usd"]
|
|
207
|
+
}.compact
|
|
208
|
+
)
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def generate_speech(input:, voice: nil, output_dir: nil, **_kwargs)
|
|
212
|
+
provider_id = Clacky::Providers.find_by_base_url(@base_url) || "custom"
|
|
213
|
+
|
|
214
|
+
if input.to_s.strip.empty?
|
|
215
|
+
return audio_error_response(
|
|
216
|
+
error: "input is required and must be a non-empty string",
|
|
217
|
+
error_type: "invalid_argument", provider: provider_id, voice: voice.to_s
|
|
218
|
+
)
|
|
219
|
+
end
|
|
220
|
+
if @api_key.to_s.empty?
|
|
221
|
+
return audio_error_response(
|
|
222
|
+
error: "api_key not configured for audio model '#{@model}'",
|
|
223
|
+
error_type: "auth_required", provider: provider_id, input: input, voice: voice.to_s
|
|
224
|
+
)
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
payload = { model: @model, input: input }
|
|
228
|
+
payload[:voice] = voice if voice && !voice.to_s.strip.empty?
|
|
229
|
+
|
|
230
|
+
begin
|
|
231
|
+
response = audio_connection.post("audio/speech") do |req|
|
|
232
|
+
req.headers["Content-Type"] = "application/json"
|
|
233
|
+
req.headers["Authorization"] = "Bearer #{@api_key}"
|
|
234
|
+
req.body = JSON.generate(payload)
|
|
235
|
+
end
|
|
236
|
+
rescue Faraday::Error => e
|
|
237
|
+
return audio_error_response(
|
|
238
|
+
error: "HTTP request failed: #{e.message}",
|
|
239
|
+
error_type: "network_error", provider: provider_id, input: input, voice: voice.to_s
|
|
240
|
+
)
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
unless response.success?
|
|
244
|
+
return audio_error_response(
|
|
245
|
+
error: "Upstream #{response.status}: #{truncate(response.body, 500)}",
|
|
246
|
+
error_type: "api_error", provider: provider_id, input: input, voice: voice.to_s
|
|
247
|
+
)
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
body = parse_json(response.body)
|
|
251
|
+
return audio_error_response(
|
|
252
|
+
error: "Invalid JSON response from upstream",
|
|
253
|
+
error_type: "invalid_response", provider: provider_id, input: input, voice: voice.to_s
|
|
254
|
+
) unless body.is_a?(Hash)
|
|
255
|
+
|
|
256
|
+
first = (body["data"] || []).first
|
|
257
|
+
if first.nil? || first["b64_json"].to_s.empty?
|
|
258
|
+
return audio_error_response(
|
|
259
|
+
error: "Upstream returned no audio data",
|
|
260
|
+
error_type: "empty_response", provider: provider_id, input: input, voice: voice.to_s
|
|
261
|
+
)
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
ext = case first["mime_type"].to_s
|
|
265
|
+
when "audio/mpeg", "audio/mp3" then "mp3"
|
|
266
|
+
when "audio/ogg" then "ogg"
|
|
267
|
+
else "wav"
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
path = save_b64_audio(first["b64_json"], output_dir: output_dir || Dir.pwd, prefix: "tts", extension: ext)
|
|
271
|
+
audio_success_response(
|
|
272
|
+
audio: path, input: input, voice: body["voice"] || voice.to_s, provider: provider_id,
|
|
273
|
+
extra: {
|
|
274
|
+
"mime_type" => first["mime_type"],
|
|
275
|
+
"usage" => body["usage"],
|
|
276
|
+
"cost_usd" => body["cost_usd"]
|
|
277
|
+
}.compact
|
|
278
|
+
)
|
|
279
|
+
end
|
|
280
|
+
|
|
138
281
|
private def connection
|
|
139
282
|
Faraday.new(url: normalized_base_url) do |f|
|
|
140
283
|
f.options.timeout = 240
|
|
@@ -142,6 +285,23 @@ module Clacky
|
|
|
142
285
|
end
|
|
143
286
|
end
|
|
144
287
|
|
|
288
|
+
# Video generation runs the gateway's submit+poll cycle inside one
|
|
289
|
+
# request, which can take several minutes; give it a much longer read
|
|
290
|
+
# timeout than the image path.
|
|
291
|
+
private def video_connection
|
|
292
|
+
Faraday.new(url: normalized_base_url) do |f|
|
|
293
|
+
f.options.timeout = 600
|
|
294
|
+
f.options.open_timeout = 10
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
private def audio_connection
|
|
299
|
+
Faraday.new(url: normalized_base_url) do |f|
|
|
300
|
+
f.options.timeout = 120
|
|
301
|
+
f.options.open_timeout = 10
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
|
|
145
305
|
private def gemini_family?(model_name)
|
|
146
306
|
model_name.to_s.match?(/gemini|imagen/i)
|
|
147
307
|
end
|
|
@@ -150,11 +150,6 @@ module Clacky
|
|
|
150
150
|
@messages.find { |m| m[:subagent_instructions] }
|
|
151
151
|
end
|
|
152
152
|
|
|
153
|
-
# Return all messages where task_id <= given id (Time Machine support).
|
|
154
|
-
def for_task(task_id)
|
|
155
|
-
@messages.select { |m| !m[:task_id] || m[:task_id] <= task_id }
|
|
156
|
-
end
|
|
157
|
-
|
|
158
153
|
# ─────────────────────────────────────────────
|
|
159
154
|
# Size helpers
|
|
160
155
|
# ─────────────────────────────────────────────
|
|
@@ -191,8 +186,18 @@ module Clacky
|
|
|
191
186
|
# can't fire when the previous turns came from a provider that keeps
|
|
192
187
|
# thinking inline (e.g. MiniMax: <think>...</think> in content), so
|
|
193
188
|
# this bypass lets us recover on the retry without a server restart.
|
|
194
|
-
|
|
195
|
-
|
|
189
|
+
# Convert to API-ready messages. When `task_chain` is given (a Set of
|
|
190
|
+
# task IDs forming the active task's ancestor chain), messages tagged with
|
|
191
|
+
# a task_id outside that chain are dropped first — this is the Time Machine
|
|
192
|
+
# path, ensuring undone/sibling-branch turns never reach the LLM. Messages
|
|
193
|
+
# without a task_id (system / injected context) are always kept.
|
|
194
|
+
def to_api(force_reasoning_content_pad: false, task_chain: nil)
|
|
195
|
+
source = if task_chain
|
|
196
|
+
@messages.select { |m| !m[:task_id] || task_chain.include?(m[:task_id]) }
|
|
197
|
+
else
|
|
198
|
+
@messages
|
|
199
|
+
end
|
|
200
|
+
msgs = source.map { |m| strip_for_api(m) }
|
|
196
201
|
msgs = repair_tool_call_pairing(msgs)
|
|
197
202
|
ensure_reasoning_content_consistency(msgs, force: force_reasoning_content_pad)
|
|
198
203
|
end
|
data/lib/clacky/providers.rb
CHANGED
|
@@ -60,6 +60,34 @@ module Clacky
|
|
|
60
60
|
"or-gpt-image-2" => "GPT Image 2"
|
|
61
61
|
},
|
|
62
62
|
"default_image_model" => "or-gpt-image-2",
|
|
63
|
+
# Video generation models served by the openclacky gateway, which
|
|
64
|
+
# routes them to Vertex AI Veo (async predictLongRunning under the
|
|
65
|
+
# hood; the gateway hides the polling and returns the MP4 inline).
|
|
66
|
+
"video_models" => [
|
|
67
|
+
"or-veo-3-1",
|
|
68
|
+
"or-veo-3-1-fast",
|
|
69
|
+
"or-veo-3",
|
|
70
|
+
"or-veo-3-fast"
|
|
71
|
+
],
|
|
72
|
+
"video_model_aliases" => {
|
|
73
|
+
"or-veo-3-1" => "Veo 3.1",
|
|
74
|
+
"or-veo-3-1-fast" => "Veo 3.1 Fast",
|
|
75
|
+
"or-veo-3" => "Veo 3",
|
|
76
|
+
"or-veo-3-fast" => "Veo 3 Fast"
|
|
77
|
+
},
|
|
78
|
+
"default_video_model" => "or-veo-3-1",
|
|
79
|
+
# Text-to-speech models served by the openclacky gateway, which
|
|
80
|
+
# routes them to Vertex AI Gemini 2.5 (responseModalities=["AUDIO"]).
|
|
81
|
+
# The gateway returns WAV inline as base64.
|
|
82
|
+
"audio_models" => [
|
|
83
|
+
"or-tts-gemini-2-5-flash",
|
|
84
|
+
"or-tts-gemini-2-5-pro"
|
|
85
|
+
],
|
|
86
|
+
"audio_model_aliases" => {
|
|
87
|
+
"or-tts-gemini-2-5-flash" => "Gemini 2.5 Flash TTS",
|
|
88
|
+
"or-tts-gemini-2-5-pro" => "Gemini 2.5 Pro TTS"
|
|
89
|
+
},
|
|
90
|
+
"default_audio_model" => "or-tts-gemini-2-5-flash",
|
|
63
91
|
# Default OCR sidecar — used when the primary model is text-only.
|
|
64
92
|
# Candidates are derived from the provider's vision-capable models;
|
|
65
93
|
# this just picks the cheap+fast default to surface in "auto" mode.
|
|
@@ -559,7 +559,9 @@ module Clacky
|
|
|
559
559
|
@running = false
|
|
560
560
|
end
|
|
561
561
|
|
|
562
|
-
|
|
562
|
+
# Clears the screen on exit by default — the Rich UI repaints fullscreen
|
|
563
|
+
# and leaves no useful scrollback to preserve.
|
|
564
|
+
def stop(clear_screen: true)
|
|
563
565
|
@running = false
|
|
564
566
|
@shell.stop
|
|
565
567
|
RubyRich::Terminal.clear if clear_screen
|