openclacky 1.2.17 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +34 -0
  3. data/lib/clacky/agent/skill_manager.rb +1 -1
  4. data/lib/clacky/agent/time_machine.rb +256 -74
  5. data/lib/clacky/agent/tool_executor.rb +12 -0
  6. data/lib/clacky/agent.rb +21 -31
  7. data/lib/clacky/agent_config.rb +18 -0
  8. data/lib/clacky/cli.rb +55 -3
  9. data/lib/clacky/default_skills/media-gen/SKILL.md +173 -5
  10. data/lib/clacky/default_skills/skill-creator/SKILL.md +1 -0
  11. data/lib/clacky/media/base.rb +125 -0
  12. data/lib/clacky/media/dashscope.rb +243 -0
  13. data/lib/clacky/media/gemini.rb +10 -0
  14. data/lib/clacky/media/generator.rb +75 -0
  15. data/lib/clacky/media/openai_compat.rb +160 -0
  16. data/lib/clacky/message_history.rb +12 -7
  17. data/lib/clacky/providers.rb +28 -0
  18. data/lib/clacky/rich_ui_controller.rb +3 -1
  19. data/lib/clacky/server/backup_manager.rb +200 -0
  20. data/lib/clacky/server/channel/adapters/feishu/adapter.rb +10 -2
  21. data/lib/clacky/server/channel/adapters/feishu/bot.rb +68 -15
  22. data/lib/clacky/server/channel/channel_manager.rb +180 -81
  23. data/lib/clacky/server/http_server.rb +348 -15
  24. data/lib/clacky/server/scheduler.rb +19 -0
  25. data/lib/clacky/server/session_registry.rb +8 -4
  26. data/lib/clacky/session_manager.rb +40 -2
  27. data/lib/clacky/skill.rb +3 -1
  28. data/lib/clacky/tools/trash_manager.rb +14 -0
  29. data/lib/clacky/ui2/components/command_suggestions.rb +1 -0
  30. data/lib/clacky/ui2/components/modal_component.rb +34 -7
  31. data/lib/clacky/ui2/ui_controller.rb +150 -19
  32. data/lib/clacky/utils/file_processor.rb +75 -4
  33. data/lib/clacky/version.rb +1 -1
  34. data/lib/clacky/web/app.css +2038 -1147
  35. data/lib/clacky/web/app.js +22 -1
  36. data/lib/clacky/web/backup.js +119 -0
  37. data/lib/clacky/web/billing.js +94 -7
  38. data/lib/clacky/web/channels.js +81 -11
  39. data/lib/clacky/web/design-sample.css +247 -0
  40. data/lib/clacky/web/design-sample.html +127 -0
  41. data/lib/clacky/web/favicon.svg +16 -0
  42. data/lib/clacky/web/i18n.js +159 -31
  43. data/lib/clacky/web/index.html +175 -55
  44. data/lib/clacky/web/logo_nav_dark.png +0 -0
  45. data/lib/clacky/web/onboard.js +114 -28
  46. data/lib/clacky/web/sessions.js +436 -192
  47. data/lib/clacky/web/settings.js +21 -1
  48. data/lib/clacky/web/skills.js +6 -6
  49. data/lib/clacky/web/tasks.js +129 -61
  50. data/lib/clacky/web/utils.js +72 -0
  51. data/lib/clacky/web/ws-dispatcher.js +6 -0
  52. data/lib/clacky.rb +1 -0
  53. metadata +8 -3
  54. data/lib/clacky/server/channel/group_message_buffer.rb +0 -53
@@ -0,0 +1,243 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "faraday"
4
+ require "json"
5
+ require "uri"
6
+ require_relative "base"
7
+
8
+ module Clacky
9
+ module Media
10
+ # Alibaba DashScope (Qwen-Image) image generation provider.
11
+ #
12
+ # DashScope is NOT an OpenAI-compatible image API. It has its own
13
+ # endpoint, request envelope and response schema:
14
+ #
15
+ # POST <host>/api/v1/services/aigc/multimodal-generation/generation
16
+ # Authorization: Bearer <key>
17
+ # { "model": "qwen-image-2.0-pro",
18
+ # "input": { "messages": [ { "role": "user",
19
+ # "content": [ { "text": "<prompt>" } ] } ] },
20
+ # "parameters": { "size": "2048*2048", "n": 1,
21
+ # "prompt_extend": true, "watermark": false } }
22
+ #
23
+ # => { "output": { "choices": [ { "message": { "content": [
24
+ # { "image": "https://...png?Expires=..." } ] } } ] },
25
+ # "usage": { "width": 2048, "height": 2048, "image_count": 1 } }
26
+ #
27
+ # The image link expires after 24h, so we download and persist it under
28
+ # <output_dir>/assets/generated/ (via Base#save_image_from_url), matching
29
+ # the on-disk shape of the base64 providers.
30
+ #
31
+ # Routing: Generator sends any base_url under *.aliyuncs.com here. We
32
+ # derive the real generation endpoint from the host so users can paste
33
+ # the compatible-mode base_url (…/compatible-mode/v1) they already use
34
+ # for Qwen text models and still get working image generation.
35
+ class DashScope < Base
36
+ GENERATION_PATH = "/api/v1/services/aigc/multimodal-generation/generation"
37
+
38
+ # aspect_ratio -> "<width>*<height>" (DashScope uses '*' not 'x').
39
+ # qwen-image-2.0 / -plus / -max share these recommended resolutions;
40
+ # the 2.0 series accepts arbitrary sizes within 512*512..2048*2048,
41
+ # the max/plus series only accept a fixed set, so we stick to values
42
+ # that are valid for every family.
43
+ ASPECT_TO_SIZE_V2 = {
44
+ "landscape" => "2688*1536", # 16:9
45
+ "square" => "2048*2048", # 1:1
46
+ "portrait" => "1536*2688" # 9:16
47
+ }.freeze
48
+
49
+ ASPECT_TO_SIZE_MAX_PLUS = {
50
+ "landscape" => "1664*928", # 16:9
51
+ "square" => "1328*1328", # 1:1
52
+ "portrait" => "928*1664" # 9:16
53
+ }.freeze
54
+
55
+ DEFAULT_ASPECT = "landscape"
56
+ PROVIDER_ID = "qwen"
57
+
58
+ def generate_image(prompt:, aspect_ratio: DEFAULT_ASPECT, output_dir: nil, n: 1, **_kwargs)
59
+ aspect = size_table.key?(aspect_ratio) ? aspect_ratio : DEFAULT_ASPECT
60
+ size = size_table[aspect]
61
+
62
+ if prompt.to_s.strip.empty?
63
+ return error_response(
64
+ error: "Prompt is required and must be a non-empty string",
65
+ error_type: "invalid_argument",
66
+ provider: PROVIDER_ID,
67
+ aspect_ratio: aspect
68
+ )
69
+ end
70
+
71
+ if @api_key.to_s.empty?
72
+ return error_response(
73
+ error: "api_key not configured for image model '#{@model}'",
74
+ error_type: "auth_required",
75
+ provider: PROVIDER_ID,
76
+ prompt: prompt,
77
+ aspect_ratio: aspect
78
+ )
79
+ end
80
+
81
+ payload = {
82
+ model: @model,
83
+ input: {
84
+ messages: [
85
+ { role: "user", content: [{ text: prompt }] }
86
+ ]
87
+ },
88
+ parameters: {
89
+ size: size,
90
+ n: n,
91
+ prompt_extend: true,
92
+ watermark: false
93
+ }
94
+ }
95
+
96
+ begin
97
+ response = connection.post(GENERATION_PATH) do |req|
98
+ req.headers["Content-Type"] = "application/json"
99
+ req.headers["Authorization"] = "Bearer #{@api_key}"
100
+ req.body = JSON.generate(payload)
101
+ end
102
+ rescue Faraday::Error => e
103
+ return error_response(
104
+ error: "HTTP request failed: #{e.message}",
105
+ error_type: "network_error",
106
+ provider: PROVIDER_ID,
107
+ prompt: prompt,
108
+ aspect_ratio: aspect
109
+ )
110
+ end
111
+
112
+ body = parse_json(response.body)
113
+ unless body.is_a?(Hash)
114
+ return error_response(
115
+ error: "Invalid JSON response from upstream",
116
+ error_type: "invalid_response",
117
+ provider: PROVIDER_ID,
118
+ prompt: prompt,
119
+ aspect_ratio: aspect
120
+ )
121
+ end
122
+
123
+ # DashScope reports business failures via top-level code/message,
124
+ # sometimes alongside a non-2xx status, sometimes 200.
125
+ if body["code"] && !body["code"].to_s.empty?
126
+ return error_response(
127
+ error: "Upstream error #{body["code"]}: #{body["message"]}",
128
+ error_type: "api_error",
129
+ provider: PROVIDER_ID,
130
+ prompt: prompt,
131
+ aspect_ratio: aspect
132
+ )
133
+ end
134
+
135
+ unless response.success?
136
+ return error_response(
137
+ error: "Upstream #{response.status}: #{truncate(response.body, 500)}",
138
+ error_type: "api_error",
139
+ provider: PROVIDER_ID,
140
+ prompt: prompt,
141
+ aspect_ratio: aspect
142
+ )
143
+ end
144
+
145
+ image_url = extract_image_url(body)
146
+ if image_url.nil?
147
+ return error_response(
148
+ error: "Upstream returned no image data",
149
+ error_type: "empty_response",
150
+ provider: PROVIDER_ID,
151
+ prompt: prompt,
152
+ aspect_ratio: aspect
153
+ )
154
+ end
155
+
156
+ local_path = save_image_from_url(image_url, output_dir: output_dir || Dir.pwd, prefix: "img")
157
+ if local_path.nil?
158
+ return error_response(
159
+ error: "Failed to download generated image from #{image_url}",
160
+ error_type: "download_failed",
161
+ provider: PROVIDER_ID,
162
+ prompt: prompt,
163
+ aspect_ratio: aspect
164
+ )
165
+ end
166
+
167
+ usage = body["usage"]
168
+ success_response(
169
+ image: local_path,
170
+ prompt: prompt,
171
+ aspect_ratio: aspect,
172
+ provider: PROVIDER_ID,
173
+ extra: {
174
+ "size" => size,
175
+ "usage" => usage,
176
+ "request_id" => body["request_id"]
177
+ }.compact
178
+ )
179
+ end
180
+
181
+ # qwen-image-max / qwen-image-plus accept only the fixed resolution set;
182
+ # everything else (qwen-image-2.0 family, plain qwen-image) uses the 2.0
183
+ # recommended sizes.
184
+ private def size_table
185
+ if @model.to_s.match?(/qwen-image-(max|plus)/i)
186
+ ASPECT_TO_SIZE_MAX_PLUS
187
+ else
188
+ ASPECT_TO_SIZE_V2
189
+ end
190
+ end
191
+
192
+ # output.choices[].message.content[].image -> first image URL
193
+ private def extract_image_url(body)
194
+ choices = body.dig("output", "choices")
195
+ return nil unless choices.is_a?(Array)
196
+
197
+ choices.each do |choice|
198
+ content = choice.dig("message", "content")
199
+ next unless content.is_a?(Array)
200
+
201
+ content.each do |block|
202
+ img = block.is_a?(Hash) ? block["image"] : nil
203
+ return img if img.is_a?(String) && !img.empty?
204
+ end
205
+ end
206
+ nil
207
+ end
208
+
209
+ private def connection
210
+ Faraday.new(url: endpoint_base) do |f|
211
+ f.options.timeout = 240
212
+ f.options.open_timeout = 10
213
+ end
214
+ end
215
+
216
+ # Derive the API root (scheme + host) from the configured base_url,
217
+ # discarding any path the user pasted (e.g. /compatible-mode/v1). The
218
+ # generation path is then appended by #connection.post. Falls back to
219
+ # the mainland host if the configured URL can't be parsed.
220
+ private def endpoint_base
221
+ uri = URI.parse(@base_url.to_s)
222
+ if uri.scheme && uri.host
223
+ "#{uri.scheme}://#{uri.host}"
224
+ else
225
+ "https://dashscope.aliyuncs.com"
226
+ end
227
+ rescue URI::InvalidURIError
228
+ "https://dashscope.aliyuncs.com"
229
+ end
230
+
231
+ private def parse_json(body)
232
+ JSON.parse(body)
233
+ rescue JSON::ParserError
234
+ nil
235
+ end
236
+
237
+ private def truncate(str, max)
238
+ s = str.to_s
239
+ s.length > max ? "#{s[0, max]}..." : s
240
+ end
241
+ end
242
+ end
243
+ end
@@ -31,6 +31,16 @@ module Clacky
31
31
  aspect_ratio: aspect_ratio
32
32
  )
33
33
  end
34
+
35
+ def generate_video(prompt:, aspect_ratio: "landscape", duration_seconds: nil, output_dir: nil, **_kwargs)
36
+ video_error_response(
37
+ error: "Direct Google AI Studio video generation is not supported. Use the openclacky gateway (base_url https://api.openclacky.com) with a video model such as or-veo-3-1.",
38
+ error_type: "not_implemented",
39
+ provider: "gemini-direct",
40
+ prompt: prompt,
41
+ aspect_ratio: aspect_ratio
42
+ )
43
+ end
34
44
  end
35
45
  end
36
46
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require_relative "openai_compat"
4
4
  require_relative "gemini"
5
+ require_relative "dashscope"
5
6
 
6
7
  module Clacky
7
8
  module Media
@@ -22,6 +23,17 @@ module Clacky
22
23
  "aiplatform.googleapis.com"
23
24
  ].freeze
24
25
 
26
+ # Hosts that speak Alibaba's native DashScope (Qwen-Image) API instead
27
+ # of an OpenAI-compatible facade. Matched as a substring so every
28
+ # regional variant (dashscope / dashscope-intl / dashscope-us, and the
29
+ # Singapore *.maas.aliyuncs.com workspace hosts) is caught. Third-party
30
+ # aggregators (SiliconFlow, OpenRouter, …) that re-expose qwen-image
31
+ # behind an OpenAI-compatible endpoint are NOT under aliyuncs.com, so
32
+ # they correctly keep going through OpenAICompat.
33
+ DASHSCOPE_NATIVE_HOSTS = [
34
+ "aliyuncs.com"
35
+ ].freeze
36
+
25
37
  # @param agent_config [Clacky::AgentConfig]
26
38
  def initialize(agent_config)
27
39
  @agent_config = agent_config
@@ -32,6 +44,16 @@ module Clacky
32
44
  @agent_config.find_model_by_type("image")
33
45
  end
34
46
 
47
+ # @return [Hash, nil] the type=video model entry, or nil if not configured
48
+ def video_model_entry
49
+ @agent_config.find_model_by_type("video")
50
+ end
51
+
52
+ # @return [Hash, nil] the type=audio model entry, or nil if not configured
53
+ def audio_model_entry
54
+ @agent_config.find_model_by_type("audio")
55
+ end
56
+
35
57
  def generate_image(prompt:, aspect_ratio: "landscape", output_dir: nil, **kwargs)
36
58
  entry = image_model_entry
37
59
  if entry.nil?
@@ -55,11 +77,62 @@ module Clacky
55
77
  )
56
78
  end
57
79
 
80
+ def generate_video(prompt:, aspect_ratio: "landscape", duration_seconds: nil, output_dir: nil, **kwargs)
81
+ entry = video_model_entry
82
+ if entry.nil?
83
+ return {
84
+ "success" => false,
85
+ "video" => nil,
86
+ "error" => "No video model configured. Add a model with type=video in settings.",
87
+ "error_type" => "not_configured",
88
+ "provider" => "",
89
+ "model" => "",
90
+ "prompt" => prompt
91
+ }
92
+ end
93
+
94
+ provider = build_provider_for(entry)
95
+ provider.generate_video(
96
+ prompt: prompt,
97
+ aspect_ratio: aspect_ratio,
98
+ duration_seconds: duration_seconds,
99
+ output_dir: output_dir,
100
+ **kwargs
101
+ )
102
+ end
103
+
104
+ def generate_speech(input:, voice: nil, output_dir: nil, **kwargs)
105
+ entry = audio_model_entry
106
+ if entry.nil?
107
+ return {
108
+ "success" => false,
109
+ "audio" => nil,
110
+ "error" => "No audio model configured. Add a model with type=audio in settings.",
111
+ "error_type" => "not_configured",
112
+ "provider" => "",
113
+ "model" => "",
114
+ "input" => input
115
+ }
116
+ end
117
+
118
+ provider = build_provider_for(entry)
119
+ provider.generate_speech(
120
+ input: input,
121
+ voice: voice,
122
+ output_dir: output_dir,
123
+ **kwargs
124
+ )
125
+ end
126
+
58
127
  # Pick the adapter class for a media model entry.
59
128
  #
60
129
  # Routing rules:
61
130
  # • base_url points directly at a Google AI Studio host → Gemini
62
131
  # (native /v1beta/models/<m>:generateContent schema).
132
+ # • base_url points at an Alibaba DashScope host (*.aliyuncs.com) →
133
+ # DashScope (native /api/v1/.../multimodal-generation schema for
134
+ # Qwen-Image). Third-party aggregators re-exposing qwen-image behind
135
+ # an OpenAI-compatible facade are NOT on aliyuncs.com and fall through.
63
136
  # • everything else → OpenAICompat. This covers OpenAI itself, the
64
137
  # openclacky gateway, OpenRouter, and any third-party proxy that
65
138
  # re-exposes Gemini / Imagen / DALL-E behind /v1/images/generations.
@@ -69,6 +142,8 @@ module Clacky
69
142
  url = entry["base_url"].to_s
70
143
  if GOOGLE_NATIVE_HOSTS.any? { |host| url.include?(host) }
71
144
  Gemini.new(entry)
145
+ elsif DASHSCOPE_NATIVE_HOSTS.any? { |host| url.include?(host) }
146
+ DashScope.new(entry)
72
147
  else
73
148
  OpenAICompat.new(entry)
74
149
  end
@@ -22,6 +22,12 @@ module Clacky
22
22
 
23
23
  DEFAULT_ASPECT = "landscape"
24
24
 
25
+ # Video aspect ratios accepted by the gateway's /videos/generations
26
+ # endpoint. The human-friendly labels map straight through; the gateway
27
+ # normalises to Veo's "16:9" / "9:16" internally.
28
+ VIDEO_ASPECTS = %w[landscape portrait].freeze
29
+ DEFAULT_VIDEO_DURATION = 8
30
+
25
31
  def generate_image(prompt:, aspect_ratio: DEFAULT_ASPECT, output_dir: nil, n: 1, **_kwargs)
26
32
  provider_id = Clacky::Providers.find_by_base_url(@base_url) || "custom"
27
33
  aspect = ASPECT_TO_SIZE.key?(aspect_ratio) ? aspect_ratio : DEFAULT_ASPECT
@@ -135,6 +141,143 @@ module Clacky
135
141
  )
136
142
  end
137
143
 
144
+ def generate_video(prompt:, aspect_ratio: DEFAULT_ASPECT, duration_seconds: nil, output_dir: nil, image: nil, **_kwargs)
145
+ provider_id = Clacky::Providers.find_by_base_url(@base_url) || "custom"
146
+ aspect = VIDEO_ASPECTS.include?(aspect_ratio) ? aspect_ratio : DEFAULT_ASPECT
147
+ duration = duration_seconds.to_i
148
+ duration = DEFAULT_VIDEO_DURATION if duration <= 0
149
+
150
+ if prompt.to_s.strip.empty?
151
+ return video_error_response(
152
+ error: "Prompt is required and must be a non-empty string",
153
+ error_type: "invalid_argument", provider: provider_id, aspect_ratio: aspect
154
+ )
155
+ end
156
+ if @api_key.to_s.empty?
157
+ return video_error_response(
158
+ error: "api_key not configured for video model '#{@model}'",
159
+ error_type: "auth_required", provider: provider_id, prompt: prompt, aspect_ratio: aspect
160
+ )
161
+ end
162
+
163
+ payload = { model: @model, prompt: prompt, aspect_ratio: aspect, duration_seconds: duration }
164
+ payload[:image] = image if image.is_a?(Hash) && image["b64_json"]
165
+
166
+ begin
167
+ response = video_connection.post("videos/generations") do |req|
168
+ req.headers["Content-Type"] = "application/json"
169
+ req.headers["Authorization"] = "Bearer #{@api_key}"
170
+ req.body = JSON.generate(payload)
171
+ end
172
+ rescue Faraday::Error => e
173
+ return video_error_response(
174
+ error: "HTTP request failed: #{e.message}",
175
+ error_type: "network_error", provider: provider_id, prompt: prompt, aspect_ratio: aspect
176
+ )
177
+ end
178
+
179
+ unless response.success?
180
+ return video_error_response(
181
+ error: "Upstream #{response.status}: #{truncate(response.body, 500)}",
182
+ error_type: "api_error", provider: provider_id, prompt: prompt, aspect_ratio: aspect
183
+ )
184
+ end
185
+
186
+ body = parse_json(response.body)
187
+ return video_error_response(
188
+ error: "Invalid JSON response from upstream",
189
+ error_type: "invalid_response", provider: provider_id, prompt: prompt, aspect_ratio: aspect
190
+ ) unless body.is_a?(Hash)
191
+
192
+ first = (body["data"] || []).first
193
+ if first.nil? || first["b64_json"].to_s.empty?
194
+ return video_error_response(
195
+ error: "Upstream returned no video data",
196
+ error_type: "empty_response", provider: provider_id, prompt: prompt, aspect_ratio: aspect
197
+ )
198
+ end
199
+
200
+ path = save_b64_video(first["b64_json"], output_dir: output_dir || Dir.pwd, prefix: "vid")
201
+ video_success_response(
202
+ video: path, prompt: prompt, aspect_ratio: aspect, provider: provider_id,
203
+ extra: {
204
+ "duration_seconds" => duration,
205
+ "usage" => body["usage"],
206
+ "cost_usd" => body["cost_usd"]
207
+ }.compact
208
+ )
209
+ end
210
+
211
+ def generate_speech(input:, voice: nil, output_dir: nil, **_kwargs)
212
+ provider_id = Clacky::Providers.find_by_base_url(@base_url) || "custom"
213
+
214
+ if input.to_s.strip.empty?
215
+ return audio_error_response(
216
+ error: "input is required and must be a non-empty string",
217
+ error_type: "invalid_argument", provider: provider_id, voice: voice.to_s
218
+ )
219
+ end
220
+ if @api_key.to_s.empty?
221
+ return audio_error_response(
222
+ error: "api_key not configured for audio model '#{@model}'",
223
+ error_type: "auth_required", provider: provider_id, input: input, voice: voice.to_s
224
+ )
225
+ end
226
+
227
+ payload = { model: @model, input: input }
228
+ payload[:voice] = voice if voice && !voice.to_s.strip.empty?
229
+
230
+ begin
231
+ response = audio_connection.post("audio/speech") do |req|
232
+ req.headers["Content-Type"] = "application/json"
233
+ req.headers["Authorization"] = "Bearer #{@api_key}"
234
+ req.body = JSON.generate(payload)
235
+ end
236
+ rescue Faraday::Error => e
237
+ return audio_error_response(
238
+ error: "HTTP request failed: #{e.message}",
239
+ error_type: "network_error", provider: provider_id, input: input, voice: voice.to_s
240
+ )
241
+ end
242
+
243
+ unless response.success?
244
+ return audio_error_response(
245
+ error: "Upstream #{response.status}: #{truncate(response.body, 500)}",
246
+ error_type: "api_error", provider: provider_id, input: input, voice: voice.to_s
247
+ )
248
+ end
249
+
250
+ body = parse_json(response.body)
251
+ return audio_error_response(
252
+ error: "Invalid JSON response from upstream",
253
+ error_type: "invalid_response", provider: provider_id, input: input, voice: voice.to_s
254
+ ) unless body.is_a?(Hash)
255
+
256
+ first = (body["data"] || []).first
257
+ if first.nil? || first["b64_json"].to_s.empty?
258
+ return audio_error_response(
259
+ error: "Upstream returned no audio data",
260
+ error_type: "empty_response", provider: provider_id, input: input, voice: voice.to_s
261
+ )
262
+ end
263
+
264
+ ext = case first["mime_type"].to_s
265
+ when "audio/mpeg", "audio/mp3" then "mp3"
266
+ when "audio/ogg" then "ogg"
267
+ else "wav"
268
+ end
269
+
270
+ path = save_b64_audio(first["b64_json"], output_dir: output_dir || Dir.pwd, prefix: "tts", extension: ext)
271
+ audio_success_response(
272
+ audio: path, input: input, voice: body["voice"] || voice.to_s, provider: provider_id,
273
+ extra: {
274
+ "mime_type" => first["mime_type"],
275
+ "usage" => body["usage"],
276
+ "cost_usd" => body["cost_usd"]
277
+ }.compact
278
+ )
279
+ end
280
+
138
281
  private def connection
139
282
  Faraday.new(url: normalized_base_url) do |f|
140
283
  f.options.timeout = 240
@@ -142,6 +285,23 @@ module Clacky
142
285
  end
143
286
  end
144
287
 
288
+ # Video generation runs the gateway's submit+poll cycle inside one
289
+ # request, which can take several minutes; give it a much longer read
290
+ # timeout than the image path.
291
+ private def video_connection
292
+ Faraday.new(url: normalized_base_url) do |f|
293
+ f.options.timeout = 600
294
+ f.options.open_timeout = 10
295
+ end
296
+ end
297
+
298
+ private def audio_connection
299
+ Faraday.new(url: normalized_base_url) do |f|
300
+ f.options.timeout = 120
301
+ f.options.open_timeout = 10
302
+ end
303
+ end
304
+
145
305
  private def gemini_family?(model_name)
146
306
  model_name.to_s.match?(/gemini|imagen/i)
147
307
  end
@@ -150,11 +150,6 @@ module Clacky
150
150
  @messages.find { |m| m[:subagent_instructions] }
151
151
  end
152
152
 
153
- # Return all messages where task_id <= given id (Time Machine support).
154
- def for_task(task_id)
155
- @messages.select { |m| !m[:task_id] || m[:task_id] <= task_id }
156
- end
157
-
158
153
  # ─────────────────────────────────────────────
159
154
  # Size helpers
160
155
  # ─────────────────────────────────────────────
@@ -191,8 +186,18 @@ module Clacky
191
186
  # can't fire when the previous turns came from a provider that keeps
192
187
  # thinking inline (e.g. MiniMax: <think>...</think> in content), so
193
188
  # this bypass lets us recover on the retry without a server restart.
194
- def to_api(force_reasoning_content_pad: false)
195
- msgs = @messages.map { |m| strip_for_api(m) }
189
+ # Convert to API-ready messages. When `task_chain` is given (a Set of
190
+ # task IDs forming the active task's ancestor chain), messages tagged with
191
+ # a task_id outside that chain are dropped first — this is the Time Machine
192
+ # path, ensuring undone/sibling-branch turns never reach the LLM. Messages
193
+ # without a task_id (system / injected context) are always kept.
194
+ def to_api(force_reasoning_content_pad: false, task_chain: nil)
195
+ source = if task_chain
196
+ @messages.select { |m| !m[:task_id] || task_chain.include?(m[:task_id]) }
197
+ else
198
+ @messages
199
+ end
200
+ msgs = source.map { |m| strip_for_api(m) }
196
201
  msgs = repair_tool_call_pairing(msgs)
197
202
  ensure_reasoning_content_consistency(msgs, force: force_reasoning_content_pad)
198
203
  end
@@ -60,6 +60,34 @@ module Clacky
60
60
  "or-gpt-image-2" => "GPT Image 2"
61
61
  },
62
62
  "default_image_model" => "or-gpt-image-2",
63
+ # Video generation models served by the openclacky gateway, which
64
+ # routes them to Vertex AI Veo (async predictLongRunning under the
65
+ # hood; the gateway hides the polling and returns the MP4 inline).
66
+ "video_models" => [
67
+ "or-veo-3-1",
68
+ "or-veo-3-1-fast",
69
+ "or-veo-3",
70
+ "or-veo-3-fast"
71
+ ],
72
+ "video_model_aliases" => {
73
+ "or-veo-3-1" => "Veo 3.1",
74
+ "or-veo-3-1-fast" => "Veo 3.1 Fast",
75
+ "or-veo-3" => "Veo 3",
76
+ "or-veo-3-fast" => "Veo 3 Fast"
77
+ },
78
+ "default_video_model" => "or-veo-3-1",
79
+ # Text-to-speech models served by the openclacky gateway, which
80
+ # routes them to Vertex AI Gemini 2.5 (responseModalities=["AUDIO"]).
81
+ # The gateway returns WAV inline as base64.
82
+ "audio_models" => [
83
+ "or-tts-gemini-2-5-flash",
84
+ "or-tts-gemini-2-5-pro"
85
+ ],
86
+ "audio_model_aliases" => {
87
+ "or-tts-gemini-2-5-flash" => "Gemini 2.5 Flash TTS",
88
+ "or-tts-gemini-2-5-pro" => "Gemini 2.5 Pro TTS"
89
+ },
90
+ "default_audio_model" => "or-tts-gemini-2-5-flash",
63
91
  # Default OCR sidecar — used when the primary model is text-only.
64
92
  # Candidates are derived from the provider's vision-capable models;
65
93
  # this just picks the cheap+fast default to surface in "auto" mode.
@@ -559,7 +559,9 @@ module Clacky
559
559
  @running = false
560
560
  end
561
561
 
562
- def stop(clear_screen: false)
562
+ # Clears the screen on exit by default — the Rich UI repaints fullscreen
563
+ # and leaves no useful scrollback to preserve.
564
+ def stop(clear_screen: true)
563
565
  @running = false
564
566
  @shell.stop
565
567
  RubyRich::Terminal.clear if clear_screen