openclacky 1.2.17 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +34 -0
  3. data/lib/clacky/agent/skill_manager.rb +1 -1
  4. data/lib/clacky/agent/time_machine.rb +256 -74
  5. data/lib/clacky/agent/tool_executor.rb +12 -0
  6. data/lib/clacky/agent.rb +21 -31
  7. data/lib/clacky/agent_config.rb +18 -0
  8. data/lib/clacky/cli.rb +55 -3
  9. data/lib/clacky/default_skills/media-gen/SKILL.md +173 -5
  10. data/lib/clacky/default_skills/skill-creator/SKILL.md +1 -0
  11. data/lib/clacky/media/base.rb +125 -0
  12. data/lib/clacky/media/dashscope.rb +243 -0
  13. data/lib/clacky/media/gemini.rb +10 -0
  14. data/lib/clacky/media/generator.rb +75 -0
  15. data/lib/clacky/media/openai_compat.rb +160 -0
  16. data/lib/clacky/message_history.rb +12 -7
  17. data/lib/clacky/providers.rb +28 -0
  18. data/lib/clacky/rich_ui_controller.rb +3 -1
  19. data/lib/clacky/server/backup_manager.rb +200 -0
  20. data/lib/clacky/server/channel/adapters/feishu/adapter.rb +10 -2
  21. data/lib/clacky/server/channel/adapters/feishu/bot.rb +68 -15
  22. data/lib/clacky/server/channel/channel_manager.rb +180 -81
  23. data/lib/clacky/server/http_server.rb +348 -15
  24. data/lib/clacky/server/scheduler.rb +19 -0
  25. data/lib/clacky/server/session_registry.rb +8 -4
  26. data/lib/clacky/session_manager.rb +40 -2
  27. data/lib/clacky/skill.rb +3 -1
  28. data/lib/clacky/tools/trash_manager.rb +14 -0
  29. data/lib/clacky/ui2/components/command_suggestions.rb +1 -0
  30. data/lib/clacky/ui2/components/modal_component.rb +34 -7
  31. data/lib/clacky/ui2/ui_controller.rb +150 -19
  32. data/lib/clacky/utils/file_processor.rb +75 -4
  33. data/lib/clacky/version.rb +1 -1
  34. data/lib/clacky/web/app.css +2038 -1147
  35. data/lib/clacky/web/app.js +22 -1
  36. data/lib/clacky/web/backup.js +119 -0
  37. data/lib/clacky/web/billing.js +94 -7
  38. data/lib/clacky/web/channels.js +81 -11
  39. data/lib/clacky/web/design-sample.css +247 -0
  40. data/lib/clacky/web/design-sample.html +127 -0
  41. data/lib/clacky/web/favicon.svg +16 -0
  42. data/lib/clacky/web/i18n.js +159 -31
  43. data/lib/clacky/web/index.html +175 -55
  44. data/lib/clacky/web/logo_nav_dark.png +0 -0
  45. data/lib/clacky/web/onboard.js +114 -28
  46. data/lib/clacky/web/sessions.js +436 -192
  47. data/lib/clacky/web/settings.js +21 -1
  48. data/lib/clacky/web/skills.js +6 -6
  49. data/lib/clacky/web/tasks.js +129 -61
  50. data/lib/clacky/web/utils.js +72 -0
  51. data/lib/clacky/web/ws-dispatcher.js +6 -0
  52. data/lib/clacky.rb +1 -0
  53. metadata +8 -3
  54. data/lib/clacky/server/channel/group_message_buffer.rb +0 -53
data/lib/clacky/cli.rb CHANGED
@@ -290,6 +290,54 @@ module Clacky
290
290
  ui_controller.append_output("")
291
291
  end
292
292
 
293
+ # Handle the `/model` slash command — a quick model-card switcher.
294
+ #
295
+ # This is the lightweight counterpart to /config: it only lets the user
296
+ # pick an already-configured model and switches to it (no add/edit/delete).
297
+ # Switching goes through the unified Agent#switch_model_by_id path and
298
+ # also updates the global default so the choice sticks across launches,
299
+ # matching /config's :switch behavior.
300
+ private def handle_model_command(ui_controller, agent_config, agent, session_manager = nil)
301
+ config = agent_config
302
+
303
+ if config.models.empty?
304
+ ui_controller.show_error("No models configured. Run /config to add one.")
305
+ return
306
+ end
307
+
308
+ # Resolve a card's provider sub-models so the picker can offer them in
309
+ # the card's sub-model drawer.
310
+ submodels_for = lambda do |model|
311
+ base_url = model["base_url"]
312
+ provider_id = base_url && Clacky::Providers.find_by_base_url(base_url)
313
+ provider_id ? Clacky::Providers.models(provider_id) : []
314
+ end
315
+
316
+ result = ui_controller.show_model_switch_modal(config, submodels_for)
317
+ return if result.nil?
318
+
319
+ target_id = result[:model_id]
320
+ sub_model = result[:model_name]
321
+
322
+ agent.switch_model_by_id(target_id)
323
+ config.set_default_model_by_id(target_id)
324
+ config.save
325
+
326
+ # Pin (or clear) the per-session sub-model overlay for the chosen card.
327
+ agent.set_session_sub_model(sub_model)
328
+
329
+ # The overlay lives in the session file (not config.yml), so persist it
330
+ # now — otherwise it would be lost if the user quits before the next task.
331
+ session_manager&.save(agent.to_session_data)
332
+
333
+ ui_controller.config[:model] = config.model_name
334
+ ui_controller.update_sessionbar(
335
+ tasks: agent.total_tasks,
336
+ cost: agent.total_cost
337
+ )
338
+ ui_controller.show_success("Switched to model: #{config.model_name}")
339
+ end
340
+
293
341
  private def handle_time_machine_command(ui_controller, agent, session_manager)
294
342
  # Get task history from agent
295
343
  history = agent.get_task_history(limit: 10)
@@ -892,10 +940,11 @@ module Clacky
892
940
  ui_controller.append_output("")
893
941
  end
894
942
 
895
- # Stop UI and exit
943
+ # Stop UI and exit. Each UI decides whether to clear the screen on
944
+ # exit (UI2 keeps it so the resume hint survives; Rich clears).
896
945
  shutting_down = true
897
946
  idle_timer.shutdown
898
- ui_controller.stop(clear_screen: true)
947
+ ui_controller.stop
899
948
  exit(0)
900
949
  end
901
950
 
@@ -913,6 +962,9 @@ module Clacky
913
962
  when "/config"
914
963
  handle_config_command(ui_controller, agent_config, agent)
915
964
  next
965
+ when "/model"
966
+ handle_model_command(ui_controller, agent_config, agent, session_manager)
967
+ next
916
968
  when "/undo"
917
969
  handle_time_machine_command(ui_controller, agent, session_manager)
918
970
  next
@@ -948,7 +1000,7 @@ module Clacky
948
1000
  when "/exit", "/quit"
949
1001
  shutting_down = true
950
1002
  idle_timer.shutdown
951
- ui_controller.stop(clear_screen: true)
1003
+ ui_controller.stop
952
1004
  exit(0)
953
1005
  when "/help"
954
1006
  sleep 0.1
@@ -1,8 +1,9 @@
1
1
  ---
2
2
  name: media-gen
3
- description: 'Generate images (and later videos / audio) inside the current task. Use this skill whenever the user asks to create, generate, or produce a picture / image / illustration / cover / poster / icon / artwork including phrases like 生成图片, 画一张, 做封面, 来张配图, generate image, make a picture, draw, create artwork, design a cover. Also use when building documents (slides, PPT, posters, marketing pages, README hero shots) where an image is needed inline. Routes calls through the local Clacky HTTP server, which uses the user-configured `type=image` model — you do NOT need to know which provider; the server handles it.'
3
+ description: 'Generate images, videos, or audio (text-to-speech) in the current task. Use whenever the user asks to create/generate/produce a picture / image / illustration / cover / poster / icon / artwork, a video / clip / animation, or speech / voiceover / narration / TTS — e.g. 生成图片, 画一张, 做封面, 配图, generate image, make a picture, draw, design a cover, 生成视频, 做个视频, text-to-video, 朗读, 配音, 旁白, 文字转语音, generate speech, voiceover. Also use when a document (slides, poster, README hero) needs an inline image.'
4
4
  disable-model-invocation: false
5
5
  user-invocable: true
6
+ always-show: true
6
7
  ---
7
8
 
8
9
  # media-gen
@@ -26,13 +27,29 @@ curl -s http://${CLACKY_SERVER_HOST}:${CLACKY_SERVER_PORT}/api/media/types
26
27
 
27
28
  If the response shows `image.configured = false`, stop and tell the user:
28
29
 
29
- > 还没有配置生图模型。请打开 Clacky 设置页 → 添加模型 → 类型选 `image`(推荐 `or-gemini-3-pro-image` 或 `or-gpt-image-1`)。配好后再让我生图。
30
+ > 还没有配置生图模型。请打开设置页 → 添加模型 → 类型选 `image`(走 openclacky 官方网关时推荐 `or-gemini-3-pro-image` 或 `or-gpt-image-2`)。配好后再让我生图。
30
31
 
31
32
  Do NOT try to fall back to `terminal` + a hand-written `curl https://api.openai.com/...` — that bypasses the user's configured backend and won't be billed correctly.
32
33
 
34
+ **You do NOT configure models — the user does, in the settings page.** Never
35
+ edit the user's `config.yml` to add or change a model, and never invent a model
36
+ name from memory (e.g. `or-gpt-5.4-image-2` does not exist). The real, current
37
+ model is whatever `/api/media/types` reports under `image.model`. If you think a
38
+ different model is needed, tell the user which one to set in the settings page —
39
+ don't touch the config file yourself.
40
+
33
41
  ## Step 2 — Generate the image
34
42
 
35
- ### ⚠️ Important: generation speed & concurrency
43
+ ### The model does NOT honor exact pixel sizes
44
+
45
+ There is no `size` / `width` / `height` field — the only shape control is
46
+ `aspect_ratio` (`landscape` / `square` / `portrait`), and even that is just a
47
+ rough hint (ask for `576x96` and you may get `1408x768`). When the user needs an
48
+ **exact pixel size, a grid, an icon at NxN, or a spritesheet**, generate first at
49
+ whatever size the model gives, then resize / crop / tile to the exact pixels with
50
+ ImageMagick (`magick`). Verify with `magick identify` before reporting done.
51
+
52
+ ### Important: generation speed & concurrency
36
53
 
37
54
  - **Image generation can be slow — up to 2 minutes per image depending on the model.** Before calling the API, warn the user that it may take a minute or two. The curl request blocks until the image is ready; do NOT run it in the background.
38
55
  - **One at a time only.** Never generate multiple images concurrently (e.g. by running several `curl` commands simultaneously or in a script loop). Each call consumes significant server-side resources, and parallel requests will almost certainly cause timeouts. If the user wants several images, generate them **sequentially**, one after another.
@@ -46,6 +63,10 @@ curl -s -X POST http://${CLACKY_SERVER_HOST}:${CLACKY_SERVER_PORT}/api/media/ima
46
63
  }'
47
64
  ```
48
65
 
66
+ - The terminal blocks multi-line commands — write the request into a `.sh` file and run it, don't paste a multi-line `curl`.
67
+ - If a call fails with `400 / INVALID_ARGUMENT`, drop the `aspect_ratio` field and retry once before reporting the error.
68
+ - If a call fails with `unknown image model` (400), the configured model name isn't recognized by its backend — tell the user to fix the model name in the settings page; do NOT guess another name and retry.
69
+
49
70
  ### Request fields
50
71
 
51
72
  | Field | Required | Values | Notes |
@@ -128,6 +149,153 @@ When the user gives a vague request like "给我配张图", ask one clarifying q
128
149
  - The user wants a **diagram / chart** with specific data — use a charting library (matplotlib, mermaid, etc.) instead; image gen is for illustrations, not data viz
129
150
  - The user asks for **screenshots** of real software — use the browser tool
130
151
 
131
- ## Future modalities
152
+ ## Generating video (Veo)
153
+
154
+ The same `/api/media/` namespace serves video generation. The user must
155
+ configure a `type=video` model in settings (recommended: `or-veo-3-1`).
156
+
157
+ ### Endpoint
158
+
159
+ ```
160
+ POST http://${CLACKY_SERVER_HOST}:${CLACKY_SERVER_PORT}/api/media/video
161
+ ```
162
+
163
+ Check `GET /api/media/types` first — if `video.configured = false`, tell the
164
+ user to add a `type=video` model in settings before generating.
165
+
166
+ ### Video is slow and expensive
167
+
168
+ - **A single clip can take 1–3 minutes (sometimes longer).** Warn the user
169
+ before calling, and run the curl in the foreground — it blocks until the
170
+ MP4 is ready. Do NOT background it.
171
+ - **One at a time.** Never run multiple video generations concurrently.
172
+ - Each clip costs real money (billed per output-second). Confirm the prompt
173
+ with the user before generating.
174
+
175
+ ### Request
176
+
177
+ ```bash
178
+ curl -s -X POST http://${CLACKY_SERVER_HOST}:${CLACKY_SERVER_PORT}/api/media/video \
179
+ -H "Content-Type: application/json" \
180
+ -d '{
181
+ "prompt": "A cinematic drone shot flying over a misty mountain range at sunrise, golden light, 4K.",
182
+ "aspect_ratio": "landscape",
183
+ "duration_seconds": 8
184
+ }'
185
+ ```
186
+
187
+ | Field | Required | Values | Notes |
188
+ |--------------------|----------|---------------------------------|-------|
189
+ | `prompt` | yes | string | Same prompt-craft tips as images apply. |
190
+ | `aspect_ratio` | no | `landscape` / `portrait` | Defaults to `landscape` (16:9). |
191
+ | `duration_seconds` | no | 4–8 | Defaults to 8. |
192
+ | `image` | no | `{ "b64_json": "...", "mime_type": "image/png" }` | Optional first frame for image-to-video. |
193
+ | `output_dir` | no | absolute path | MP4 saved under `<output_dir>/assets/generated/`. |
194
+
195
+ ### Response (success)
196
+
197
+ ```json
198
+ {
199
+ "success": true,
200
+ "video": "/abs/path/to/working_dir/assets/generated/vid_20260615_011820_a1b2c3d4.mp4",
201
+ "model": "or-veo-3-1",
202
+ "provider": "openclacky",
203
+ "prompt": "A cinematic drone shot ...",
204
+ "aspect_ratio": "landscape",
205
+ "duration_seconds": 8,
206
+ "cost_usd": 2.688
207
+ }
208
+ ```
209
+
210
+ The `video` field is an absolute path on disk. Show it to the user with a
211
+ markdown link or an HTML5 `<video>` tag pointing at the `file://` path; embed
212
+ it in documents with a relative path under `./assets/generated/`.
213
+
214
+ ### Response (failure)
215
+
216
+ Same shape and `error_type` values as image generation, but with `"video": null`.
217
+ `not_configured` means no `type=video` model is set up.
218
+
219
+ ## Generating speech (Gemini TTS)
220
+
221
+ The same `/api/media/` namespace serves text-to-speech. The user must
222
+ configure a `type=audio` model in settings (recommended:
223
+ `or-tts-gemini-2-5-flash`, the cheap+fast default).
224
+
225
+ ### Endpoint
226
+
227
+ ```
228
+ POST http://${CLACKY_SERVER_HOST}:${CLACKY_SERVER_PORT}/api/media/audio/speech
229
+ ```
230
+
231
+ Check `GET /api/media/types` first — if `audio.configured = false`, tell the
232
+ user to add a `type=audio` model in settings before generating.
233
+
234
+ ### Request
235
+
236
+ ```bash
237
+ curl -s -X POST http://${CLACKY_SERVER_HOST}:${CLACKY_SERVER_PORT}/api/media/audio/speech \
238
+ -H "Content-Type: application/json" \
239
+ -d '{
240
+ "input": "Hello and welcome to openclacky. Today we will explore...",
241
+ "voice": "Kore"
242
+ }'
243
+ ```
244
+
245
+ | Field | Required | Values | Notes |
246
+ |--------------|----------|---------------------------------|-------|
247
+ | `input` | yes | string | The text to speak. Plain prose works best; you can prefix with style cues like "Say cheerfully:" or "In a calm tone:". |
248
+ | `voice` | no | string voice name | Defaults to `Kore`. Common Gemini voices: `Kore`, `Puck`, `Charon`, `Fenrir`, `Aoede`. |
249
+ | `output_dir` | no | absolute path | WAV saved under `<output_dir>/assets/generated/`. |
250
+
251
+ Generation typically takes 2–10 seconds depending on length. The request
252
+ blocks until the WAV is ready.
253
+
254
+ ### Response (success)
255
+
256
+ ```json
257
+ {
258
+ "success": true,
259
+ "audio": "/abs/path/to/working_dir/assets/generated/tts_20260615_233522_4ff02705.wav",
260
+ "model": "or-tts-gemini-2-5-flash",
261
+ "provider": "openclacky",
262
+ "input": "Hello and welcome to openclacky...",
263
+ "voice": "Kore",
264
+ "mime_type": "audio/wav",
265
+ "usage": { "prompt_tokens": 13, "completion_tokens": 122, "total_tokens": 135 },
266
+ "cost_usd": 0.000259
267
+ }
268
+ ```
269
+
270
+ The `audio` field is an absolute path on disk. Output is mono 16-bit PCM at
271
+ 24 kHz wrapped in a standard WAV container — playable by any browser, OS
272
+ player, or `<audio>` tag without conversion.
273
+
274
+ To let the user hear it, write a markdown link in your reply:
275
+
276
+ ```markdown
277
+ [🔊 听一下](file:///abs/path/from/response.wav)
278
+ ```
279
+
280
+ For embedding in HTML documents, use:
281
+
282
+ ```html
283
+ <audio controls src="./assets/generated/xxx.wav"></audio>
284
+ ```
285
+
286
+ ### Response (failure)
287
+
288
+ Same shape and `error_type` values as image generation, but with `"audio": null`.
289
+ `not_configured` means no `type=audio` model is set up.
290
+
291
+ ### Cost & length tips
292
+
293
+ - Gemini TTS bills by tokens (input text + generated audio). A typical
294
+ one-paragraph narration costs well under $0.001.
295
+ - For long-form audio (>1 minute), split the script into paragraphs and
296
+ generate each separately, then concatenate locally — avoids upstream
297
+ truncation and gives you finer control over pacing.
298
+ - Voice consistency: Gemini TTS does not currently support voice cloning;
299
+ use the same `voice` name across calls in one project to keep the
300
+ narrator consistent.
132
301
 
133
- The same `/api/media/` namespace will gain `video` and `audio` endpoints. The pattern is identical: the user configures `type=video` / `type=audio` models in settings, this skill (or its successor) calls the matching endpoint.
@@ -1,6 +1,7 @@
1
1
  ---
2
2
  name: skill-creator
3
3
  description: Create new skills, modify and improve existing skills, and measure skill performance. Use when users want to create a skill from scratch, edit, or optimize an existing skill, run evals to test a skill, benchmark skill performance with variance analysis, or optimize a skill's description for better triggering accuracy.
4
+ always-show: true
4
5
  ---
5
6
 
6
7
  # Skill Creator
@@ -3,6 +3,7 @@
3
3
  require "fileutils"
4
4
  require "base64"
5
5
  require "securerandom"
6
+ require "faraday"
6
7
 
7
8
  module Clacky
8
9
  module Media
@@ -28,6 +29,28 @@ module Clacky
28
29
  raise NotImplementedError, "#{self.class.name} must implement #generate_image"
29
30
  end
30
31
 
32
+ # @return [Hash] either video_success_response(...) or
33
+ # video_error_response(...)
34
+ def generate_video(prompt:, aspect_ratio: "landscape", duration_seconds: nil, output_dir: nil, **_kwargs)
35
+ video_error_response(
36
+ error: "Video generation is not supported by #{self.class.name.split("::").last}. Use the openclacky gateway with a video model such as or-veo-3-1.",
37
+ error_type: "not_implemented",
38
+ provider: "",
39
+ prompt: prompt,
40
+ aspect_ratio: aspect_ratio
41
+ )
42
+ end
43
+
44
+ # @return [Hash] either audio_success_response(...) or audio_error_response(...)
45
+ def generate_speech(input:, voice: nil, output_dir: nil, **_kwargs)
46
+ audio_error_response(
47
+ error: "Speech synthesis is not supported by #{self.class.name.split("::").last}. Use the openclacky gateway with a TTS model such as or-tts-gemini-2-5-flash.",
48
+ error_type: "not_implemented",
49
+ provider: "",
50
+ input: input
51
+ )
52
+ end
53
+
31
54
  # Persist a base64-encoded image under <output_dir>/assets/generated/.
32
55
  # Returns the absolute path on disk.
33
56
  private def save_b64_image(b64_data, output_dir:, prefix: "img", extension: "png")
@@ -40,6 +63,60 @@ module Clacky
40
63
  path
41
64
  end
42
65
 
66
+ # Persist a base64-encoded video under <output_dir>/assets/generated/.
67
+ # Returns the absolute path on disk. Mirrors #save_b64_image; the only
68
+ # difference is the default extension (mp4).
69
+ private def save_b64_video(b64_data, output_dir:, prefix: "vid", extension: "mp4")
70
+ target_dir = File.join(output_dir, "assets", "generated")
71
+ FileUtils.mkdir_p(target_dir)
72
+ ts = Time.now.strftime("%Y%m%d_%H%M%S")
73
+ short = SecureRandom.hex(4)
74
+ path = File.join(target_dir, "#{prefix}_#{ts}_#{short}.#{extension}")
75
+ File.binwrite(path, Base64.decode64(b64_data))
76
+ path
77
+ end
78
+
79
+ private def save_b64_audio(b64_data, output_dir:, prefix: "tts", extension: "wav")
80
+ target_dir = File.join(output_dir, "assets", "generated")
81
+ FileUtils.mkdir_p(target_dir)
82
+ ts = Time.now.strftime("%Y%m%d_%H%M%S")
83
+ short = SecureRandom.hex(4)
84
+ path = File.join(target_dir, "#{prefix}_#{ts}_#{short}.#{extension}")
85
+ File.binwrite(path, Base64.decode64(b64_data))
86
+ path
87
+ end
88
+
89
+ # Download a remote image URL and persist it under
90
+ # <output_dir>/assets/generated/, mirroring save_b64_image so providers
91
+ # that return URLs (e.g. DashScope, whose links expire after 24h) land
92
+ # local files at the same path shape as base64 providers.
93
+ # Returns the absolute path on disk, or nil if the download fails.
94
+ private def save_image_from_url(url, output_dir:, prefix: "img", extension: "png")
95
+ body = download_url(url)
96
+ return nil if body.nil? || body.empty?
97
+
98
+ target_dir = File.join(output_dir, "assets", "generated")
99
+ FileUtils.mkdir_p(target_dir)
100
+ ts = Time.now.strftime("%Y%m%d_%H%M%S")
101
+ short = SecureRandom.hex(4)
102
+ path = File.join(target_dir, "#{prefix}_#{ts}_#{short}.#{extension}")
103
+ File.binwrite(path, body)
104
+ path
105
+ end
106
+
107
+ # Fetch raw bytes from a URL. Isolated so specs can stub it without a
108
+ # live HTTP call. Returns the response body String, or nil on failure.
109
+ private def download_url(url)
110
+ conn = Faraday.new do |f|
111
+ f.options.timeout = 120
112
+ f.options.open_timeout = 10
113
+ end
114
+ resp = conn.get(url)
115
+ resp.success? ? resp.body : nil
116
+ rescue Faraday::Error
117
+ nil
118
+ end
119
+
43
120
  private def success_response(image:, prompt:, aspect_ratio:, provider:, extra: {})
44
121
  {
45
122
  "success" => true,
@@ -63,6 +140,54 @@ module Clacky
63
140
  "provider" => provider
64
141
  }
65
142
  end
143
+
144
+ private def video_success_response(video:, prompt:, aspect_ratio:, provider:, extra: {})
145
+ {
146
+ "success" => true,
147
+ "video" => video,
148
+ "model" => @model,
149
+ "prompt" => prompt,
150
+ "aspect_ratio" => aspect_ratio,
151
+ "provider" => provider
152
+ }.merge(extra)
153
+ end
154
+
155
+ private def video_error_response(error:, error_type: "provider_error", provider: "", prompt: "", aspect_ratio: "landscape")
156
+ {
157
+ "success" => false,
158
+ "video" => nil,
159
+ "error" => error,
160
+ "error_type" => error_type,
161
+ "model" => @model,
162
+ "prompt" => prompt,
163
+ "aspect_ratio" => aspect_ratio,
164
+ "provider" => provider
165
+ }
166
+ end
167
+
168
+ private def audio_success_response(audio:, input:, voice:, provider:, extra: {})
169
+ {
170
+ "success" => true,
171
+ "audio" => audio,
172
+ "model" => @model,
173
+ "input" => input,
174
+ "voice" => voice,
175
+ "provider" => provider
176
+ }.merge(extra)
177
+ end
178
+
179
+ private def audio_error_response(error:, error_type: "provider_error", provider: "", input: "", voice: "")
180
+ {
181
+ "success" => false,
182
+ "audio" => nil,
183
+ "error" => error,
184
+ "error_type" => error_type,
185
+ "model" => @model,
186
+ "input" => input,
187
+ "voice" => voice,
188
+ "provider" => provider
189
+ }
190
+ end
66
191
  end
67
192
  end
68
193
  end