openclacky 1.2.12 → 1.2.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.clacky/skills/gem-release/SKILL.md +5 -1
- data/.clacky/skills/gem-release/scripts/release.sh +4 -1
- data/CHANGELOG.md +39 -0
- data/lib/clacky/agent/llm_caller.rb +40 -25
- data/lib/clacky/agent/memory_updater.rb +12 -0
- data/lib/clacky/agent/session_serializer.rb +1 -0
- data/lib/clacky/agent/skill_auto_creator.rb +7 -4
- data/lib/clacky/agent/skill_evolution.rb +23 -5
- data/lib/clacky/agent/skill_manager.rb +86 -1
- data/lib/clacky/agent/skill_reflector.rb +18 -23
- data/lib/clacky/agent.rb +132 -15
- data/lib/clacky/agent_config.rb +183 -22
- data/lib/clacky/cli.rb +55 -0
- data/lib/clacky/client.rb +11 -1
- data/lib/clacky/default_parsers/pdf_parser.rb +70 -86
- data/lib/clacky/default_parsers/pdf_parser_vlm.py +136 -0
- data/lib/clacky/default_skills/persist-memory/SKILL.md +4 -3
- data/lib/clacky/default_skills/search-skills/SKILL.md +61 -0
- data/lib/clacky/idle_compression_timer.rb +1 -1
- data/lib/clacky/message_format/open_ai.rb +7 -1
- data/lib/clacky/openai_stream_aggregator.rb +4 -1
- data/lib/clacky/providers.rb +77 -12
- data/lib/clacky/server/http_server.rb +296 -7
- data/lib/clacky/server/session_registry.rb +30 -8
- data/lib/clacky/server/web_ui_controller.rb +24 -1
- data/lib/clacky/session_manager.rb +120 -0
- data/lib/clacky/tools/web_search.rb +59 -8
- data/lib/clacky/ui2/layout_manager.rb +15 -5
- data/lib/clacky/ui2/progress_handle.rb +18 -8
- data/lib/clacky/ui2/ui_controller.rb +27 -0
- data/lib/clacky/ui_interface.rb +22 -0
- data/lib/clacky/utils/model_pricing.rb +96 -0
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/vision/resolver.rb +157 -0
- data/lib/clacky/web/app.css +209 -4
- data/lib/clacky/web/app.js +6 -5
- data/lib/clacky/web/i18n.js +22 -6
- data/lib/clacky/web/index.html +2 -1
- data/lib/clacky/web/sessions.js +408 -80
- data/lib/clacky/web/settings.js +241 -60
- data/lib/clacky/web/skills.js +5 -14
- data/lib/clacky/web/utils.js +57 -0
- data/lib/clacky/web/ws-dispatcher.js +136 -0
- data/lib/clacky.rb +1 -0
- metadata +6 -2
|
@@ -120,17 +120,67 @@ module Clacky
|
|
|
120
120
|
|
|
121
121
|
# ── Bing ───────────────────────────────────────────────────────────────
|
|
122
122
|
|
|
123
|
+
BING_ENDPOINTS = [
|
|
124
|
+
["cn.bing.com", "zh-CN,zh;q=0.9,en;q=0.8"],
|
|
125
|
+
["www.bing.com", "en-US,en;q=0.9"]
|
|
126
|
+
].freeze
|
|
127
|
+
|
|
128
|
+
# Race both Bing endpoints in parallel and return the first relevant result.
|
|
129
|
+
# cn.bing.com works best from mainland China; www.bing.com works best from
|
|
130
|
+
# overseas. Racing avoids guessing the network egress and recovers from
|
|
131
|
+
# one endpoint temporarily returning anti-scrape filler. If both return
|
|
132
|
+
# irrelevant garbage, fall back to whichever came back non-empty.
|
|
123
133
|
private def search_bing(query, max_results)
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
134
|
+
queue = Queue.new
|
|
135
|
+
threads = BING_ENDPOINTS.map do |host, lang|
|
|
136
|
+
Thread.new do
|
|
137
|
+
results = bing_fetch(host, lang, query, max_results)
|
|
138
|
+
queue.push([host, results])
|
|
139
|
+
rescue StandardError
|
|
140
|
+
queue.push([host, []])
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
winner = nil
|
|
145
|
+
runner_up = nil
|
|
146
|
+
BING_ENDPOINTS.length.times do
|
|
147
|
+
_host, results = queue.pop
|
|
148
|
+
if bing_results_relevant?(results, query)
|
|
149
|
+
winner = results
|
|
150
|
+
break
|
|
151
|
+
elsif !results.empty? && runner_up.nil?
|
|
152
|
+
runner_up = results
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
threads.each(&:kill)
|
|
157
|
+
winner || runner_up || []
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
private def bing_fetch(host, lang, query, max_results)
|
|
161
|
+
url = URI("https://#{host}/search?q=#{CGI.escape(query)}&count=#{max_results}&form=QBLH")
|
|
162
|
+
response = http_get(url, accept_language: lang, follow_redirects: 2,
|
|
163
|
+
referer: "https://#{host}/")
|
|
129
164
|
return [] unless response.is_a?(Net::HTTPSuccess)
|
|
130
165
|
|
|
131
166
|
parse_bing_html(response.body, max_results)
|
|
132
167
|
end
|
|
133
168
|
|
|
169
|
+
# A real Bing answer mentions at least one query token in the titles or
|
|
170
|
+
# snippets. The anti-scrape fallback returns top-domain filler (Yandex,
|
|
171
|
+
# Bunnings, WikiLeaks, …) that shares nothing with the query.
|
|
172
|
+
private def bing_results_relevant?(results, query)
|
|
173
|
+
return false if results.empty?
|
|
174
|
+
|
|
175
|
+
tokens = query.downcase.scan(/[\p{L}\p{N}]+/).reject { |t| t.length < 2 }
|
|
176
|
+
return true if tokens.empty?
|
|
177
|
+
|
|
178
|
+
results.any? do |r|
|
|
179
|
+
haystack = "#{r[:title]} #{r[:snippet]}".downcase
|
|
180
|
+
tokens.any? { |t| haystack.include?(t) }
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
134
184
|
private def parse_bing_html(html, max_results)
|
|
135
185
|
results = []
|
|
136
186
|
html = Clacky::Utils::Encoding.to_utf8(html)
|
|
@@ -199,7 +249,7 @@ module Clacky
|
|
|
199
249
|
|
|
200
250
|
# Shared browser-like GET request — no Accept-Encoding to avoid gzip/br
|
|
201
251
|
# detection tricks used by Bing. Supports redirect following.
|
|
202
|
-
private def http_get(url, accept_language: "en-US,en;q=0.9", follow_redirects: 0)
|
|
252
|
+
private def http_get(url, accept_language: "en-US,en;q=0.9", follow_redirects: 0, referer: nil)
|
|
203
253
|
request = Net::HTTP::Get.new(url)
|
|
204
254
|
request["User-Agent"] = USER_AGENTS.sample
|
|
205
255
|
request["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
|
|
@@ -208,8 +258,9 @@ module Clacky
|
|
|
208
258
|
# a JS-only skeleton (~39KB) instead of the real HTML results (~120KB)
|
|
209
259
|
request["Sec-Fetch-Dest"] = "document"
|
|
210
260
|
request["Sec-Fetch-Mode"] = "navigate"
|
|
211
|
-
request["Sec-Fetch-Site"] = "none"
|
|
261
|
+
request["Sec-Fetch-Site"] = referer ? "same-origin" : "none"
|
|
212
262
|
request["Upgrade-Insecure-Requests"] = "1"
|
|
263
|
+
request["Referer"] = referer if referer
|
|
213
264
|
|
|
214
265
|
response = Net::HTTP.start(url.hostname, url.port,
|
|
215
266
|
use_ssl: url.scheme == "https",
|
|
@@ -220,7 +271,7 @@ module Clacky
|
|
|
220
271
|
if follow_redirects > 0 && response.is_a?(Net::HTTPRedirection)
|
|
221
272
|
location = response["location"]
|
|
222
273
|
redirect_url = location.start_with?("http") ? URI(location) : URI("#{url.scheme}://#{url.hostname}#{location}")
|
|
223
|
-
return http_get(redirect_url, accept_language: accept_language, follow_redirects: follow_redirects - 1)
|
|
274
|
+
return http_get(redirect_url, accept_language: accept_language, follow_redirects: follow_redirects - 1, referer: referer)
|
|
224
275
|
end
|
|
225
276
|
|
|
226
277
|
response
|
|
@@ -119,18 +119,29 @@ module Clacky
|
|
|
119
119
|
|
|
120
120
|
@render_mutex.synchronize do
|
|
121
121
|
entry = @buffer.entry_by_id(id)
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
122
|
+
if entry.nil?
|
|
123
|
+
Clacky::Logger.warn("[ph_debug] replace_entry_nil", id: id, content: content.to_s[0, 120])
|
|
124
|
+
return
|
|
125
|
+
end
|
|
126
|
+
if entry.committed
|
|
127
|
+
Clacky::Logger.warn("[ph_debug] replace_entry_committed", id: id, content: content.to_s[0, 120])
|
|
128
|
+
return
|
|
129
|
+
end
|
|
130
|
+
if (entry.committed_line_offset || 0) > 0
|
|
131
|
+
Clacky::Logger.warn("[ph_debug] replace_entry_partial", id: id, offset: entry.committed_line_offset, content: content.to_s[0, 120])
|
|
132
|
+
return
|
|
133
|
+
end
|
|
126
134
|
|
|
127
135
|
old_lines = entry.lines.dup
|
|
128
136
|
new_lines = wrap_content_to_lines(content)
|
|
129
137
|
if old_lines == new_lines
|
|
138
|
+
Clacky::Logger.warn("[ph_debug] replace_entry_same", id: id)
|
|
130
139
|
screen.flush
|
|
131
140
|
return
|
|
132
141
|
end
|
|
133
142
|
@buffer.replace(id, new_lines)
|
|
143
|
+
is_tail = @buffer.live_entries.last&.id == id
|
|
144
|
+
Clacky::Logger.warn("[ph_debug] replace_entry_paint", id: id, is_tail: is_tail, old_n: old_lines.length, new_n: new_lines.length, content: content.to_s[0, 120])
|
|
134
145
|
|
|
135
146
|
unless @fullscreen_mode
|
|
136
147
|
# repaint_entry_in_place relies on the entry being the tail of
|
|
@@ -147,7 +158,6 @@ module Clacky
|
|
|
147
158
|
# For non-tail replaces, fall back to a full rebuild of the
|
|
148
159
|
# output area from the buffer. Slower, but correct regardless
|
|
149
160
|
# of where the entry lives.
|
|
150
|
-
is_tail = @buffer.live_entries.last&.id == id
|
|
151
161
|
if is_tail
|
|
152
162
|
repaint_entry_in_place(entry, old_lines, new_lines)
|
|
153
163
|
else
|
|
@@ -127,6 +127,7 @@ module Clacky
|
|
|
127
127
|
@start_time = nil
|
|
128
128
|
@ticker = nil
|
|
129
129
|
@state = :fresh # :fresh → :running → :closed
|
|
130
|
+
@unregistered = false
|
|
130
131
|
@metadata = {}
|
|
131
132
|
@last_chunk_at = nil
|
|
132
133
|
@monitor = Monitor.new
|
|
@@ -172,29 +173,38 @@ module Clacky
|
|
|
172
173
|
end
|
|
173
174
|
|
|
174
175
|
# Stop the ticker, render one final frame, and unregister from the
|
|
175
|
-
# owner. Idempotent
|
|
176
|
+
# owner. Idempotent and crash-safe — if a previous finish was
|
|
177
|
+
# interrupted (e.g. Thread#raise(AgentInterrupted) hit between
|
|
178
|
+
# +stop_ticker+ and +unregister_progress+), a follow-up finish
|
|
179
|
+
# will still complete the unregister so the handle does not stay
|
|
180
|
+
# orphaned on the owner's progress stack.
|
|
176
181
|
#
|
|
177
182
|
# @param final_message [String, nil] Optional override for the last
|
|
178
183
|
# frame. If nil, the handle composes "<message>… (<elapsed>s)".
|
|
179
184
|
def finish(final_message: nil)
|
|
185
|
+
Clacky::Logger.warn("[ph_debug] finish_entry", oid: object_id, state: @state, unreg: @unregistered, msg: @message, eid: @entry_id)
|
|
180
186
|
snapshot = @monitor.synchronize do
|
|
181
|
-
return if @
|
|
182
|
-
@state
|
|
183
|
-
|
|
187
|
+
return if @unregistered
|
|
188
|
+
first_close = @state == :running
|
|
189
|
+
@state = :closed if first_close
|
|
190
|
+
{
|
|
191
|
+
first_close: first_close,
|
|
192
|
+
message: final_message || @message,
|
|
193
|
+
elapsed: elapsed_seconds,
|
|
194
|
+
}
|
|
184
195
|
end
|
|
185
196
|
|
|
186
197
|
stop_ticker
|
|
187
|
-
# Collapse fast-finishers to a removed entry so tools that complete
|
|
188
|
-
# in under FAST_FINISH_THRESHOLD_SECONDS don't leave a permanent
|
|
189
|
-
# "Executing foo… (0s)" line. The owner interprets final_frame: nil
|
|
190
|
-
# as "remove the entry entirely".
|
|
191
198
|
final_frame =
|
|
192
199
|
if @quiet_on_fast_finish && snapshot[:elapsed] < FAST_FINISH_THRESHOLD_SECONDS
|
|
193
200
|
nil
|
|
194
201
|
else
|
|
195
202
|
compose_final_frame(snapshot[:message], snapshot[:elapsed])
|
|
196
203
|
end
|
|
204
|
+
Clacky::Logger.warn("[ph_debug] finish_unregister", oid: object_id, eid: @entry_id, first_close: snapshot[:first_close], final_frame: final_frame.to_s[0, 200])
|
|
197
205
|
@owner.unregister_progress(self, final_frame: final_frame)
|
|
206
|
+
@monitor.synchronize { @unregistered = true }
|
|
207
|
+
Clacky::Logger.warn("[ph_debug] finish_done", oid: object_id)
|
|
198
208
|
end
|
|
199
209
|
alias_method :cancel, :finish
|
|
200
210
|
|
|
@@ -655,6 +655,7 @@ module Clacky
|
|
|
655
655
|
|
|
656
656
|
# Called by ProgressHandle#finish.
|
|
657
657
|
def unregister_progress(handle, final_frame:)
|
|
658
|
+
Clacky::Logger.warn("[ph_debug] unreg_entry", oid: handle.object_id, eid: handle.entry_id, top: @progress_stack.last == handle, stack_size: @progress_stack.size, ff: final_frame.to_s[0, 200])
|
|
658
659
|
@progress_mutex.synchronize do
|
|
659
660
|
# If this handle still holds its entry (it's currently top), we
|
|
660
661
|
# render one last frame there and release the id. If it was
|
|
@@ -662,10 +663,14 @@ module Clacky
|
|
|
662
663
|
# is already gone and the final_frame is simply dropped.
|
|
663
664
|
if handle.entry_id
|
|
664
665
|
if final_frame && !final_frame.to_s.strip.empty?
|
|
666
|
+
Clacky::Logger.warn("[ph_debug] unreg_update_entry", oid: handle.object_id, eid: handle.entry_id)
|
|
665
667
|
update_entry(handle.entry_id, @renderer.render_progress(final_frame))
|
|
666
668
|
else
|
|
669
|
+
Clacky::Logger.warn("[ph_debug] unreg_remove_entry", oid: handle.object_id, eid: handle.entry_id)
|
|
667
670
|
remove_entry(handle.entry_id)
|
|
668
671
|
end
|
|
672
|
+
else
|
|
673
|
+
Clacky::Logger.warn("[ph_debug] unreg_no_entry_id", oid: handle.object_id)
|
|
669
674
|
end
|
|
670
675
|
|
|
671
676
|
@progress_stack.delete(handle)
|
|
@@ -873,6 +878,28 @@ module Clacky
|
|
|
873
878
|
append_output(output)
|
|
874
879
|
end
|
|
875
880
|
|
|
881
|
+
def phase_start(kind:, label:)
|
|
882
|
+
phase_id = SecureRandom.uuid
|
|
883
|
+
@active_phases ||= {}
|
|
884
|
+
@active_phases[phase_id] = { kind: kind, label: label, started_at: Time.now }
|
|
885
|
+
Thread.current[:clacky_phase_id] = phase_id
|
|
886
|
+
|
|
887
|
+
banner = "──────── ▼ #{label} ────────"
|
|
888
|
+
append_output(@renderer.render_system_message(banner, prefix_newline: true))
|
|
889
|
+
phase_id
|
|
890
|
+
end
|
|
891
|
+
|
|
892
|
+
def phase_end(phase_id, summary: nil)
|
|
893
|
+
Thread.current[:clacky_phase_id] = nil
|
|
894
|
+
return unless @active_phases&.key?(phase_id)
|
|
895
|
+
|
|
896
|
+
info = @active_phases.delete(phase_id)
|
|
897
|
+
label = info[:label]
|
|
898
|
+
tail = summary && !summary.to_s.strip.empty? ? " — #{summary.to_s.strip}" : ""
|
|
899
|
+
banner = "──────── ▲ #{label} done#{tail} ────────"
|
|
900
|
+
append_output(@renderer.render_system_message(banner, prefix_newline: false))
|
|
901
|
+
end
|
|
902
|
+
|
|
876
903
|
# Set workspace status to idle (called when agent stops working)
|
|
877
904
|
def set_idle_status
|
|
878
905
|
# Safety net: close any legacy progress slots that were opened via
|
data/lib/clacky/ui_interface.rb
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "securerandom"
|
|
4
|
+
|
|
3
5
|
module Clacky
|
|
4
6
|
# UIInterface defines the standard interface between Agent/CLI and UI implementations.
|
|
5
7
|
# All UI controllers (UIController, JsonUIController) must implement these methods.
|
|
@@ -136,5 +138,25 @@ module Clacky
|
|
|
136
138
|
# === Path redaction (for encrypted brand skill tmpdirs) ===
|
|
137
139
|
# === Lifecycle ===
|
|
138
140
|
def stop(clear_screen: false); end
|
|
141
|
+
|
|
142
|
+
# === Phase grouping (optional, web UI uses this to fold subagent runs) ===
|
|
143
|
+
# Begin a logical phase. Events emitted between phase_start and phase_end
|
|
144
|
+
# carry the phase_id so the UI can group them visually.
|
|
145
|
+
# Returns the phase_id (caller is responsible for passing it to phase_end).
|
|
146
|
+
def phase_start(kind:, label: nil)
|
|
147
|
+
SecureRandom.uuid
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def phase_end(phase_id, summary: nil); end
|
|
151
|
+
|
|
152
|
+
# Run block within a phase. Always closes via ensure.
|
|
153
|
+
def with_phase(kind:, label: nil)
|
|
154
|
+
pid = phase_start(kind: kind, label: label)
|
|
155
|
+
begin
|
|
156
|
+
yield pid
|
|
157
|
+
ensure
|
|
158
|
+
phase_end(pid)
|
|
159
|
+
end
|
|
160
|
+
end
|
|
139
161
|
end
|
|
140
162
|
end
|
|
@@ -145,6 +145,47 @@ module Clacky
|
|
|
145
145
|
}
|
|
146
146
|
},
|
|
147
147
|
|
|
148
|
+
# Xiaomi MiMo — USD per 1M tokens, international (海外) list price.
|
|
149
|
+
# Source: https://platform.xiaomimimo.com/docs/zh-CN/price/pay-as-you-go
|
|
150
|
+
# Effective 2026-05-27 (V2.5 launch price cut). Cache write is "limited-
|
|
151
|
+
# time free" per Xiaomi's notice; per the project's "displayed ≤ actual"
|
|
152
|
+
# convention we bill writes at the input-miss rate so that when the
|
|
153
|
+
# promo ends users won't see a cost spike. Cache hits use the explicit
|
|
154
|
+
# cache-hit rate.
|
|
155
|
+
#
|
|
156
|
+
# As of 2026-06-01, mimo-v2-pro/omni are forwarded to the V2.5 series
|
|
157
|
+
# and billed at V2.5 rates; mimo-v2-pro mirrors mimo-v2.5-pro and
|
|
158
|
+
# mimo-v2-omni mirrors mimo-v2.5. Both will be retired 2026-06-30.
|
|
159
|
+
"mimo-v2.5-pro" => {
|
|
160
|
+
input: { default: 0.435, over_200k: 0.435 },
|
|
161
|
+
output: { default: 0.87, over_200k: 0.87 },
|
|
162
|
+
cache: { write: 0.435, read: 0.0036 }
|
|
163
|
+
},
|
|
164
|
+
|
|
165
|
+
"mimo-v2.5" => {
|
|
166
|
+
input: { default: 0.14, over_200k: 0.14 },
|
|
167
|
+
output: { default: 0.28, over_200k: 0.28 },
|
|
168
|
+
cache: { write: 0.14, read: 0.0028 }
|
|
169
|
+
},
|
|
170
|
+
|
|
171
|
+
"mimo-v2-pro" => {
|
|
172
|
+
input: { default: 0.435, over_200k: 0.435 },
|
|
173
|
+
output: { default: 0.87, over_200k: 0.87 },
|
|
174
|
+
cache: { write: 0.435, read: 0.0036 }
|
|
175
|
+
},
|
|
176
|
+
|
|
177
|
+
"mimo-v2-omni" => {
|
|
178
|
+
input: { default: 0.14, over_200k: 0.14 },
|
|
179
|
+
output: { default: 0.28, over_200k: 0.28 },
|
|
180
|
+
cache: { write: 0.14, read: 0.0028 }
|
|
181
|
+
},
|
|
182
|
+
|
|
183
|
+
"mimo-v2-flash" => {
|
|
184
|
+
input: { default: 0.10, over_200k: 0.10 },
|
|
185
|
+
output: { default: 0.30, over_200k: 0.30 },
|
|
186
|
+
cache: { write: 0.10, read: 0.01 }
|
|
187
|
+
},
|
|
188
|
+
|
|
148
189
|
# Kimi K2.5 / K2.6 multimodal models
|
|
149
190
|
# Source: https://platform.moonshot.cn (USD / 1M tokens)
|
|
150
191
|
# Kimi billing model (same shape as DeepSeek):
|
|
@@ -181,6 +222,38 @@ module Clacky
|
|
|
181
222
|
}
|
|
182
223
|
},
|
|
183
224
|
|
|
225
|
+
# Google Gemini 3 series (via Vertex AI). Tiered at 200K input tokens
|
|
226
|
+
# for Pro; Flash has flat pricing.
|
|
227
|
+
"gemini-3.1-pro" => {
|
|
228
|
+
input: {
|
|
229
|
+
default: 2.00,
|
|
230
|
+
over_200k: 4.00
|
|
231
|
+
},
|
|
232
|
+
output: {
|
|
233
|
+
default: 12.00,
|
|
234
|
+
over_200k: 18.00
|
|
235
|
+
},
|
|
236
|
+
cache: {
|
|
237
|
+
write: 2.00,
|
|
238
|
+
read: 0.50
|
|
239
|
+
}
|
|
240
|
+
},
|
|
241
|
+
|
|
242
|
+
"gemini-3-flash" => {
|
|
243
|
+
input: {
|
|
244
|
+
default: 0.50,
|
|
245
|
+
over_200k: 0.50
|
|
246
|
+
},
|
|
247
|
+
output: {
|
|
248
|
+
default: 3.00,
|
|
249
|
+
over_200k: 3.00
|
|
250
|
+
},
|
|
251
|
+
cache: {
|
|
252
|
+
write: 0.50,
|
|
253
|
+
read: 0.05
|
|
254
|
+
}
|
|
255
|
+
},
|
|
256
|
+
|
|
184
257
|
# OpenAI GPT-5.5 / GPT-5.4 — breakpoint at 272K input tokens
|
|
185
258
|
# Source: https://openai.com/api/pricing/ (USD / 1M tokens)
|
|
186
259
|
# Note: OpenAI's actual tiered-pricing threshold is 272K, not the
|
|
@@ -581,6 +654,22 @@ module Clacky
|
|
|
581
654
|
# non-thinking / thinking modes respectively. Bill at flash rates.
|
|
582
655
|
when /^deepseek-chat$/i, /^deepseek-reasoner$/i
|
|
583
656
|
"deepseek-v4-flash"
|
|
657
|
+
# Xiaomi MiMo — strict anchored match per registered model id in
|
|
658
|
+
# providers.rb (currently mimo-v2.5-pro / mimo-v2-pro / mimo-v2-omni).
|
|
659
|
+
# mimo-v2.5 / mimo-v2-flash are also priced ahead of provider-side
|
|
660
|
+
# registration. Per Xiaomi's 2026-06 schedule, mimo-v2-pro/omni are
|
|
661
|
+
# transparently routed to V2.5 — keys are listed independently so
|
|
662
|
+
# both old and new ids resolve to the right rate.
|
|
663
|
+
when /^mimo-v2\.?5-pro$/i
|
|
664
|
+
"mimo-v2.5-pro"
|
|
665
|
+
when /^mimo-v2\.?5$/i
|
|
666
|
+
"mimo-v2.5"
|
|
667
|
+
when /^mimo-v2-pro$/i
|
|
668
|
+
"mimo-v2-pro"
|
|
669
|
+
when /^mimo-v2-omni$/i
|
|
670
|
+
"mimo-v2-omni"
|
|
671
|
+
when /^mimo-v2-flash$/i
|
|
672
|
+
"mimo-v2-flash"
|
|
584
673
|
# Kimi K2.5 / K2.6 — strict match only. K2 text-only models
|
|
585
674
|
# (kimi-k2-0905-preview, kimi-k2-thinking, etc.) are not yet
|
|
586
675
|
# registered in providers.rb and will be added in a follow-up
|
|
@@ -636,6 +725,13 @@ module Clacky
|
|
|
636
725
|
when /^qwen3-vl-plus$/i
|
|
637
726
|
"qwen3-vl-plus"
|
|
638
727
|
|
|
728
|
+
# Google Gemini 3 series. Match the platform aliases (or-gemini-*)
|
|
729
|
+
# and the bare upstream ids returned by Vertex.
|
|
730
|
+
when /^or-gemini-3-1-pro$/i, /^gemini-3\.1-pro(-preview)?$/i
|
|
731
|
+
"gemini-3.1-pro"
|
|
732
|
+
when /^or-gemini-3-5-flash$/i, /^gemini-3\.5-flash$/i, /^gemini-3-flash(-preview)?$/i
|
|
733
|
+
"gemini-3-flash"
|
|
734
|
+
|
|
639
735
|
# OpenAI GPT-5.x models — match various dashed/dotted/compact forms
|
|
640
736
|
# (e.g. "gpt-5.5", "gpt-5-5", "gpt5.5", "gpt55")
|
|
641
737
|
when /^gpt-?5\.?5$/i, /^gpt-?5[\.-]?5$/i
|
data/lib/clacky/version.rb
CHANGED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
require "base64"
|
|
5
|
+
require "fileutils"
|
|
6
|
+
require "json"
|
|
7
|
+
require_relative "../utils/file_processor"
|
|
8
|
+
|
|
9
|
+
module Clacky
|
|
10
|
+
module Vision
|
|
11
|
+
# OCR sidecar — turns image bytes into a text description by calling a
|
|
12
|
+
# vision-capable model. Used when the user's primary model is text-only
|
|
13
|
+
# (e.g. DeepSeek V4) so that uploaded images and tool screenshots still
|
|
14
|
+
# reach the conversation as useful context.
|
|
15
|
+
#
|
|
16
|
+
# Routes through Clacky::Client so we get the same OpenAI/Anthropic/
|
|
17
|
+
# Bedrock format negotiation, retry, and credit-error handling as the
|
|
18
|
+
# main agent path. Image content travels as a canonical `image_url`
|
|
19
|
+
# block (the unified internal shape understood by all three formats).
|
|
20
|
+
class Resolver
|
|
21
|
+
DEFAULT_PROMPT = <<~PROMPT.strip
|
|
22
|
+
Extract every legible text and describe the visual content of this image.
|
|
23
|
+
Output as Markdown. Preserve table layout where possible (use Markdown tables).
|
|
24
|
+
For UI screenshots, describe the layout, visible labels, and active state.
|
|
25
|
+
Be thorough but concise — the user cannot see the image and must rely on
|
|
26
|
+
your description.
|
|
27
|
+
PROMPT
|
|
28
|
+
|
|
29
|
+
MAX_TOKENS = 8192
|
|
30
|
+
CACHE_DIR = File.join(Dir.home, ".clacky", "ocr_cache")
|
|
31
|
+
CACHE_VERSION = 1
|
|
32
|
+
|
|
33
|
+
Result = Struct.new(:status, :text, :error, keyword_init: true) do
|
|
34
|
+
def ok?; status == :ok; end
|
|
35
|
+
def empty?; status == :empty; end
|
|
36
|
+
def call_failed?; status == :call_failed; end
|
|
37
|
+
def bad_image?; status == :bad_image; end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def initialize(model_entry)
|
|
41
|
+
@model_entry = model_entry
|
|
42
|
+
@model = model_entry["model"]
|
|
43
|
+
@base_url = model_entry["base_url"]
|
|
44
|
+
@api_key = model_entry["api_key"]
|
|
45
|
+
@anthropic = !!model_entry["anthropic_format"]
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# @return [Result] one of:
|
|
49
|
+
# status=:ok + text — sidecar produced a description
|
|
50
|
+
# status=:empty — sidecar returned 200 but no usable text (e.g. token budget exhausted by reasoning)
|
|
51
|
+
# status=:call_failed + error — network/parse/auth error from the sidecar
|
|
52
|
+
# status=:bad_image — image bytes unreadable / empty
|
|
53
|
+
def describe(image, prompt: nil)
|
|
54
|
+
prompt = prompt.to_s.strip
|
|
55
|
+
prompt = DEFAULT_PROMPT if prompt.empty?
|
|
56
|
+
|
|
57
|
+
bytes, mime = read_image(image)
|
|
58
|
+
return Result.new(status: :bad_image) if bytes.nil? || bytes.empty?
|
|
59
|
+
|
|
60
|
+
cached = cache_get(bytes, prompt)
|
|
61
|
+
return Result.new(status: :ok, text: cached) if cached
|
|
62
|
+
|
|
63
|
+
text = call_vlm(bytes, mime, prompt)
|
|
64
|
+
return Result.new(status: :empty) if text.nil? || text.strip.empty?
|
|
65
|
+
|
|
66
|
+
cache_put(bytes, prompt, text)
|
|
67
|
+
Result.new(status: :ok, text: text)
|
|
68
|
+
rescue => e
|
|
69
|
+
Clacky::Logger.warn("[Vision::Resolver] failed: #{e.class}: #{e.message}") if defined?(Clacky::Logger)
|
|
70
|
+
Result.new(status: :call_failed, error: "#{e.class}: #{e.message}")
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private def read_image(image)
|
|
74
|
+
if image[:bytes]
|
|
75
|
+
[image[:bytes], image[:mime_type] || "image/png"]
|
|
76
|
+
elsif image[:data_url] || image["data_url"]
|
|
77
|
+
url = image[:data_url] || image["data_url"]
|
|
78
|
+
m = url.match(/\Adata:([^;]+);base64,(.*)\z/m)
|
|
79
|
+
return [nil, nil] unless m
|
|
80
|
+
[Base64.decode64(m[2]), m[1]]
|
|
81
|
+
elsif image[:path] || image["path"]
|
|
82
|
+
path = image[:path] || image["path"]
|
|
83
|
+
return [nil, nil] unless File.exist?(path)
|
|
84
|
+
[File.binread(path), Utils::FileProcessor.detect_mime_type(path, nil) || "image/png"]
|
|
85
|
+
else
|
|
86
|
+
[nil, nil]
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private def call_vlm(bytes, mime, prompt)
|
|
91
|
+
data_url = "data:#{mime};base64,#{Base64.strict_encode64(bytes)}"
|
|
92
|
+
message = {
|
|
93
|
+
role: "user",
|
|
94
|
+
content: [
|
|
95
|
+
{ type: "text", text: prompt },
|
|
96
|
+
{ type: "image_url", image_url: { url: data_url } }
|
|
97
|
+
]
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
client = Clacky::Client.new(
|
|
101
|
+
@api_key,
|
|
102
|
+
base_url: @base_url,
|
|
103
|
+
model: @model,
|
|
104
|
+
anthropic_format: @anthropic
|
|
105
|
+
)
|
|
106
|
+
response = client.send_messages([message], model: @model, max_tokens: MAX_TOKENS)
|
|
107
|
+
extract_text(response)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Client#send_messages returns the raw upstream string for OpenAI/Anthropic;
|
|
111
|
+
# for Bedrock it returns the parsed text content. Normalise to String.
|
|
112
|
+
private def extract_text(response)
|
|
113
|
+
case response
|
|
114
|
+
when String then response
|
|
115
|
+
when Hash then response[:content] || response["content"] || response.to_s
|
|
116
|
+
else response.to_s
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# ── Cache ─────────────────────────────────────────────────────────────
|
|
121
|
+
|
|
122
|
+
private def cache_key(bytes, prompt)
|
|
123
|
+
sha = Digest::SHA256.hexdigest(bytes)
|
|
124
|
+
prompt_sha = Digest::SHA256.hexdigest(prompt)[0, 12]
|
|
125
|
+
"#{sha}_#{@model.gsub(/[^A-Za-z0-9_.-]/, '_')}_#{prompt_sha}"
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
private def cache_path(key)
|
|
129
|
+
File.join(CACHE_DIR, "#{key}.json")
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
private def cache_get(bytes, prompt)
|
|
133
|
+
path = cache_path(cache_key(bytes, prompt))
|
|
134
|
+
return nil unless File.exist?(path)
|
|
135
|
+
data = JSON.parse(File.read(path))
|
|
136
|
+
return nil unless data["v"] == CACHE_VERSION
|
|
137
|
+
data["text"]
|
|
138
|
+
rescue JSON::ParserError, Errno::ENOENT
|
|
139
|
+
nil
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
private def cache_put(bytes, prompt, text)
|
|
143
|
+
FileUtils.mkdir_p(CACHE_DIR)
|
|
144
|
+
path = cache_path(cache_key(bytes, prompt))
|
|
145
|
+
File.write(path, JSON.generate({
|
|
146
|
+
"v" => CACHE_VERSION,
|
|
147
|
+
"model" => @model,
|
|
148
|
+
"text" => text,
|
|
149
|
+
"ts" => Time.now.to_i
|
|
150
|
+
}))
|
|
151
|
+
rescue => _
|
|
152
|
+
# Cache is best-effort — never fail the request because we can't write.
|
|
153
|
+
nil
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|