openclacky 1.2.13 → 1.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.clacky/skills/gem-release/SKILL.md +4 -0
- data/CHANGELOG.md +28 -0
- data/lib/clacky/agent/session_serializer.rb +1 -0
- data/lib/clacky/agent.rb +123 -14
- data/lib/clacky/agent_config.rb +136 -10
- data/lib/clacky/client.rb +59 -46
- data/lib/clacky/default_parsers/pdf_parser.rb +70 -86
- data/lib/clacky/default_parsers/pdf_parser_vlm.py +136 -0
- data/lib/clacky/providers.rb +37 -0
- data/lib/clacky/proxy_config.rb +65 -0
- data/lib/clacky/server/http_server.rb +202 -5
- data/lib/clacky/server/scheduler.rb +13 -10
- data/lib/clacky/ui2/progress_handle.rb +17 -13
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/vision/resolver.rb +157 -0
- data/lib/clacky/web/app.css +56 -6
- data/lib/clacky/web/i18n.js +24 -2
- data/lib/clacky/web/index.html +21 -0
- data/lib/clacky/web/notify.js +154 -0
- data/lib/clacky/web/notify.mp3 +0 -0
- data/lib/clacky/web/settings.js +88 -12
- data/lib/clacky/web/ws-dispatcher.js +8 -0
- data/lib/clacky.rb +4 -0
- metadata +7 -2
|
@@ -40,15 +40,13 @@ module Clacky
|
|
|
40
40
|
url = f[:data_url] || f["data_url"]
|
|
41
41
|
name = f[:name] || f["name"]
|
|
42
42
|
path = f[:path] || f["path"]
|
|
43
|
+
type = f[:type] || f["type"] || ""
|
|
43
44
|
|
|
44
45
|
if url
|
|
45
46
|
url
|
|
46
|
-
elsif path && File.exist?(path.to_s)
|
|
47
|
-
# Reconstruct data_url from the tmp file (still present on disk)
|
|
47
|
+
elsif type.to_s == "image" && path && File.exist?(path.to_s)
|
|
48
48
|
Utils::FileProcessor.image_path_to_data_url(path) rescue "expired:#{name}"
|
|
49
49
|
elsif name
|
|
50
|
-
# File badge for non-image disk files, or image whose tmp file is gone
|
|
51
|
-
type = f[:type] || f["type"] || ""
|
|
52
50
|
type.to_s == "image" ? "expired:#{name}" : "pdf:#{name}"
|
|
53
51
|
end
|
|
54
52
|
end
|
|
@@ -440,6 +438,10 @@ module Clacky
|
|
|
440
438
|
when ["POST", "/api/config/test"] then api_test_config(req, res)
|
|
441
439
|
when ["POST", "/api/config/media/test"] then api_test_media_config(req, res)
|
|
442
440
|
when ["GET", "/api/config/media"] then api_get_media_config(res)
|
|
441
|
+
when ["GET", "/api/config/ocr"] then api_get_ocr_config(res)
|
|
442
|
+
when ["PATCH", "/api/config/ocr"] then api_update_ocr_config(req, res)
|
|
443
|
+
when ["POST", "/api/config/ocr/test"] then api_test_ocr_config(req, res)
|
|
444
|
+
when ["POST", "/api/internal/ocr-image"] then api_internal_ocr_image(req, res)
|
|
443
445
|
when ["GET", "/api/providers"] then api_list_providers(res)
|
|
444
446
|
when ["GET", "/api/onboard/status"] then api_onboard_status(res)
|
|
445
447
|
when ["GET", "/api/browser/status"] then api_browser_status(res)
|
|
@@ -1103,6 +1105,179 @@ module Clacky
|
|
|
1103
1105
|
json_response(res, 422, { error: e.message })
|
|
1104
1106
|
end
|
|
1105
1107
|
|
|
1108
|
+
# GET /api/config/ocr
|
|
1109
|
+
# Returns the OCR sidecar state for the Settings UI. Mirrors media_state
|
|
1110
|
+
# in shape so the UI can render OCR with the same row component.
|
|
1111
|
+
def api_get_ocr_config(res)
|
|
1112
|
+
state = @agent_config.ocr_state
|
|
1113
|
+
entry = @agent_config.find_model_by_type("ocr")
|
|
1114
|
+
|
|
1115
|
+
out = {
|
|
1116
|
+
source: state["source"],
|
|
1117
|
+
model: state["model"],
|
|
1118
|
+
base_url: state["base_url"],
|
|
1119
|
+
api_key_masked: entry ? mask_api_key(entry["api_key"]) : nil,
|
|
1120
|
+
provider: state["provider"],
|
|
1121
|
+
available: state["available"],
|
|
1122
|
+
stale: state["stale"] || false,
|
|
1123
|
+
requested_model: state["requested_model"],
|
|
1124
|
+
configured: state["configured"],
|
|
1125
|
+
primary: state["primary"] || false
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
# Auto-mode preview: surface what the OCR sidecar *would* be if the
|
|
1129
|
+
# user flipped to "auto" — derived from the same provider as the
|
|
1130
|
+
# current default model.
|
|
1131
|
+
default = @agent_config.find_model_by_type("default")
|
|
1132
|
+
provider_id = default && Clacky::Providers.resolve_provider(
|
|
1133
|
+
base_url: default["base_url"],
|
|
1134
|
+
api_key: default["api_key"]
|
|
1135
|
+
)
|
|
1136
|
+
default_preview = {
|
|
1137
|
+
provider: provider_id,
|
|
1138
|
+
model: provider_id ? Clacky::Providers.default_ocr_model(provider_id) : nil,
|
|
1139
|
+
available: provider_id ? Clacky::Providers.ocr_models(provider_id) : []
|
|
1140
|
+
}
|
|
1141
|
+
|
|
1142
|
+
json_response(res, 200, { ocr: out, default_provider: default_preview })
|
|
1143
|
+
end
|
|
1144
|
+
|
|
1145
|
+
# PATCH /api/config/ocr
|
|
1146
|
+
# Body: { source: "off"|"auto"|"custom", model?, base_url?, api_key?,
|
|
1147
|
+
# anthropic_format? }
|
|
1148
|
+
# Mirrors api_update_media_config but for the single "ocr" type.
|
|
1149
|
+
def api_update_ocr_config(req, res)
|
|
1150
|
+
body = parse_json_body(req) || {}
|
|
1151
|
+
source = body["source"].to_s
|
|
1152
|
+
unless %w[off auto custom].include?(source)
|
|
1153
|
+
return json_response(res, 422, { error: "invalid source" })
|
|
1154
|
+
end
|
|
1155
|
+
|
|
1156
|
+
@agent_config.models.reject! { |m| m["type"] == "ocr" }
|
|
1157
|
+
|
|
1158
|
+
case source
|
|
1159
|
+
when "off"
|
|
1160
|
+
@agent_config.models << {
|
|
1161
|
+
"id" => SecureRandom.uuid,
|
|
1162
|
+
"type" => "ocr",
|
|
1163
|
+
"disabled" => true
|
|
1164
|
+
}
|
|
1165
|
+
when "auto"
|
|
1166
|
+
override = body["model"].to_s.strip
|
|
1167
|
+
unless override.empty?
|
|
1168
|
+
@agent_config.models << {
|
|
1169
|
+
"id" => SecureRandom.uuid,
|
|
1170
|
+
"type" => "ocr",
|
|
1171
|
+
"model" => override
|
|
1172
|
+
}
|
|
1173
|
+
end
|
|
1174
|
+
when "custom"
|
|
1175
|
+
model = body["model"].to_s.strip
|
|
1176
|
+
base_url = body["base_url"].to_s.strip
|
|
1177
|
+
api_key = body["api_key"].to_s
|
|
1178
|
+
if api_key.include?("****")
|
|
1179
|
+
existing = @agent_config.models.find { |m| m["type"] == "ocr" && m["api_key"] }
|
|
1180
|
+
api_key = existing ? existing["api_key"].to_s : ""
|
|
1181
|
+
end
|
|
1182
|
+
if model.empty? || base_url.empty? || api_key.empty?
|
|
1183
|
+
return json_response(res, 422, { error: "model, base_url, api_key are required" })
|
|
1184
|
+
end
|
|
1185
|
+
|
|
1186
|
+
@agent_config.models << {
|
|
1187
|
+
"id" => SecureRandom.uuid,
|
|
1188
|
+
"model" => model,
|
|
1189
|
+
"base_url" => base_url,
|
|
1190
|
+
"api_key" => api_key,
|
|
1191
|
+
"anthropic_format" => body["anthropic_format"] || false,
|
|
1192
|
+
"type" => "ocr"
|
|
1193
|
+
}
|
|
1194
|
+
end
|
|
1195
|
+
|
|
1196
|
+
@agent_config.save
|
|
1197
|
+
json_response(res, 200, { ok: true, state: @agent_config.ocr_state })
|
|
1198
|
+
rescue => e
|
|
1199
|
+
json_response(res, 422, { error: e.message })
|
|
1200
|
+
end
|
|
1201
|
+
|
|
1202
|
+
# POST /api/config/ocr/test
|
|
1203
|
+
# Reuses the media preflight (GET /models) — same connectivity check.
|
|
1204
|
+
def api_test_ocr_config(req, res)
|
|
1205
|
+
body = parse_json_body(req) || {}
|
|
1206
|
+
api_key = body["api_key"].to_s
|
|
1207
|
+
if api_key.empty? || api_key.include?("****")
|
|
1208
|
+
existing = @agent_config.find_model_by_type("ocr") || {}
|
|
1209
|
+
api_key = existing["api_key"].to_s
|
|
1210
|
+
end
|
|
1211
|
+
|
|
1212
|
+
model = body["model"].to_s.strip
|
|
1213
|
+
base_url = body["base_url"].to_s.strip
|
|
1214
|
+
|
|
1215
|
+
if model.empty? || base_url.empty? || api_key.empty?
|
|
1216
|
+
return json_response(res, 200, { ok: false, message: "model, base_url, api_key are required" })
|
|
1217
|
+
end
|
|
1218
|
+
|
|
1219
|
+
result = preflight_media_endpoint(base_url: base_url, api_key: api_key, model: model)
|
|
1220
|
+
json_response(res, 200, result)
|
|
1221
|
+
rescue => e
|
|
1222
|
+
json_response(res, 200, { ok: false, message: e.message })
|
|
1223
|
+
end
|
|
1224
|
+
|
|
1225
|
+
# POST /api/internal/ocr-image
|
|
1226
|
+
# Internal endpoint used by parser scripts (e.g. pdf_parser_vlm.py) to
|
|
1227
|
+
# transcribe a single image via the configured OCR sidecar. Localhost-
|
|
1228
|
+
# only by virtue of the standard auth path: when the server binds to
|
|
1229
|
+
# 127.0.0.1 (@localhost_only), check_access_key returns true without
|
|
1230
|
+
# requiring a token, so parsers running on the same host can call this
|
|
1231
|
+
# endpoint with no extra wiring.
|
|
1232
|
+
#
|
|
1233
|
+
# Request: multipart/form-data with field "image" (binary), optional "prompt"
|
|
1234
|
+
# OR JSON body { "data_url": "data:image/png;base64,...", "prompt": "..." }
|
|
1235
|
+
# Response: { ok: true, text: "..." } or { ok: false, message: "..." }
|
|
1236
|
+
def api_internal_ocr_image(req, res)
|
|
1237
|
+
entry = @agent_config.find_model_by_type("ocr")
|
|
1238
|
+
unless entry
|
|
1239
|
+
return json_response(res, 503, { ok: false, message: "OCR sidecar not configured" })
|
|
1240
|
+
end
|
|
1241
|
+
|
|
1242
|
+
prompt = nil
|
|
1243
|
+
data_url = nil
|
|
1244
|
+
bytes = nil
|
|
1245
|
+
mime = "image/png"
|
|
1246
|
+
|
|
1247
|
+
ctype = req.content_type.to_s
|
|
1248
|
+
if ctype.start_with?("multipart/form-data")
|
|
1249
|
+
parts = req.query
|
|
1250
|
+
if (img = parts["image"])
|
|
1251
|
+
bytes = img.respond_to?(:read) ? img.read : img.to_s
|
|
1252
|
+
mime = (img.respond_to?(:[]) ? img["content-type"].to_s : nil)
|
|
1253
|
+
mime = "image/png" if mime.nil? || mime.empty?
|
|
1254
|
+
end
|
|
1255
|
+
prompt = parts["prompt"].to_s if parts["prompt"]
|
|
1256
|
+
else
|
|
1257
|
+
body = parse_json_body(req) || {}
|
|
1258
|
+
data_url = body["data_url"].to_s
|
|
1259
|
+
prompt = body["prompt"].to_s if body["prompt"]
|
|
1260
|
+
end
|
|
1261
|
+
|
|
1262
|
+
image =
|
|
1263
|
+
if bytes && !bytes.empty?
|
|
1264
|
+
{ bytes: bytes, mime_type: mime }
|
|
1265
|
+
elsif data_url && !data_url.empty?
|
|
1266
|
+
{ data_url: data_url }
|
|
1267
|
+
else
|
|
1268
|
+
return json_response(res, 400, { ok: false, message: "image or data_url required" })
|
|
1269
|
+
end
|
|
1270
|
+
|
|
1271
|
+
text = Clacky::Vision::Resolver.new(entry).describe(image, prompt: prompt)
|
|
1272
|
+
if text && !text.strip.empty?
|
|
1273
|
+
json_response(res, 200, { ok: true, text: text })
|
|
1274
|
+
else
|
|
1275
|
+
json_response(res, 200, { ok: false, message: "OCR returned empty result" })
|
|
1276
|
+
end
|
|
1277
|
+
rescue => e
|
|
1278
|
+
json_response(res, 500, { ok: false, message: e.message })
|
|
1279
|
+
end
|
|
1280
|
+
|
|
1106
1281
|
# POST /api/onboard/complete
|
|
1107
1282
|
# Called after key setup is done (soul_setup is optional/skipped).
|
|
1108
1283
|
# Creates the default session if none exists yet, returns it.
|
|
@@ -3742,7 +3917,8 @@ module Clacky
|
|
|
3742
3917
|
ok: true,
|
|
3743
3918
|
enable_compression: @agent_config.enable_compression,
|
|
3744
3919
|
enable_prompt_caching: @agent_config.enable_prompt_caching,
|
|
3745
|
-
memory_update_enabled: @agent_config.memory_update_enabled
|
|
3920
|
+
memory_update_enabled: @agent_config.memory_update_enabled,
|
|
3921
|
+
proxy_url: @agent_config.proxy_url.to_s
|
|
3746
3922
|
})
|
|
3747
3923
|
end
|
|
3748
3924
|
|
|
@@ -3760,6 +3936,22 @@ module Clacky
|
|
|
3760
3936
|
if body.key?("memory_update_enabled")
|
|
3761
3937
|
@agent_config.memory_update_enabled = !!body["memory_update_enabled"]
|
|
3762
3938
|
end
|
|
3939
|
+
if body.key?("proxy_url")
|
|
3940
|
+
raw = body["proxy_url"].to_s.strip
|
|
3941
|
+
if raw.empty?
|
|
3942
|
+
@agent_config.proxy_url = nil
|
|
3943
|
+
else
|
|
3944
|
+
begin
|
|
3945
|
+
uri = URI.parse(raw)
|
|
3946
|
+
unless uri.is_a?(URI::HTTP) && uri.host && !uri.host.empty?
|
|
3947
|
+
return json_response(res, 422, { error: "proxy_url must be a valid http(s) URL" })
|
|
3948
|
+
end
|
|
3949
|
+
rescue URI::InvalidURIError
|
|
3950
|
+
return json_response(res, 422, { error: "proxy_url is not a valid URL" })
|
|
3951
|
+
end
|
|
3952
|
+
@agent_config.proxy_url = raw
|
|
3953
|
+
end
|
|
3954
|
+
end
|
|
3763
3955
|
|
|
3764
3956
|
@agent_config.save
|
|
3765
3957
|
json_response(res, 200, { ok: true })
|
|
@@ -4778,6 +4970,11 @@ module Clacky
|
|
|
4778
4970
|
task.call
|
|
4779
4971
|
@registry.update(session_id, status: :idle, error: nil)
|
|
4780
4972
|
broadcast_session_update(session_id)
|
|
4973
|
+
# Transient global signal for the optional task-complete sound. Sent to
|
|
4974
|
+
# all clients (broadcast_all) so a browser viewing another session — or
|
|
4975
|
+
# with the tab/window in the background — can still chime. Not part of
|
|
4976
|
+
# session history: a chime is a live cue, never replayed on refresh.
|
|
4977
|
+
broadcast_all(type: "task_finished", session_id: session_id)
|
|
4781
4978
|
@session_manager.save(agent.to_session_data(status: :success))
|
|
4782
4979
|
# Start idle compression timer now that the agent is idle
|
|
4783
4980
|
idle_timer&.start
|
|
@@ -192,17 +192,20 @@ module Clacky
|
|
|
192
192
|
|
|
193
193
|
private def run_loop
|
|
194
194
|
loop do
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
195
|
+
begin
|
|
196
|
+
break unless @running
|
|
197
|
+
|
|
198
|
+
tick(Time.now)
|
|
199
|
+
|
|
200
|
+
# Sleep until the start of the next minute
|
|
201
|
+
now = Time.now
|
|
202
|
+
sleep_s = 60 - now.sec
|
|
203
|
+
sleep(sleep_s)
|
|
204
|
+
rescue => e
|
|
205
|
+
Clacky::Logger.error("scheduler_tick_error", error: e)
|
|
206
|
+
sleep(5) # back off before retrying next tick
|
|
207
|
+
end
|
|
203
208
|
end
|
|
204
|
-
rescue => e
|
|
205
|
-
Clacky::Logger.error("scheduler_fatal_error", error: e)
|
|
206
209
|
end
|
|
207
210
|
|
|
208
211
|
# Check all enabled schedules against the given time and fire matching ones.
|
|
@@ -127,6 +127,7 @@ module Clacky
|
|
|
127
127
|
@start_time = nil
|
|
128
128
|
@ticker = nil
|
|
129
129
|
@state = :fresh # :fresh → :running → :closed
|
|
130
|
+
@unregistered = false
|
|
130
131
|
@metadata = {}
|
|
131
132
|
@last_chunk_at = nil
|
|
132
133
|
@monitor = Monitor.new
|
|
@@ -172,34 +173,37 @@ module Clacky
|
|
|
172
173
|
end
|
|
173
174
|
|
|
174
175
|
# Stop the ticker, render one final frame, and unregister from the
|
|
175
|
-
# owner. Idempotent
|
|
176
|
+
# owner. Idempotent and crash-safe — if a previous finish was
|
|
177
|
+
# interrupted (e.g. Thread#raise(AgentInterrupted) hit between
|
|
178
|
+
# +stop_ticker+ and +unregister_progress+), a follow-up finish
|
|
179
|
+
# will still complete the unregister so the handle does not stay
|
|
180
|
+
# orphaned on the owner's progress stack.
|
|
176
181
|
#
|
|
177
182
|
# @param final_message [String, nil] Optional override for the last
|
|
178
183
|
# frame. If nil, the handle composes "<message>… (<elapsed>s)".
|
|
179
184
|
def finish(final_message: nil)
|
|
180
|
-
Clacky::Logger.warn("[ph_debug] finish_entry", oid: object_id, state: @state, msg: @message, eid: @entry_id)
|
|
185
|
+
Clacky::Logger.warn("[ph_debug] finish_entry", oid: object_id, state: @state, unreg: @unregistered, msg: @message, eid: @entry_id)
|
|
181
186
|
snapshot = @monitor.synchronize do
|
|
182
|
-
if @
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
187
|
+
return if @unregistered
|
|
188
|
+
first_close = @state == :running
|
|
189
|
+
@state = :closed if first_close
|
|
190
|
+
{
|
|
191
|
+
first_close: first_close,
|
|
192
|
+
message: final_message || @message,
|
|
193
|
+
elapsed: elapsed_seconds,
|
|
194
|
+
}
|
|
188
195
|
end
|
|
189
196
|
|
|
190
197
|
stop_ticker
|
|
191
|
-
# Collapse fast-finishers to a removed entry so tools that complete
|
|
192
|
-
# in under FAST_FINISH_THRESHOLD_SECONDS don't leave a permanent
|
|
193
|
-
# "Executing foo… (0s)" line. The owner interprets final_frame: nil
|
|
194
|
-
# as "remove the entry entirely".
|
|
195
198
|
final_frame =
|
|
196
199
|
if @quiet_on_fast_finish && snapshot[:elapsed] < FAST_FINISH_THRESHOLD_SECONDS
|
|
197
200
|
nil
|
|
198
201
|
else
|
|
199
202
|
compose_final_frame(snapshot[:message], snapshot[:elapsed])
|
|
200
203
|
end
|
|
201
|
-
Clacky::Logger.warn("[ph_debug] finish_unregister", oid: object_id, eid: @entry_id, final_frame: final_frame.to_s[0, 200])
|
|
204
|
+
Clacky::Logger.warn("[ph_debug] finish_unregister", oid: object_id, eid: @entry_id, first_close: snapshot[:first_close], final_frame: final_frame.to_s[0, 200])
|
|
202
205
|
@owner.unregister_progress(self, final_frame: final_frame)
|
|
206
|
+
@monitor.synchronize { @unregistered = true }
|
|
203
207
|
Clacky::Logger.warn("[ph_debug] finish_done", oid: object_id)
|
|
204
208
|
end
|
|
205
209
|
alias_method :cancel, :finish
|
data/lib/clacky/version.rb
CHANGED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
require "base64"
|
|
5
|
+
require "fileutils"
|
|
6
|
+
require "json"
|
|
7
|
+
require_relative "../utils/file_processor"
|
|
8
|
+
|
|
9
|
+
module Clacky
|
|
10
|
+
module Vision
|
|
11
|
+
# OCR sidecar — turns image bytes into a text description by calling a
|
|
12
|
+
# vision-capable model. Used when the user's primary model is text-only
|
|
13
|
+
# (e.g. DeepSeek V4) so that uploaded images and tool screenshots still
|
|
14
|
+
# reach the conversation as useful context.
|
|
15
|
+
#
|
|
16
|
+
# Routes through Clacky::Client so we get the same OpenAI/Anthropic/
|
|
17
|
+
# Bedrock format negotiation, retry, and credit-error handling as the
|
|
18
|
+
# main agent path. Image content travels as a canonical `image_url`
|
|
19
|
+
# block (the unified internal shape understood by all three formats).
|
|
20
|
+
class Resolver
|
|
21
|
+
DEFAULT_PROMPT = <<~PROMPT.strip
|
|
22
|
+
Extract every legible text and describe the visual content of this image.
|
|
23
|
+
Output as Markdown. Preserve table layout where possible (use Markdown tables).
|
|
24
|
+
For UI screenshots, describe the layout, visible labels, and active state.
|
|
25
|
+
Be thorough but concise — the user cannot see the image and must rely on
|
|
26
|
+
your description.
|
|
27
|
+
PROMPT
|
|
28
|
+
|
|
29
|
+
MAX_TOKENS = 8192
|
|
30
|
+
CACHE_DIR = File.join(Dir.home, ".clacky", "ocr_cache")
|
|
31
|
+
CACHE_VERSION = 1
|
|
32
|
+
|
|
33
|
+
Result = Struct.new(:status, :text, :error, keyword_init: true) do
|
|
34
|
+
def ok?; status == :ok; end
|
|
35
|
+
def empty?; status == :empty; end
|
|
36
|
+
def call_failed?; status == :call_failed; end
|
|
37
|
+
def bad_image?; status == :bad_image; end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def initialize(model_entry)
|
|
41
|
+
@model_entry = model_entry
|
|
42
|
+
@model = model_entry["model"]
|
|
43
|
+
@base_url = model_entry["base_url"]
|
|
44
|
+
@api_key = model_entry["api_key"]
|
|
45
|
+
@anthropic = !!model_entry["anthropic_format"]
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# @return [Result] one of:
|
|
49
|
+
# status=:ok + text — sidecar produced a description
|
|
50
|
+
# status=:empty — sidecar returned 200 but no usable text (e.g. token budget exhausted by reasoning)
|
|
51
|
+
# status=:call_failed + error — network/parse/auth error from the sidecar
|
|
52
|
+
# status=:bad_image — image bytes unreadable / empty
|
|
53
|
+
def describe(image, prompt: nil)
|
|
54
|
+
prompt = prompt.to_s.strip
|
|
55
|
+
prompt = DEFAULT_PROMPT if prompt.empty?
|
|
56
|
+
|
|
57
|
+
bytes, mime = read_image(image)
|
|
58
|
+
return Result.new(status: :bad_image) if bytes.nil? || bytes.empty?
|
|
59
|
+
|
|
60
|
+
cached = cache_get(bytes, prompt)
|
|
61
|
+
return Result.new(status: :ok, text: cached) if cached
|
|
62
|
+
|
|
63
|
+
text = call_vlm(bytes, mime, prompt)
|
|
64
|
+
return Result.new(status: :empty) if text.nil? || text.strip.empty?
|
|
65
|
+
|
|
66
|
+
cache_put(bytes, prompt, text)
|
|
67
|
+
Result.new(status: :ok, text: text)
|
|
68
|
+
rescue => e
|
|
69
|
+
Clacky::Logger.warn("[Vision::Resolver] failed: #{e.class}: #{e.message}") if defined?(Clacky::Logger)
|
|
70
|
+
Result.new(status: :call_failed, error: "#{e.class}: #{e.message}")
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private def read_image(image)
|
|
74
|
+
if image[:bytes]
|
|
75
|
+
[image[:bytes], image[:mime_type] || "image/png"]
|
|
76
|
+
elsif image[:data_url] || image["data_url"]
|
|
77
|
+
url = image[:data_url] || image["data_url"]
|
|
78
|
+
m = url.match(/\Adata:([^;]+);base64,(.*)\z/m)
|
|
79
|
+
return [nil, nil] unless m
|
|
80
|
+
[Base64.decode64(m[2]), m[1]]
|
|
81
|
+
elsif image[:path] || image["path"]
|
|
82
|
+
path = image[:path] || image["path"]
|
|
83
|
+
return [nil, nil] unless File.exist?(path)
|
|
84
|
+
[File.binread(path), Utils::FileProcessor.detect_mime_type(path, nil) || "image/png"]
|
|
85
|
+
else
|
|
86
|
+
[nil, nil]
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private def call_vlm(bytes, mime, prompt)
|
|
91
|
+
data_url = "data:#{mime};base64,#{Base64.strict_encode64(bytes)}"
|
|
92
|
+
message = {
|
|
93
|
+
role: "user",
|
|
94
|
+
content: [
|
|
95
|
+
{ type: "text", text: prompt },
|
|
96
|
+
{ type: "image_url", image_url: { url: data_url } }
|
|
97
|
+
]
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
client = Clacky::Client.new(
|
|
101
|
+
@api_key,
|
|
102
|
+
base_url: @base_url,
|
|
103
|
+
model: @model,
|
|
104
|
+
anthropic_format: @anthropic
|
|
105
|
+
)
|
|
106
|
+
response = client.send_messages([message], model: @model, max_tokens: MAX_TOKENS)
|
|
107
|
+
extract_text(response)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Client#send_messages returns the raw upstream string for OpenAI/Anthropic;
|
|
111
|
+
# for Bedrock it returns the parsed text content. Normalise to String.
|
|
112
|
+
private def extract_text(response)
|
|
113
|
+
case response
|
|
114
|
+
when String then response
|
|
115
|
+
when Hash then response[:content] || response["content"] || response.to_s
|
|
116
|
+
else response.to_s
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# ── Cache ─────────────────────────────────────────────────────────────
|
|
121
|
+
|
|
122
|
+
private def cache_key(bytes, prompt)
|
|
123
|
+
sha = Digest::SHA256.hexdigest(bytes)
|
|
124
|
+
prompt_sha = Digest::SHA256.hexdigest(prompt)[0, 12]
|
|
125
|
+
"#{sha}_#{@model.gsub(/[^A-Za-z0-9_.-]/, '_')}_#{prompt_sha}"
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
private def cache_path(key)
|
|
129
|
+
File.join(CACHE_DIR, "#{key}.json")
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
private def cache_get(bytes, prompt)
|
|
133
|
+
path = cache_path(cache_key(bytes, prompt))
|
|
134
|
+
return nil unless File.exist?(path)
|
|
135
|
+
data = JSON.parse(File.read(path))
|
|
136
|
+
return nil unless data["v"] == CACHE_VERSION
|
|
137
|
+
data["text"]
|
|
138
|
+
rescue JSON::ParserError, Errno::ENOENT
|
|
139
|
+
nil
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
private def cache_put(bytes, prompt, text)
|
|
143
|
+
FileUtils.mkdir_p(CACHE_DIR)
|
|
144
|
+
path = cache_path(cache_key(bytes, prompt))
|
|
145
|
+
File.write(path, JSON.generate({
|
|
146
|
+
"v" => CACHE_VERSION,
|
|
147
|
+
"model" => @model,
|
|
148
|
+
"text" => text,
|
|
149
|
+
"ts" => Time.now.to_i
|
|
150
|
+
}))
|
|
151
|
+
rescue => _
|
|
152
|
+
# Cache is best-effort — never fail the request because we can't write.
|
|
153
|
+
nil
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
data/lib/clacky/web/app.css
CHANGED
|
@@ -325,6 +325,13 @@ body {
|
|
|
325
325
|
.theme-toggle-btn:active {
|
|
326
326
|
background: var(--color-bg-hover);
|
|
327
327
|
}
|
|
328
|
+
/* Sound-notification toggle shares .theme-toggle-btn; highlight when ON. */
|
|
329
|
+
#notify-toggle-header.notify-on {
|
|
330
|
+
color: var(--color-accent-primary, var(--color-text-primary));
|
|
331
|
+
}
|
|
332
|
+
#notify-toggle-header.notify-on:hover {
|
|
333
|
+
color: var(--color-accent-primary, var(--color-text-primary));
|
|
334
|
+
}
|
|
328
335
|
|
|
329
336
|
/* ── Content Row (Sidebar + Main) ───────────────────────────────────────── */
|
|
330
337
|
#app > aside,
|
|
@@ -2282,19 +2289,31 @@ body {
|
|
|
2282
2289
|
|
|
2283
2290
|
/* ── Diff block (rendered inline within edit/write tool-item) ─────────────── */
|
|
2284
2291
|
.tool-item-diff {
|
|
2285
|
-
margin: 0.
|
|
2286
|
-
padding: 0.
|
|
2292
|
+
margin: 0.375rem 0 0.375rem 1.25rem;
|
|
2293
|
+
padding: 0.5rem 0.625rem;
|
|
2287
2294
|
background: var(--color-bg-secondary);
|
|
2288
2295
|
border: 1px solid var(--color-border-secondary);
|
|
2289
|
-
border-radius:
|
|
2296
|
+
border-radius: 6px;
|
|
2290
2297
|
font-size: 0.6875rem;
|
|
2291
2298
|
font-family: monospace;
|
|
2292
|
-
line-height: 1.
|
|
2299
|
+
line-height: 1.55;
|
|
2293
2300
|
max-height: 20rem;
|
|
2294
|
-
overflow:
|
|
2301
|
+
overflow-x: hidden;
|
|
2302
|
+
overflow-y: auto;
|
|
2303
|
+
scrollbar-width: thin;
|
|
2304
|
+
scrollbar-color: var(--color-border-secondary) transparent;
|
|
2295
2305
|
}
|
|
2306
|
+
.tool-item-diff::-webkit-scrollbar { width: 6px; height: 6px; }
|
|
2307
|
+
.tool-item-diff::-webkit-scrollbar-track { background: transparent; }
|
|
2308
|
+
.tool-item-diff::-webkit-scrollbar-thumb {
|
|
2309
|
+
background: var(--color-border-secondary);
|
|
2310
|
+
border-radius: 3px;
|
|
2311
|
+
}
|
|
2312
|
+
.tool-item-diff::-webkit-scrollbar-thumb:hover { background: var(--color-text-tertiary); }
|
|
2296
2313
|
.diff-line {
|
|
2297
|
-
white-space: pre;
|
|
2314
|
+
white-space: pre-wrap;
|
|
2315
|
+
word-break: break-all;
|
|
2316
|
+
overflow-wrap: anywhere;
|
|
2298
2317
|
padding: 0 0.25rem;
|
|
2299
2318
|
border-radius: 2px;
|
|
2300
2319
|
}
|
|
@@ -3559,6 +3578,37 @@ body {
|
|
|
3559
3578
|
}
|
|
3560
3579
|
.btn-settings-action:hover { background: var(--color-bg-hover); border-color: var(--color-accent-primary); }
|
|
3561
3580
|
.btn-settings-action:disabled { opacity: 0.5; cursor: not-allowed; }
|
|
3581
|
+
|
|
3582
|
+
.settings-network {
|
|
3583
|
+
display: flex;
|
|
3584
|
+
flex-direction: column;
|
|
3585
|
+
gap: 0.875rem;
|
|
3586
|
+
padding: 0.875rem 1rem;
|
|
3587
|
+
background: var(--color-bg-secondary);
|
|
3588
|
+
border: 1px solid var(--color-border-primary);
|
|
3589
|
+
border-radius: 10px;
|
|
3590
|
+
}
|
|
3591
|
+
.settings-network-desc {
|
|
3592
|
+
font-size: 0.8125rem;
|
|
3593
|
+
color: var(--color-text-secondary);
|
|
3594
|
+
line-height: 1.5;
|
|
3595
|
+
margin: 0;
|
|
3596
|
+
}
|
|
3597
|
+
.settings-network-url {
|
|
3598
|
+
display: flex;
|
|
3599
|
+
flex-direction: column;
|
|
3600
|
+
gap: 0.375rem;
|
|
3601
|
+
}
|
|
3602
|
+
.settings-network-url-label {
|
|
3603
|
+
font-size: 0.75rem;
|
|
3604
|
+
color: var(--color-text-secondary);
|
|
3605
|
+
}
|
|
3606
|
+
.settings-network-url-row {
|
|
3607
|
+
display: flex;
|
|
3608
|
+
gap: 0.5rem;
|
|
3609
|
+
align-items: center;
|
|
3610
|
+
}
|
|
3611
|
+
.settings-network-url-row .field-input { flex: 1; }
|
|
3562
3612
|
.settings-loading, .settings-empty, .settings-error {
|
|
3563
3613
|
color: var(--color-text-secondary);
|
|
3564
3614
|
font-size: 0.8125rem;
|
data/lib/clacky/web/i18n.js
CHANGED
|
@@ -505,12 +505,13 @@ const I18n = (() => {
|
|
|
505
505
|
"settings.models.badge.default": "Default",
|
|
506
506
|
"settings.models.badge.lite": "Lite",
|
|
507
507
|
"settings.media.title": "Media Generation",
|
|
508
|
-
"settings.media.desc": "Optional. Image / video / audio
|
|
508
|
+
"settings.media.desc": "Optional. Image / video / audio / OCR sidecar models.",
|
|
509
509
|
"settings.media.loading": "Loading…",
|
|
510
510
|
"settings.media.error": "Failed to load: {{msg}}",
|
|
511
511
|
"settings.media.kind.image": "Image",
|
|
512
512
|
"settings.media.kind.video": "Video",
|
|
513
513
|
"settings.media.kind.audio": "Audio",
|
|
514
|
+
"settings.media.kind.ocr": "OCR",
|
|
514
515
|
"settings.media.source.off": "Off",
|
|
515
516
|
"settings.media.source.auto": "Auto",
|
|
516
517
|
"settings.media.source.custom": "Custom",
|
|
@@ -597,6 +598,14 @@ const I18n = (() => {
|
|
|
597
598
|
"settings.browser.btn": "🌐 Configure Browser",
|
|
598
599
|
"settings.browser.btn.reconfigure": "🌐 Reconfigure Browser",
|
|
599
600
|
"settings.browser.btn.starting": "Starting…",
|
|
601
|
+
"settings.network.title": "Network",
|
|
602
|
+
"settings.network.desc": "Clacky always ignores HTTP_PROXY / HTTPS_PROXY from your shell. Set an explicit proxy URL below to route Clacky's outbound traffic through a proxy.",
|
|
603
|
+
"settings.network.proxyUrl": "Proxy URL",
|
|
604
|
+
"settings.network.save": "Save",
|
|
605
|
+
"settings.network.saved": "Saved",
|
|
606
|
+
"settings.network.clear": "Clear",
|
|
607
|
+
"settings.network.cleared": "Cleared — direct connection",
|
|
608
|
+
"settings.network.invalidUrl": "Invalid URL — use http://host:port or http://user:pass@host:port",
|
|
600
609
|
"settings.brand.title": "Brand & License",
|
|
601
610
|
"settings.brand.label.brand": "Brand",
|
|
602
611
|
"settings.brand.label.status": "Status",
|
|
@@ -697,6 +706,8 @@ const I18n = (() => {
|
|
|
697
706
|
"brand.banner.freePromptBoth": "Welcome to {{name}} — {{free}} free skill{{freePlural}} ready to use, plus {{paid}} premium skill{{paidPlural}} unlockable with a serial number.",
|
|
698
707
|
|
|
699
708
|
"header.owner.tooltip": "Creator — click to open Creator Hub",
|
|
709
|
+
"notify.tooltip.on": "Sound on task complete: ON (click to mute)",
|
|
710
|
+
"notify.tooltip.off": "Sound on task complete: OFF (click to enable)",
|
|
700
711
|
|
|
701
712
|
// ── Session info bar / Model switcher benchmark ──
|
|
702
713
|
"sib.bench.btn": "Benchmark",
|
|
@@ -1245,12 +1256,13 @@ const I18n = (() => {
|
|
|
1245
1256
|
"settings.models.badge.default": "默认",
|
|
1246
1257
|
"settings.models.badge.lite": "轻量",
|
|
1247
1258
|
"settings.media.title": "媒体生成",
|
|
1248
|
-
"settings.media.desc": "可选。图片 / 视频 / 音频
|
|
1259
|
+
"settings.media.desc": "可选。图片 / 视频 / 音频 / 图片理解(OCR)副模型。",
|
|
1249
1260
|
"settings.media.loading": "加载中…",
|
|
1250
1261
|
"settings.media.error": "加载失败:{{msg}}",
|
|
1251
1262
|
"settings.media.kind.image": "图片",
|
|
1252
1263
|
"settings.media.kind.video": "视频",
|
|
1253
1264
|
"settings.media.kind.audio": "音频",
|
|
1265
|
+
"settings.media.kind.ocr": "OCR",
|
|
1254
1266
|
"settings.media.source.off": "关闭",
|
|
1255
1267
|
"settings.media.source.auto": "自动",
|
|
1256
1268
|
"settings.media.source.custom": "自定义",
|
|
@@ -1337,6 +1349,14 @@ const I18n = (() => {
|
|
|
1337
1349
|
"settings.browser.btn": "🌐 配置浏览器",
|
|
1338
1350
|
"settings.browser.btn.reconfigure": "🌐 重新配置浏览器",
|
|
1339
1351
|
"settings.browser.btn.starting": "启动中…",
|
|
1352
|
+
"settings.network.title": "网络",
|
|
1353
|
+
"settings.network.desc": "Clacky 始终忽略系统的 HTTP_PROXY / HTTPS_PROXY。如需让 Clacky 走代理,请在下方填入显式代理地址。",
|
|
1354
|
+
"settings.network.proxyUrl": "代理地址",
|
|
1355
|
+
"settings.network.save": "保存",
|
|
1356
|
+
"settings.network.saved": "已保存",
|
|
1357
|
+
"settings.network.clear": "清除",
|
|
1358
|
+
"settings.network.cleared": "已清除 — 直连",
|
|
1359
|
+
"settings.network.invalidUrl": "地址格式不正确,请使用 http://host:port 或 http://user:pass@host:port",
|
|
1340
1360
|
"settings.brand.title": "品牌 & 授权",
|
|
1341
1361
|
"settings.brand.label.brand": "品牌",
|
|
1342
1362
|
"settings.brand.label.status": "状态",
|
|
@@ -1437,6 +1457,8 @@ const I18n = (() => {
|
|
|
1437
1457
|
"brand.banner.freePromptBoth": "欢迎使用 {{name}} — 已自动安装 {{free}} 个免费技能,还有 {{paid}} 个增值技能可输入序列号解锁。",
|
|
1438
1458
|
|
|
1439
1459
|
"header.owner.tooltip": "创作者 — 点击进入创作者中心",
|
|
1460
|
+
"notify.tooltip.on": "任务完成提示音:已开启(点击关闭)",
|
|
1461
|
+
"notify.tooltip.off": "任务完成提示音:已关闭(点击开启)",
|
|
1440
1462
|
|
|
1441
1463
|
// ── 会话信息栏 / 模型切换器 测速 ──
|
|
1442
1464
|
"sib.bench.btn": "测速",
|