openclacky 1.2.13 → 1.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.clacky/skills/gem-release/SKILL.md +4 -0
- data/CHANGELOG.md +28 -0
- data/lib/clacky/agent/session_serializer.rb +1 -0
- data/lib/clacky/agent.rb +123 -14
- data/lib/clacky/agent_config.rb +136 -10
- data/lib/clacky/client.rb +59 -46
- data/lib/clacky/default_parsers/pdf_parser.rb +70 -86
- data/lib/clacky/default_parsers/pdf_parser_vlm.py +136 -0
- data/lib/clacky/providers.rb +37 -0
- data/lib/clacky/proxy_config.rb +65 -0
- data/lib/clacky/server/http_server.rb +202 -5
- data/lib/clacky/server/scheduler.rb +13 -10
- data/lib/clacky/ui2/progress_handle.rb +17 -13
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/vision/resolver.rb +157 -0
- data/lib/clacky/web/app.css +56 -6
- data/lib/clacky/web/i18n.js +24 -2
- data/lib/clacky/web/index.html +21 -0
- data/lib/clacky/web/notify.js +154 -0
- data/lib/clacky/web/notify.mp3 +0 -0
- data/lib/clacky/web/settings.js +88 -12
- data/lib/clacky/web/ws-dispatcher.js +8 -0
- data/lib/clacky.rb +4 -0
- metadata +7 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 84e7378b08b627bad34d327d1bd82cc7efbfe980a690d64e678242917be8125d
|
|
4
|
+
data.tar.gz: 87e4c1b8e99f2195c98c85124816503fd11436b3bdb38465bb7289fab1204fa3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 36cb343f4a81222b3a2861dcd80529f0d3216a341e19ea7ebfd1ea6dbebded9c0d31a212a645cffb7268e9faf844be9f257d739677a17d0387bf033970dc7675
|
|
7
|
+
data.tar.gz: 16d08fc33223c56024a27072de5a0f435ac969c1ac55fffa39738e0e8d9bfe77f1ebbe4c4cafb8793c2dac1367744daeb417b97db9a854ba7e04ffd4d2b17042
|
|
@@ -177,6 +177,10 @@ Ask the user whether to use `--update-latest` before running the script.
|
|
|
177
177
|
The script uses `set -euo pipefail` and stops on any failure. Common issues:
|
|
178
178
|
|
|
179
179
|
- **Tests fail** → fix tests before re-running
|
|
180
|
+
- **Web search smoke test fails (Bing)** → This often happens due to datacenter IP fingerprinting (anti-scrape blocking) returning irrelevant top-domain filler (like Mr.Bricolage). If you see "No ruby-related result from bing" during the smoke test:
|
|
181
|
+
1. Manually run `bundle exec rspec spec/integration/web_search_smoke_spec.rb --tag smoke` to verify
|
|
182
|
+
2. If it's the anti-scrape block, temporarily edit `spec/integration/web_search_smoke_spec.rb` to skip the relevance check on failure (e.g., using `skip "Bing returned anti-scrape garbage..."`)
|
|
183
|
+
3. Commit the change ("ci: skip bing smoke test relevance check on anti-scrape") and re-run the release script
|
|
180
184
|
- **CI fails** → script pushes then watches CI; fix and re-push if needed
|
|
181
185
|
- **gem push fails** → check RubyGems credentials (`gem signin`)
|
|
182
186
|
- **gh release fails** → check `gh auth status`
|
data/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,34 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.2.15] - 2026-06-10
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Proxy configuration support
|
|
12
|
+
- Optional sound notification on task completion in Web UI
|
|
13
|
+
|
|
14
|
+
### Fixed
|
|
15
|
+
- Prevent scheduler thread from dying on tick exception
|
|
16
|
+
|
|
17
|
+
### More
|
|
18
|
+
- Tool diff CSS refinement
|
|
19
|
+
|
|
20
|
+
## [1.2.14] - 2026-06-08
|
|
21
|
+
|
|
22
|
+
### Added
|
|
23
|
+
- OCR support for scanned PDFs (optical character recognition)
|
|
24
|
+
- VLM-based PDF parser for improved document understanding
|
|
25
|
+
|
|
26
|
+
### Improved
|
|
27
|
+
- PDF OCR processing quality
|
|
28
|
+
|
|
29
|
+
### Fixed
|
|
30
|
+
- PDF processing not appearing in session history
|
|
31
|
+
- Stale progress indicator that wouldn't dismiss
|
|
32
|
+
|
|
33
|
+
### More
|
|
34
|
+
- Document Bing smoke test anti-scrape failure handling in gem-release
|
|
35
|
+
|
|
8
36
|
## [1.2.13] - 2026-06-08
|
|
9
37
|
|
|
10
38
|
### Added
|
|
@@ -272,6 +272,7 @@ module Clacky
|
|
|
272
272
|
# Disk files (PDF, doc, etc.): stored in display_files on the user message at send time
|
|
273
273
|
disk_files = Array(msg[:display_files]).map { |f|
|
|
274
274
|
{ name: f[:name] || f["name"], type: f[:type] || f["type"] || "file",
|
|
275
|
+
path: f[:path] || f["path"],
|
|
275
276
|
preview_path: f[:preview_path] || f["preview_path"] }
|
|
276
277
|
}
|
|
277
278
|
all_files = image_files + disk_files
|
data/lib/clacky/agent.rb
CHANGED
|
@@ -341,19 +341,23 @@ module Clacky
|
|
|
341
341
|
# the file_prompt builder can't emit the "not supported by model" /
|
|
342
342
|
# "too large" note for downgraded images.
|
|
343
343
|
downgrade_reason = f[:downgrade_reason] || f["downgrade_reason"]
|
|
344
|
+
ocr_text = f[:ocr_text] || f["ocr_text"]
|
|
344
345
|
ref = Utils::FileProcessor.process_path(path, name: name)
|
|
345
346
|
{ name: ref.name, type: ref.type.to_s, path: ref.original_path,
|
|
346
347
|
preview_path: ref.preview_path, parse_error: ref.parse_error, parser_path: ref.parser_path,
|
|
347
|
-
downgrade_reason: downgrade_reason }
|
|
348
|
+
downgrade_reason: downgrade_reason, ocr_text: ocr_text }
|
|
348
349
|
end
|
|
349
350
|
|
|
350
351
|
# Build display_files for replay: lightweight metadata so the UI can reconstruct
|
|
351
|
-
# file badges (PDF, doc, etc.) on page refresh.
|
|
352
|
-
#
|
|
352
|
+
# file badges (PDF, doc, etc.) on page refresh. Vision-inlined images are NOT
|
|
353
|
+
# stored here — they recover from image_url blocks in user_content. Downgraded
|
|
354
|
+
# images (provider has no vision / too large / OCR'd) DO need path here so the
|
|
355
|
+
# UI can re-render them from the on-disk copy across session switches.
|
|
353
356
|
display_files = all_disk_files.filter_map do |f|
|
|
354
357
|
name = f[:name] || f["name"]
|
|
355
358
|
next unless name
|
|
356
359
|
{ name: name, type: f[:type] || f["type"] || "file",
|
|
360
|
+
path: f[:path] || f["path"],
|
|
357
361
|
preview_path: f[:preview_path] || f["preview_path"] }
|
|
358
362
|
end
|
|
359
363
|
|
|
@@ -381,6 +385,7 @@ module Clacky
|
|
|
381
385
|
parse_error = f[:parse_error] || f["parse_error"]
|
|
382
386
|
parser_path = f[:parser_path] || f["parser_path"]
|
|
383
387
|
downgrade_reason = f[:downgrade_reason] || f["downgrade_reason"]
|
|
388
|
+
ocr_text = f[:ocr_text] || f["ocr_text"]
|
|
384
389
|
|
|
385
390
|
next unless name
|
|
386
391
|
|
|
@@ -396,6 +401,14 @@ module Clacky
|
|
|
396
401
|
note = downgrade_note_for(downgrade_reason)
|
|
397
402
|
lines << "Note: #{note}" if note
|
|
398
403
|
|
|
404
|
+
# OCR transcription (when an OCR sidecar successfully described
|
|
405
|
+
# an image the primary model couldn't see). Embedded inline so
|
|
406
|
+
# the LLM has the description colocated with the file entry.
|
|
407
|
+
if ocr_text && !ocr_text.strip.empty?
|
|
408
|
+
lines << "OCR description:"
|
|
409
|
+
lines << ocr_text.strip
|
|
410
|
+
end
|
|
411
|
+
|
|
399
412
|
# Parser failed — instruct LLM to fix and re-run
|
|
400
413
|
if preview_path.nil? && parse_error
|
|
401
414
|
lines << "Parse failed: #{parse_error}"
|
|
@@ -1098,6 +1111,9 @@ module Clacky
|
|
|
1098
1111
|
# base64 data in a `role:"tool"` message causes it to be JSON-encoded as
|
|
1099
1112
|
# plain text, inflating token counts by 20-40x. The tool result carries a
|
|
1100
1113
|
# plain-text description for the LLM; the actual image is delivered here.
|
|
1114
|
+
vision_supported = @config.current_model_supports?(:vision)
|
|
1115
|
+
ocr_entry = vision_supported ? nil : @config.find_model_by_type("ocr")
|
|
1116
|
+
|
|
1101
1117
|
tool_results.each do |tr|
|
|
1102
1118
|
inject = tr[:image_inject]
|
|
1103
1119
|
next unless inject
|
|
@@ -1109,12 +1125,18 @@ module Clacky
|
|
|
1109
1125
|
|
|
1110
1126
|
data_url = "data:#{mime_type};base64,#{base64_data}"
|
|
1111
1127
|
label = path ? File.basename(path.to_s) : "image"
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1128
|
+
|
|
1129
|
+
image_content =
|
|
1130
|
+
if vision_supported
|
|
1131
|
+
image_block = { type: "image_url", image_url: { url: data_url } }
|
|
1132
|
+
image_block[:image_path] = path if path
|
|
1133
|
+
[{ type: "text", text: "[Image: #{label}]" }, image_block]
|
|
1134
|
+
else
|
|
1135
|
+
ocr_result = try_ocr(ocr_entry, data_url: data_url, name: label)
|
|
1136
|
+
text = ocr_text_for_inject(label, ocr_result, ocr_entry)
|
|
1137
|
+
[{ type: "text", text: text }]
|
|
1138
|
+
end
|
|
1139
|
+
|
|
1118
1140
|
@history.append({
|
|
1119
1141
|
role: "user",
|
|
1120
1142
|
content: image_content,
|
|
@@ -1494,6 +1516,11 @@ module Clacky
|
|
|
1494
1516
|
# the current model (no stale state on `/model` switch).
|
|
1495
1517
|
vision_supported = @config.current_model_supports?(:vision)
|
|
1496
1518
|
|
|
1519
|
+
# OCR sidecar — only consulted when the primary doesn't see images.
|
|
1520
|
+
# When the sidecar entry has "primary"=>true, the primary itself can see,
|
|
1521
|
+
# so vision_supported was already true and we never enter the OCR branch.
|
|
1522
|
+
ocr_entry = vision_supported ? nil : @config.find_model_by_type("ocr")
|
|
1523
|
+
|
|
1497
1524
|
vision_images = [] # Array of { url:, name:, size_bytes:, path: }
|
|
1498
1525
|
downgraded = []
|
|
1499
1526
|
|
|
@@ -1510,8 +1537,11 @@ module Clacky
|
|
|
1510
1537
|
file_ref = Utils::FileProcessor.save_image_to_disk(body: raw, mime_type: mime, filename: name)
|
|
1511
1538
|
reason = downgrade_reason_for(vision_supported, byte_size, max_bytes)
|
|
1512
1539
|
if reason
|
|
1513
|
-
|
|
1514
|
-
|
|
1540
|
+
ocr_result = (reason == :provider_no_vision) ? try_ocr(ocr_entry, data_url: data_url, name: name) : nil
|
|
1541
|
+
entry = { name: name, path: file_ref.original_path, type: "image",
|
|
1542
|
+
mime_type: mime, size_bytes: byte_size, downgrade_reason: reason }
|
|
1543
|
+
apply_ocr_outcome!(entry, ocr_result)
|
|
1544
|
+
downgraded << entry
|
|
1515
1545
|
else
|
|
1516
1546
|
vision_images << { url: data_url, name: name, size_bytes: byte_size, path: file_ref.original_path }
|
|
1517
1547
|
end
|
|
@@ -1522,8 +1552,11 @@ module Clacky
|
|
|
1522
1552
|
byte_size = (b64_data.bytesize * 3) / 4
|
|
1523
1553
|
reason = downgrade_reason_for(vision_supported, byte_size, max_bytes)
|
|
1524
1554
|
if reason
|
|
1525
|
-
|
|
1526
|
-
|
|
1555
|
+
ocr_result = (reason == :provider_no_vision) ? try_ocr(ocr_entry, path: path, name: name) : nil
|
|
1556
|
+
entry = { name: name, path: path, type: "image",
|
|
1557
|
+
mime_type: mime, size_bytes: byte_size, downgrade_reason: reason }
|
|
1558
|
+
apply_ocr_outcome!(entry, ocr_result)
|
|
1559
|
+
downgraded << entry
|
|
1527
1560
|
else
|
|
1528
1561
|
vision_images << { url: data_url_from_path, name: name, size_bytes: byte_size, path: path }
|
|
1529
1562
|
end
|
|
@@ -1536,6 +1569,30 @@ module Clacky
|
|
|
1536
1569
|
[vision_images, downgraded]
|
|
1537
1570
|
end
|
|
1538
1571
|
|
|
1572
|
+
# Best-effort OCR through the configured sidecar. Returns nil when no
|
|
1573
|
+
# sidecar is configured or the call failed — caller falls back to the
|
|
1574
|
+
# ":provider_no_vision" downgrade note (today's behaviour).
|
|
1575
|
+
# @return [Clacky::Vision::Resolver::Result, nil]
|
|
1576
|
+
# nil — no sidecar exists or sidecar IS the primary (no point extra hop).
|
|
1577
|
+
# Caller treats this as ":provider_no_vision" (configure a sidecar).
|
|
1578
|
+
# Result — outcome from the sidecar call. status=:ok carries text;
|
|
1579
|
+
# :empty / :call_failed / :bad_image each get their own message
|
|
1580
|
+
# so the user can tell "image content unreadable" from
|
|
1581
|
+
# "sidecar misconfigured / down".
|
|
1582
|
+
private def try_ocr(ocr_entry, data_url: nil, path: nil, name: nil)
|
|
1583
|
+
return nil unless ocr_entry
|
|
1584
|
+
return nil if ocr_entry["primary"]
|
|
1585
|
+
|
|
1586
|
+
image = data_url ? { data_url: data_url } : { path: path }
|
|
1587
|
+
|
|
1588
|
+
@ui&.show_progress("OCR...", progress_type: "thinking", phase: "active")
|
|
1589
|
+
begin
|
|
1590
|
+
Clacky::Vision::Resolver.new(ocr_entry).describe(image)
|
|
1591
|
+
ensure
|
|
1592
|
+
@ui&.show_progress(phase: "done")
|
|
1593
|
+
end
|
|
1594
|
+
end
|
|
1595
|
+
|
|
1539
1596
|
# Decide whether an image must be downgraded to a disk ref, and if so why.
|
|
1540
1597
|
# Precedence: provider capability is checked first — a text-only model
|
|
1541
1598
|
# can't use the image at any size, so there's no point re-checking size.
|
|
@@ -1554,9 +1611,61 @@ module Clacky
|
|
|
1554
1611
|
private def downgrade_note_for(reason)
|
|
1555
1612
|
case reason&.to_sym
|
|
1556
1613
|
when :provider_no_vision
|
|
1557
|
-
"The current model does not support vision input
|
|
1614
|
+
"The current model does not support vision input and no OCR sidecar is configured. Tell the user clearly that to analyze this image they need to either: (1) configure an OCR sidecar model in Settings → Media → OCR (any vision-capable model works as the sidecar — e.g. gemini-3-5-flash, gpt-4o-mini, claude-3-5-haiku), or (2) switch the current model to a vision-capable one. Do not attempt to guess the image content."
|
|
1558
1615
|
when :too_large
|
|
1559
1616
|
"Image was too large for inline delivery and has been saved to disk. Read it with a vision-capable tool/model if needed."
|
|
1617
|
+
when :ocr_resolved
|
|
1618
|
+
"The current model does not support vision input. The image has been transcribed by an OCR sidecar model — the description below is what the model sees in place of the raw pixels."
|
|
1619
|
+
when :ocr_call_failed
|
|
1620
|
+
"The current model does not support vision and the configured OCR sidecar call failed. Tell the user the sidecar (Settings → Media → OCR) errored — likely a misconfigured base_url / api_key, or the upstream is down. They can retry, fix the sidecar config, or switch to a vision-capable primary model. Do not guess the image content."
|
|
1621
|
+
when :ocr_empty
|
|
1622
|
+
"The current model does not support vision. The OCR sidecar responded but returned no readable text (the model produced no description — possibly the image is blank, or the model exhausted its token budget on internal reasoning). Tell the user honestly; do not guess the image content."
|
|
1623
|
+
when :ocr_bad_image
|
|
1624
|
+
"The current model does not support vision. The OCR sidecar could not read the image bytes (corrupt or unsupported format). Tell the user; do not guess the image content."
|
|
1625
|
+
end
|
|
1626
|
+
end
|
|
1627
|
+
|
|
1628
|
+
# Mutates `entry` in place based on the OCR Result outcome.
|
|
1629
|
+
# Sets `:ocr_text` (only on :ok) and rewrites `:downgrade_reason` to one
|
|
1630
|
+
# of :ocr_resolved / :ocr_call_failed / :ocr_empty / :ocr_bad_image.
|
|
1631
|
+
# When ocr_result is nil (no sidecar configured) leaves the original
|
|
1632
|
+
# :provider_no_vision reason untouched.
|
|
1633
|
+
private def apply_ocr_outcome!(entry, ocr_result)
|
|
1634
|
+
return entry unless ocr_result
|
|
1635
|
+
|
|
1636
|
+
case ocr_result.status
|
|
1637
|
+
when :ok
|
|
1638
|
+
entry[:ocr_text] = ocr_result.text
|
|
1639
|
+
entry[:downgrade_reason] = :ocr_resolved
|
|
1640
|
+
when :empty
|
|
1641
|
+
entry[:downgrade_reason] = :ocr_empty
|
|
1642
|
+
when :call_failed
|
|
1643
|
+
entry[:downgrade_reason] = :ocr_call_failed
|
|
1644
|
+
entry[:ocr_error] = ocr_result.error
|
|
1645
|
+
when :bad_image
|
|
1646
|
+
entry[:downgrade_reason] = :ocr_bad_image
|
|
1647
|
+
end
|
|
1648
|
+
entry
|
|
1649
|
+
end
|
|
1650
|
+
|
|
1651
|
+
# Build the inline text block used by the image_inject path (tool screenshots,
|
|
1652
|
+
# generated images, etc. that arrive as content blocks rather than as
|
|
1653
|
+
# display_files entries).
|
|
1654
|
+
private def ocr_text_for_inject(label, ocr_result, ocr_entry)
|
|
1655
|
+
header = "[Image: #{label}]"
|
|
1656
|
+
if ocr_result.nil?
|
|
1657
|
+
return "#{header} The current model has no vision and no OCR sidecar is configured. Tell the user to either configure an OCR sidecar in Settings → Media → OCR, or switch to a vision-capable model, then retry. Do not guess the image content."
|
|
1658
|
+
end
|
|
1659
|
+
|
|
1660
|
+
case ocr_result.status
|
|
1661
|
+
when :ok
|
|
1662
|
+
"#{header}\nOCR description (the current model cannot see images directly; this transcription was produced by sidecar #{ocr_entry["model"]}):\n#{ocr_result.text.strip}"
|
|
1663
|
+
when :empty
|
|
1664
|
+
"#{header} The OCR sidecar (#{ocr_entry["model"]}) returned no readable text. The image may be blank, or the sidecar exhausted its token budget on internal reasoning. Tell the user honestly; do not guess the image content."
|
|
1665
|
+
when :call_failed
|
|
1666
|
+
"#{header} The OCR sidecar (#{ocr_entry["model"]}) call failed: #{ocr_result.error}. Tell the user the sidecar errored (likely a misconfigured base_url / api_key in Settings → Media → OCR, or the upstream is down). They can retry, fix the sidecar, or switch to a vision-capable primary model. Do not guess the image content."
|
|
1667
|
+
when :bad_image
|
|
1668
|
+
"#{header} The OCR sidecar could not read the image bytes (corrupt or unsupported format). Tell the user; do not guess the image content."
|
|
1560
1669
|
end
|
|
1561
1670
|
end
|
|
1562
1671
|
|
data/lib/clacky/agent_config.rb
CHANGED
|
@@ -164,7 +164,8 @@ module Clacky
|
|
|
164
164
|
:models, :current_model_index, :current_model_id,
|
|
165
165
|
:memory_update_enabled, :skill_evolution,
|
|
166
166
|
:max_running_agents, :max_idle_agents,
|
|
167
|
-
:default_working_dir
|
|
167
|
+
:default_working_dir,
|
|
168
|
+
:proxy_url
|
|
168
169
|
|
|
169
170
|
def initialize(options = {})
|
|
170
171
|
@permission_mode = validate_permission_mode(options[:permission_mode])
|
|
@@ -217,6 +218,11 @@ module Clacky
|
|
|
217
218
|
|
|
218
219
|
@default_working_dir = options[:default_working_dir] || ENV["CLACKY_WORKSPACE_DIR"]
|
|
219
220
|
|
|
221
|
+
# HTTP proxy policy. The user's shell ENV (HTTP_PROXY etc.) is always
|
|
222
|
+
# ignored — set proxy_url here to route Clacky's outbound HTTP through
|
|
223
|
+
# a proxy. Leave nil to go direct.
|
|
224
|
+
@proxy_url = options[:proxy_url]
|
|
225
|
+
|
|
220
226
|
# Per-session virtual model overlay.
|
|
221
227
|
# When set, #current_model returns a *merged* hash (the resolved @models
|
|
222
228
|
# entry merged with this overlay) without mutating the shared @models
|
|
@@ -390,6 +396,7 @@ module Clacky
|
|
|
390
396
|
FileUtils.mkdir_p(config_dir)
|
|
391
397
|
File.write(config_file, to_yaml)
|
|
392
398
|
FileUtils.chmod(0o600, config_file)
|
|
399
|
+
Clacky::ProxyConfig.reset_cache! if defined?(Clacky::ProxyConfig)
|
|
393
400
|
end
|
|
394
401
|
|
|
395
402
|
# Convert to YAML format (top-level array)
|
|
@@ -407,6 +414,7 @@ module Clacky
|
|
|
407
414
|
memory_update_enabled
|
|
408
415
|
skill_evolution max_running_agents max_idle_agents
|
|
409
416
|
default_working_dir
|
|
417
|
+
proxy_url
|
|
410
418
|
].freeze
|
|
411
419
|
|
|
412
420
|
# Serialize the current agent configuration to YAML.
|
|
@@ -425,7 +433,8 @@ module Clacky
|
|
|
425
433
|
"skill_evolution" => @skill_evolution,
|
|
426
434
|
"max_running_agents" => @max_running_agents,
|
|
427
435
|
"max_idle_agents" => @max_idle_agents,
|
|
428
|
-
"default_working_dir" => @default_working_dir
|
|
436
|
+
"default_working_dir" => @default_working_dir,
|
|
437
|
+
"proxy_url" => @proxy_url
|
|
429
438
|
}
|
|
430
439
|
YAML.dump("settings" => settings, "models" => persistable_models)
|
|
431
440
|
end
|
|
@@ -606,12 +615,16 @@ module Clacky
|
|
|
606
615
|
}.compact
|
|
607
616
|
end
|
|
608
617
|
|
|
609
|
-
# Find model by type (default or lite or media kind)
|
|
618
|
+
# Find model by type (default or lite or media kind or ocr sidecar)
|
|
610
619
|
# Returns the model hash or nil if not found.
|
|
611
620
|
# For media kinds (image/video/audio): explicit user-configured (custom)
|
|
612
621
|
# entries win; otherwise an auto-derived virtual entry is returned
|
|
613
622
|
# based on the default model's provider — mirroring how lite is
|
|
614
623
|
# virtually derived via #lite_model_config_for_current.
|
|
624
|
+
# For "ocr": same custom→auto→nil pattern. Auto path first checks
|
|
625
|
+
# whether the default model itself supports vision (zero-overhead path,
|
|
626
|
+
# no sidecar needed); if not, derives from the provider's
|
|
627
|
+
# default_ocr_model.
|
|
615
628
|
def find_model_by_type(type)
|
|
616
629
|
kind = type.to_s
|
|
617
630
|
if Clacky::Providers::MEDIA_KINDS.include?(kind)
|
|
@@ -622,16 +635,24 @@ module Clacky
|
|
|
622
635
|
end
|
|
623
636
|
return derive_media_model(kind, model_override: entry && entry["model"])
|
|
624
637
|
end
|
|
638
|
+
if kind == "ocr"
|
|
639
|
+
entry = @models.find { |m| m["type"] == "ocr" }
|
|
640
|
+
return nil if entry && entry["disabled"]
|
|
641
|
+
if entry && entry["base_url"].to_s.strip != "" && entry["api_key"].to_s.strip != ""
|
|
642
|
+
return entry
|
|
643
|
+
end
|
|
644
|
+
return derive_ocr_model(model_override: entry && entry["model"])
|
|
645
|
+
end
|
|
625
646
|
@models.find { |m| m["type"] == type }
|
|
626
647
|
end
|
|
627
648
|
|
|
628
649
|
private def derive_media_model(kind, model_override: nil)
|
|
629
|
-
|
|
630
|
-
return nil unless
|
|
650
|
+
anchor = current_model || find_model_by_type("default")
|
|
651
|
+
return nil unless anchor
|
|
631
652
|
|
|
632
653
|
provider_id = Clacky::Providers.resolve_provider(
|
|
633
|
-
base_url:
|
|
634
|
-
api_key:
|
|
654
|
+
base_url: anchor["base_url"],
|
|
655
|
+
api_key: anchor["api_key"]
|
|
635
656
|
)
|
|
636
657
|
return nil unless provider_id
|
|
637
658
|
|
|
@@ -649,8 +670,8 @@ module Clacky
|
|
|
649
670
|
|
|
650
671
|
{
|
|
651
672
|
"model" => model_name,
|
|
652
|
-
"base_url" =>
|
|
653
|
-
"api_key" =>
|
|
673
|
+
"base_url" => anchor["base_url"],
|
|
674
|
+
"api_key" => anchor["api_key"],
|
|
654
675
|
"type" => kind,
|
|
655
676
|
"auto_injected" => true
|
|
656
677
|
}
|
|
@@ -662,6 +683,54 @@ module Clacky
|
|
|
662
683
|
@models.reject! { |m| m["auto_injected"] && Clacky::Providers::MEDIA_KINDS.include?(m["type"].to_s) }
|
|
663
684
|
end
|
|
664
685
|
|
|
686
|
+
# Derive an OCR sidecar model entry from the default model's provider.
|
|
687
|
+
# Resolution order:
|
|
688
|
+
# 1. If the default model itself supports vision → return the default
|
|
689
|
+
# directly (zero-overhead path; no separate sidecar call needed).
|
|
690
|
+
# 2. Otherwise look up the provider's default_ocr_model (or honour
|
|
691
|
+
# model_override if it's a vision-capable model on that provider).
|
|
692
|
+
# 3. nil when the provider has no vision-capable lineup at all
|
|
693
|
+
# (e.g. DeepSeek V4) — caller falls back to today's "no vision" UX.
|
|
694
|
+
private def derive_ocr_model(model_override: nil)
|
|
695
|
+
# Anchor on the model the session is *actually* running on, not the
|
|
696
|
+
# yml `type: default` marker — those diverge whenever the user
|
|
697
|
+
# switches model mid-session (e.g. opus → deepseek).
|
|
698
|
+
anchor = current_model || find_model_by_type("default")
|
|
699
|
+
return nil unless anchor
|
|
700
|
+
|
|
701
|
+
provider_id = Clacky::Providers.resolve_provider(
|
|
702
|
+
base_url: anchor["base_url"], api_key: anchor["api_key"]
|
|
703
|
+
)
|
|
704
|
+
return nil unless provider_id
|
|
705
|
+
|
|
706
|
+
if Clacky::Providers.supports?(provider_id, :vision, model_name: anchor["model"])
|
|
707
|
+
return {
|
|
708
|
+
"model" => anchor["model"],
|
|
709
|
+
"base_url" => anchor["base_url"],
|
|
710
|
+
"api_key" => anchor["api_key"],
|
|
711
|
+
"type" => "ocr",
|
|
712
|
+
"auto_injected" => true,
|
|
713
|
+
"primary" => true
|
|
714
|
+
}
|
|
715
|
+
end
|
|
716
|
+
|
|
717
|
+
candidates = Clacky::Providers.ocr_models(provider_id)
|
|
718
|
+
model_name = if model_override && candidates.include?(model_override)
|
|
719
|
+
model_override
|
|
720
|
+
else
|
|
721
|
+
Clacky::Providers.default_ocr_model(provider_id)
|
|
722
|
+
end
|
|
723
|
+
return nil if model_name.nil? || model_name.to_s.empty?
|
|
724
|
+
|
|
725
|
+
{
|
|
726
|
+
"model" => model_name,
|
|
727
|
+
"base_url" => anchor["base_url"],
|
|
728
|
+
"api_key" => anchor["api_key"],
|
|
729
|
+
"type" => "ocr",
|
|
730
|
+
"auto_injected" => true
|
|
731
|
+
}
|
|
732
|
+
end
|
|
733
|
+
|
|
665
734
|
# Returns the configured/derived media model entry for `kind`, plus a
|
|
666
735
|
# hint about its source. UI uses this to render the tri-state control.
|
|
667
736
|
# @param kind [String] one of "image" / "video" / "audio"
|
|
@@ -738,6 +807,63 @@ module Clacky
|
|
|
738
807
|
}
|
|
739
808
|
end
|
|
740
809
|
|
|
810
|
+
# Tri-state introspection for the OCR sidecar — mirrors #media_state shape
|
|
811
|
+
# so the Settings UI can reuse the same row component.
|
|
812
|
+
# @return [Hash{String=>Object}] keys:
|
|
813
|
+
# "configured" — anything available (auto or custom)
|
|
814
|
+
# "source" — "off" | "auto" | "custom"
|
|
815
|
+
# "primary" — true when auto resolves to the default model itself
|
|
816
|
+
# (no sidecar call needed)
|
|
817
|
+
# "model"/"base_url"/"provider"/"available"
|
|
818
|
+
def ocr_state
|
|
819
|
+
raw_entry = @models.find { |m| m["type"] == "ocr" }
|
|
820
|
+
|
|
821
|
+
default = find_model_by_type("default")
|
|
822
|
+
default_provider = default && Clacky::Providers.resolve_provider(
|
|
823
|
+
base_url: default["base_url"], api_key: default["api_key"]
|
|
824
|
+
)
|
|
825
|
+
available = default_provider ? Clacky::Providers.ocr_models(default_provider) : []
|
|
826
|
+
|
|
827
|
+
if raw_entry && raw_entry["disabled"]
|
|
828
|
+
return {
|
|
829
|
+
"configured" => false,
|
|
830
|
+
"source" => "off",
|
|
831
|
+
"model" => nil,
|
|
832
|
+
"base_url" => nil,
|
|
833
|
+
"provider" => nil,
|
|
834
|
+
"primary" => false,
|
|
835
|
+
"available" => available
|
|
836
|
+
}
|
|
837
|
+
end
|
|
838
|
+
|
|
839
|
+
is_custom = raw_entry &&
|
|
840
|
+
raw_entry["base_url"].to_s.strip != "" &&
|
|
841
|
+
raw_entry["api_key"].to_s.strip != ""
|
|
842
|
+
override_model = raw_entry && !is_custom ? raw_entry["model"] : nil
|
|
843
|
+
|
|
844
|
+
entry = if is_custom
|
|
845
|
+
raw_entry
|
|
846
|
+
else
|
|
847
|
+
derive_ocr_model(model_override: override_model)
|
|
848
|
+
end
|
|
849
|
+
|
|
850
|
+
provider_id = if entry
|
|
851
|
+
Clacky::Providers.resolve_provider(
|
|
852
|
+
base_url: entry["base_url"], api_key: entry["api_key"]
|
|
853
|
+
)
|
|
854
|
+
end
|
|
855
|
+
|
|
856
|
+
{
|
|
857
|
+
"configured" => !entry.nil?,
|
|
858
|
+
"source" => is_custom ? "custom" : (entry ? "auto" : "off"),
|
|
859
|
+
"model" => entry && entry["model"],
|
|
860
|
+
"base_url" => entry && entry["base_url"],
|
|
861
|
+
"provider" => provider_id,
|
|
862
|
+
"primary" => !!(entry && entry["primary"]),
|
|
863
|
+
"available" => available
|
|
864
|
+
}
|
|
865
|
+
end
|
|
866
|
+
|
|
741
867
|
# Find model by composite key (model name + base_url).
|
|
742
868
|
# Used when restoring a session to match its original model without relying
|
|
743
869
|
# on the runtime-only id (which changes on every process restart).
|
|
@@ -1050,7 +1176,7 @@ module Clacky
|
|
|
1050
1176
|
# Returns true if successful
|
|
1051
1177
|
def set_model_type(index, type)
|
|
1052
1178
|
return false if index < 0 || index >= @models.length
|
|
1053
|
-
return false unless ["default", "lite", "image", "video", "audio", nil].include?(type)
|
|
1179
|
+
return false unless ["default", "lite", "image", "video", "audio", "ocr", nil].include?(type)
|
|
1054
1180
|
|
|
1055
1181
|
if type
|
|
1056
1182
|
# Remove type from any other model that has it
|
data/lib/clacky/client.rb
CHANGED
|
@@ -398,7 +398,17 @@ module Clacky
|
|
|
398
398
|
def parse_simple_openai_response(response)
|
|
399
399
|
raise_error(response) unless response.status == 200
|
|
400
400
|
parsed_body = safe_json_parse(response.body, context: "LLM response")
|
|
401
|
-
parsed_body
|
|
401
|
+
content = parsed_body.dig("choices", 0, "message", "content")
|
|
402
|
+
if content.nil?
|
|
403
|
+
snippet = response.body.to_s[0, 1200]
|
|
404
|
+
if defined?(Clacky::Logger)
|
|
405
|
+
Clacky::Logger.warn("[parse_simple_openai_response] no content. status=#{response.status} body=#{snippet}")
|
|
406
|
+
end
|
|
407
|
+
raise Clacky::Error,
|
|
408
|
+
"Upstream OpenAI-compatible response missing choices[0].message.content. " \
|
|
409
|
+
"Body snippet: #{snippet}"
|
|
410
|
+
end
|
|
411
|
+
content
|
|
402
412
|
end
|
|
403
413
|
|
|
404
414
|
# ── Prompt caching helpers ────────────────────────────────────────────────
|
|
@@ -506,61 +516,64 @@ module Clacky
|
|
|
506
516
|
end
|
|
507
517
|
|
|
508
518
|
def bedrock_connection
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
519
|
+
current_epoch = Clacky::ProxyConfig.epoch
|
|
520
|
+
if @bedrock_connection.nil? ||
|
|
521
|
+
(!@bedrock_connection_epoch.nil? && @bedrock_connection_epoch != current_epoch)
|
|
522
|
+
@bedrock_connection = Faraday.new(url: @base_url) do |conn|
|
|
523
|
+
conn.headers["Content-Type"] = "application/json"
|
|
524
|
+
conn.headers["Authorization"] = "Bearer #{@api_key}"
|
|
525
|
+
conn.options.timeout = @read_timeout || 300
|
|
526
|
+
conn.options.open_timeout = 10
|
|
527
|
+
conn.ssl.verify = false
|
|
528
|
+
conn.adapter Faraday.default_adapter
|
|
529
|
+
end
|
|
530
|
+
@bedrock_connection_epoch = current_epoch
|
|
516
531
|
end
|
|
532
|
+
@bedrock_connection
|
|
517
533
|
end
|
|
518
534
|
|
|
519
535
|
def openai_connection
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
536
|
+
current_epoch = Clacky::ProxyConfig.epoch
|
|
537
|
+
if @openai_connection.nil? ||
|
|
538
|
+
(!@openai_connection_epoch.nil? && @openai_connection_epoch != current_epoch)
|
|
539
|
+
@openai_connection = Faraday.new(url: @base_url) do |conn|
|
|
540
|
+
conn.headers["Content-Type"] = "application/json"
|
|
541
|
+
conn.headers["Authorization"] = "Bearer #{@api_key}"
|
|
542
|
+
conn.options.timeout = @read_timeout || 300
|
|
543
|
+
conn.options.open_timeout = 10
|
|
544
|
+
conn.ssl.verify = false
|
|
545
|
+
conn.adapter Faraday.default_adapter
|
|
546
|
+
end
|
|
547
|
+
@openai_connection_epoch = current_epoch
|
|
527
548
|
end
|
|
549
|
+
@openai_connection
|
|
528
550
|
end
|
|
529
551
|
|
|
530
552
|
def anthropic_connection
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
# wire-level behaviour. Hardcoding rather than exposing as a config
|
|
552
|
-
# field is intentional: the only UAs known to pass the gate are the
|
|
553
|
-
# whitelisted-client formats, and the project's preset registry is
|
|
554
|
-
# the single source of truth for provider-specific quirks (mirroring
|
|
555
|
-
# how the openrouter Bearer-fallback above is hardcoded).
|
|
556
|
-
if @provider_id == "kimi-coding"
|
|
557
|
-
conn.headers["User-Agent"] = "claude-cli/1.0.51 (external, cli)"
|
|
553
|
+
current_epoch = Clacky::ProxyConfig.epoch
|
|
554
|
+
if @anthropic_connection.nil? ||
|
|
555
|
+
(!@anthropic_connection_epoch.nil? && @anthropic_connection_epoch != current_epoch)
|
|
556
|
+
@anthropic_connection = Faraday.new(url: @base_url) do |conn|
|
|
557
|
+
conn.headers["Content-Type"] = "application/json"
|
|
558
|
+
conn.headers["x-api-key"] = @api_key
|
|
559
|
+
conn.headers["anthropic-version"] = "2023-06-01"
|
|
560
|
+
conn.headers["anthropic-dangerous-direct-browser-access"] = "true"
|
|
561
|
+
if @provider_id == "openrouter"
|
|
562
|
+
conn.headers["Authorization"] = "Bearer #{@api_key}"
|
|
563
|
+
end
|
|
564
|
+
# Moonshot's Kimi Code (Coding Plan) endpoint enforces a User-Agent
|
|
565
|
+
# prefix whitelist limited to first-party coding agents.
|
|
566
|
+
if @provider_id == "kimi-coding"
|
|
567
|
+
conn.headers["User-Agent"] = "claude-cli/1.0.51 (external, cli)"
|
|
568
|
+
end
|
|
569
|
+
conn.options.timeout = @read_timeout || 300
|
|
570
|
+
conn.options.open_timeout = 10
|
|
571
|
+
conn.ssl.verify = false
|
|
572
|
+
conn.adapter Faraday.default_adapter
|
|
558
573
|
end
|
|
559
|
-
|
|
560
|
-
conn.options.open_timeout = 10
|
|
561
|
-
conn.ssl.verify = false
|
|
562
|
-
conn.adapter Faraday.default_adapter
|
|
574
|
+
@anthropic_connection_epoch = current_epoch
|
|
563
575
|
end
|
|
576
|
+
@anthropic_connection
|
|
564
577
|
end
|
|
565
578
|
|
|
566
579
|
# Correct relative path for the Anthropic /v1/messages endpoint, accounting
|