openclacky 1.2.13 → 1.2.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.clacky/skills/gem-release/SKILL.md +4 -0
- data/CHANGELOG.md +16 -0
- data/lib/clacky/agent/session_serializer.rb +1 -0
- data/lib/clacky/agent.rb +123 -14
- data/lib/clacky/agent_config.rb +125 -8
- data/lib/clacky/client.rb +11 -1
- data/lib/clacky/default_parsers/pdf_parser.rb +70 -86
- data/lib/clacky/default_parsers/pdf_parser_vlm.py +136 -0
- data/lib/clacky/providers.rb +37 -0
- data/lib/clacky/server/http_server.rb +179 -4
- data/lib/clacky/ui2/progress_handle.rb +17 -13
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/vision/resolver.rb +157 -0
- data/lib/clacky/web/i18n.js +4 -2
- data/lib/clacky/web/settings.js +31 -12
- data/lib/clacky.rb +1 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 82874a3ac7c623672bd09b5fa1be1c5dd70b1f223119a1b58b86f85417e46f1c
|
|
4
|
+
data.tar.gz: ba5f1cc02f50a0bee31e24a6ad009c265881eef8b8b9efa6f17b5bec29124414
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5535350a83909fffe2471ab0f6505d54f9bc2436826636eacb1c8d6bbbd84e554087b31b9503d6671449789160072eb743711556f302dc42b820637b7edab83d
|
|
7
|
+
data.tar.gz: bcdec5ed7e56cfc27ee2370ae46582239fa425254e013a7577f162b28c6e2d88b821768f2e367fe2b33dd6773804740692f1f29cba0ca2d73f40d38f6b8e2243
|
|
@@ -177,6 +177,10 @@ Ask the user whether to use `--update-latest` before running the script.
|
|
|
177
177
|
The script uses `set -euo pipefail` and stops on any failure. Common issues:
|
|
178
178
|
|
|
179
179
|
- **Tests fail** → fix tests before re-running
|
|
180
|
+
- **Web search smoke test fails (Bing)** → This often happens due to datacenter IP fingerprinting (anti-scrape blocking) returning irrelevant top-domain filler (like Mr.Bricolage). If you see "No ruby-related result from bing" during the smoke test:
|
|
181
|
+
1. Manually run `bundle exec rspec spec/integration/web_search_smoke_spec.rb --tag smoke` to verify
|
|
182
|
+
2. If it's the anti-scrape block, temporarily edit `spec/integration/web_search_smoke_spec.rb` to skip the relevance check on failure (e.g., using `skip "Bing returned anti-scrape garbage..."`)
|
|
183
|
+
3. Commit the change ("ci: skip bing smoke test relevance check on anti-scrape") and re-run the release script
|
|
180
184
|
- **CI fails** → script pushes then watches CI; fix and re-push if needed
|
|
181
185
|
- **gem push fails** → check RubyGems credentials (`gem signin`)
|
|
182
186
|
- **gh release fails** → check `gh auth status`
|
data/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.2.14] - 2026-06-08
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- OCR support for scanned PDFs (optical character recognition)
|
|
12
|
+
- VLM-based PDF parser for improved document understanding
|
|
13
|
+
|
|
14
|
+
### Improved
|
|
15
|
+
- PDF OCR processing quality
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
- PDF processing not appearing in session history
|
|
19
|
+
- Stale progress indicator that wouldn't dismiss
|
|
20
|
+
|
|
21
|
+
### More
|
|
22
|
+
- Document Bing smoke test anti-scrape failure handling in gem-release
|
|
23
|
+
|
|
8
24
|
## [1.2.13] - 2026-06-08
|
|
9
25
|
|
|
10
26
|
### Added
|
|
@@ -272,6 +272,7 @@ module Clacky
|
|
|
272
272
|
# Disk files (PDF, doc, etc.): stored in display_files on the user message at send time
|
|
273
273
|
disk_files = Array(msg[:display_files]).map { |f|
|
|
274
274
|
{ name: f[:name] || f["name"], type: f[:type] || f["type"] || "file",
|
|
275
|
+
path: f[:path] || f["path"],
|
|
275
276
|
preview_path: f[:preview_path] || f["preview_path"] }
|
|
276
277
|
}
|
|
277
278
|
all_files = image_files + disk_files
|
data/lib/clacky/agent.rb
CHANGED
|
@@ -341,19 +341,23 @@ module Clacky
|
|
|
341
341
|
# the file_prompt builder can't emit the "not supported by model" /
|
|
342
342
|
# "too large" note for downgraded images.
|
|
343
343
|
downgrade_reason = f[:downgrade_reason] || f["downgrade_reason"]
|
|
344
|
+
ocr_text = f[:ocr_text] || f["ocr_text"]
|
|
344
345
|
ref = Utils::FileProcessor.process_path(path, name: name)
|
|
345
346
|
{ name: ref.name, type: ref.type.to_s, path: ref.original_path,
|
|
346
347
|
preview_path: ref.preview_path, parse_error: ref.parse_error, parser_path: ref.parser_path,
|
|
347
|
-
downgrade_reason: downgrade_reason }
|
|
348
|
+
downgrade_reason: downgrade_reason, ocr_text: ocr_text }
|
|
348
349
|
end
|
|
349
350
|
|
|
350
351
|
# Build display_files for replay: lightweight metadata so the UI can reconstruct
|
|
351
|
-
# file badges (PDF, doc, etc.) on page refresh.
|
|
352
|
-
#
|
|
352
|
+
# file badges (PDF, doc, etc.) on page refresh. Vision-inlined images are NOT
|
|
353
|
+
# stored here — they recover from image_url blocks in user_content. Downgraded
|
|
354
|
+
# images (provider has no vision / too large / OCR'd) DO need path here so the
|
|
355
|
+
# UI can re-render them from the on-disk copy across session switches.
|
|
353
356
|
display_files = all_disk_files.filter_map do |f|
|
|
354
357
|
name = f[:name] || f["name"]
|
|
355
358
|
next unless name
|
|
356
359
|
{ name: name, type: f[:type] || f["type"] || "file",
|
|
360
|
+
path: f[:path] || f["path"],
|
|
357
361
|
preview_path: f[:preview_path] || f["preview_path"] }
|
|
358
362
|
end
|
|
359
363
|
|
|
@@ -381,6 +385,7 @@ module Clacky
|
|
|
381
385
|
parse_error = f[:parse_error] || f["parse_error"]
|
|
382
386
|
parser_path = f[:parser_path] || f["parser_path"]
|
|
383
387
|
downgrade_reason = f[:downgrade_reason] || f["downgrade_reason"]
|
|
388
|
+
ocr_text = f[:ocr_text] || f["ocr_text"]
|
|
384
389
|
|
|
385
390
|
next unless name
|
|
386
391
|
|
|
@@ -396,6 +401,14 @@ module Clacky
|
|
|
396
401
|
note = downgrade_note_for(downgrade_reason)
|
|
397
402
|
lines << "Note: #{note}" if note
|
|
398
403
|
|
|
404
|
+
# OCR transcription (when an OCR sidecar successfully described
|
|
405
|
+
# an image the primary model couldn't see). Embedded inline so
|
|
406
|
+
# the LLM has the description colocated with the file entry.
|
|
407
|
+
if ocr_text && !ocr_text.strip.empty?
|
|
408
|
+
lines << "OCR description:"
|
|
409
|
+
lines << ocr_text.strip
|
|
410
|
+
end
|
|
411
|
+
|
|
399
412
|
# Parser failed — instruct LLM to fix and re-run
|
|
400
413
|
if preview_path.nil? && parse_error
|
|
401
414
|
lines << "Parse failed: #{parse_error}"
|
|
@@ -1098,6 +1111,9 @@ module Clacky
|
|
|
1098
1111
|
# base64 data in a `role:"tool"` message causes it to be JSON-encoded as
|
|
1099
1112
|
# plain text, inflating token counts by 20-40x. The tool result carries a
|
|
1100
1113
|
# plain-text description for the LLM; the actual image is delivered here.
|
|
1114
|
+
vision_supported = @config.current_model_supports?(:vision)
|
|
1115
|
+
ocr_entry = vision_supported ? nil : @config.find_model_by_type("ocr")
|
|
1116
|
+
|
|
1101
1117
|
tool_results.each do |tr|
|
|
1102
1118
|
inject = tr[:image_inject]
|
|
1103
1119
|
next unless inject
|
|
@@ -1109,12 +1125,18 @@ module Clacky
|
|
|
1109
1125
|
|
|
1110
1126
|
data_url = "data:#{mime_type};base64,#{base64_data}"
|
|
1111
1127
|
label = path ? File.basename(path.to_s) : "image"
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1128
|
+
|
|
1129
|
+
image_content =
|
|
1130
|
+
if vision_supported
|
|
1131
|
+
image_block = { type: "image_url", image_url: { url: data_url } }
|
|
1132
|
+
image_block[:image_path] = path if path
|
|
1133
|
+
[{ type: "text", text: "[Image: #{label}]" }, image_block]
|
|
1134
|
+
else
|
|
1135
|
+
ocr_result = try_ocr(ocr_entry, data_url: data_url, name: label)
|
|
1136
|
+
text = ocr_text_for_inject(label, ocr_result, ocr_entry)
|
|
1137
|
+
[{ type: "text", text: text }]
|
|
1138
|
+
end
|
|
1139
|
+
|
|
1118
1140
|
@history.append({
|
|
1119
1141
|
role: "user",
|
|
1120
1142
|
content: image_content,
|
|
@@ -1494,6 +1516,11 @@ module Clacky
|
|
|
1494
1516
|
# the current model (no stale state on `/model` switch).
|
|
1495
1517
|
vision_supported = @config.current_model_supports?(:vision)
|
|
1496
1518
|
|
|
1519
|
+
# OCR sidecar — only consulted when the primary doesn't see images.
|
|
1520
|
+
# When the sidecar entry has "primary"=>true, the primary itself can see,
|
|
1521
|
+
# so vision_supported was already true and we never enter the OCR branch.
|
|
1522
|
+
ocr_entry = vision_supported ? nil : @config.find_model_by_type("ocr")
|
|
1523
|
+
|
|
1497
1524
|
vision_images = [] # Array of { url:, name:, size_bytes:, path: }
|
|
1498
1525
|
downgraded = []
|
|
1499
1526
|
|
|
@@ -1510,8 +1537,11 @@ module Clacky
|
|
|
1510
1537
|
file_ref = Utils::FileProcessor.save_image_to_disk(body: raw, mime_type: mime, filename: name)
|
|
1511
1538
|
reason = downgrade_reason_for(vision_supported, byte_size, max_bytes)
|
|
1512
1539
|
if reason
|
|
1513
|
-
|
|
1514
|
-
|
|
1540
|
+
ocr_result = (reason == :provider_no_vision) ? try_ocr(ocr_entry, data_url: data_url, name: name) : nil
|
|
1541
|
+
entry = { name: name, path: file_ref.original_path, type: "image",
|
|
1542
|
+
mime_type: mime, size_bytes: byte_size, downgrade_reason: reason }
|
|
1543
|
+
apply_ocr_outcome!(entry, ocr_result)
|
|
1544
|
+
downgraded << entry
|
|
1515
1545
|
else
|
|
1516
1546
|
vision_images << { url: data_url, name: name, size_bytes: byte_size, path: file_ref.original_path }
|
|
1517
1547
|
end
|
|
@@ -1522,8 +1552,11 @@ module Clacky
|
|
|
1522
1552
|
byte_size = (b64_data.bytesize * 3) / 4
|
|
1523
1553
|
reason = downgrade_reason_for(vision_supported, byte_size, max_bytes)
|
|
1524
1554
|
if reason
|
|
1525
|
-
|
|
1526
|
-
|
|
1555
|
+
ocr_result = (reason == :provider_no_vision) ? try_ocr(ocr_entry, path: path, name: name) : nil
|
|
1556
|
+
entry = { name: name, path: path, type: "image",
|
|
1557
|
+
mime_type: mime, size_bytes: byte_size, downgrade_reason: reason }
|
|
1558
|
+
apply_ocr_outcome!(entry, ocr_result)
|
|
1559
|
+
downgraded << entry
|
|
1527
1560
|
else
|
|
1528
1561
|
vision_images << { url: data_url_from_path, name: name, size_bytes: byte_size, path: path }
|
|
1529
1562
|
end
|
|
@@ -1536,6 +1569,30 @@ module Clacky
|
|
|
1536
1569
|
[vision_images, downgraded]
|
|
1537
1570
|
end
|
|
1538
1571
|
|
|
1572
|
+
# Best-effort OCR through the configured sidecar. Returns nil when no
|
|
1573
|
+
# sidecar is configured or the call failed — caller falls back to the
|
|
1574
|
+
# ":provider_no_vision" downgrade note (today's behaviour).
|
|
1575
|
+
# @return [Clacky::Vision::Resolver::Result, nil]
|
|
1576
|
+
# nil — no sidecar exists or sidecar IS the primary (no point extra hop).
|
|
1577
|
+
# Caller treats this as ":provider_no_vision" (configure a sidecar).
|
|
1578
|
+
# Result — outcome from the sidecar call. status=:ok carries text;
|
|
1579
|
+
# :empty / :call_failed / :bad_image each get their own message
|
|
1580
|
+
# so the user can tell "image content unreadable" from
|
|
1581
|
+
# "sidecar misconfigured / down".
|
|
1582
|
+
private def try_ocr(ocr_entry, data_url: nil, path: nil, name: nil)
|
|
1583
|
+
return nil unless ocr_entry
|
|
1584
|
+
return nil if ocr_entry["primary"]
|
|
1585
|
+
|
|
1586
|
+
image = data_url ? { data_url: data_url } : { path: path }
|
|
1587
|
+
|
|
1588
|
+
@ui&.show_progress("OCR...", progress_type: "thinking", phase: "active")
|
|
1589
|
+
begin
|
|
1590
|
+
Clacky::Vision::Resolver.new(ocr_entry).describe(image)
|
|
1591
|
+
ensure
|
|
1592
|
+
@ui&.show_progress(phase: "done")
|
|
1593
|
+
end
|
|
1594
|
+
end
|
|
1595
|
+
|
|
1539
1596
|
# Decide whether an image must be downgraded to a disk ref, and if so why.
|
|
1540
1597
|
# Precedence: provider capability is checked first — a text-only model
|
|
1541
1598
|
# can't use the image at any size, so there's no point re-checking size.
|
|
@@ -1554,9 +1611,61 @@ module Clacky
|
|
|
1554
1611
|
private def downgrade_note_for(reason)
|
|
1555
1612
|
case reason&.to_sym
|
|
1556
1613
|
when :provider_no_vision
|
|
1557
|
-
"The current model does not support vision input
|
|
1614
|
+
"The current model does not support vision input and no OCR sidecar is configured. Tell the user clearly that to analyze this image they need to either: (1) configure an OCR sidecar model in Settings → Media → OCR (any vision-capable model works as the sidecar — e.g. gemini-3-5-flash, gpt-4o-mini, claude-3-5-haiku), or (2) switch the current model to a vision-capable one. Do not attempt to guess the image content."
|
|
1558
1615
|
when :too_large
|
|
1559
1616
|
"Image was too large for inline delivery and has been saved to disk. Read it with a vision-capable tool/model if needed."
|
|
1617
|
+
when :ocr_resolved
|
|
1618
|
+
"The current model does not support vision input. The image has been transcribed by an OCR sidecar model — the description below is what the model sees in place of the raw pixels."
|
|
1619
|
+
when :ocr_call_failed
|
|
1620
|
+
"The current model does not support vision and the configured OCR sidecar call failed. Tell the user the sidecar (Settings → Media → OCR) errored — likely a misconfigured base_url / api_key, or the upstream is down. They can retry, fix the sidecar config, or switch to a vision-capable primary model. Do not guess the image content."
|
|
1621
|
+
when :ocr_empty
|
|
1622
|
+
"The current model does not support vision. The OCR sidecar responded but returned no readable text (the model produced no description — possibly the image is blank, or the model exhausted its token budget on internal reasoning). Tell the user honestly; do not guess the image content."
|
|
1623
|
+
when :ocr_bad_image
|
|
1624
|
+
"The current model does not support vision. The OCR sidecar could not read the image bytes (corrupt or unsupported format). Tell the user; do not guess the image content."
|
|
1625
|
+
end
|
|
1626
|
+
end
|
|
1627
|
+
|
|
1628
|
+
# Mutates `entry` in place based on the OCR Result outcome.
|
|
1629
|
+
# Sets `:ocr_text` (only on :ok) and rewrites `:downgrade_reason` to one
|
|
1630
|
+
# of :ocr_resolved / :ocr_call_failed / :ocr_empty / :ocr_bad_image.
|
|
1631
|
+
# When ocr_result is nil (no sidecar configured) leaves the original
|
|
1632
|
+
# :provider_no_vision reason untouched.
|
|
1633
|
+
private def apply_ocr_outcome!(entry, ocr_result)
|
|
1634
|
+
return entry unless ocr_result
|
|
1635
|
+
|
|
1636
|
+
case ocr_result.status
|
|
1637
|
+
when :ok
|
|
1638
|
+
entry[:ocr_text] = ocr_result.text
|
|
1639
|
+
entry[:downgrade_reason] = :ocr_resolved
|
|
1640
|
+
when :empty
|
|
1641
|
+
entry[:downgrade_reason] = :ocr_empty
|
|
1642
|
+
when :call_failed
|
|
1643
|
+
entry[:downgrade_reason] = :ocr_call_failed
|
|
1644
|
+
entry[:ocr_error] = ocr_result.error
|
|
1645
|
+
when :bad_image
|
|
1646
|
+
entry[:downgrade_reason] = :ocr_bad_image
|
|
1647
|
+
end
|
|
1648
|
+
entry
|
|
1649
|
+
end
|
|
1650
|
+
|
|
1651
|
+
# Build the inline text block used by the image_inject path (tool screenshots,
|
|
1652
|
+
# generated images, etc. that arrive as content blocks rather than as
|
|
1653
|
+
# display_files entries).
|
|
1654
|
+
private def ocr_text_for_inject(label, ocr_result, ocr_entry)
|
|
1655
|
+
header = "[Image: #{label}]"
|
|
1656
|
+
if ocr_result.nil?
|
|
1657
|
+
return "#{header} The current model has no vision and no OCR sidecar is configured. Tell the user to either configure an OCR sidecar in Settings → Media → OCR, or switch to a vision-capable model, then retry. Do not guess the image content."
|
|
1658
|
+
end
|
|
1659
|
+
|
|
1660
|
+
case ocr_result.status
|
|
1661
|
+
when :ok
|
|
1662
|
+
"#{header}\nOCR description (the current model cannot see images directly; this transcription was produced by sidecar #{ocr_entry["model"]}):\n#{ocr_result.text.strip}"
|
|
1663
|
+
when :empty
|
|
1664
|
+
"#{header} The OCR sidecar (#{ocr_entry["model"]}) returned no readable text. The image may be blank, or the sidecar exhausted its token budget on internal reasoning. Tell the user honestly; do not guess the image content."
|
|
1665
|
+
when :call_failed
|
|
1666
|
+
"#{header} The OCR sidecar (#{ocr_entry["model"]}) call failed: #{ocr_result.error}. Tell the user the sidecar errored (likely a misconfigured base_url / api_key in Settings → Media → OCR, or the upstream is down). They can retry, fix the sidecar, or switch to a vision-capable primary model. Do not guess the image content."
|
|
1667
|
+
when :bad_image
|
|
1668
|
+
"#{header} The OCR sidecar could not read the image bytes (corrupt or unsupported format). Tell the user; do not guess the image content."
|
|
1560
1669
|
end
|
|
1561
1670
|
end
|
|
1562
1671
|
|
data/lib/clacky/agent_config.rb
CHANGED
|
@@ -606,12 +606,16 @@ module Clacky
|
|
|
606
606
|
}.compact
|
|
607
607
|
end
|
|
608
608
|
|
|
609
|
-
# Find model by type (default or lite or media kind)
|
|
609
|
+
# Find model by type (default or lite or media kind or ocr sidecar)
|
|
610
610
|
# Returns the model hash or nil if not found.
|
|
611
611
|
# For media kinds (image/video/audio): explicit user-configured (custom)
|
|
612
612
|
# entries win; otherwise an auto-derived virtual entry is returned
|
|
613
613
|
# based on the default model's provider — mirroring how lite is
|
|
614
614
|
# virtually derived via #lite_model_config_for_current.
|
|
615
|
+
# For "ocr": same custom→auto→nil pattern. Auto path first checks
|
|
616
|
+
# whether the default model itself supports vision (zero-overhead path,
|
|
617
|
+
# no sidecar needed); if not, derives from the provider's
|
|
618
|
+
# default_ocr_model.
|
|
615
619
|
def find_model_by_type(type)
|
|
616
620
|
kind = type.to_s
|
|
617
621
|
if Clacky::Providers::MEDIA_KINDS.include?(kind)
|
|
@@ -622,16 +626,24 @@ module Clacky
|
|
|
622
626
|
end
|
|
623
627
|
return derive_media_model(kind, model_override: entry && entry["model"])
|
|
624
628
|
end
|
|
629
|
+
if kind == "ocr"
|
|
630
|
+
entry = @models.find { |m| m["type"] == "ocr" }
|
|
631
|
+
return nil if entry && entry["disabled"]
|
|
632
|
+
if entry && entry["base_url"].to_s.strip != "" && entry["api_key"].to_s.strip != ""
|
|
633
|
+
return entry
|
|
634
|
+
end
|
|
635
|
+
return derive_ocr_model(model_override: entry && entry["model"])
|
|
636
|
+
end
|
|
625
637
|
@models.find { |m| m["type"] == type }
|
|
626
638
|
end
|
|
627
639
|
|
|
628
640
|
private def derive_media_model(kind, model_override: nil)
|
|
629
|
-
|
|
630
|
-
return nil unless
|
|
641
|
+
anchor = current_model || find_model_by_type("default")
|
|
642
|
+
return nil unless anchor
|
|
631
643
|
|
|
632
644
|
provider_id = Clacky::Providers.resolve_provider(
|
|
633
|
-
base_url:
|
|
634
|
-
api_key:
|
|
645
|
+
base_url: anchor["base_url"],
|
|
646
|
+
api_key: anchor["api_key"]
|
|
635
647
|
)
|
|
636
648
|
return nil unless provider_id
|
|
637
649
|
|
|
@@ -649,8 +661,8 @@ module Clacky
|
|
|
649
661
|
|
|
650
662
|
{
|
|
651
663
|
"model" => model_name,
|
|
652
|
-
"base_url" =>
|
|
653
|
-
"api_key" =>
|
|
664
|
+
"base_url" => anchor["base_url"],
|
|
665
|
+
"api_key" => anchor["api_key"],
|
|
654
666
|
"type" => kind,
|
|
655
667
|
"auto_injected" => true
|
|
656
668
|
}
|
|
@@ -662,6 +674,54 @@ module Clacky
|
|
|
662
674
|
@models.reject! { |m| m["auto_injected"] && Clacky::Providers::MEDIA_KINDS.include?(m["type"].to_s) }
|
|
663
675
|
end
|
|
664
676
|
|
|
677
|
+
# Derive an OCR sidecar model entry from the default model's provider.
|
|
678
|
+
# Resolution order:
|
|
679
|
+
# 1. If the default model itself supports vision → return the default
|
|
680
|
+
# directly (zero-overhead path; no separate sidecar call needed).
|
|
681
|
+
# 2. Otherwise look up the provider's default_ocr_model (or honour
|
|
682
|
+
# model_override if it's a vision-capable model on that provider).
|
|
683
|
+
# 3. nil when the provider has no vision-capable lineup at all
|
|
684
|
+
# (e.g. DeepSeek V4) — caller falls back to today's "no vision" UX.
|
|
685
|
+
private def derive_ocr_model(model_override: nil)
|
|
686
|
+
# Anchor on the model the session is *actually* running on, not the
|
|
687
|
+
# yml `type: default` marker — those diverge whenever the user
|
|
688
|
+
# switches model mid-session (e.g. opus → deepseek).
|
|
689
|
+
anchor = current_model || find_model_by_type("default")
|
|
690
|
+
return nil unless anchor
|
|
691
|
+
|
|
692
|
+
provider_id = Clacky::Providers.resolve_provider(
|
|
693
|
+
base_url: anchor["base_url"], api_key: anchor["api_key"]
|
|
694
|
+
)
|
|
695
|
+
return nil unless provider_id
|
|
696
|
+
|
|
697
|
+
if Clacky::Providers.supports?(provider_id, :vision, model_name: anchor["model"])
|
|
698
|
+
return {
|
|
699
|
+
"model" => anchor["model"],
|
|
700
|
+
"base_url" => anchor["base_url"],
|
|
701
|
+
"api_key" => anchor["api_key"],
|
|
702
|
+
"type" => "ocr",
|
|
703
|
+
"auto_injected" => true,
|
|
704
|
+
"primary" => true
|
|
705
|
+
}
|
|
706
|
+
end
|
|
707
|
+
|
|
708
|
+
candidates = Clacky::Providers.ocr_models(provider_id)
|
|
709
|
+
model_name = if model_override && candidates.include?(model_override)
|
|
710
|
+
model_override
|
|
711
|
+
else
|
|
712
|
+
Clacky::Providers.default_ocr_model(provider_id)
|
|
713
|
+
end
|
|
714
|
+
return nil if model_name.nil? || model_name.to_s.empty?
|
|
715
|
+
|
|
716
|
+
{
|
|
717
|
+
"model" => model_name,
|
|
718
|
+
"base_url" => anchor["base_url"],
|
|
719
|
+
"api_key" => anchor["api_key"],
|
|
720
|
+
"type" => "ocr",
|
|
721
|
+
"auto_injected" => true
|
|
722
|
+
}
|
|
723
|
+
end
|
|
724
|
+
|
|
665
725
|
# Returns the configured/derived media model entry for `kind`, plus a
|
|
666
726
|
# hint about its source. UI uses this to render the tri-state control.
|
|
667
727
|
# @param kind [String] one of "image" / "video" / "audio"
|
|
@@ -738,6 +798,63 @@ module Clacky
|
|
|
738
798
|
}
|
|
739
799
|
end
|
|
740
800
|
|
|
801
|
+
# Tri-state introspection for the OCR sidecar — mirrors #media_state shape
|
|
802
|
+
# so the Settings UI can reuse the same row component.
|
|
803
|
+
# @return [Hash{String=>Object}] keys:
|
|
804
|
+
# "configured" — anything available (auto or custom)
|
|
805
|
+
# "source" — "off" | "auto" | "custom"
|
|
806
|
+
# "primary" — true when auto resolves to the default model itself
|
|
807
|
+
# (no sidecar call needed)
|
|
808
|
+
# "model"/"base_url"/"provider"/"available"
|
|
809
|
+
def ocr_state
|
|
810
|
+
raw_entry = @models.find { |m| m["type"] == "ocr" }
|
|
811
|
+
|
|
812
|
+
default = find_model_by_type("default")
|
|
813
|
+
default_provider = default && Clacky::Providers.resolve_provider(
|
|
814
|
+
base_url: default["base_url"], api_key: default["api_key"]
|
|
815
|
+
)
|
|
816
|
+
available = default_provider ? Clacky::Providers.ocr_models(default_provider) : []
|
|
817
|
+
|
|
818
|
+
if raw_entry && raw_entry["disabled"]
|
|
819
|
+
return {
|
|
820
|
+
"configured" => false,
|
|
821
|
+
"source" => "off",
|
|
822
|
+
"model" => nil,
|
|
823
|
+
"base_url" => nil,
|
|
824
|
+
"provider" => nil,
|
|
825
|
+
"primary" => false,
|
|
826
|
+
"available" => available
|
|
827
|
+
}
|
|
828
|
+
end
|
|
829
|
+
|
|
830
|
+
is_custom = raw_entry &&
|
|
831
|
+
raw_entry["base_url"].to_s.strip != "" &&
|
|
832
|
+
raw_entry["api_key"].to_s.strip != ""
|
|
833
|
+
override_model = raw_entry && !is_custom ? raw_entry["model"] : nil
|
|
834
|
+
|
|
835
|
+
entry = if is_custom
|
|
836
|
+
raw_entry
|
|
837
|
+
else
|
|
838
|
+
derive_ocr_model(model_override: override_model)
|
|
839
|
+
end
|
|
840
|
+
|
|
841
|
+
provider_id = if entry
|
|
842
|
+
Clacky::Providers.resolve_provider(
|
|
843
|
+
base_url: entry["base_url"], api_key: entry["api_key"]
|
|
844
|
+
)
|
|
845
|
+
end
|
|
846
|
+
|
|
847
|
+
{
|
|
848
|
+
"configured" => !entry.nil?,
|
|
849
|
+
"source" => is_custom ? "custom" : (entry ? "auto" : "off"),
|
|
850
|
+
"model" => entry && entry["model"],
|
|
851
|
+
"base_url" => entry && entry["base_url"],
|
|
852
|
+
"provider" => provider_id,
|
|
853
|
+
"primary" => !!(entry && entry["primary"]),
|
|
854
|
+
"available" => available
|
|
855
|
+
}
|
|
856
|
+
end
|
|
857
|
+
|
|
741
858
|
# Find model by composite key (model name + base_url).
|
|
742
859
|
# Used when restoring a session to match its original model without relying
|
|
743
860
|
# on the runtime-only id (which changes on every process restart).
|
|
@@ -1050,7 +1167,7 @@ module Clacky
|
|
|
1050
1167
|
# Returns true if successful
|
|
1051
1168
|
def set_model_type(index, type)
|
|
1052
1169
|
return false if index < 0 || index >= @models.length
|
|
1053
|
-
return false unless ["default", "lite", "image", "video", "audio", nil].include?(type)
|
|
1170
|
+
return false unless ["default", "lite", "image", "video", "audio", "ocr", nil].include?(type)
|
|
1054
1171
|
|
|
1055
1172
|
if type
|
|
1056
1173
|
# Remove type from any other model that has it
|
data/lib/clacky/client.rb
CHANGED
|
@@ -398,7 +398,17 @@ module Clacky
|
|
|
398
398
|
def parse_simple_openai_response(response)
|
|
399
399
|
raise_error(response) unless response.status == 200
|
|
400
400
|
parsed_body = safe_json_parse(response.body, context: "LLM response")
|
|
401
|
-
parsed_body
|
|
401
|
+
content = parsed_body.dig("choices", 0, "message", "content")
|
|
402
|
+
if content.nil?
|
|
403
|
+
snippet = response.body.to_s[0, 1200]
|
|
404
|
+
if defined?(Clacky::Logger)
|
|
405
|
+
Clacky::Logger.warn("[parse_simple_openai_response] no content. status=#{response.status} body=#{snippet}")
|
|
406
|
+
end
|
|
407
|
+
raise Clacky::Error,
|
|
408
|
+
"Upstream OpenAI-compatible response missing choices[0].message.content. " \
|
|
409
|
+
"Body snippet: #{snippet}"
|
|
410
|
+
end
|
|
411
|
+
content
|
|
402
412
|
end
|
|
403
413
|
|
|
404
414
|
# ── Prompt caching helpers ────────────────────────────────────────────────
|