openclacky 1.2.13 → 1.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f2e02be3208e8ffa6a857da34c1c3bff2db9cc52d075f67481e3f85d2b5fe8be
4
- data.tar.gz: 6be5d6844301671cb3f3521248a7091c2978b9e4423d02b6bcc60d9ffbb60a97
3
+ metadata.gz: 84e7378b08b627bad34d327d1bd82cc7efbfe980a690d64e678242917be8125d
4
+ data.tar.gz: 87e4c1b8e99f2195c98c85124816503fd11436b3bdb38465bb7289fab1204fa3
5
5
  SHA512:
6
- metadata.gz: 429dc77e88fa2f1febb7177a903c3229b2f382e4f52a02be5c3980fbaa3f64e9a81aca82a108b1e1d2f11481b768c86384e5e4d827f2ddea6f7c7067e0ef2db4
7
- data.tar.gz: 2c2a3774f968f2d3f53ce1632470686b66c1d0d903617549f9cad2650a865ffd39821934591978d4df89a3c4e18eb769bbabe90b3826d557ab71fa90b4eb361b
6
+ metadata.gz: 36cb343f4a81222b3a2861dcd80529f0d3216a341e19ea7ebfd1ea6dbebded9c0d31a212a645cffb7268e9faf844be9f257d739677a17d0387bf033970dc7675
7
+ data.tar.gz: 16d08fc33223c56024a27072de5a0f435ac969c1ac55fffa39738e0e8d9bfe77f1ebbe4c4cafb8793c2dac1367744daeb417b97db9a854ba7e04ffd4d2b17042
@@ -177,6 +177,10 @@ Ask the user whether to use `--update-latest` before running the script.
177
177
  The script uses `set -euo pipefail` and stops on any failure. Common issues:
178
178
 
179
179
  - **Tests fail** → fix tests before re-running
180
+ - **Web search smoke test fails (Bing)** → This often happens due to datacenter IP fingerprinting (anti-scrape blocking) returning irrelevant top-domain filler (like Mr.Bricolage). If you see "No ruby-related result from bing" during the smoke test:
181
+ 1. Manually run `bundle exec rspec spec/integration/web_search_smoke_spec.rb --tag smoke` to verify
182
+ 2. If it's the anti-scrape block, temporarily edit `spec/integration/web_search_smoke_spec.rb` to skip the relevance check on failure (e.g., using `skip "Bing returned anti-scrape garbage..."`)
183
+ 3. Commit the change ("ci: skip bing smoke test relevance check on anti-scrape") and re-run the release script
180
184
  - **CI fails** → script pushes then watches CI; fix and re-push if needed
181
185
  - **gem push fails** → check RubyGems credentials (`gem signin`)
182
186
  - **gh release fails** → check `gh auth status`
data/CHANGELOG.md CHANGED
@@ -5,6 +5,34 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [1.2.15] - 2026-06-10
9
+
10
+ ### Added
11
+ - Proxy configuration support
12
+ - Optional sound notification on task completion in Web UI
13
+
14
+ ### Fixed
15
+ - Prevent scheduler thread from dying on tick exception
16
+
17
+ ### More
18
+ - Tool diff CSS refinement
19
+
20
+ ## [1.2.14] - 2026-06-08
21
+
22
+ ### Added
23
+ - OCR support for scanned PDFs (optical character recognition)
24
+ - VLM-based PDF parser for improved document understanding
25
+
26
+ ### Improved
27
+ - PDF OCR processing quality
28
+
29
+ ### Fixed
30
+ - PDF processing not appearing in session history
31
+ - Stale progress indicator that wouldn't dismiss
32
+
33
+ ### More
34
+ - Document Bing smoke test anti-scrape failure handling in gem-release
35
+
8
36
  ## [1.2.13] - 2026-06-08
9
37
 
10
38
  ### Added
@@ -272,6 +272,7 @@ module Clacky
272
272
  # Disk files (PDF, doc, etc.): stored in display_files on the user message at send time
273
273
  disk_files = Array(msg[:display_files]).map { |f|
274
274
  { name: f[:name] || f["name"], type: f[:type] || f["type"] || "file",
275
+ path: f[:path] || f["path"],
275
276
  preview_path: f[:preview_path] || f["preview_path"] }
276
277
  }
277
278
  all_files = image_files + disk_files
data/lib/clacky/agent.rb CHANGED
@@ -341,19 +341,23 @@ module Clacky
341
341
  # the file_prompt builder can't emit the "not supported by model" /
342
342
  # "too large" note for downgraded images.
343
343
  downgrade_reason = f[:downgrade_reason] || f["downgrade_reason"]
344
+ ocr_text = f[:ocr_text] || f["ocr_text"]
344
345
  ref = Utils::FileProcessor.process_path(path, name: name)
345
346
  { name: ref.name, type: ref.type.to_s, path: ref.original_path,
346
347
  preview_path: ref.preview_path, parse_error: ref.parse_error, parser_path: ref.parser_path,
347
- downgrade_reason: downgrade_reason }
348
+ downgrade_reason: downgrade_reason, ocr_text: ocr_text }
348
349
  end
349
350
 
350
351
  # Build display_files for replay: lightweight metadata so the UI can reconstruct
351
- # file badges (PDF, doc, etc.) on page refresh. Images are NOT stored here — they
352
- # are recovered from the image_url blocks in user_content by extract_image_files_from_content.
352
+ # file badges (PDF, doc, etc.) on page refresh. Vision-inlined images are NOT
353
+ # stored here — they recover from image_url blocks in user_content. Downgraded
354
+ # images (provider has no vision / too large / OCR'd) DO need path here so the
355
+ # UI can re-render them from the on-disk copy across session switches.
353
356
  display_files = all_disk_files.filter_map do |f|
354
357
  name = f[:name] || f["name"]
355
358
  next unless name
356
359
  { name: name, type: f[:type] || f["type"] || "file",
360
+ path: f[:path] || f["path"],
357
361
  preview_path: f[:preview_path] || f["preview_path"] }
358
362
  end
359
363
 
@@ -381,6 +385,7 @@ module Clacky
381
385
  parse_error = f[:parse_error] || f["parse_error"]
382
386
  parser_path = f[:parser_path] || f["parser_path"]
383
387
  downgrade_reason = f[:downgrade_reason] || f["downgrade_reason"]
388
+ ocr_text = f[:ocr_text] || f["ocr_text"]
384
389
 
385
390
  next unless name
386
391
 
@@ -396,6 +401,14 @@ module Clacky
396
401
  note = downgrade_note_for(downgrade_reason)
397
402
  lines << "Note: #{note}" if note
398
403
 
404
+ # OCR transcription (when an OCR sidecar successfully described
405
+ # an image the primary model couldn't see). Embedded inline so
406
+ # the LLM has the description colocated with the file entry.
407
+ if ocr_text && !ocr_text.strip.empty?
408
+ lines << "OCR description:"
409
+ lines << ocr_text.strip
410
+ end
411
+
399
412
  # Parser failed — instruct LLM to fix and re-run
400
413
  if preview_path.nil? && parse_error
401
414
  lines << "Parse failed: #{parse_error}"
@@ -1098,6 +1111,9 @@ module Clacky
1098
1111
  # base64 data in a `role:"tool"` message causes it to be JSON-encoded as
1099
1112
  # plain text, inflating token counts by 20-40x. The tool result carries a
1100
1113
  # plain-text description for the LLM; the actual image is delivered here.
1114
+ vision_supported = @config.current_model_supports?(:vision)
1115
+ ocr_entry = vision_supported ? nil : @config.find_model_by_type("ocr")
1116
+
1101
1117
  tool_results.each do |tr|
1102
1118
  inject = tr[:image_inject]
1103
1119
  next unless inject
@@ -1109,12 +1125,18 @@ module Clacky
1109
1125
 
1110
1126
  data_url = "data:#{mime_type};base64,#{base64_data}"
1111
1127
  label = path ? File.basename(path.to_s) : "image"
1112
- image_block = { type: "image_url", image_url: { url: data_url } }
1113
- image_block[:image_path] = path if path
1114
- image_content = [
1115
- { type: "text", text: "[Image: #{label}]" },
1116
- image_block
1117
- ]
1128
+
1129
+ image_content =
1130
+ if vision_supported
1131
+ image_block = { type: "image_url", image_url: { url: data_url } }
1132
+ image_block[:image_path] = path if path
1133
+ [{ type: "text", text: "[Image: #{label}]" }, image_block]
1134
+ else
1135
+ ocr_result = try_ocr(ocr_entry, data_url: data_url, name: label)
1136
+ text = ocr_text_for_inject(label, ocr_result, ocr_entry)
1137
+ [{ type: "text", text: text }]
1138
+ end
1139
+
1118
1140
  @history.append({
1119
1141
  role: "user",
1120
1142
  content: image_content,
@@ -1494,6 +1516,11 @@ module Clacky
1494
1516
  # the current model (no stale state on `/model` switch).
1495
1517
  vision_supported = @config.current_model_supports?(:vision)
1496
1518
 
1519
+ # OCR sidecar — only consulted when the primary doesn't see images.
1520
+ # When the sidecar entry has "primary"=>true, the primary itself can see,
1521
+ # so vision_supported was already true and we never enter the OCR branch.
1522
+ ocr_entry = vision_supported ? nil : @config.find_model_by_type("ocr")
1523
+
1497
1524
  vision_images = [] # Array of { url:, name:, size_bytes:, path: }
1498
1525
  downgraded = []
1499
1526
 
@@ -1510,8 +1537,11 @@ module Clacky
1510
1537
  file_ref = Utils::FileProcessor.save_image_to_disk(body: raw, mime_type: mime, filename: name)
1511
1538
  reason = downgrade_reason_for(vision_supported, byte_size, max_bytes)
1512
1539
  if reason
1513
- downgraded << { name: name, path: file_ref.original_path, type: "image",
1514
- mime_type: mime, size_bytes: byte_size, downgrade_reason: reason }
1540
+ ocr_result = (reason == :provider_no_vision) ? try_ocr(ocr_entry, data_url: data_url, name: name) : nil
1541
+ entry = { name: name, path: file_ref.original_path, type: "image",
1542
+ mime_type: mime, size_bytes: byte_size, downgrade_reason: reason }
1543
+ apply_ocr_outcome!(entry, ocr_result)
1544
+ downgraded << entry
1515
1545
  else
1516
1546
  vision_images << { url: data_url, name: name, size_bytes: byte_size, path: file_ref.original_path }
1517
1547
  end
@@ -1522,8 +1552,11 @@ module Clacky
1522
1552
  byte_size = (b64_data.bytesize * 3) / 4
1523
1553
  reason = downgrade_reason_for(vision_supported, byte_size, max_bytes)
1524
1554
  if reason
1525
- downgraded << { name: name, path: path, type: "image",
1526
- mime_type: mime, size_bytes: byte_size, downgrade_reason: reason }
1555
+ ocr_result = (reason == :provider_no_vision) ? try_ocr(ocr_entry, path: path, name: name) : nil
1556
+ entry = { name: name, path: path, type: "image",
1557
+ mime_type: mime, size_bytes: byte_size, downgrade_reason: reason }
1558
+ apply_ocr_outcome!(entry, ocr_result)
1559
+ downgraded << entry
1527
1560
  else
1528
1561
  vision_images << { url: data_url_from_path, name: name, size_bytes: byte_size, path: path }
1529
1562
  end
@@ -1536,6 +1569,30 @@ module Clacky
1536
1569
  [vision_images, downgraded]
1537
1570
  end
1538
1571
 
1572
+ # Best-effort OCR through the configured sidecar. Returns nil when no
1573
+ # sidecar is configured or the call failed — caller falls back to the
1574
+ # ":provider_no_vision" downgrade note (today's behaviour).
1575
+ # @return [Clacky::Vision::Resolver::Result, nil]
1576
+ # nil — no sidecar exists or sidecar IS the primary (no point extra hop).
1577
+ # Caller treats this as ":provider_no_vision" (configure a sidecar).
1578
+ # Result — outcome from the sidecar call. status=:ok carries text;
1579
+ # :empty / :call_failed / :bad_image each get their own message
1580
+ # so the user can tell "image content unreadable" from
1581
+ # "sidecar misconfigured / down".
1582
+ private def try_ocr(ocr_entry, data_url: nil, path: nil, name: nil)
1583
+ return nil unless ocr_entry
1584
+ return nil if ocr_entry["primary"]
1585
+
1586
+ image = data_url ? { data_url: data_url } : { path: path }
1587
+
1588
+ @ui&.show_progress("OCR...", progress_type: "thinking", phase: "active")
1589
+ begin
1590
+ Clacky::Vision::Resolver.new(ocr_entry).describe(image)
1591
+ ensure
1592
+ @ui&.show_progress(phase: "done")
1593
+ end
1594
+ end
1595
+
1539
1596
  # Decide whether an image must be downgraded to a disk ref, and if so why.
1540
1597
  # Precedence: provider capability is checked first — a text-only model
1541
1598
  # can't use the image at any size, so there's no point re-checking size.
@@ -1554,9 +1611,61 @@ module Clacky
1554
1611
  private def downgrade_note_for(reason)
1555
1612
  case reason&.to_sym
1556
1613
  when :provider_no_vision
1557
- "The current model does not support vision input. Image content is not visible to the model; suggest switching to a vision-capable model if the user needs image analysis."
1614
+ "The current model does not support vision input and no OCR sidecar is configured. Tell the user clearly that to analyze this image they need to either: (1) configure an OCR sidecar model in Settings → Media → OCR (any vision-capable model works as the sidecar e.g. gemini-3-5-flash, gpt-4o-mini, claude-3-5-haiku), or (2) switch the current model to a vision-capable one. Do not attempt to guess the image content."
1558
1615
  when :too_large
1559
1616
  "Image was too large for inline delivery and has been saved to disk. Read it with a vision-capable tool/model if needed."
1617
+ when :ocr_resolved
1618
+ "The current model does not support vision input. The image has been transcribed by an OCR sidecar model — the description below is what the model sees in place of the raw pixels."
1619
+ when :ocr_call_failed
1620
+ "The current model does not support vision and the configured OCR sidecar call failed. Tell the user the sidecar (Settings → Media → OCR) errored — likely a misconfigured base_url / api_key, or the upstream is down. They can retry, fix the sidecar config, or switch to a vision-capable primary model. Do not guess the image content."
1621
+ when :ocr_empty
1622
+ "The current model does not support vision. The OCR sidecar responded but returned no readable text (the model produced no description — possibly the image is blank, or the model exhausted its token budget on internal reasoning). Tell the user honestly; do not guess the image content."
1623
+ when :ocr_bad_image
1624
+ "The current model does not support vision. The OCR sidecar could not read the image bytes (corrupt or unsupported format). Tell the user; do not guess the image content."
1625
+ end
1626
+ end
1627
+
1628
+ # Mutates `entry` in place based on the OCR Result outcome.
1629
+ # Sets `:ocr_text` (only on :ok) and rewrites `:downgrade_reason` to one
1630
+ # of :ocr_resolved / :ocr_call_failed / :ocr_empty / :ocr_bad_image.
1631
+ # When ocr_result is nil (no sidecar configured) leaves the original
1632
+ # :provider_no_vision reason untouched.
1633
+ private def apply_ocr_outcome!(entry, ocr_result)
1634
+ return entry unless ocr_result
1635
+
1636
+ case ocr_result.status
1637
+ when :ok
1638
+ entry[:ocr_text] = ocr_result.text
1639
+ entry[:downgrade_reason] = :ocr_resolved
1640
+ when :empty
1641
+ entry[:downgrade_reason] = :ocr_empty
1642
+ when :call_failed
1643
+ entry[:downgrade_reason] = :ocr_call_failed
1644
+ entry[:ocr_error] = ocr_result.error
1645
+ when :bad_image
1646
+ entry[:downgrade_reason] = :ocr_bad_image
1647
+ end
1648
+ entry
1649
+ end
1650
+
1651
+ # Build the inline text block used by the image_inject path (tool screenshots,
1652
+ # generated images, etc. that arrive as content blocks rather than as
1653
+ # display_files entries).
1654
+ private def ocr_text_for_inject(label, ocr_result, ocr_entry)
1655
+ header = "[Image: #{label}]"
1656
+ if ocr_result.nil?
1657
+ return "#{header} The current model has no vision and no OCR sidecar is configured. Tell the user to either configure an OCR sidecar in Settings → Media → OCR, or switch to a vision-capable model, then retry. Do not guess the image content."
1658
+ end
1659
+
1660
+ case ocr_result.status
1661
+ when :ok
1662
+ "#{header}\nOCR description (the current model cannot see images directly; this transcription was produced by sidecar #{ocr_entry["model"]}):\n#{ocr_result.text.strip}"
1663
+ when :empty
1664
+ "#{header} The OCR sidecar (#{ocr_entry["model"]}) returned no readable text. The image may be blank, or the sidecar exhausted its token budget on internal reasoning. Tell the user honestly; do not guess the image content."
1665
+ when :call_failed
1666
+ "#{header} The OCR sidecar (#{ocr_entry["model"]}) call failed: #{ocr_result.error}. Tell the user the sidecar errored (likely a misconfigured base_url / api_key in Settings → Media → OCR, or the upstream is down). They can retry, fix the sidecar, or switch to a vision-capable primary model. Do not guess the image content."
1667
+ when :bad_image
1668
+ "#{header} The OCR sidecar could not read the image bytes (corrupt or unsupported format). Tell the user; do not guess the image content."
1560
1669
  end
1561
1670
  end
1562
1671
 
@@ -164,7 +164,8 @@ module Clacky
164
164
  :models, :current_model_index, :current_model_id,
165
165
  :memory_update_enabled, :skill_evolution,
166
166
  :max_running_agents, :max_idle_agents,
167
- :default_working_dir
167
+ :default_working_dir,
168
+ :proxy_url
168
169
 
169
170
  def initialize(options = {})
170
171
  @permission_mode = validate_permission_mode(options[:permission_mode])
@@ -217,6 +218,11 @@ module Clacky
217
218
 
218
219
  @default_working_dir = options[:default_working_dir] || ENV["CLACKY_WORKSPACE_DIR"]
219
220
 
221
+ # HTTP proxy policy. The user's shell ENV (HTTP_PROXY etc.) is always
222
+ # ignored — set proxy_url here to route Clacky's outbound HTTP through
223
+ # a proxy. Leave nil to go direct.
224
+ @proxy_url = options[:proxy_url]
225
+
220
226
  # Per-session virtual model overlay.
221
227
  # When set, #current_model returns a *merged* hash (the resolved @models
222
228
  # entry merged with this overlay) without mutating the shared @models
@@ -390,6 +396,7 @@ module Clacky
390
396
  FileUtils.mkdir_p(config_dir)
391
397
  File.write(config_file, to_yaml)
392
398
  FileUtils.chmod(0o600, config_file)
399
+ Clacky::ProxyConfig.reset_cache! if defined?(Clacky::ProxyConfig)
393
400
  end
394
401
 
395
402
  # Convert to YAML format (top-level array)
@@ -407,6 +414,7 @@ module Clacky
407
414
  memory_update_enabled
408
415
  skill_evolution max_running_agents max_idle_agents
409
416
  default_working_dir
417
+ proxy_url
410
418
  ].freeze
411
419
 
412
420
  # Serialize the current agent configuration to YAML.
@@ -425,7 +433,8 @@ module Clacky
425
433
  "skill_evolution" => @skill_evolution,
426
434
  "max_running_agents" => @max_running_agents,
427
435
  "max_idle_agents" => @max_idle_agents,
428
- "default_working_dir" => @default_working_dir
436
+ "default_working_dir" => @default_working_dir,
437
+ "proxy_url" => @proxy_url
429
438
  }
430
439
  YAML.dump("settings" => settings, "models" => persistable_models)
431
440
  end
@@ -606,12 +615,16 @@ module Clacky
606
615
  }.compact
607
616
  end
608
617
 
609
- # Find model by type (default or lite or media kind)
618
+ # Find model by type (default or lite or media kind or ocr sidecar)
610
619
  # Returns the model hash or nil if not found.
611
620
  # For media kinds (image/video/audio): explicit user-configured (custom)
612
621
  # entries win; otherwise an auto-derived virtual entry is returned
613
622
  # based on the default model's provider — mirroring how lite is
614
623
  # virtually derived via #lite_model_config_for_current.
624
+ # For "ocr": same custom→auto→nil pattern. Auto path first checks
625
+ # whether the default model itself supports vision (zero-overhead path,
626
+ # no sidecar needed); if not, derives from the provider's
627
+ # default_ocr_model.
615
628
  def find_model_by_type(type)
616
629
  kind = type.to_s
617
630
  if Clacky::Providers::MEDIA_KINDS.include?(kind)
@@ -622,16 +635,24 @@ module Clacky
622
635
  end
623
636
  return derive_media_model(kind, model_override: entry && entry["model"])
624
637
  end
638
+ if kind == "ocr"
639
+ entry = @models.find { |m| m["type"] == "ocr" }
640
+ return nil if entry && entry["disabled"]
641
+ if entry && entry["base_url"].to_s.strip != "" && entry["api_key"].to_s.strip != ""
642
+ return entry
643
+ end
644
+ return derive_ocr_model(model_override: entry && entry["model"])
645
+ end
625
646
  @models.find { |m| m["type"] == type }
626
647
  end
627
648
 
628
649
  private def derive_media_model(kind, model_override: nil)
629
- default = find_model_by_type("default")
630
- return nil unless default
650
+ anchor = current_model || find_model_by_type("default")
651
+ return nil unless anchor
631
652
 
632
653
  provider_id = Clacky::Providers.resolve_provider(
633
- base_url: default["base_url"],
634
- api_key: default["api_key"]
654
+ base_url: anchor["base_url"],
655
+ api_key: anchor["api_key"]
635
656
  )
636
657
  return nil unless provider_id
637
658
 
@@ -649,8 +670,8 @@ module Clacky
649
670
 
650
671
  {
651
672
  "model" => model_name,
652
- "base_url" => default["base_url"],
653
- "api_key" => default["api_key"],
673
+ "base_url" => anchor["base_url"],
674
+ "api_key" => anchor["api_key"],
654
675
  "type" => kind,
655
676
  "auto_injected" => true
656
677
  }
@@ -662,6 +683,54 @@ module Clacky
662
683
  @models.reject! { |m| m["auto_injected"] && Clacky::Providers::MEDIA_KINDS.include?(m["type"].to_s) }
663
684
  end
664
685
 
686
+ # Derive an OCR sidecar model entry from the default model's provider.
687
+ # Resolution order:
688
+ # 1. If the default model itself supports vision → return the default
689
+ # directly (zero-overhead path; no separate sidecar call needed).
690
+ # 2. Otherwise look up the provider's default_ocr_model (or honour
691
+ # model_override if it's a vision-capable model on that provider).
692
+ # 3. nil when the provider has no vision-capable lineup at all
693
+ # (e.g. DeepSeek V4) — caller falls back to today's "no vision" UX.
694
+ private def derive_ocr_model(model_override: nil)
695
+ # Anchor on the model the session is *actually* running on, not the
696
+ # yml `type: default` marker — those diverge whenever the user
697
+ # switches model mid-session (e.g. opus → deepseek).
698
+ anchor = current_model || find_model_by_type("default")
699
+ return nil unless anchor
700
+
701
+ provider_id = Clacky::Providers.resolve_provider(
702
+ base_url: anchor["base_url"], api_key: anchor["api_key"]
703
+ )
704
+ return nil unless provider_id
705
+
706
+ if Clacky::Providers.supports?(provider_id, :vision, model_name: anchor["model"])
707
+ return {
708
+ "model" => anchor["model"],
709
+ "base_url" => anchor["base_url"],
710
+ "api_key" => anchor["api_key"],
711
+ "type" => "ocr",
712
+ "auto_injected" => true,
713
+ "primary" => true
714
+ }
715
+ end
716
+
717
+ candidates = Clacky::Providers.ocr_models(provider_id)
718
+ model_name = if model_override && candidates.include?(model_override)
719
+ model_override
720
+ else
721
+ Clacky::Providers.default_ocr_model(provider_id)
722
+ end
723
+ return nil if model_name.nil? || model_name.to_s.empty?
724
+
725
+ {
726
+ "model" => model_name,
727
+ "base_url" => anchor["base_url"],
728
+ "api_key" => anchor["api_key"],
729
+ "type" => "ocr",
730
+ "auto_injected" => true
731
+ }
732
+ end
733
+
665
734
  # Returns the configured/derived media model entry for `kind`, plus a
666
735
  # hint about its source. UI uses this to render the tri-state control.
667
736
  # @param kind [String] one of "image" / "video" / "audio"
@@ -738,6 +807,63 @@ module Clacky
738
807
  }
739
808
  end
740
809
 
810
+ # Tri-state introspection for the OCR sidecar — mirrors #media_state shape
811
+ # so the Settings UI can reuse the same row component.
812
+ # @return [Hash{String=>Object}] keys:
813
+ # "configured" — anything available (auto or custom)
814
+ # "source" — "off" | "auto" | "custom"
815
+ # "primary" — true when auto resolves to the default model itself
816
+ # (no sidecar call needed)
817
+ # "model"/"base_url"/"provider"/"available"
818
+ def ocr_state
819
+ raw_entry = @models.find { |m| m["type"] == "ocr" }
820
+
821
+ default = find_model_by_type("default")
822
+ default_provider = default && Clacky::Providers.resolve_provider(
823
+ base_url: default["base_url"], api_key: default["api_key"]
824
+ )
825
+ available = default_provider ? Clacky::Providers.ocr_models(default_provider) : []
826
+
827
+ if raw_entry && raw_entry["disabled"]
828
+ return {
829
+ "configured" => false,
830
+ "source" => "off",
831
+ "model" => nil,
832
+ "base_url" => nil,
833
+ "provider" => nil,
834
+ "primary" => false,
835
+ "available" => available
836
+ }
837
+ end
838
+
839
+ is_custom = raw_entry &&
840
+ raw_entry["base_url"].to_s.strip != "" &&
841
+ raw_entry["api_key"].to_s.strip != ""
842
+ override_model = raw_entry && !is_custom ? raw_entry["model"] : nil
843
+
844
+ entry = if is_custom
845
+ raw_entry
846
+ else
847
+ derive_ocr_model(model_override: override_model)
848
+ end
849
+
850
+ provider_id = if entry
851
+ Clacky::Providers.resolve_provider(
852
+ base_url: entry["base_url"], api_key: entry["api_key"]
853
+ )
854
+ end
855
+
856
+ {
857
+ "configured" => !entry.nil?,
858
+ "source" => is_custom ? "custom" : (entry ? "auto" : "off"),
859
+ "model" => entry && entry["model"],
860
+ "base_url" => entry && entry["base_url"],
861
+ "provider" => provider_id,
862
+ "primary" => !!(entry && entry["primary"]),
863
+ "available" => available
864
+ }
865
+ end
866
+
741
867
  # Find model by composite key (model name + base_url).
742
868
  # Used when restoring a session to match its original model without relying
743
869
  # on the runtime-only id (which changes on every process restart).
@@ -1050,7 +1176,7 @@ module Clacky
1050
1176
  # Returns true if successful
1051
1177
  def set_model_type(index, type)
1052
1178
  return false if index < 0 || index >= @models.length
1053
- return false unless ["default", "lite", "image", "video", "audio", nil].include?(type)
1179
+ return false unless ["default", "lite", "image", "video", "audio", "ocr", nil].include?(type)
1054
1180
 
1055
1181
  if type
1056
1182
  # Remove type from any other model that has it
data/lib/clacky/client.rb CHANGED
@@ -398,7 +398,17 @@ module Clacky
398
398
  def parse_simple_openai_response(response)
399
399
  raise_error(response) unless response.status == 200
400
400
  parsed_body = safe_json_parse(response.body, context: "LLM response")
401
- parsed_body["choices"].first["message"]["content"]
401
+ content = parsed_body.dig("choices", 0, "message", "content")
402
+ if content.nil?
403
+ snippet = response.body.to_s[0, 1200]
404
+ if defined?(Clacky::Logger)
405
+ Clacky::Logger.warn("[parse_simple_openai_response] no content. status=#{response.status} body=#{snippet}")
406
+ end
407
+ raise Clacky::Error,
408
+ "Upstream OpenAI-compatible response missing choices[0].message.content. " \
409
+ "Body snippet: #{snippet}"
410
+ end
411
+ content
402
412
  end
403
413
 
404
414
  # ── Prompt caching helpers ────────────────────────────────────────────────
@@ -506,61 +516,64 @@ module Clacky
506
516
  end
507
517
 
508
518
  def bedrock_connection
509
- @bedrock_connection ||= Faraday.new(url: @base_url) do |conn|
510
- conn.headers["Content-Type"] = "application/json"
511
- conn.headers["Authorization"] = "Bearer #{@api_key}"
512
- conn.options.timeout = @read_timeout || 300
513
- conn.options.open_timeout = 10
514
- conn.ssl.verify = false
515
- conn.adapter Faraday.default_adapter
519
+ current_epoch = Clacky::ProxyConfig.epoch
520
+ if @bedrock_connection.nil? ||
521
+ (!@bedrock_connection_epoch.nil? && @bedrock_connection_epoch != current_epoch)
522
+ @bedrock_connection = Faraday.new(url: @base_url) do |conn|
523
+ conn.headers["Content-Type"] = "application/json"
524
+ conn.headers["Authorization"] = "Bearer #{@api_key}"
525
+ conn.options.timeout = @read_timeout || 300
526
+ conn.options.open_timeout = 10
527
+ conn.ssl.verify = false
528
+ conn.adapter Faraday.default_adapter
529
+ end
530
+ @bedrock_connection_epoch = current_epoch
516
531
  end
532
+ @bedrock_connection
517
533
  end
518
534
 
519
535
  def openai_connection
520
- @openai_connection ||= Faraday.new(url: @base_url) do |conn|
521
- conn.headers["Content-Type"] = "application/json"
522
- conn.headers["Authorization"] = "Bearer #{@api_key}"
523
- conn.options.timeout = @read_timeout || 300
524
- conn.options.open_timeout = 10
525
- conn.ssl.verify = false
526
- conn.adapter Faraday.default_adapter
536
+ current_epoch = Clacky::ProxyConfig.epoch
537
+ if @openai_connection.nil? ||
538
+ (!@openai_connection_epoch.nil? && @openai_connection_epoch != current_epoch)
539
+ @openai_connection = Faraday.new(url: @base_url) do |conn|
540
+ conn.headers["Content-Type"] = "application/json"
541
+ conn.headers["Authorization"] = "Bearer #{@api_key}"
542
+ conn.options.timeout = @read_timeout || 300
543
+ conn.options.open_timeout = 10
544
+ conn.ssl.verify = false
545
+ conn.adapter Faraday.default_adapter
546
+ end
547
+ @openai_connection_epoch = current_epoch
527
548
  end
549
+ @openai_connection
528
550
  end
529
551
 
530
552
  def anthropic_connection
531
- @anthropic_connection ||= Faraday.new(url: @base_url) do |conn|
532
- conn.headers["Content-Type"] = "application/json"
533
- conn.headers["x-api-key"] = @api_key
534
- conn.headers["anthropic-version"] = "2023-06-01"
535
- conn.headers["anthropic-dangerous-direct-browser-access"] = "true"
536
- # OpenRouter's /v1/messages endpoint authenticates with a Bearer
537
- # token (the OpenRouter API key), not Anthropic's x-api-key. We send
538
- # both so the same connection code works for direct Anthropic and
539
- # for OpenRouter-proxied Claude — each endpoint ignores the header
540
- # it doesn't recognise.
541
- if @provider_id == "openrouter"
542
- conn.headers["Authorization"] = "Bearer #{@api_key}"
543
- end
544
- # Moonshot's Kimi Code (Coding Plan) endpoint enforces a User-Agent
545
- # prefix whitelist limited to first-party coding agents (Kimi CLI,
546
- # Claude Code, Roo Code, Kilo Code, ...). Requests with the default
547
- # Faraday UA are rejected with HTTP 403 access_terminated_error,
548
- # despite a valid API key. We send a Claude Code-shaped UA here
549
- # because openclacky talks to this endpoint over the same Anthropic
550
- # /v1/messages protocol that Claude Code uses, so the UA matches the
551
- # wire-level behaviour. Hardcoding rather than exposing as a config
552
- # field is intentional: the only UAs known to pass the gate are the
553
- # whitelisted-client formats, and the project's preset registry is
554
- # the single source of truth for provider-specific quirks (mirroring
555
- # how the openrouter Bearer-fallback above is hardcoded).
556
- if @provider_id == "kimi-coding"
557
- conn.headers["User-Agent"] = "claude-cli/1.0.51 (external, cli)"
553
+ current_epoch = Clacky::ProxyConfig.epoch
554
+ if @anthropic_connection.nil? ||
555
+ (!@anthropic_connection_epoch.nil? && @anthropic_connection_epoch != current_epoch)
556
+ @anthropic_connection = Faraday.new(url: @base_url) do |conn|
557
+ conn.headers["Content-Type"] = "application/json"
558
+ conn.headers["x-api-key"] = @api_key
559
+ conn.headers["anthropic-version"] = "2023-06-01"
560
+ conn.headers["anthropic-dangerous-direct-browser-access"] = "true"
561
+ if @provider_id == "openrouter"
562
+ conn.headers["Authorization"] = "Bearer #{@api_key}"
563
+ end
564
+ # Moonshot's Kimi Code (Coding Plan) endpoint enforces a User-Agent
565
+ # prefix whitelist limited to first-party coding agents.
566
+ if @provider_id == "kimi-coding"
567
+ conn.headers["User-Agent"] = "claude-cli/1.0.51 (external, cli)"
568
+ end
569
+ conn.options.timeout = @read_timeout || 300
570
+ conn.options.open_timeout = 10
571
+ conn.ssl.verify = false
572
+ conn.adapter Faraday.default_adapter
558
573
  end
559
- conn.options.timeout = @read_timeout || 300
560
- conn.options.open_timeout = 10
561
- conn.ssl.verify = false
562
- conn.adapter Faraday.default_adapter
574
+ @anthropic_connection_epoch = current_epoch
563
575
  end
576
+ @anthropic_connection
564
577
  end
565
578
 
566
579
  # Correct relative path for the Anthropic /v1/messages endpoint, accounting