openclacky 1.2.12 → 1.2.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.clacky/skills/gem-release/SKILL.md +5 -1
- data/.clacky/skills/gem-release/scripts/release.sh +4 -1
- data/CHANGELOG.md +39 -0
- data/lib/clacky/agent/llm_caller.rb +40 -25
- data/lib/clacky/agent/memory_updater.rb +12 -0
- data/lib/clacky/agent/session_serializer.rb +1 -0
- data/lib/clacky/agent/skill_auto_creator.rb +7 -4
- data/lib/clacky/agent/skill_evolution.rb +23 -5
- data/lib/clacky/agent/skill_manager.rb +86 -1
- data/lib/clacky/agent/skill_reflector.rb +18 -23
- data/lib/clacky/agent.rb +132 -15
- data/lib/clacky/agent_config.rb +183 -22
- data/lib/clacky/cli.rb +55 -0
- data/lib/clacky/client.rb +11 -1
- data/lib/clacky/default_parsers/pdf_parser.rb +70 -86
- data/lib/clacky/default_parsers/pdf_parser_vlm.py +136 -0
- data/lib/clacky/default_skills/persist-memory/SKILL.md +4 -3
- data/lib/clacky/default_skills/search-skills/SKILL.md +61 -0
- data/lib/clacky/idle_compression_timer.rb +1 -1
- data/lib/clacky/message_format/open_ai.rb +7 -1
- data/lib/clacky/openai_stream_aggregator.rb +4 -1
- data/lib/clacky/providers.rb +77 -12
- data/lib/clacky/server/http_server.rb +296 -7
- data/lib/clacky/server/session_registry.rb +30 -8
- data/lib/clacky/server/web_ui_controller.rb +24 -1
- data/lib/clacky/session_manager.rb +120 -0
- data/lib/clacky/tools/web_search.rb +59 -8
- data/lib/clacky/ui2/layout_manager.rb +15 -5
- data/lib/clacky/ui2/progress_handle.rb +18 -8
- data/lib/clacky/ui2/ui_controller.rb +27 -0
- data/lib/clacky/ui_interface.rb +22 -0
- data/lib/clacky/utils/model_pricing.rb +96 -0
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/vision/resolver.rb +157 -0
- data/lib/clacky/web/app.css +209 -4
- data/lib/clacky/web/app.js +6 -5
- data/lib/clacky/web/i18n.js +22 -6
- data/lib/clacky/web/index.html +2 -1
- data/lib/clacky/web/sessions.js +408 -80
- data/lib/clacky/web/settings.js +241 -60
- data/lib/clacky/web/skills.js +5 -14
- data/lib/clacky/web/utils.js +57 -0
- data/lib/clacky/web/ws-dispatcher.js +136 -0
- data/lib/clacky.rb +1 -0
- metadata +6 -2
data/lib/clacky/agent.rb
CHANGED
|
@@ -341,19 +341,23 @@ module Clacky
|
|
|
341
341
|
# the file_prompt builder can't emit the "not supported by model" /
|
|
342
342
|
# "too large" note for downgraded images.
|
|
343
343
|
downgrade_reason = f[:downgrade_reason] || f["downgrade_reason"]
|
|
344
|
+
ocr_text = f[:ocr_text] || f["ocr_text"]
|
|
344
345
|
ref = Utils::FileProcessor.process_path(path, name: name)
|
|
345
346
|
{ name: ref.name, type: ref.type.to_s, path: ref.original_path,
|
|
346
347
|
preview_path: ref.preview_path, parse_error: ref.parse_error, parser_path: ref.parser_path,
|
|
347
|
-
downgrade_reason: downgrade_reason }
|
|
348
|
+
downgrade_reason: downgrade_reason, ocr_text: ocr_text }
|
|
348
349
|
end
|
|
349
350
|
|
|
350
351
|
# Build display_files for replay: lightweight metadata so the UI can reconstruct
|
|
351
|
-
# file badges (PDF, doc, etc.) on page refresh.
|
|
352
|
-
#
|
|
352
|
+
# file badges (PDF, doc, etc.) on page refresh. Vision-inlined images are NOT
|
|
353
|
+
# stored here — they recover from image_url blocks in user_content. Downgraded
|
|
354
|
+
# images (provider has no vision / too large / OCR'd) DO need path here so the
|
|
355
|
+
# UI can re-render them from the on-disk copy across session switches.
|
|
353
356
|
display_files = all_disk_files.filter_map do |f|
|
|
354
357
|
name = f[:name] || f["name"]
|
|
355
358
|
next unless name
|
|
356
359
|
{ name: name, type: f[:type] || f["type"] || "file",
|
|
360
|
+
path: f[:path] || f["path"],
|
|
357
361
|
preview_path: f[:preview_path] || f["preview_path"] }
|
|
358
362
|
end
|
|
359
363
|
|
|
@@ -381,6 +385,7 @@ module Clacky
|
|
|
381
385
|
parse_error = f[:parse_error] || f["parse_error"]
|
|
382
386
|
parser_path = f[:parser_path] || f["parser_path"]
|
|
383
387
|
downgrade_reason = f[:downgrade_reason] || f["downgrade_reason"]
|
|
388
|
+
ocr_text = f[:ocr_text] || f["ocr_text"]
|
|
384
389
|
|
|
385
390
|
next unless name
|
|
386
391
|
|
|
@@ -396,6 +401,14 @@ module Clacky
|
|
|
396
401
|
note = downgrade_note_for(downgrade_reason)
|
|
397
402
|
lines << "Note: #{note}" if note
|
|
398
403
|
|
|
404
|
+
# OCR transcription (when an OCR sidecar successfully described
|
|
405
|
+
# an image the primary model couldn't see). Embedded inline so
|
|
406
|
+
# the LLM has the description colocated with the file entry.
|
|
407
|
+
if ocr_text && !ocr_text.strip.empty?
|
|
408
|
+
lines << "OCR description:"
|
|
409
|
+
lines << ocr_text.strip
|
|
410
|
+
end
|
|
411
|
+
|
|
399
412
|
# Parser failed — instruct LLM to fix and re-run
|
|
400
413
|
if preview_path.nil? && parse_error
|
|
401
414
|
lines << "Parse failed: #{parse_error}"
|
|
@@ -533,6 +546,11 @@ module Clacky
|
|
|
533
546
|
end
|
|
534
547
|
end
|
|
535
548
|
|
|
549
|
+
# If the assistant ended its turn with a question, treat this as
|
|
550
|
+
# an in-flight conversation (agent is awaiting the user's reply)
|
|
551
|
+
# and skip skill evolution — the task isn't truly complete yet.
|
|
552
|
+
awaiting_user_feedback = true if ends_with_question
|
|
553
|
+
|
|
536
554
|
break
|
|
537
555
|
end
|
|
538
556
|
|
|
@@ -656,6 +674,7 @@ module Clacky
|
|
|
656
674
|
# Safety net: ensure any lingering progress spinner is stopped.
|
|
657
675
|
# Normal paths close their own spinners; this guards against exceptions
|
|
658
676
|
# raised between a progress slot's active/done pair.
|
|
677
|
+
Clacky::Logger.warn("[ph_debug] agent_run_ensure")
|
|
659
678
|
@ui&.show_progress(phase: "done")
|
|
660
679
|
|
|
661
680
|
# Shred any decrypted-script tmpdirs created during this run for encrypted brand skills.
|
|
@@ -1092,6 +1111,9 @@ module Clacky
|
|
|
1092
1111
|
# base64 data in a `role:"tool"` message causes it to be JSON-encoded as
|
|
1093
1112
|
# plain text, inflating token counts by 20-40x. The tool result carries a
|
|
1094
1113
|
# plain-text description for the LLM; the actual image is delivered here.
|
|
1114
|
+
vision_supported = @config.current_model_supports?(:vision)
|
|
1115
|
+
ocr_entry = vision_supported ? nil : @config.find_model_by_type("ocr")
|
|
1116
|
+
|
|
1095
1117
|
tool_results.each do |tr|
|
|
1096
1118
|
inject = tr[:image_inject]
|
|
1097
1119
|
next unless inject
|
|
@@ -1103,12 +1125,18 @@ module Clacky
|
|
|
1103
1125
|
|
|
1104
1126
|
data_url = "data:#{mime_type};base64,#{base64_data}"
|
|
1105
1127
|
label = path ? File.basename(path.to_s) : "image"
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1128
|
+
|
|
1129
|
+
image_content =
|
|
1130
|
+
if vision_supported
|
|
1131
|
+
image_block = { type: "image_url", image_url: { url: data_url } }
|
|
1132
|
+
image_block[:image_path] = path if path
|
|
1133
|
+
[{ type: "text", text: "[Image: #{label}]" }, image_block]
|
|
1134
|
+
else
|
|
1135
|
+
ocr_result = try_ocr(ocr_entry, data_url: data_url, name: label)
|
|
1136
|
+
text = ocr_text_for_inject(label, ocr_result, ocr_entry)
|
|
1137
|
+
[{ type: "text", text: text }]
|
|
1138
|
+
end
|
|
1139
|
+
|
|
1112
1140
|
@history.append({
|
|
1113
1141
|
role: "user",
|
|
1114
1142
|
content: image_content,
|
|
@@ -1236,7 +1264,7 @@ module Clacky
|
|
|
1236
1264
|
# Skip malformed tool calls with nil name or arguments
|
|
1237
1265
|
next if name.nil? || arguments.nil?
|
|
1238
1266
|
|
|
1239
|
-
{
|
|
1267
|
+
formatted = {
|
|
1240
1268
|
id: call[:id],
|
|
1241
1269
|
type: call[:type] || "function",
|
|
1242
1270
|
function: {
|
|
@@ -1244,6 +1272,8 @@ module Clacky
|
|
|
1244
1272
|
arguments: arguments
|
|
1245
1273
|
}
|
|
1246
1274
|
}
|
|
1275
|
+
formatted[:extra_content] = call[:extra_content] if call[:extra_content]
|
|
1276
|
+
formatted
|
|
1247
1277
|
end
|
|
1248
1278
|
|
|
1249
1279
|
valid.any? ? valid : nil
|
|
@@ -1486,6 +1516,11 @@ module Clacky
|
|
|
1486
1516
|
# the current model (no stale state on `/model` switch).
|
|
1487
1517
|
vision_supported = @config.current_model_supports?(:vision)
|
|
1488
1518
|
|
|
1519
|
+
# OCR sidecar — only consulted when the primary doesn't see images.
|
|
1520
|
+
# When the sidecar entry has "primary"=>true, the primary itself can see,
|
|
1521
|
+
# so vision_supported was already true and we never enter the OCR branch.
|
|
1522
|
+
ocr_entry = vision_supported ? nil : @config.find_model_by_type("ocr")
|
|
1523
|
+
|
|
1489
1524
|
vision_images = [] # Array of { url:, name:, size_bytes:, path: }
|
|
1490
1525
|
downgraded = []
|
|
1491
1526
|
|
|
@@ -1502,8 +1537,11 @@ module Clacky
|
|
|
1502
1537
|
file_ref = Utils::FileProcessor.save_image_to_disk(body: raw, mime_type: mime, filename: name)
|
|
1503
1538
|
reason = downgrade_reason_for(vision_supported, byte_size, max_bytes)
|
|
1504
1539
|
if reason
|
|
1505
|
-
|
|
1506
|
-
|
|
1540
|
+
ocr_result = (reason == :provider_no_vision) ? try_ocr(ocr_entry, data_url: data_url, name: name) : nil
|
|
1541
|
+
entry = { name: name, path: file_ref.original_path, type: "image",
|
|
1542
|
+
mime_type: mime, size_bytes: byte_size, downgrade_reason: reason }
|
|
1543
|
+
apply_ocr_outcome!(entry, ocr_result)
|
|
1544
|
+
downgraded << entry
|
|
1507
1545
|
else
|
|
1508
1546
|
vision_images << { url: data_url, name: name, size_bytes: byte_size, path: file_ref.original_path }
|
|
1509
1547
|
end
|
|
@@ -1514,8 +1552,11 @@ module Clacky
|
|
|
1514
1552
|
byte_size = (b64_data.bytesize * 3) / 4
|
|
1515
1553
|
reason = downgrade_reason_for(vision_supported, byte_size, max_bytes)
|
|
1516
1554
|
if reason
|
|
1517
|
-
|
|
1518
|
-
|
|
1555
|
+
ocr_result = (reason == :provider_no_vision) ? try_ocr(ocr_entry, path: path, name: name) : nil
|
|
1556
|
+
entry = { name: name, path: path, type: "image",
|
|
1557
|
+
mime_type: mime, size_bytes: byte_size, downgrade_reason: reason }
|
|
1558
|
+
apply_ocr_outcome!(entry, ocr_result)
|
|
1559
|
+
downgraded << entry
|
|
1519
1560
|
else
|
|
1520
1561
|
vision_images << { url: data_url_from_path, name: name, size_bytes: byte_size, path: path }
|
|
1521
1562
|
end
|
|
@@ -1528,6 +1569,30 @@ module Clacky
|
|
|
1528
1569
|
[vision_images, downgraded]
|
|
1529
1570
|
end
|
|
1530
1571
|
|
|
1572
|
+
# Best-effort OCR through the configured sidecar. Returns nil when no
|
|
1573
|
+
# sidecar is configured or the call failed — caller falls back to the
|
|
1574
|
+
# ":provider_no_vision" downgrade note (today's behaviour).
|
|
1575
|
+
# @return [Clacky::Vision::Resolver::Result, nil]
|
|
1576
|
+
# nil — no sidecar exists or sidecar IS the primary (no point extra hop).
|
|
1577
|
+
# Caller treats this as ":provider_no_vision" (configure a sidecar).
|
|
1578
|
+
# Result — outcome from the sidecar call. status=:ok carries text;
|
|
1579
|
+
# :empty / :call_failed / :bad_image each get their own message
|
|
1580
|
+
# so the user can tell "image content unreadable" from
|
|
1581
|
+
# "sidecar misconfigured / down".
|
|
1582
|
+
private def try_ocr(ocr_entry, data_url: nil, path: nil, name: nil)
|
|
1583
|
+
return nil unless ocr_entry
|
|
1584
|
+
return nil if ocr_entry["primary"]
|
|
1585
|
+
|
|
1586
|
+
image = data_url ? { data_url: data_url } : { path: path }
|
|
1587
|
+
|
|
1588
|
+
@ui&.show_progress("OCR...", progress_type: "thinking", phase: "active")
|
|
1589
|
+
begin
|
|
1590
|
+
Clacky::Vision::Resolver.new(ocr_entry).describe(image)
|
|
1591
|
+
ensure
|
|
1592
|
+
@ui&.show_progress(phase: "done")
|
|
1593
|
+
end
|
|
1594
|
+
end
|
|
1595
|
+
|
|
1531
1596
|
# Decide whether an image must be downgraded to a disk ref, and if so why.
|
|
1532
1597
|
# Precedence: provider capability is checked first — a text-only model
|
|
1533
1598
|
# can't use the image at any size, so there's no point re-checking size.
|
|
@@ -1546,9 +1611,61 @@ module Clacky
|
|
|
1546
1611
|
private def downgrade_note_for(reason)
|
|
1547
1612
|
case reason&.to_sym
|
|
1548
1613
|
when :provider_no_vision
|
|
1549
|
-
"The current model does not support vision input
|
|
1614
|
+
"The current model does not support vision input and no OCR sidecar is configured. Tell the user clearly that to analyze this image they need to either: (1) configure an OCR sidecar model in Settings → Media → OCR (any vision-capable model works as the sidecar — e.g. gemini-3-5-flash, gpt-4o-mini, claude-3-5-haiku), or (2) switch the current model to a vision-capable one. Do not attempt to guess the image content."
|
|
1550
1615
|
when :too_large
|
|
1551
1616
|
"Image was too large for inline delivery and has been saved to disk. Read it with a vision-capable tool/model if needed."
|
|
1617
|
+
when :ocr_resolved
|
|
1618
|
+
"The current model does not support vision input. The image has been transcribed by an OCR sidecar model — the description below is what the model sees in place of the raw pixels."
|
|
1619
|
+
when :ocr_call_failed
|
|
1620
|
+
"The current model does not support vision and the configured OCR sidecar call failed. Tell the user the sidecar (Settings → Media → OCR) errored — likely a misconfigured base_url / api_key, or the upstream is down. They can retry, fix the sidecar config, or switch to a vision-capable primary model. Do not guess the image content."
|
|
1621
|
+
when :ocr_empty
|
|
1622
|
+
"The current model does not support vision. The OCR sidecar responded but returned no readable text (the model produced no description — possibly the image is blank, or the model exhausted its token budget on internal reasoning). Tell the user honestly; do not guess the image content."
|
|
1623
|
+
when :ocr_bad_image
|
|
1624
|
+
"The current model does not support vision. The OCR sidecar could not read the image bytes (corrupt or unsupported format). Tell the user; do not guess the image content."
|
|
1625
|
+
end
|
|
1626
|
+
end
|
|
1627
|
+
|
|
1628
|
+
# Mutates `entry` in place based on the OCR Result outcome.
|
|
1629
|
+
# Sets `:ocr_text` (only on :ok) and rewrites `:downgrade_reason` to one
|
|
1630
|
+
# of :ocr_resolved / :ocr_call_failed / :ocr_empty / :ocr_bad_image.
|
|
1631
|
+
# When ocr_result is nil (no sidecar configured) leaves the original
|
|
1632
|
+
# :provider_no_vision reason untouched.
|
|
1633
|
+
private def apply_ocr_outcome!(entry, ocr_result)
|
|
1634
|
+
return entry unless ocr_result
|
|
1635
|
+
|
|
1636
|
+
case ocr_result.status
|
|
1637
|
+
when :ok
|
|
1638
|
+
entry[:ocr_text] = ocr_result.text
|
|
1639
|
+
entry[:downgrade_reason] = :ocr_resolved
|
|
1640
|
+
when :empty
|
|
1641
|
+
entry[:downgrade_reason] = :ocr_empty
|
|
1642
|
+
when :call_failed
|
|
1643
|
+
entry[:downgrade_reason] = :ocr_call_failed
|
|
1644
|
+
entry[:ocr_error] = ocr_result.error
|
|
1645
|
+
when :bad_image
|
|
1646
|
+
entry[:downgrade_reason] = :ocr_bad_image
|
|
1647
|
+
end
|
|
1648
|
+
entry
|
|
1649
|
+
end
|
|
1650
|
+
|
|
1651
|
+
# Build the inline text block used by the image_inject path (tool screenshots,
|
|
1652
|
+
# generated images, etc. that arrive as content blocks rather than as
|
|
1653
|
+
# display_files entries).
|
|
1654
|
+
private def ocr_text_for_inject(label, ocr_result, ocr_entry)
|
|
1655
|
+
header = "[Image: #{label}]"
|
|
1656
|
+
if ocr_result.nil?
|
|
1657
|
+
return "#{header} The current model has no vision and no OCR sidecar is configured. Tell the user to either configure an OCR sidecar in Settings → Media → OCR, or switch to a vision-capable model, then retry. Do not guess the image content."
|
|
1658
|
+
end
|
|
1659
|
+
|
|
1660
|
+
case ocr_result.status
|
|
1661
|
+
when :ok
|
|
1662
|
+
"#{header}\nOCR description (the current model cannot see images directly; this transcription was produced by sidecar #{ocr_entry["model"]}):\n#{ocr_result.text.strip}"
|
|
1663
|
+
when :empty
|
|
1664
|
+
"#{header} The OCR sidecar (#{ocr_entry["model"]}) returned no readable text. The image may be blank, or the sidecar exhausted its token budget on internal reasoning. Tell the user honestly; do not guess the image content."
|
|
1665
|
+
when :call_failed
|
|
1666
|
+
"#{header} The OCR sidecar (#{ocr_entry["model"]}) call failed: #{ocr_result.error}. Tell the user the sidecar errored (likely a misconfigured base_url / api_key in Settings → Media → OCR, or the upstream is down). They can retry, fix the sidecar, or switch to a vision-capable primary model. Do not guess the image content."
|
|
1667
|
+
when :bad_image
|
|
1668
|
+
"#{header} The OCR sidecar could not read the image bytes (corrupt or unsupported format). Tell the user; do not guess the image content."
|
|
1552
1669
|
end
|
|
1553
1670
|
end
|
|
1554
1671
|
|
data/lib/clacky/agent_config.rb
CHANGED
|
@@ -606,39 +606,63 @@ module Clacky
|
|
|
606
606
|
}.compact
|
|
607
607
|
end
|
|
608
608
|
|
|
609
|
-
# Find model by type (default or lite or media kind)
|
|
609
|
+
# Find model by type (default or lite or media kind or ocr sidecar)
|
|
610
610
|
# Returns the model hash or nil if not found.
|
|
611
611
|
# For media kinds (image/video/audio): explicit user-configured (custom)
|
|
612
612
|
# entries win; otherwise an auto-derived virtual entry is returned
|
|
613
613
|
# based on the default model's provider — mirroring how lite is
|
|
614
614
|
# virtually derived via #lite_model_config_for_current.
|
|
615
|
+
# For "ocr": same custom→auto→nil pattern. Auto path first checks
|
|
616
|
+
# whether the default model itself supports vision (zero-overhead path,
|
|
617
|
+
# no sidecar needed); if not, derives from the provider's
|
|
618
|
+
# default_ocr_model.
|
|
615
619
|
def find_model_by_type(type)
|
|
616
620
|
kind = type.to_s
|
|
617
621
|
if Clacky::Providers::MEDIA_KINDS.include?(kind)
|
|
618
|
-
|
|
619
|
-
return
|
|
620
|
-
|
|
622
|
+
entry = @models.find { |m| m["type"] == kind }
|
|
623
|
+
return nil if entry && entry["disabled"]
|
|
624
|
+
if entry && entry["base_url"].to_s.strip != "" && entry["api_key"].to_s.strip != ""
|
|
625
|
+
return entry
|
|
626
|
+
end
|
|
627
|
+
return derive_media_model(kind, model_override: entry && entry["model"])
|
|
628
|
+
end
|
|
629
|
+
if kind == "ocr"
|
|
630
|
+
entry = @models.find { |m| m["type"] == "ocr" }
|
|
631
|
+
return nil if entry && entry["disabled"]
|
|
632
|
+
if entry && entry["base_url"].to_s.strip != "" && entry["api_key"].to_s.strip != ""
|
|
633
|
+
return entry
|
|
634
|
+
end
|
|
635
|
+
return derive_ocr_model(model_override: entry && entry["model"])
|
|
621
636
|
end
|
|
622
637
|
@models.find { |m| m["type"] == type }
|
|
623
638
|
end
|
|
624
639
|
|
|
625
|
-
private def derive_media_model(kind)
|
|
626
|
-
|
|
627
|
-
return nil unless
|
|
640
|
+
private def derive_media_model(kind, model_override: nil)
|
|
641
|
+
anchor = current_model || find_model_by_type("default")
|
|
642
|
+
return nil unless anchor
|
|
628
643
|
|
|
629
644
|
provider_id = Clacky::Providers.resolve_provider(
|
|
630
|
-
base_url:
|
|
631
|
-
api_key:
|
|
645
|
+
base_url: anchor["base_url"],
|
|
646
|
+
api_key: anchor["api_key"]
|
|
632
647
|
)
|
|
633
648
|
return nil unless provider_id
|
|
634
649
|
|
|
635
|
-
|
|
650
|
+
if model_override && !model_override.to_s.strip.empty?
|
|
651
|
+
available = Clacky::Providers.media_models(provider_id, kind)
|
|
652
|
+
if available.include?(model_override)
|
|
653
|
+
model_name = model_override
|
|
654
|
+
else
|
|
655
|
+
model_name = Clacky::Providers.default_media_model(provider_id, kind)
|
|
656
|
+
end
|
|
657
|
+
else
|
|
658
|
+
model_name = Clacky::Providers.default_media_model(provider_id, kind)
|
|
659
|
+
end
|
|
636
660
|
return nil if model_name.nil? || model_name.to_s.empty?
|
|
637
661
|
|
|
638
662
|
{
|
|
639
663
|
"model" => model_name,
|
|
640
|
-
"base_url" =>
|
|
641
|
-
"api_key" =>
|
|
664
|
+
"base_url" => anchor["base_url"],
|
|
665
|
+
"api_key" => anchor["api_key"],
|
|
642
666
|
"type" => kind,
|
|
643
667
|
"auto_injected" => true
|
|
644
668
|
}
|
|
@@ -650,6 +674,54 @@ module Clacky
|
|
|
650
674
|
@models.reject! { |m| m["auto_injected"] && Clacky::Providers::MEDIA_KINDS.include?(m["type"].to_s) }
|
|
651
675
|
end
|
|
652
676
|
|
|
677
|
+
# Derive an OCR sidecar model entry from the default model's provider.
|
|
678
|
+
# Resolution order:
|
|
679
|
+
# 1. If the default model itself supports vision → return the default
|
|
680
|
+
# directly (zero-overhead path; no separate sidecar call needed).
|
|
681
|
+
# 2. Otherwise look up the provider's default_ocr_model (or honour
|
|
682
|
+
# model_override if it's a vision-capable model on that provider).
|
|
683
|
+
# 3. nil when the provider has no vision-capable lineup at all
|
|
684
|
+
# (e.g. DeepSeek V4) — caller falls back to today's "no vision" UX.
|
|
685
|
+
private def derive_ocr_model(model_override: nil)
|
|
686
|
+
# Anchor on the model the session is *actually* running on, not the
|
|
687
|
+
# yml `type: default` marker — those diverge whenever the user
|
|
688
|
+
# switches model mid-session (e.g. opus → deepseek).
|
|
689
|
+
anchor = current_model || find_model_by_type("default")
|
|
690
|
+
return nil unless anchor
|
|
691
|
+
|
|
692
|
+
provider_id = Clacky::Providers.resolve_provider(
|
|
693
|
+
base_url: anchor["base_url"], api_key: anchor["api_key"]
|
|
694
|
+
)
|
|
695
|
+
return nil unless provider_id
|
|
696
|
+
|
|
697
|
+
if Clacky::Providers.supports?(provider_id, :vision, model_name: anchor["model"])
|
|
698
|
+
return {
|
|
699
|
+
"model" => anchor["model"],
|
|
700
|
+
"base_url" => anchor["base_url"],
|
|
701
|
+
"api_key" => anchor["api_key"],
|
|
702
|
+
"type" => "ocr",
|
|
703
|
+
"auto_injected" => true,
|
|
704
|
+
"primary" => true
|
|
705
|
+
}
|
|
706
|
+
end
|
|
707
|
+
|
|
708
|
+
candidates = Clacky::Providers.ocr_models(provider_id)
|
|
709
|
+
model_name = if model_override && candidates.include?(model_override)
|
|
710
|
+
model_override
|
|
711
|
+
else
|
|
712
|
+
Clacky::Providers.default_ocr_model(provider_id)
|
|
713
|
+
end
|
|
714
|
+
return nil if model_name.nil? || model_name.to_s.empty?
|
|
715
|
+
|
|
716
|
+
{
|
|
717
|
+
"model" => model_name,
|
|
718
|
+
"base_url" => anchor["base_url"],
|
|
719
|
+
"api_key" => anchor["api_key"],
|
|
720
|
+
"type" => "ocr",
|
|
721
|
+
"auto_injected" => true
|
|
722
|
+
}
|
|
723
|
+
end
|
|
724
|
+
|
|
653
725
|
# Returns the configured/derived media model entry for `kind`, plus a
|
|
654
726
|
# hint about its source. UI uses this to render the tri-state control.
|
|
655
727
|
# @param kind [String] one of "image" / "video" / "audio"
|
|
@@ -662,34 +734,123 @@ module Clacky
|
|
|
662
734
|
# "available" [Array<String>] — auto-source candidates from preset
|
|
663
735
|
def media_state(kind)
|
|
664
736
|
kind = kind.to_s
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
737
|
+
raw_entry = @models.find { |m| m["type"] == kind }
|
|
738
|
+
|
|
739
|
+
if raw_entry && raw_entry["disabled"]
|
|
740
|
+
default = find_model_by_type("default")
|
|
741
|
+
default_provider = default && Clacky::Providers.resolve_provider(
|
|
742
|
+
base_url: default["base_url"], api_key: default["api_key"]
|
|
743
|
+
)
|
|
744
|
+
available = default_provider ? Clacky::Providers.media_models(default_provider, kind) : []
|
|
745
|
+
aliases = default_provider ? Clacky::Providers.media_model_aliases(default_provider, kind) : {}
|
|
746
|
+
return {
|
|
747
|
+
"configured" => false,
|
|
748
|
+
"source" => "off",
|
|
749
|
+
"model" => nil,
|
|
750
|
+
"base_url" => nil,
|
|
751
|
+
"provider" => nil,
|
|
752
|
+
"available" => available,
|
|
753
|
+
"aliases" => aliases,
|
|
754
|
+
"stale" => false
|
|
755
|
+
}
|
|
756
|
+
end
|
|
757
|
+
|
|
758
|
+
is_custom = raw_entry &&
|
|
759
|
+
raw_entry["base_url"].to_s.strip != "" &&
|
|
760
|
+
raw_entry["api_key"].to_s.strip != ""
|
|
761
|
+
override_model = raw_entry && !is_custom ? raw_entry["model"] : nil
|
|
762
|
+
|
|
763
|
+
entry = if is_custom
|
|
764
|
+
raw_entry
|
|
765
|
+
else
|
|
766
|
+
derive_media_model(kind, model_override: override_model)
|
|
767
|
+
end
|
|
668
768
|
|
|
669
769
|
provider_id = if entry
|
|
670
770
|
Clacky::Providers.resolve_provider(
|
|
671
|
-
base_url: entry["base_url"],
|
|
672
|
-
api_key: entry["api_key"]
|
|
771
|
+
base_url: entry["base_url"], api_key: entry["api_key"]
|
|
673
772
|
)
|
|
674
773
|
end
|
|
675
774
|
|
|
676
|
-
available_provider_id = if
|
|
775
|
+
available_provider_id = if is_custom
|
|
677
776
|
provider_id
|
|
678
777
|
else
|
|
679
778
|
default = find_model_by_type("default")
|
|
680
779
|
default && Clacky::Providers.resolve_provider(
|
|
681
|
-
base_url: default["base_url"],
|
|
682
|
-
api_key: default["api_key"]
|
|
780
|
+
base_url: default["base_url"], api_key: default["api_key"]
|
|
683
781
|
)
|
|
684
782
|
end
|
|
685
783
|
available = available_provider_id ? Clacky::Providers.media_models(available_provider_id, kind) : []
|
|
784
|
+
aliases = available_provider_id ? Clacky::Providers.media_model_aliases(available_provider_id, kind) : {}
|
|
785
|
+
|
|
786
|
+
stale = !!(override_model && entry && entry["model"] != override_model)
|
|
787
|
+
|
|
788
|
+
{
|
|
789
|
+
"configured" => !entry.nil?,
|
|
790
|
+
"source" => is_custom ? "custom" : (entry ? "auto" : "off"),
|
|
791
|
+
"model" => entry && entry["model"],
|
|
792
|
+
"base_url" => entry && entry["base_url"],
|
|
793
|
+
"provider" => provider_id,
|
|
794
|
+
"available" => available,
|
|
795
|
+
"aliases" => aliases,
|
|
796
|
+
"stale" => stale,
|
|
797
|
+
"requested_model" => stale ? override_model : nil
|
|
798
|
+
}
|
|
799
|
+
end
|
|
800
|
+
|
|
801
|
+
# Tri-state introspection for the OCR sidecar — mirrors #media_state shape
|
|
802
|
+
# so the Settings UI can reuse the same row component.
|
|
803
|
+
# @return [Hash{String=>Object}] keys:
|
|
804
|
+
# "configured" — anything available (auto or custom)
|
|
805
|
+
# "source" — "off" | "auto" | "custom"
|
|
806
|
+
# "primary" — true when auto resolves to the default model itself
|
|
807
|
+
# (no sidecar call needed)
|
|
808
|
+
# "model"/"base_url"/"provider"/"available"
|
|
809
|
+
def ocr_state
|
|
810
|
+
raw_entry = @models.find { |m| m["type"] == "ocr" }
|
|
811
|
+
|
|
812
|
+
default = find_model_by_type("default")
|
|
813
|
+
default_provider = default && Clacky::Providers.resolve_provider(
|
|
814
|
+
base_url: default["base_url"], api_key: default["api_key"]
|
|
815
|
+
)
|
|
816
|
+
available = default_provider ? Clacky::Providers.ocr_models(default_provider) : []
|
|
817
|
+
|
|
818
|
+
if raw_entry && raw_entry["disabled"]
|
|
819
|
+
return {
|
|
820
|
+
"configured" => false,
|
|
821
|
+
"source" => "off",
|
|
822
|
+
"model" => nil,
|
|
823
|
+
"base_url" => nil,
|
|
824
|
+
"provider" => nil,
|
|
825
|
+
"primary" => false,
|
|
826
|
+
"available" => available
|
|
827
|
+
}
|
|
828
|
+
end
|
|
829
|
+
|
|
830
|
+
is_custom = raw_entry &&
|
|
831
|
+
raw_entry["base_url"].to_s.strip != "" &&
|
|
832
|
+
raw_entry["api_key"].to_s.strip != ""
|
|
833
|
+
override_model = raw_entry && !is_custom ? raw_entry["model"] : nil
|
|
834
|
+
|
|
835
|
+
entry = if is_custom
|
|
836
|
+
raw_entry
|
|
837
|
+
else
|
|
838
|
+
derive_ocr_model(model_override: override_model)
|
|
839
|
+
end
|
|
840
|
+
|
|
841
|
+
provider_id = if entry
|
|
842
|
+
Clacky::Providers.resolve_provider(
|
|
843
|
+
base_url: entry["base_url"], api_key: entry["api_key"]
|
|
844
|
+
)
|
|
845
|
+
end
|
|
686
846
|
|
|
687
847
|
{
|
|
688
848
|
"configured" => !entry.nil?,
|
|
689
|
-
"source" =>
|
|
849
|
+
"source" => is_custom ? "custom" : (entry ? "auto" : "off"),
|
|
690
850
|
"model" => entry && entry["model"],
|
|
691
851
|
"base_url" => entry && entry["base_url"],
|
|
692
852
|
"provider" => provider_id,
|
|
853
|
+
"primary" => !!(entry && entry["primary"]),
|
|
693
854
|
"available" => available
|
|
694
855
|
}
|
|
695
856
|
end
|
|
@@ -1006,7 +1167,7 @@ module Clacky
|
|
|
1006
1167
|
# Returns true if successful
|
|
1007
1168
|
def set_model_type(index, type)
|
|
1008
1169
|
return false if index < 0 || index >= @models.length
|
|
1009
|
-
return false unless ["default", "lite", "image", "video", "audio", nil].include?(type)
|
|
1170
|
+
return false unless ["default", "lite", "image", "video", "audio", "ocr", nil].include?(type)
|
|
1010
1171
|
|
|
1011
1172
|
if type
|
|
1012
1173
|
# Remove type from any other model that has it
|
data/lib/clacky/cli.rb
CHANGED
|
@@ -50,6 +50,7 @@ module Clacky
|
|
|
50
50
|
option :verbose, type: :boolean, aliases: "-v", default: false, desc: "Show detailed output"
|
|
51
51
|
option :path, type: :string, desc: "Project directory path (defaults to current directory)"
|
|
52
52
|
option :continue, type: :boolean, aliases: "-c", desc: "Continue most recent session"
|
|
53
|
+
option :fork, type: :string, desc: "Fork a session by number or session ID prefix (creates a copy)"
|
|
53
54
|
option :list, type: :boolean, aliases: "-l", desc: "List recent sessions"
|
|
54
55
|
option :attach, type: :string, aliases: "-a", desc: "Attach to session by number or keyword"
|
|
55
56
|
option :json, type: :boolean, default: false, desc: "Output NDJSON to stdout (for scripting/piping)"
|
|
@@ -140,6 +141,9 @@ module Clacky
|
|
|
140
141
|
elsif options[:attach]
|
|
141
142
|
agent = load_session_by_number(client_factory.call, agent_config, session_manager, working_dir, options[:attach], profile: agent_profile)
|
|
142
143
|
is_session_load = !agent.nil?
|
|
144
|
+
elsif options[:fork]
|
|
145
|
+
agent = fork_session(client_factory.call, agent_config, session_manager, working_dir, options[:fork], profile: agent_profile)
|
|
146
|
+
is_session_load = !agent.nil?
|
|
143
147
|
end
|
|
144
148
|
|
|
145
149
|
# Create new agent if no session loaded
|
|
@@ -549,8 +553,59 @@ module Clacky
|
|
|
549
553
|
Clacky::Agent.from_session(client, agent_config, session_data, profile: resolved_profile)
|
|
550
554
|
end
|
|
551
555
|
|
|
556
|
+
def fork_session(client, agent_config, session_manager, working_dir, identifier, profile:)
|
|
557
|
+
# Get a larger list to search through (for ID prefix matching)
|
|
558
|
+
sessions = session_manager.all_sessions(current_dir: working_dir, limit: 100)
|
|
559
|
+
|
|
560
|
+
if sessions.empty?
|
|
561
|
+
say "No sessions found.", :yellow
|
|
562
|
+
return nil
|
|
563
|
+
end
|
|
564
|
+
|
|
565
|
+
session_data = nil
|
|
566
|
+
|
|
567
|
+
# Same resolution logic as load_session_by_number
|
|
568
|
+
if identifier.match?(/^\d+$/) && identifier.to_i <= 99
|
|
569
|
+
index = identifier.to_i - 1
|
|
570
|
+
if index < 0 || index >= sessions.size
|
|
571
|
+
say "Invalid session number. Use -l to list available sessions.", :red
|
|
572
|
+
exit 1
|
|
573
|
+
end
|
|
574
|
+
session_data = sessions[index]
|
|
575
|
+
else
|
|
576
|
+
matching_sessions = sessions.select { |s| s[:session_id].start_with?(identifier) }
|
|
577
|
+
if matching_sessions.empty?
|
|
578
|
+
say "No session found matching ID prefix: #{identifier}", :red
|
|
579
|
+
say "Use -l to list available sessions.", :yellow
|
|
580
|
+
exit 1
|
|
581
|
+
elsif matching_sessions.size > 1
|
|
582
|
+
say "Multiple sessions found matching '#{identifier}':", :yellow
|
|
583
|
+
matching_sessions.each_with_index do |session, idx|
|
|
584
|
+
created_at = Time.parse(session[:created_at]).strftime("%Y-%m-%d %H:%M")
|
|
585
|
+
s_id = session[:session_id][0..7]
|
|
586
|
+
name = session[:name].to_s.empty? ? "Unnamed session" : session[:name]
|
|
587
|
+
say " #{idx + 1}. [#{s_id}] #{created_at} - #{name}", :cyan
|
|
588
|
+
end
|
|
589
|
+
say "\nPlease use a more specific prefix.", :yellow
|
|
590
|
+
exit 1
|
|
591
|
+
else
|
|
592
|
+
session_data = matching_sessions.first
|
|
593
|
+
end
|
|
594
|
+
end
|
|
595
|
+
|
|
596
|
+
fork_data = session_manager.fork(session_data[:session_id])
|
|
597
|
+
return nil unless fork_data
|
|
598
|
+
|
|
599
|
+
# Fall back to CLI --agent flag for sessions that predate agent_profile
|
|
600
|
+
restored_profile = fork_data[:agent_profile].to_s
|
|
601
|
+
resolved_profile = restored_profile.empty? ? profile : restored_profile
|
|
602
|
+
|
|
603
|
+
Clacky::Agent.from_session(client, agent_config, fork_data, profile: resolved_profile)
|
|
604
|
+
end
|
|
605
|
+
|
|
552
606
|
# Handle agent error/interrupt with cleanup
|
|
553
607
|
def handle_agent_exception(ui_controller, agent, session_manager, exception)
|
|
608
|
+
Clacky::Logger.warn("[ph_debug] handle_agent_exception", klass: exception.class.name, msg: exception.message.to_s[0, 200])
|
|
554
609
|
ui_controller.show_progress(phase: "done")
|
|
555
610
|
ui_controller.set_idle_status
|
|
556
611
|
|
data/lib/clacky/client.rb
CHANGED
|
@@ -398,7 +398,17 @@ module Clacky
|
|
|
398
398
|
def parse_simple_openai_response(response)
|
|
399
399
|
raise_error(response) unless response.status == 200
|
|
400
400
|
parsed_body = safe_json_parse(response.body, context: "LLM response")
|
|
401
|
-
parsed_body
|
|
401
|
+
content = parsed_body.dig("choices", 0, "message", "content")
|
|
402
|
+
if content.nil?
|
|
403
|
+
snippet = response.body.to_s[0, 1200]
|
|
404
|
+
if defined?(Clacky::Logger)
|
|
405
|
+
Clacky::Logger.warn("[parse_simple_openai_response] no content. status=#{response.status} body=#{snippet}")
|
|
406
|
+
end
|
|
407
|
+
raise Clacky::Error,
|
|
408
|
+
"Upstream OpenAI-compatible response missing choices[0].message.content. " \
|
|
409
|
+
"Body snippet: #{snippet}"
|
|
410
|
+
end
|
|
411
|
+
content
|
|
402
412
|
end
|
|
403
413
|
|
|
404
414
|
# ── Prompt caching helpers ────────────────────────────────────────────────
|