openclacky 1.2.12 → 1.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.clacky/skills/gem-release/SKILL.md +5 -1
  3. data/.clacky/skills/gem-release/scripts/release.sh +4 -1
  4. data/CHANGELOG.md +39 -0
  5. data/lib/clacky/agent/llm_caller.rb +40 -25
  6. data/lib/clacky/agent/memory_updater.rb +12 -0
  7. data/lib/clacky/agent/session_serializer.rb +1 -0
  8. data/lib/clacky/agent/skill_auto_creator.rb +7 -4
  9. data/lib/clacky/agent/skill_evolution.rb +23 -5
  10. data/lib/clacky/agent/skill_manager.rb +86 -1
  11. data/lib/clacky/agent/skill_reflector.rb +18 -23
  12. data/lib/clacky/agent.rb +132 -15
  13. data/lib/clacky/agent_config.rb +183 -22
  14. data/lib/clacky/cli.rb +55 -0
  15. data/lib/clacky/client.rb +11 -1
  16. data/lib/clacky/default_parsers/pdf_parser.rb +70 -86
  17. data/lib/clacky/default_parsers/pdf_parser_vlm.py +136 -0
  18. data/lib/clacky/default_skills/persist-memory/SKILL.md +4 -3
  19. data/lib/clacky/default_skills/search-skills/SKILL.md +61 -0
  20. data/lib/clacky/idle_compression_timer.rb +1 -1
  21. data/lib/clacky/message_format/open_ai.rb +7 -1
  22. data/lib/clacky/openai_stream_aggregator.rb +4 -1
  23. data/lib/clacky/providers.rb +77 -12
  24. data/lib/clacky/server/http_server.rb +296 -7
  25. data/lib/clacky/server/session_registry.rb +30 -8
  26. data/lib/clacky/server/web_ui_controller.rb +24 -1
  27. data/lib/clacky/session_manager.rb +120 -0
  28. data/lib/clacky/tools/web_search.rb +59 -8
  29. data/lib/clacky/ui2/layout_manager.rb +15 -5
  30. data/lib/clacky/ui2/progress_handle.rb +18 -8
  31. data/lib/clacky/ui2/ui_controller.rb +27 -0
  32. data/lib/clacky/ui_interface.rb +22 -0
  33. data/lib/clacky/utils/model_pricing.rb +96 -0
  34. data/lib/clacky/version.rb +1 -1
  35. data/lib/clacky/vision/resolver.rb +157 -0
  36. data/lib/clacky/web/app.css +209 -4
  37. data/lib/clacky/web/app.js +6 -5
  38. data/lib/clacky/web/i18n.js +22 -6
  39. data/lib/clacky/web/index.html +2 -1
  40. data/lib/clacky/web/sessions.js +408 -80
  41. data/lib/clacky/web/settings.js +241 -60
  42. data/lib/clacky/web/skills.js +5 -14
  43. data/lib/clacky/web/utils.js +57 -0
  44. data/lib/clacky/web/ws-dispatcher.js +136 -0
  45. data/lib/clacky.rb +1 -0
  46. metadata +6 -2
@@ -5,6 +5,7 @@ require "websocket"
5
5
  require "socket"
6
6
  require "json"
7
7
  require "net/http"
8
+ require "faraday"
8
9
  require "thread"
9
10
  require "fileutils"
10
11
  require "tmpdir"
@@ -39,15 +40,13 @@ module Clacky
39
40
  url = f[:data_url] || f["data_url"]
40
41
  name = f[:name] || f["name"]
41
42
  path = f[:path] || f["path"]
43
+ type = f[:type] || f["type"] || ""
42
44
 
43
45
  if url
44
46
  url
45
- elsif path && File.exist?(path.to_s)
46
- # Reconstruct data_url from the tmp file (still present on disk)
47
+ elsif type.to_s == "image" && path && File.exist?(path.to_s)
47
48
  Utils::FileProcessor.image_path_to_data_url(path) rescue "expired:#{name}"
48
49
  elsif name
49
- # File badge for non-image disk files, or image whose tmp file is gone
50
- type = f[:type] || f["type"] || ""
51
50
  type.to_s == "image" ? "expired:#{name}" : "pdf:#{name}"
52
51
  end
53
52
  end
@@ -437,7 +436,12 @@ module Clacky
437
436
  when ["PATCH", "/api/config/settings"] then api_update_settings(req, res)
438
437
  when ["POST", "/api/config/models"] then api_add_model(req, res)
439
438
  when ["POST", "/api/config/test"] then api_test_config(req, res)
439
+ when ["POST", "/api/config/media/test"] then api_test_media_config(req, res)
440
440
  when ["GET", "/api/config/media"] then api_get_media_config(res)
441
+ when ["GET", "/api/config/ocr"] then api_get_ocr_config(res)
442
+ when ["PATCH", "/api/config/ocr"] then api_update_ocr_config(req, res)
443
+ when ["POST", "/api/config/ocr/test"] then api_test_ocr_config(req, res)
444
+ when ["POST", "/api/internal/ocr-image"] then api_internal_ocr_image(req, res)
441
445
  when ["GET", "/api/providers"] then api_list_providers(res)
442
446
  when ["GET", "/api/onboard/status"] then api_onboard_status(res)
443
447
  when ["GET", "/api/browser/status"] then api_browser_status(res)
@@ -552,6 +556,9 @@ module Clacky
552
556
  elsif method == "PATCH" && path.match?(%r{^/api/sessions/[^/]+/working_dir$})
553
557
  session_id = path.sub("/api/sessions/", "").sub("/working_dir", "")
554
558
  api_change_session_working_dir(session_id, req, res)
559
+ elsif method == "POST" && path.match?(%r{^/api/sessions/[^/]+/fork$})
560
+ session_id = path.sub("/api/sessions/", "").sub("/fork", "")
561
+ api_fork_session(session_id, req, res)
555
562
  elsif method == "DELETE" && path.start_with?("/api/sessions/")
556
563
  session_id = path.sub("/api/sessions/", "")
557
564
  api_delete_session(session_id, res)
@@ -610,6 +617,7 @@ module Clacky
610
617
  limit = [query["limit"].to_i.then { |n| n > 0 ? n : 20 }, 50].min
611
618
  before = query["before"].to_s.strip.then { |v| v.empty? ? nil : v }
612
619
  q = query["q"].to_s.strip.then { |v| v.empty? ? nil : v }
620
+ q_scope = query["q_scope"].to_s.strip.then { |v| %w[name content].include?(v) ? v : "name" }
613
621
  date = query["date"].to_s.strip.then { |v| v.empty? ? nil : v }
614
622
  type = query["type"].to_s.strip.then { |v| v.empty? ? nil : v }
615
623
  # Backward-compat: ?source=<x> and ?profile=coding → type
@@ -620,7 +628,7 @@ module Clacky
620
628
  # `registry.list` always returns ALL matching pinned rows first (on the
621
629
  # first page; `before` == nil), followed by non-pinned rows up to `limit+1`.
622
630
  # So has_more is determined by whether the non-pinned section overflowed.
623
- sessions = @registry.list(limit: limit + 1, before: before, q: q, date: date, type: type)
631
+ sessions = @registry.list(limit: limit + 1, before: before, q: q, q_scope: q_scope, date: date, type: type)
624
632
 
625
633
  # Split pinned vs non-pinned to apply has_more only to the non-pinned tail.
626
634
  pinned_part, non_pinned_part = sessions.partition { |s| s[:pinned] }
@@ -935,6 +943,9 @@ module Clacky
935
943
  api_key_masked: entry ? mask_api_key(entry["api_key"]) : nil,
936
944
  provider: state["provider"],
937
945
  available: state["available"],
946
+ aliases: state["aliases"] || {},
947
+ stale: state["stale"] || false,
948
+ requested_model: state["requested_model"],
938
949
  configured: state["configured"]
939
950
  }
940
951
  end
@@ -952,7 +963,8 @@ module Clacky
952
963
  defaults[t] = {
953
964
  provider: provider_id,
954
965
  model: provider_id ? Clacky::Providers.default_media_model(provider_id, t) : nil,
955
- available: provider_id ? Clacky::Providers.media_models(provider_id, t) : []
966
+ available: provider_id ? Clacky::Providers.media_models(provider_id, t) : [],
967
+ aliases: provider_id ? Clacky::Providers.media_model_aliases(provider_id, t) : {}
956
968
  }
957
969
  end
958
970
 
@@ -965,6 +977,85 @@ module Clacky
965
977
  # off / auto — remove any custom entry; "auto" lets the virtual
966
978
  # derivation in AgentConfig#find_model_by_type take over.
967
979
  # custom — replace any existing custom entry with the supplied fields.
980
+ # POST /api/config/media/test
981
+ # Body: { kind, source, model, base_url, api_key }
982
+ # Lightweight preflight: GET <base_url>/models to verify connectivity,
983
+ # auth, and that the requested model is exposed by the endpoint.
984
+ # No image is generated — zero cost, sub-second.
985
+ def api_test_media_config(req, res)
986
+ body = parse_json_body(req) || {}
987
+ kind = body["kind"].to_s
988
+ return json_response(res, 422, { error: "invalid kind" }) unless %w[image video audio].include?(kind)
989
+ return json_response(res, 422, { error: "only image kind supported" }) unless kind == "image"
990
+
991
+ api_key = body["api_key"].to_s
992
+ if api_key.empty? || api_key.include?("****")
993
+ existing = @agent_config.find_model_by_type(kind) || {}
994
+ api_key = existing["api_key"].to_s
995
+ end
996
+
997
+ model = body["model"].to_s.strip
998
+ base_url = body["base_url"].to_s.strip
999
+
1000
+ if model.empty? || base_url.empty? || api_key.empty?
1001
+ return json_response(res, 200, { ok: false, message: "model, base_url, api_key are required" })
1002
+ end
1003
+
1004
+ result = preflight_media_endpoint(base_url: base_url, api_key: api_key, model: model)
1005
+ json_response(res, 200, result)
1006
+ rescue => e
1007
+ json_response(res, 200, { ok: false, message: e.message })
1008
+ end
1009
+
1010
+ private def preflight_media_endpoint(base_url:, api_key:, model:)
1011
+ url = "#{base_url.chomp("/")}/models"
1012
+ conn = Faraday.new(url: url) do |f|
1013
+ f.options.timeout = 10
1014
+ f.options.open_timeout = 5
1015
+ end
1016
+
1017
+ response =
1018
+ begin
1019
+ conn.get do |req|
1020
+ req.headers["Authorization"] = "Bearer #{api_key}"
1021
+ req.headers["Accept"] = "application/json"
1022
+ end
1023
+ rescue Faraday::Error => e
1024
+ return { ok: false, message: "Network error: #{e.message}" }
1025
+ end
1026
+
1027
+ case response.status
1028
+ when 401, 403
1029
+ return { ok: false, message: "Authentication failed (HTTP #{response.status}). Check API key." }
1030
+ when 404
1031
+ return { ok: false, message: "Endpoint not found at #{url}. Check Base URL." }
1032
+ end
1033
+
1034
+ unless response.success?
1035
+ return { ok: false, message: "HTTP #{response.status}: #{response.body.to_s[0, 200]}" }
1036
+ end
1037
+
1038
+ body = JSON.parse(response.body) rescue nil
1039
+ ids =
1040
+ if body.is_a?(Hash) && body["data"].is_a?(Array)
1041
+ body["data"].map { |m| m["id"].to_s }
1042
+ elsif body.is_a?(Array)
1043
+ body.map { |m| m["id"].to_s }
1044
+ else
1045
+ []
1046
+ end
1047
+
1048
+ if ids.empty?
1049
+ return { ok: true, message: "Connected (model list unavailable; cannot verify model id)" }
1050
+ end
1051
+
1052
+ if ids.include?(model)
1053
+ { ok: true, message: "Connected. Model '#{model}' is available." }
1054
+ else
1055
+ { ok: false, message: "Connected, but model '#{model}' not found on this endpoint." }
1056
+ end
1057
+ end
1058
+
968
1059
  def api_update_media_config(kind, req, res)
969
1060
  body = parse_json_body(req) || {}
970
1061
  source = body["source"].to_s
@@ -974,7 +1065,23 @@ module Clacky
974
1065
 
975
1066
  @agent_config.models.reject! { |m| m["type"] == kind }
976
1067
 
977
- if source == "custom"
1068
+ case source
1069
+ when "off"
1070
+ @agent_config.models << {
1071
+ "id" => SecureRandom.uuid,
1072
+ "type" => kind,
1073
+ "disabled" => true
1074
+ }
1075
+ when "auto"
1076
+ override = body["model"].to_s.strip
1077
+ unless override.empty?
1078
+ @agent_config.models << {
1079
+ "id" => SecureRandom.uuid,
1080
+ "type" => kind,
1081
+ "model" => override
1082
+ }
1083
+ end
1084
+ when "custom"
978
1085
  model = body["model"].to_s.strip
979
1086
  base_url = body["base_url"].to_s.strip
980
1087
  api_key = body["api_key"].to_s
@@ -998,6 +1105,179 @@ module Clacky
998
1105
  json_response(res, 422, { error: e.message })
999
1106
  end
1000
1107
 
1108
+ # GET /api/config/ocr
1109
+ # Returns the OCR sidecar state for the Settings UI. Mirrors media_state
1110
+ # in shape so the UI can render OCR with the same row component.
1111
+ def api_get_ocr_config(res)
1112
+ state = @agent_config.ocr_state
1113
+ entry = @agent_config.find_model_by_type("ocr")
1114
+
1115
+ out = {
1116
+ source: state["source"],
1117
+ model: state["model"],
1118
+ base_url: state["base_url"],
1119
+ api_key_masked: entry ? mask_api_key(entry["api_key"]) : nil,
1120
+ provider: state["provider"],
1121
+ available: state["available"],
1122
+ stale: state["stale"] || false,
1123
+ requested_model: state["requested_model"],
1124
+ configured: state["configured"],
1125
+ primary: state["primary"] || false
1126
+ }
1127
+
1128
+ # Auto-mode preview: surface what the OCR sidecar *would* be if the
1129
+ # user flipped to "auto" — derived from the same provider as the
1130
+ # current default model.
1131
+ default = @agent_config.find_model_by_type("default")
1132
+ provider_id = default && Clacky::Providers.resolve_provider(
1133
+ base_url: default["base_url"],
1134
+ api_key: default["api_key"]
1135
+ )
1136
+ default_preview = {
1137
+ provider: provider_id,
1138
+ model: provider_id ? Clacky::Providers.default_ocr_model(provider_id) : nil,
1139
+ available: provider_id ? Clacky::Providers.ocr_models(provider_id) : []
1140
+ }
1141
+
1142
+ json_response(res, 200, { ocr: out, default_provider: default_preview })
1143
+ end
1144
+
1145
+ # PATCH /api/config/ocr
1146
+ # Body: { source: "off"|"auto"|"custom", model?, base_url?, api_key?,
1147
+ # anthropic_format? }
1148
+ # Mirrors api_update_media_config but for the single "ocr" type.
1149
+ def api_update_ocr_config(req, res)
1150
+ body = parse_json_body(req) || {}
1151
+ source = body["source"].to_s
1152
+ unless %w[off auto custom].include?(source)
1153
+ return json_response(res, 422, { error: "invalid source" })
1154
+ end
1155
+
1156
+ @agent_config.models.reject! { |m| m["type"] == "ocr" }
1157
+
1158
+ case source
1159
+ when "off"
1160
+ @agent_config.models << {
1161
+ "id" => SecureRandom.uuid,
1162
+ "type" => "ocr",
1163
+ "disabled" => true
1164
+ }
1165
+ when "auto"
1166
+ override = body["model"].to_s.strip
1167
+ unless override.empty?
1168
+ @agent_config.models << {
1169
+ "id" => SecureRandom.uuid,
1170
+ "type" => "ocr",
1171
+ "model" => override
1172
+ }
1173
+ end
1174
+ when "custom"
1175
+ model = body["model"].to_s.strip
1176
+ base_url = body["base_url"].to_s.strip
1177
+ api_key = body["api_key"].to_s
1178
+ if api_key.include?("****")
1179
+ existing = @agent_config.models.find { |m| m["type"] == "ocr" && m["api_key"] }
1180
+ api_key = existing ? existing["api_key"].to_s : ""
1181
+ end
1182
+ if model.empty? || base_url.empty? || api_key.empty?
1183
+ return json_response(res, 422, { error: "model, base_url, api_key are required" })
1184
+ end
1185
+
1186
+ @agent_config.models << {
1187
+ "id" => SecureRandom.uuid,
1188
+ "model" => model,
1189
+ "base_url" => base_url,
1190
+ "api_key" => api_key,
1191
+ "anthropic_format" => body["anthropic_format"] || false,
1192
+ "type" => "ocr"
1193
+ }
1194
+ end
1195
+
1196
+ @agent_config.save
1197
+ json_response(res, 200, { ok: true, state: @agent_config.ocr_state })
1198
+ rescue => e
1199
+ json_response(res, 422, { error: e.message })
1200
+ end
1201
+
1202
+ # POST /api/config/ocr/test
1203
+ # Reuses the media preflight (GET /models) — same connectivity check.
1204
+ def api_test_ocr_config(req, res)
1205
+ body = parse_json_body(req) || {}
1206
+ api_key = body["api_key"].to_s
1207
+ if api_key.empty? || api_key.include?("****")
1208
+ existing = @agent_config.find_model_by_type("ocr") || {}
1209
+ api_key = existing["api_key"].to_s
1210
+ end
1211
+
1212
+ model = body["model"].to_s.strip
1213
+ base_url = body["base_url"].to_s.strip
1214
+
1215
+ if model.empty? || base_url.empty? || api_key.empty?
1216
+ return json_response(res, 200, { ok: false, message: "model, base_url, api_key are required" })
1217
+ end
1218
+
1219
+ result = preflight_media_endpoint(base_url: base_url, api_key: api_key, model: model)
1220
+ json_response(res, 200, result)
1221
+ rescue => e
1222
+ json_response(res, 200, { ok: false, message: e.message })
1223
+ end
1224
+
1225
+ # POST /api/internal/ocr-image
1226
+ # Internal endpoint used by parser scripts (e.g. pdf_parser_vlm.py) to
1227
+ # transcribe a single image via the configured OCR sidecar. Localhost-
1228
+ # only by virtue of the standard auth path: when the server binds to
1229
+ # 127.0.0.1 (@localhost_only), check_access_key returns true without
1230
+ # requiring a token, so parsers running on the same host can call this
1231
+ # endpoint with no extra wiring.
1232
+ #
1233
+ # Request: multipart/form-data with field "image" (binary), optional "prompt"
1234
+ # OR JSON body { "data_url": "data:image/png;base64,...", "prompt": "..." }
1235
+ # Response: { ok: true, text: "..." } or { ok: false, message: "..." }
1236
+ def api_internal_ocr_image(req, res)
1237
+ entry = @agent_config.find_model_by_type("ocr")
1238
+ unless entry
1239
+ return json_response(res, 503, { ok: false, message: "OCR sidecar not configured" })
1240
+ end
1241
+
1242
+ prompt = nil
1243
+ data_url = nil
1244
+ bytes = nil
1245
+ mime = "image/png"
1246
+
1247
+ ctype = req.content_type.to_s
1248
+ if ctype.start_with?("multipart/form-data")
1249
+ parts = req.query
1250
+ if (img = parts["image"])
1251
+ bytes = img.respond_to?(:read) ? img.read : img.to_s
1252
+ mime = (img.respond_to?(:[]) ? img["content-type"].to_s : nil)
1253
+ mime = "image/png" if mime.nil? || mime.empty?
1254
+ end
1255
+ prompt = parts["prompt"].to_s if parts["prompt"]
1256
+ else
1257
+ body = parse_json_body(req) || {}
1258
+ data_url = body["data_url"].to_s
1259
+ prompt = body["prompt"].to_s if body["prompt"]
1260
+ end
1261
+
1262
+ image =
1263
+ if bytes && !bytes.empty?
1264
+ { bytes: bytes, mime_type: mime }
1265
+ elsif data_url && !data_url.empty?
1266
+ { data_url: data_url }
1267
+ else
1268
+ return json_response(res, 400, { ok: false, message: "image or data_url required" })
1269
+ end
1270
+
1271
+ text = Clacky::Vision::Resolver.new(entry).describe(image, prompt: prompt)
1272
+ if text && !text.strip.empty?
1273
+ json_response(res, 200, { ok: true, text: text })
1274
+ else
1275
+ json_response(res, 200, { ok: false, message: "OCR returned empty result" })
1276
+ end
1277
+ rescue => e
1278
+ json_response(res, 500, { ok: false, message: e.message })
1279
+ end
1280
+
1001
1281
  # POST /api/onboard/complete
1002
1282
  # Called after key setup is done (soul_setup is optional/skipped).
1003
1283
  # Creates the default session if none exists yet, returns it.
@@ -4201,6 +4481,15 @@ module Clacky
4201
4481
  json_response(res, 500, { error: e.message })
4202
4482
  end
4203
4483
 
4484
+ def api_fork_session(session_id, req, res)
4485
+ fork_data = @session_manager.fork(session_id)
4486
+ return json_response(res, 404, { error: "Session not found" }) unless fork_data
4487
+
4488
+ fork_id = fork_data[:session_id]
4489
+ broadcast_session_update(fork_id)
4490
+ json_response(res, 201, { session: @registry.snapshot(fork_id) })
4491
+ end
4492
+
4204
4493
  def api_delete_session(session_id, res)
4205
4494
  # A session exists if it's either in the runtime registry OR on disk.
4206
4495
  # Old sessions that were never restored into memory this server run
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "set"
4
+
3
5
  module Clacky
4
6
  module Server
5
7
  # SessionRegistry is the single authoritative source for session state.
@@ -158,7 +160,7 @@ module Clacky
158
160
  # [ ...all_pinned_matching (newest-first), ...non_pinned (newest-first, limited) ]
159
161
  #
160
162
  # source and profile are orthogonal — either can be nil independently.
161
- def list(limit: nil, before: nil, q: nil, date: nil, type: nil, include_pinned: true)
163
+ def list(limit: nil, before: nil, q: nil, q_scope: "name", date: nil, type: nil, include_pinned: true)
162
164
  return [] unless @session_manager
163
165
 
164
166
  live = @mutex.synchronize do
@@ -195,13 +197,25 @@ module Clacky
195
197
  # ── date filter (YYYY-MM-DD, matches created_at prefix) ──────────────
196
198
  all = all.select { |s| s[:created_at].to_s.start_with?(date) } if date
197
199
 
198
- # ── name / id search ─────────────────────────────────────────────────
200
+ # ── name / id / content search ───────────────────────────────────────
201
+ content_snippets = nil
199
202
  if q && !q.empty?
200
- q_down = q.downcase
201
- all = all.select { |s|
202
- (s[:name] || "").downcase.include?(q_down) ||
203
- (s[:session_id] || "").downcase.include?(q_down)
204
- }
203
+ if q_scope == "content"
204
+ content_snippets = @session_manager.search_content(q)
205
+ if content_snippets.empty?
206
+ all = []
207
+ else
208
+ prefix_set = content_snippets.keys.to_set
209
+ all = all.select { |s| prefix_set.include?((s[:session_id] || "")[0, 8]) }
210
+ end
211
+ else
212
+ q_down = q.downcase
213
+ id_match_eligible = q_down.match?(/\A[0-9a-f]{6,}\z/)
214
+ all = all.select { |s|
215
+ (s[:name] || "").downcase.include?(q_down) ||
216
+ (id_match_eligible && (s[:session_id] || "").downcase.include?(q_down))
217
+ }
218
+ end
205
219
  end
206
220
 
207
221
  # ── Split pinned vs non-pinned BEFORE applying `before`/`limit`.
@@ -223,7 +237,15 @@ module Clacky
223
237
 
224
238
  ordered = pinned_section + non_pinned
225
239
 
226
- ordered.map { |s| build_enriched_row(s, live[s[:session_id]]) }
240
+ ordered.map do |s|
241
+ row = build_enriched_row(s, live[s[:session_id]])
242
+ if content_snippets
243
+ short = (s[:session_id] || "")[0, 8]
244
+ snip = content_snippets[short]
245
+ row[:search_snippet] = snip if snip
246
+ end
247
+ row
248
+ end
227
249
  end
228
250
 
229
251
  # Return the same enriched hash that a `list` row would produce, for a
@@ -171,7 +171,6 @@ module Clacky
171
171
 
172
172
  def show_diff(old_content, new_content, max_lines: 50)
173
173
  emit("diff", old_size: old_content.bytesize, new_size: new_content.bytesize)
174
- # Diffs are too verbose for IM — intentionally not forwarded
175
174
  end
176
175
 
177
176
  def show_token_usage(token_data)
@@ -210,6 +209,27 @@ module Clacky
210
209
  forward_to_subscribers { |sub| sub.show_warning(message) }
211
210
  end
212
211
 
212
+ # === Phase grouping ===
213
+
214
+ def phase_start(kind:, label: nil)
215
+ pid = SecureRandom.uuid
216
+ Thread.current[:clacky_phase_id] = pid
217
+ # Emit without auto-injection (the start event itself defines the phase)
218
+ event = { type: "phase_start", session_id: @session_id, phase_id: pid, kind: kind.to_s }
219
+ event[:label] = label if label
220
+ @broadcaster.call(@session_id, event)
221
+ pid
222
+ end
223
+
224
+ def phase_end(phase_id, summary: nil)
225
+ # Clear thread-local before emitting end so the end event itself
226
+ # doesn't get tagged with the phase_id it's closing.
227
+ Thread.current[:clacky_phase_id] = nil if Thread.current[:clacky_phase_id] == phase_id
228
+ event = { type: "phase_end", session_id: @session_id, phase_id: phase_id }
229
+ event[:summary] = summary if summary
230
+ @broadcaster.call(@session_id, event)
231
+ end
232
+
213
233
  def show_error(message, code: nil, top_up_url: nil)
214
234
  payload = { message: message }
215
235
  payload[:code] = code if code
@@ -414,6 +434,9 @@ module Clacky
414
434
 
415
435
  def emit(type, **data)
416
436
  event = { type: type, session_id: @session_id }.merge(data)
437
+ if (pid = Thread.current[:clacky_phase_id]) && !data.key?(:phase_id)
438
+ event[:phase_id] = pid
439
+ end
417
440
  @broadcaster.call(@session_id, event)
418
441
  end
419
442
 
@@ -3,6 +3,7 @@
3
3
  require "json"
4
4
  require "fileutils"
5
5
  require "securerandom"
6
+ require "open3"
6
7
 
7
8
  module Clacky
8
9
  class SessionManager
@@ -51,6 +52,27 @@ module Clacky
51
52
  all_sessions.find { |s| s[:session_id].to_s.start_with?(session_id.to_s) }
52
53
  end
53
54
 
55
+ # Fork a session: create a copy with new id, "(copy)" name suffix, and reset stats.
56
+ # Returns the forked session data hash, or nil if the original is not found.
57
+ def fork(session_id)
58
+ original = load(session_id)
59
+ return nil unless original
60
+
61
+ forked = original.dup
62
+ forked[:session_id] = self.class.generate_id
63
+ forked[:created_at] = Time.now.iso8601
64
+ forked[:updated_at] = Time.now.iso8601
65
+ forked[:pinned] = false
66
+ forked[:name] = "#{original[:name] || "Unnamed session"} (copy)"
67
+ forked[:stats] = (original[:stats] || {}).merge(
68
+ total_tasks: 0, total_iterations: 0, total_cost_usd: 0.0,
69
+ last_status: nil, last_error: nil
70
+ )
71
+
72
+ save(forked)
73
+ forked
74
+ end
75
+
54
76
  # Soft-delete: move session JSON + chunks to the session trash directory.
55
77
  # Returns true if found and moved, false if not found.
56
78
  def delete(session_id)
@@ -158,6 +180,104 @@ module Clacky
158
180
  limit ? sessions.first(limit) : sessions
159
181
  end
160
182
 
183
+ # Full-text grep over session JSON + chunk MD files.
184
+ # Case-sensitive: BSD grep -i is ~30x slower; Chinese has no case.
185
+ # Returns Hash<short_id String => snippet String> (snippet around the first match).
186
+ def search_content(query, timeout: 5)
187
+ q = query.to_s
188
+ return {} if q.strip.length < 2
189
+
190
+ files = Dir.glob(File.join(@sessions_dir, "*.json")) +
191
+ Dir.glob(File.join(@sessions_dir, "*-chunk-*.md"))
192
+ return {} if files.empty?
193
+
194
+ result = {}
195
+ deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + timeout
196
+ each_grep_batch(files) do |batch|
197
+ remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
198
+ break if remaining <= 0
199
+ out = run_with_timeout({ "LC_ALL" => "C" },
200
+ "grep", "-H", "-F", "-m", "1", "--",
201
+ q, *batch,
202
+ timeout: remaining)
203
+ next unless out
204
+ out.each_line do |line|
205
+ path, _, rest = line.chomp.partition(":")
206
+ next if path.empty? || rest.empty?
207
+ sid = extract_short_id(File.basename(path))
208
+ next unless sid
209
+ next if result.key?(sid)
210
+ result[sid] = build_snippet(rest, q)
211
+ end
212
+ end
213
+ result
214
+ end
215
+
216
+ # Yield file batches whose joined argv length stays well under ARG_MAX.
217
+ # macOS ARG_MAX is ~256 KiB; we cap at 96 KiB to leave room for env.
218
+ private def each_grep_batch(files, max_bytes: 96 * 1024)
219
+ batch = []
220
+ size = 0
221
+ files.each do |f|
222
+ len = f.bytesize + 1
223
+ if size + len > max_bytes && !batch.empty?
224
+ yield batch
225
+ batch = []
226
+ size = 0
227
+ end
228
+ batch << f
229
+ size += len
230
+ end
231
+ yield batch unless batch.empty?
232
+ end
233
+
234
+ private def build_snippet(line, query, radius: 80)
235
+ bytes = line.b
236
+ q = query.b
237
+ idx = bytes.index(q)
238
+ if idx.nil?
239
+ head = bytes.byteslice(0, radius * 2).to_s
240
+ return head.force_encoding("UTF-8").scrub("?").gsub(/\s+/, " ").strip
241
+ end
242
+
243
+ start_byte = [idx - radius, 0].max
244
+ stop_byte = [idx + q.bytesize + radius, bytes.bytesize].min
245
+ snippet = bytes.byteslice(start_byte, stop_byte - start_byte).to_s
246
+ snippet = snippet.force_encoding("UTF-8").scrub("?")
247
+ snippet = "…" + snippet if start_byte > 0
248
+ snippet = snippet + "…" if stop_byte < bytes.bytesize
249
+ snippet.gsub(/\s+/, " ").strip
250
+ end
251
+
252
+ private def run_with_timeout(env, *cmd, timeout:)
253
+ Open3.popen3(env, *cmd) do |stdin, stdout, stderr, wait_thr|
254
+ stdin.close
255
+ out = +""
256
+ reader = Thread.new { out << stdout.read }
257
+ drain = Thread.new { stderr.read }
258
+ deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + timeout
259
+ loop do
260
+ remaining = deadline - Process.clock_gettime(Process::CLOCK_MONOTONIC)
261
+ break if remaining <= 0
262
+ break if wait_thr.join(remaining)
263
+ end
264
+ if wait_thr.alive?
265
+ Process.kill("TERM", wait_thr.pid) rescue nil
266
+ wait_thr.join(0.5)
267
+ Process.kill("KILL", wait_thr.pid) rescue nil if wait_thr.alive?
268
+ reader.kill; drain.kill
269
+ return nil
270
+ end
271
+ reader.join; drain.join
272
+ out
273
+ end
274
+ end
275
+
276
+ private def extract_short_id(basename)
277
+ m = basename.match(/-([0-9a-f]{8})(?:-chunk-\d+)?\.(?:json|md)\z/)
278
+ m && m[1]
279
+ end
280
+
161
281
  # Return the most recent session for a given working directory, or nil.
162
282
  def latest_for_directory(working_dir)
163
283
  all_sessions(current_dir: working_dir).first