openclacky 0.9.35 → 0.9.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -468,10 +468,19 @@ module Clacky
468
468
  # Backward-compat: ?source=<x> and ?profile=coding → type
469
469
  type ||= query["profile"].to_s.strip.then { |v| v.empty? ? nil : v }
470
470
  type ||= query["source"].to_s.strip.then { |v| v.empty? ? nil : v }
471
- # Fetch one extra to detect has_more without a separate count query
471
+
472
+ # Fetch one extra NON-PINNED row to detect has_more without a separate count query.
473
+ # `registry.list` always returns ALL matching pinned rows first (on the
474
+ # first page; `before` == nil), followed by non-pinned rows up to `limit+1`.
475
+ # So has_more is determined by whether the non-pinned section overflowed.
472
476
  sessions = @registry.list(limit: limit + 1, before: before, q: q, date: date, type: type)
473
- has_more = sessions.size > limit
474
- sessions = sessions.first(limit)
477
+
478
+ # Split pinned vs non-pinned to apply has_more only to the non-pinned tail.
479
+ pinned_part, non_pinned_part = sessions.partition { |s| s[:pinned] }
480
+ has_more = non_pinned_part.size > limit
481
+ non_pinned_part = non_pinned_part.first(limit)
482
+ sessions = pinned_part + non_pinned_part
483
+
475
484
  json_response(res, 200, { sessions: sessions, has_more: has_more })
476
485
  end
477
486
 
@@ -606,6 +615,52 @@ module Clacky
606
615
 
607
616
  # ── Brand API ─────────────────────────────────────────────────────────────
608
617
 
618
+ # Process-wide mutex guarding heartbeat trigger state.
619
+ # Used by #trigger_async_heartbeat! to ensure only one heartbeat Thread is
620
+ # in flight at a time, no matter how many concurrent /api/brand/status
621
+ # requests arrive from the Web UI poller.
622
+ BRAND_HEARTBEAT_MUTEX = Mutex.new
623
+ # Tracks whether a heartbeat Thread is currently running.
624
+ @@brand_heartbeat_inflight = false
625
+
626
+ # Fire a heartbeat in a background Thread without blocking the caller.
627
+ #
628
+ # Contract:
629
+ # * Only one heartbeat Thread may be running at any moment across the
630
+ # whole process. If one is already in flight, this call is a no-op.
631
+ # * The caller never waits: it returns immediately after (at most)
632
+ # spawning the Thread.
633
+ # * The Thread rescues everything so a network failure cannot kill the
634
+ # server or leak an exception through the web stack.
635
+ def trigger_async_heartbeat!
636
+ BRAND_HEARTBEAT_MUTEX.synchronize do
637
+ if @@brand_heartbeat_inflight
638
+ Clacky::Logger.debug("[Brand] heartbeat already in flight, skipping")
639
+ return
640
+ end
641
+ @@brand_heartbeat_inflight = true
642
+ end
643
+
644
+ Thread.new do
645
+ Clacky::Logger.info("[Brand] async heartbeat starting...")
646
+ begin
647
+ brand = Clacky::BrandConfig.load
648
+ result = brand.heartbeat!
649
+ if result[:success]
650
+ Clacky::Logger.info("[Brand] async heartbeat OK")
651
+ else
652
+ Clacky::Logger.warn("[Brand] async heartbeat failed — #{result[:message]}")
653
+ end
654
+ rescue StandardError => e
655
+ Clacky::Logger.warn("[Brand] async heartbeat raised: #{e.class}: #{e.message}")
656
+ ensure
657
+ BRAND_HEARTBEAT_MUTEX.synchronize do
658
+ @@brand_heartbeat_inflight = false
659
+ end
660
+ end
661
+ end
662
+ end
663
+
609
664
  # GET /api/brand/status
610
665
  # Returns whether brand activation is needed.
611
666
  # Mirrors the onboard/status pattern so the frontend can gate on it.
@@ -634,17 +689,15 @@ module Clacky
634
689
  return
635
690
  end
636
691
 
637
- # Send heartbeat if interval has elapsed (once per day)
692
+ # Send heartbeat asynchronously if interval has elapsed (once per day).
693
+ #
694
+ # We must NOT block this HTTP response on the heartbeat call: a slow or
695
+ # unreachable license server would otherwise stall the Web UI's first
696
+ # paint for up to ~92s (2 hosts × 2 attempts × 23s timeout). The fresh
697
+ # expires_at / last_heartbeat will be picked up on the next /api/brand/status
698
+ # poll, which is sufficient for a once-per-day check.
638
699
  if brand.heartbeat_due?
639
- Clacky::Logger.info("[Brand] api_brand_status: heartbeat due, sending...")
640
- result = brand.heartbeat!
641
- if result[:success]
642
- Clacky::Logger.info("[Brand] api_brand_status: heartbeat OK")
643
- else
644
- Clacky::Logger.warn("[Brand] api_brand_status: heartbeat failed — #{result[:message]}")
645
- end
646
- # Reload after heartbeat to pick up updated expires_at / last_heartbeat
647
- brand = Clacky::BrandConfig.load
700
+ trigger_async_heartbeat!
648
701
  else
649
702
  Clacky::Logger.debug("[Brand] api_brand_status: heartbeat not due yet")
650
703
  end
@@ -886,8 +939,12 @@ module Clacky
886
939
  key.empty? ? nil : key
887
940
  end
888
941
 
889
- # Extract bearer token / query param / cookie from a WEBrick request.
890
- # Priority: Authorization: Bearer > ?access_key= > Cookie clacky_access_key
942
+ # Extract bearer token or query param from a WEBrick request.
943
+ # Priority: Authorization: Bearer > ?access_key=
944
+ # The query string form is only used by WebSocket connections, which
945
+ # cannot set custom headers from the browser. All HTTP clients —
946
+ # including the web UI (via a fetch interceptor in auth.js) — use the
947
+ # Authorization header.
891
948
  private def extract_key(req)
892
949
  auth = req["Authorization"].to_s.strip
893
950
  if auth.start_with?("Bearer ")
@@ -899,10 +956,6 @@ module Clacky
899
956
  token = query["access_key"].to_s.strip
900
957
  return token unless token.empty?
901
958
 
902
- req.cookies.each do |c|
903
- return c.value if c.name == "clacky_access_key" && !c.value.to_s.empty?
904
- end
905
-
906
959
  nil
907
960
  end
908
961
 
@@ -1075,7 +1128,18 @@ module Clacky
1075
1128
  end
1076
1129
 
1077
1130
  # Broadcast final upgrade result with appropriate log message.
1131
+ #
1132
+ # Defensive post-check: if `run_shell` reported failure but the gem
1133
+ # is in fact now installed at the latest version, reverse the verdict.
1134
+ # This guards against false negatives from the Terminal idle-poll
1135
+ # mechanism (see: 0.9.36 upgrade failure bug).
1078
1136
  private def finish_upgrade(success, fallback_hint: "gem update openclacky")
1137
+ if !success && gem_actually_upgraded?
1138
+ Clacky::Logger.warn("[Upgrade] run_shell reported failure, but installed version matches latest — treating as success.")
1139
+ broadcast_all(type: "upgrade_log", line: "\n(Verified: the new version is installed — reclassifying as success.)\n")
1140
+ success = true
1141
+ end
1142
+
1079
1143
  if success
1080
1144
  Clacky::Logger.info("[Upgrade] Success!")
1081
1145
  broadcast_all(type: "upgrade_log", line: "\n✓ Upgrade successful! Please restart the server to apply the new version.\n")
@@ -1087,6 +1151,22 @@ module Clacky
1087
1151
  end
1088
1152
  end
1089
1153
 
1154
+ # Check whether the latest published version of openclacky is already
1155
+ # installed locally. Used as a post-upgrade sanity check so a flaky
1156
+ # run_shell result doesn't mask a successful install.
1157
+ # Returns false on any error (conservative — don't fabricate success).
1158
+ private def gem_actually_upgraded?
1159
+ latest = fetch_latest_version_from_rubygems_api
1160
+ return false unless latest
1161
+
1162
+ out, exit_code = run_shell("gem list openclacky -i -v #{latest}", timeout: 30)
1163
+ return false unless exit_code&.zero?
1164
+ out.to_s.strip.downcase == "true"
1165
+ rescue StandardError => e
1166
+ Clacky::Logger.warn("[Upgrade] gem_actually_upgraded? error: #{e.message}")
1167
+ false
1168
+ end
1169
+
1090
1170
  # POST /api/restart
1091
1171
  # Re-execs the current process so the newly installed gem version is loaded.
1092
1172
  # Uses the absolute script path captured at startup to avoid relative-path issues.
@@ -1198,18 +1278,11 @@ module Clacky
1198
1278
  # Run a shell command via the unified Terminal tool and return
1199
1279
  # [output, exit_code] — drop-in replacement for Open3.capture2e.
1200
1280
  #
1201
- # Uses Terminal#execute so the command inherits the user's real
1202
- # login shell (rbenv/mise shims, configured gem mirrors, etc.).
1203
- # On timeout / still-running, returns [output_so_far, nil].
1204
- #
1205
- # The command is routed through the Security layer like any other
1206
- # Terminal call; server-side commands (`gem ...`, `curl -fsSL ... -o ...`)
1207
- # pass through unchanged.
1281
+ # Delegates to Terminal.run_sync which handles the idle-poll loop
1282
+ # internally (see its docs for why that's needed — this wrapper used
1283
+ # to re-implement it wrong and caused the 0.9.36 upgrade bug).
1208
1284
  private def run_shell(command, timeout: 120)
1209
- result = Clacky::Tools::Terminal.new.execute(command: command, timeout: timeout)
1210
- output = result[:output].to_s
1211
- exit_code = result[:exit_code] # nil when the session is still running
1212
- [output, exit_code]
1285
+ Clacky::Tools::Terminal.run_sync(command, timeout: timeout)
1213
1286
  end
1214
1287
 
1215
1288
  # ── Channel API ───────────────────────────────────────────────────────────
@@ -1828,6 +1901,7 @@ module Clacky
1828
1901
  def api_get_config(res)
1829
1902
  models = @agent_config.models.map.with_index do |m, i|
1830
1903
  {
1904
+ id: m["id"], # Stable runtime id — use this for switching
1831
1905
  index: i,
1832
1906
  model: m["model"],
1833
1907
  base_url: m["base_url"],
@@ -1838,12 +1912,19 @@ module Clacky
1838
1912
  end
1839
1913
  # Filter out auto-injected models (like lite) from UI display
1840
1914
  models.reject! { |m| @agent_config.models[m[:index]]["auto_injected"] }
1841
- json_response(res, 200, { models: models, current_index: @agent_config.current_model_index })
1915
+ json_response(res, 200, {
1916
+ models: models,
1917
+ current_index: @agent_config.current_model_index,
1918
+ current_id: @agent_config.current_model&.dig("id")
1919
+ })
1842
1920
  end
1843
1921
 
1844
1922
  # POST /api/config — save updated model list
1845
- # Body: { models: [ { index, model, base_url, api_key, anthropic_format, type } ] }
1846
- # api_key may be masked ("sk-ab12****...5678") keep existing key in that case
1923
+ # Body: { models: [ { id?, index, model, base_url, api_key, anthropic_format, type } ] }
1924
+ # - id may be present for existing models (preserved) or absent for newly added
1925
+ # models (a new id is generated). Ids are runtime-only and stripped before
1926
+ # writing to config.yml (see AgentConfig#to_yaml).
1927
+ # - api_key may be masked ("sk-ab12****...5678") — keep existing key in that case
1847
1928
  def api_save_config(req, res)
1848
1929
  body = parse_json_body(req)
1849
1930
  return json_response(res, 400, { error: "Invalid JSON" }) unless body
@@ -1851,35 +1932,59 @@ module Clacky
1851
1932
  incoming = body["models"]
1852
1933
  return json_response(res, 400, { error: "models array required" }) unless incoming.is_a?(Array)
1853
1934
 
1854
- incoming.each_with_index do |m, i|
1855
- existing = @agent_config.models[i]
1856
- # Resolve api_key: if masked placeholder, keep the stored key
1857
- api_key = if m["api_key"].to_s.include?("****")
1858
- existing&.dig("api_key")
1935
+ # Build a quick id→existing-model lookup. Ids are the single source
1936
+ # of identity for models across save/reload cycles — no index-based
1937
+ # fallback (ids are stable; indexes are not). Live sessions' stored
1938
+ # @current_model_id stays valid as long as the id is still present
1939
+ # in the list after save.
1940
+ existing_by_id = {}
1941
+ @agent_config.models.each { |m| existing_by_id[m["id"]] = m if m["id"] }
1942
+
1943
+ new_models = incoming.map do |m|
1944
+ # Lookup by id only. No id means a brand-new model — we mint one.
1945
+ existing = m["id"] && existing_by_id[m["id"]]
1946
+
1947
+ # Resolve api_key with THREE cases (ordered, fail-safe):
1948
+ # 1. Incoming key is the masked placeholder ("sk-ab12****...5678")
1949
+ # → user didn't retype; keep the stored key.
1950
+ # 2. Incoming key is missing/blank AND we have an existing model
1951
+ # → the browser omitted api_key for non-edited rows; keep the
1952
+ # stored key. This is the critical 0.9.37 fix — without it,
1953
+ # saving one model silently wiped api_keys of all others,
1954
+ # because the frontend only ever hydrates api_key for the
1955
+ # row currently being edited (/api/config returns only
1956
+ # api_key_masked, never api_key).
1957
+ # 3. Otherwise: user typed a new key (or this is a brand-new
1958
+ # model); use the incoming value.
1959
+ incoming_key = m["api_key"].to_s
1960
+ api_key = if incoming_key.include?("****")
1961
+ existing&.dig("api_key").to_s
1962
+ elsif incoming_key.empty? && existing
1963
+ existing["api_key"].to_s
1859
1964
  else
1860
- m["api_key"]
1965
+ incoming_key
1861
1966
  end
1862
1967
 
1863
- if existing
1864
- existing["model"] = m["model"] if m.key?("model")
1865
- existing["base_url"] = m["base_url"] if m.key?("base_url")
1866
- existing["api_key"] = api_key if api_key
1867
- existing["anthropic_format"] = m["anthropic_format"] if m.key?("anthropic_format")
1868
- existing["type"] = m["type"] if m.key?("type")
1869
- else
1870
- @agent_config.add_model(
1871
- model: m["model"].to_s,
1872
- api_key: api_key.to_s,
1873
- base_url: m["base_url"].to_s,
1874
- anthropic_format: m["anthropic_format"] || false,
1875
- type: m["type"]
1876
- )
1877
- end
1968
+ {
1969
+ "id" => (existing && existing["id"]) || SecureRandom.uuid,
1970
+ "model" => m["model"].to_s,
1971
+ "base_url" => m["base_url"].to_s,
1972
+ "api_key" => api_key,
1973
+ "anthropic_format" => m["anthropic_format"] || false,
1974
+ "type" => m["type"]
1975
+ }.tap { |h| h.delete("type") if h["type"].nil? || h["type"].to_s.empty? }
1878
1976
  end
1879
1977
 
1880
- # Remove models that are no longer present (trim to incoming length)
1881
- while @agent_config.models.length > incoming.length
1882
- @agent_config.models.pop
1978
+ # Replace @models in place do NOT reassign the array, because every
1979
+ # live session holds a reference to the same array (Plan B shared
1980
+ # models). `replace` mutates in place so all sessions see the new list.
1981
+ @agent_config.models.replace(new_models)
1982
+
1983
+ # Re-anchor current_model_index to the model still holding type: default
1984
+ if (new_default_idx = new_models.find_index { |m| m["type"] == "default" })
1985
+ @agent_config.current_model_index = new_default_idx
1986
+ elsif @agent_config.current_model_index >= new_models.length
1987
+ @agent_config.current_model_index = [new_models.length - 1, 0].max
1883
1988
  end
1884
1989
 
1885
1990
  @agent_config.save
@@ -2004,36 +2109,38 @@ module Clacky
2004
2109
 
2005
2110
  def api_switch_session_model(session_id, req, res)
2006
2111
  body = parse_json_body(req)
2007
- new_model_name = body["model"].to_s.strip
2112
+ model_id = body["model_id"].to_s.strip
2008
2113
 
2009
- return json_response(res, 400, { error: "model is required" }) if new_model_name.empty?
2114
+ return json_response(res, 400, { error: "model_id is required" }) if model_id.empty?
2010
2115
  return json_response(res, 404, { error: "Session not found" }) unless @registry.ensure(session_id)
2011
2116
 
2012
2117
  agent = nil
2013
2118
  @registry.with_session(session_id) { |s| agent = s[:agent] }
2014
-
2015
- # Find the model configuration index by model name (use global config)
2016
- model_index = @agent_config.models.find_index { |m| m["model"] == new_model_name }
2017
-
2018
- if model_index.nil?
2019
- return json_response(res, 400, { error: "Model '#{new_model_name}' not found in configuration" })
2119
+
2120
+ # With Plan B (shared @models reference), every session's AgentConfig
2121
+ # points at the same @models array as the global @agent_config. So
2122
+ # resolving the model by stable id here and in agent.switch_model_by_id
2123
+ # will always agree — no more index divergence after add/delete.
2124
+ target_model = @agent_config.models.find { |m| m["id"] == model_id }
2125
+ if target_model.nil?
2126
+ return json_response(res, 400, { error: "Model not found in configuration" })
2020
2127
  end
2021
-
2022
- # Switch to the model by index (unified interface with CLI)
2023
- # This handles: config.switch_model + client rebuild + message_compressor rebuild
2024
- success = agent.switch_model(model_index)
2025
-
2128
+
2129
+ # Switch to the model by id (unified interface with CLI)
2130
+ # Handles: config.switch_model_by_id + client rebuild + message_compressor rebuild
2131
+ success = agent.switch_model_by_id(model_id)
2132
+
2026
2133
  unless success
2027
2134
  return json_response(res, 500, { error: "Failed to switch model" })
2028
2135
  end
2029
-
2136
+
2030
2137
  # Persist the change (saves to session file, NOT global config.yml)
2031
2138
  @session_manager.save(agent.to_session_data)
2032
-
2139
+
2033
2140
  # Broadcast update to all clients
2034
2141
  broadcast_session_update(session_id)
2035
-
2036
- json_response(res, 200, { ok: true, model: new_model_name })
2142
+
2143
+ json_response(res, 200, { ok: true, model_id: model_id, model: target_model["model"] })
2037
2144
  rescue => e
2038
2145
  json_response(res, 500, { error: e.message })
2039
2146
  end
@@ -2071,16 +2178,34 @@ module Clacky
2071
2178
  end
2072
2179
 
2073
2180
  def api_delete_session(session_id, res)
2074
- if @registry.delete(session_id)
2075
- # Also remove the persisted session file from disk
2076
- @session_manager.delete(session_id)
2077
- # Notify connected clients the session is gone
2078
- broadcast(session_id, { type: "session_deleted", session_id: session_id })
2079
- unsubscribe_all(session_id)
2080
- json_response(res, 200, { ok: true })
2081
- else
2082
- json_response(res, 404, { error: "Session not found" })
2181
+ # A session exists if it's either in the runtime registry OR on disk.
2182
+ # Old sessions that were never restored into memory this server run
2183
+ # (e.g. shown via "load more" in the WebUI list) are disk-only — we
2184
+ # must still be able to delete them. Previously this endpoint only
2185
+ # consulted @registry and returned 404 for disk-only sessions,
2186
+ # causing the "can't delete old sessions" bug.
2187
+ in_registry = @registry.exist?(session_id)
2188
+ on_disk = !@session_manager.load(session_id).nil?
2189
+
2190
+ unless in_registry || on_disk
2191
+ return json_response(res, 404, { error: "Session not found" })
2083
2192
  end
2193
+
2194
+ # Registry delete is best-effort — only meaningful when the session
2195
+ # is actually live (cancels idle timer, interrupts the agent thread).
2196
+ # For disk-only sessions this is a no-op and returns false, which is
2197
+ # fine and no longer blocks the disk cleanup below.
2198
+ @registry.delete(session_id) if in_registry
2199
+
2200
+ # Always physically remove the persisted session file (+ chunks).
2201
+ @session_manager.delete(session_id) if on_disk
2202
+
2203
+ # Notify any still-connected clients (mainly matters when the
2204
+ # session was live, but harmless otherwise).
2205
+ broadcast(session_id, { type: "session_deleted", session_id: session_id })
2206
+ unsubscribe_all(session_id)
2207
+
2208
+ json_response(res, 200, { ok: true })
2084
2209
  end
2085
2210
 
2086
2211
  # Export a session bundle as a .zip download containing:
@@ -143,11 +143,22 @@ module Clacky
143
143
  # nil = no source filter (all sessions)
144
144
  # profile: "general"|"coding"|nil
145
145
  # nil = no agent_profile filter
146
- # limit: max sessions to return
146
+ # limit: max sessions to return (applies to NON-PINNED only; see below)
147
147
  # before: ISO8601 cursor — only sessions with created_at < before
148
+ # (also applies to NON-PINNED only; pinned items are a separate
149
+ # logical section, they should never be paginated away)
150
+ # include_pinned: when true (default), all matching pinned sessions are
151
+ # always returned on the FIRST page (before == nil) regardless
152
+ # of limit. Subsequent pages (before set) contain only
153
+ # non-pinned sessions. This guarantees that users who pinned
154
+ # an old session always see it at the top of the sidebar,
155
+ # even if many newer sessions exist.
156
+ #
157
+ # Ordering of the returned array:
158
+ # [ ...all_pinned_matching (newest-first), ...non_pinned (newest-first, limited) ]
148
159
  #
149
160
  # source and profile are orthogonal — either can be nil independently.
150
- def list(limit: nil, before: nil, q: nil, date: nil, type: nil)
161
+ def list(limit: nil, before: nil, q: nil, date: nil, type: nil, include_pinned: true)
151
162
  return [] unless @session_manager
152
163
 
153
164
  live = @mutex.synchronize do
@@ -185,10 +196,26 @@ module Clacky
185
196
  }
186
197
  end
187
198
 
188
- all = all.select { |s| (s[:created_at] || "") < before } if before
189
- all = all.first(limit) if limit
199
+ # ── Split pinned vs non-pinned BEFORE applying `before`/`limit`.
200
+ # Pinned sessions bypass pagination entirely so an old pinned session
201
+ # never falls off the first page just because newer sessions exist.
202
+ # (Regression fix for 0.9.37: previously `all_sessions` was only
203
+ # sorted by created_at and `limit` cut off old pinned rows, making
204
+ # them invisible until the user clicked "load more".)
205
+ pinned, non_pinned = all.partition { |s| s[:pinned] }
206
+
207
+ # `before` cursor ONLY applies to non-pinned (paginated) sessions.
208
+ non_pinned = non_pinned.select { |s| (s[:created_at] || "") < before } if before
209
+ non_pinned = non_pinned.first(limit) if limit
210
+
211
+ # Pinned section: only included on the first page (before == nil) so
212
+ # "load more" responses don't re-send them. On first page, return ALL
213
+ # matching pinned sessions regardless of limit.
214
+ pinned_section = (include_pinned && before.nil?) ? pinned : []
215
+
216
+ ordered = pinned_section + non_pinned
190
217
 
191
- all.map do |s|
218
+ ordered.map do |s|
192
219
  id = s[:session_id]
193
220
  ls = live[id]
194
221
  {
@@ -44,7 +44,10 @@ module Clacky
44
44
  end
45
45
 
46
46
  begin
47
- content = File.read(path)
47
+ # Scrub invalid UTF-8 bytes at read time — otherwise editing a file
48
+ # that contains non-UTF-8 bytes would poison history / error messages
49
+ # and cause JSON.generate to fail during replay.
50
+ content = safe_utf8(File.read(path))
48
51
 
49
52
  # Find matching string using layered strategy (shared with preview)
50
53
  match_result = Utils::StringMatcher.find_match(content, old_string)
@@ -127,6 +130,13 @@ module Clacky
127
130
  replacements = result[:replacements] || result["replacements"] || 1
128
131
  "Modified #{replacements} occurrence#{replacements > 1 ? "s" : ""}"
129
132
  end
133
+
134
+ # Scrub invalid UTF-8 byte sequences (see file_reader.rb for rationale).
135
+ private def safe_utf8(str)
136
+ return str if str.nil?
137
+ return str if str.encoding == Encoding::UTF_8 && str.valid_encoding?
138
+ str.encode("UTF-8", invalid: :replace, undef: :replace, replace: "\u{FFFD}")
139
+ end
130
140
  end
131
141
  end
132
142
  end
@@ -86,8 +86,10 @@ module Clacky
86
86
  }
87
87
  end
88
88
 
89
- # Read text file with optional line range
90
- all_lines = File.readlines(expanded_path)
89
+ # Read text file with optional line range.
90
+ # Scrub invalid UTF-8 bytes (e.g. GBK-encoded files) so downstream
91
+ # JSON.generate / history persistence won't blow up later.
92
+ all_lines = File.readlines(expanded_path).map! { |line| safe_utf8(line) }
91
93
  total_lines = all_lines.size
92
94
 
93
95
  # Calculate start index (convert 1-indexed to 0-indexed)
@@ -313,7 +315,11 @@ module Clacky
313
315
  # List first-level directory contents (files and directories)
314
316
  private def list_directory_contents(path)
315
317
  begin
316
- entries = Dir.entries(path).reject { |entry| entry == "." || entry == ".." }
318
+ # Scrub entry names filenames on disk may contain non-UTF-8 bytes
319
+ # (e.g. GBK/Shift-JIS names on macOS/Linux) which would poison history.
320
+ entries = Dir.entries(path)
321
+ .map { |entry| safe_utf8(entry) }
322
+ .reject { |entry| entry == "." || entry == ".." }
317
323
 
318
324
  # Separate files and directories
319
325
  files = []
@@ -353,6 +359,16 @@ module Clacky
353
359
  }
354
360
  end
355
361
  end
362
+
363
+ # Scrub invalid UTF-8 byte sequences so the result survives
364
+ # JSON.generate (session replay, API responses).
365
+ # Invalid bytes are replaced with U+FFFD (�). Valid UTF-8 is
366
+ # returned untouched via the fast path.
367
+ private def safe_utf8(str)
368
+ return str if str.nil?
369
+ return str if str.encoding == Encoding::UTF_8 && str.valid_encoding?
370
+ str.encode("UTF-8", invalid: :replace, undef: :replace, replace: "\u{FFFD}")
371
+ end
356
372
  end
357
373
  end
358
374
  end
@@ -84,6 +84,7 @@ module Clacky
84
84
  always_ignored_dirs = Clacky::Utils::FileIgnoreHelper::ALWAYS_IGNORED_DIRS
85
85
 
86
86
  all_matches = Dir.glob(full_pattern, File::FNM_DOTMATCH)
87
+ .map { |p| p.encoding == Encoding::UTF_8 && p.valid_encoding? ? p : p.encode("UTF-8", invalid: :replace, undef: :replace, replace: "\u{FFFD}") }
87
88
  .reject { |path| File.directory?(path) }
88
89
  .reject { |path| path.end_with?(".", "..") }
89
90
  .reject do |path|
@@ -271,8 +271,10 @@ module Clacky
271
271
  def search_file(file, regex, context_lines, max_matches)
272
272
  matches = []
273
273
 
274
- # Use File.foreach for memory-efficient line-by-line reading
275
- File.foreach(file, chomp: true).with_index do |line, index|
274
+ # Use File.foreach for memory-efficient line-by-line reading.
275
+ # Scrub invalid UTF-8 bytes so results survive JSON encoding.
276
+ File.foreach(file, chomp: true).with_index do |raw_line, index|
277
+ line = safe_utf8(raw_line)
276
278
  # Stop if we have enough matches for this file
277
279
  break if matches.length >= max_matches
278
280
 
@@ -302,7 +304,7 @@ module Clacky
302
304
 
303
305
  # Get context lines around a match
304
306
  def get_line_context(file, match_index, context_lines)
305
- lines = File.readlines(file, chomp: true)
307
+ lines = File.readlines(file, chomp: true).map! { |l| safe_utf8(l) }
306
308
  start_line = [0, match_index - context_lines].max
307
309
  end_line = [lines.length - 1, match_index + context_lines].min
308
310
 
@@ -325,6 +327,13 @@ module Clacky
325
327
  rescue StandardError
326
328
  nil
327
329
  end
330
+
331
+ # Scrub invalid UTF-8 byte sequences (see file_reader.rb for rationale).
332
+ private def safe_utf8(str)
333
+ return str if str.nil?
334
+ return str if str.encoding == Encoding::UTF_8 && str.valid_encoding?
335
+ str.encode("UTF-8", invalid: :replace, undef: :replace, replace: "\u{FFFD}")
336
+ end
328
337
  end
329
338
  end
330
339
  end