openclacky 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +39 -0
  3. data/README.md +87 -53
  4. data/lib/clacky/agent/cost_tracker.rb +19 -2
  5. data/lib/clacky/agent/llm_caller.rb +218 -0
  6. data/lib/clacky/agent/message_compressor_helper.rb +32 -2
  7. data/lib/clacky/agent.rb +54 -22
  8. data/lib/clacky/client.rb +44 -5
  9. data/lib/clacky/default_parsers/pdf_parser.rb +58 -17
  10. data/lib/clacky/default_parsers/pdf_parser_ocr.py +103 -0
  11. data/lib/clacky/default_parsers/pdf_parser_plumber.py +62 -0
  12. data/lib/clacky/default_skills/deploy/SKILL.md +201 -77
  13. data/lib/clacky/default_skills/new/SKILL.md +3 -114
  14. data/lib/clacky/default_skills/onboard/SKILL.md +349 -133
  15. data/lib/clacky/default_skills/onboard/scripts/import_external_skills.rb +371 -0
  16. data/lib/clacky/default_skills/onboard/scripts/install_builtin_skills.rb +175 -0
  17. data/lib/clacky/default_skills/skill-add/scripts/install_from_zip.rb +59 -26
  18. data/lib/clacky/message_format/anthropic.rb +72 -8
  19. data/lib/clacky/message_format/bedrock.rb +6 -3
  20. data/lib/clacky/providers.rb +146 -3
  21. data/lib/clacky/server/channel/adapters/feishu/adapter.rb +14 -0
  22. data/lib/clacky/server/channel/adapters/feishu/bot.rb +10 -0
  23. data/lib/clacky/server/channel/adapters/feishu/message_parser.rb +1 -0
  24. data/lib/clacky/server/channel/channel_manager.rb +12 -4
  25. data/lib/clacky/server/channel/channel_ui_controller.rb +8 -2
  26. data/lib/clacky/server/http_server.rb +746 -13
  27. data/lib/clacky/server/session_registry.rb +55 -24
  28. data/lib/clacky/skill.rb +10 -9
  29. data/lib/clacky/skill_loader.rb +23 -11
  30. data/lib/clacky/tools/file_reader.rb +232 -127
  31. data/lib/clacky/tools/security.rb +42 -64
  32. data/lib/clacky/tools/terminal/persistent_session.rb +15 -4
  33. data/lib/clacky/tools/terminal/safe_rm.sh +106 -0
  34. data/lib/clacky/tools/terminal/session_manager.rb +8 -3
  35. data/lib/clacky/tools/terminal.rb +263 -16
  36. data/lib/clacky/ui2/layout_manager.rb +8 -1
  37. data/lib/clacky/ui2/output_buffer.rb +83 -23
  38. data/lib/clacky/ui2/ui_controller.rb +74 -7
  39. data/lib/clacky/utils/file_processor.rb +14 -40
  40. data/lib/clacky/utils/model_pricing.rb +215 -0
  41. data/lib/clacky/utils/parser_manager.rb +70 -6
  42. data/lib/clacky/utils/string_matcher.rb +23 -1
  43. data/lib/clacky/version.rb +1 -1
  44. data/lib/clacky/web/app.css +673 -9
  45. data/lib/clacky/web/app.js +40 -1608
  46. data/lib/clacky/web/i18n.js +209 -0
  47. data/lib/clacky/web/index.html +166 -2
  48. data/lib/clacky/web/onboard.js +77 -1
  49. data/lib/clacky/web/profile.js +442 -0
  50. data/lib/clacky/web/sessions.js +1034 -2
  51. data/lib/clacky/web/settings.js +127 -6
  52. data/lib/clacky/web/sidebar.js +39 -0
  53. data/lib/clacky/web/skills.js +460 -0
  54. data/lib/clacky/web/trash.js +343 -0
  55. data/lib/clacky/web/ws-dispatcher.js +255 -0
  56. data/lib/clacky.rb +5 -3
  57. metadata +16 -17
  58. data/lib/clacky/clacky_auth_client.rb +0 -152
  59. data/lib/clacky/clacky_cloud_config.rb +0 -123
  60. data/lib/clacky/cloud_project_client.rb +0 -169
  61. data/lib/clacky/default_skills/deploy/scripts/rails_deploy.rb +0 -1377
  62. data/lib/clacky/default_skills/deploy/tools/check_health.rb +0 -116
  63. data/lib/clacky/default_skills/deploy/tools/create_database_service.rb +0 -341
  64. data/lib/clacky/default_skills/deploy/tools/execute_deployment.rb +0 -99
  65. data/lib/clacky/default_skills/deploy/tools/fetch_runtime_logs.rb +0 -77
  66. data/lib/clacky/default_skills/deploy/tools/list_services.rb +0 -67
  67. data/lib/clacky/default_skills/deploy/tools/report_deploy_status.rb +0 -67
  68. data/lib/clacky/default_skills/deploy/tools/set_deploy_variables.rb +0 -189
  69. data/lib/clacky/default_skills/new/scripts/cloud_project_init.sh +0 -74
  70. data/lib/clacky/deploy_api_client.rb +0 -484
@@ -598,7 +598,7 @@ module Clacky
598
598
 
599
599
  @progress_stack.push(handle)
600
600
  entry_id = append_output(render_for(handle))
601
- update_sessionbar(status: 'working') if handle.style == :primary
601
+ recompute_sessionbar_status
602
602
  entry_id
603
603
  end
604
604
  end
@@ -625,13 +625,12 @@ module Clacky
625
625
  if (restored = @progress_stack.last)
626
626
  new_id = append_output(render_for(restored))
627
627
  restored.__reattach_entry!(new_id)
628
- else
629
- # No more progress handles — clear the "working" sessionbar.
630
- # We only flip to idle if the handle that just finished was
631
- # the one that brought us to working (style :primary). A quiet
632
- # handle finishing never touches the sessionbar.
633
- update_sessionbar(status: 'idle') if handle.style == :primary
634
628
  end
629
+
630
+ # Recompute sessionbar status from whatever remains on the stack.
631
+ # This handles: (a) empty stack → idle, (b) mixed stack (e.g. a
632
+ # long-running quiet tool still active underneath) → working.
633
+ recompute_sessionbar_status
635
634
  end
636
635
  end
637
636
 
@@ -654,6 +653,11 @@ module Clacky
654
653
  @renderer.render_working(decorated) :
655
654
  @renderer.render_progress(decorated)
656
655
  update_entry(handle.entry_id, painted)
656
+
657
+ # Re-evaluate sessionbar: a quiet handle that crosses the fast-finish
658
+ # threshold should upgrade the status bar to "working" so long-running
659
+ # tools (terminal running a build, web_fetch) visibly reflect activity.
660
+ recompute_sessionbar_status
657
661
  end
658
662
  end
659
663
 
@@ -672,6 +676,41 @@ module Clacky
672
676
  @renderer.render_progress(decorated)
673
677
  end
674
678
 
679
+ # Derive the sessionbar workspace status from the live progress stack.
680
+ #
681
+ # Rules:
682
+ # - Any :primary handle alive → "working" (fast path for LLM thinking)
683
+ # - Any :quiet handle that has been alive longer than
684
+ # FAST_FINISH_THRESHOLD_SECONDS → "working" (so long tools like
685
+ # `terminal` running a build or test suite correctly flip the bar
686
+ # to working instead of staying on "idle" for minutes)
687
+ # - Otherwise → "idle"
688
+ #
689
+ # Must be called with @progress_mutex held. Emits update_sessionbar
690
+ # only when the computed status differs from the last one we wrote,
691
+ # avoiding pointless re-renders on every tick.
692
+ private def recompute_sessionbar_status
693
+ new_status = compute_sessionbar_status
694
+ return if @last_sessionbar_status == new_status
695
+ @last_sessionbar_status = new_status
696
+ update_sessionbar(status: new_status)
697
+ end
698
+
699
+ private def compute_sessionbar_status
700
+ return 'idle' if @progress_stack.empty?
701
+
702
+ threshold = ProgressHandle::FAST_FINISH_THRESHOLD_SECONDS
703
+ now = Time.now
704
+ @progress_stack.each do |h|
705
+ return 'working' if h.style == :primary
706
+ # Quiet handles only "count" once they've been alive long enough
707
+ # that a user would naturally expect a busy indicator.
708
+ start = h.start_time
709
+ return 'working' if start && (now - start) >= threshold
710
+ end
711
+ 'idle'
712
+ end
713
+
675
714
  # ---------------------------------------------------------------------
676
715
  # Legacy shim: show_progress(message, phase:, progress_type:, ...)
677
716
  #
@@ -771,12 +810,40 @@ module Clacky
771
810
 
772
811
  # Set workspace status to idle (called when agent stops working)
773
812
  def set_idle_status
813
+ # Safety net: close any legacy progress slots that were opened via
814
+ # show_progress(progress_type: X, phase: "active") but never paired
815
+ # with a corresponding phase: "done" call. Historically the
816
+ # "retrying" slot in LlmCaller was leaked on every successful
817
+ # recovery, leaving the user with a stale "Network failed ... (NNN s)"
818
+ # line ticking forever. LlmCaller now closes its own slot (see the
819
+ # ensure in call_llm), but we mirror that defense here so any
820
+ # future code path that forgets to close a slot still gets cleaned
821
+ # up at the well-defined idle boundary.
822
+ close_leaked_legacy_progress_handles
823
+
774
824
  update_sessionbar(status: 'idle')
825
+ @last_sessionbar_status = 'idle'
775
826
  # Clear user tip when agent stops working
776
827
  @input_area.clear_user_tip
777
828
  @layout.render_input
778
829
  end
779
830
 
831
+ # Finish every ProgressHandle still registered in the legacy
832
+ # (show_progress) handle map. Called from set_idle_status as a
833
+ # defense-in-depth against unpaired active/done calls.
834
+ private def close_leaked_legacy_progress_handles
835
+ return unless @legacy_progress_handles
836
+
837
+ leaked = @legacy_progress_handles.reject { |_type, h| h.nil? || !h.running? }
838
+ return if leaked.empty?
839
+
840
+ # Finish top-down so each handle is the one currently rendering
841
+ # when it closes (matches the invariant in interrupt_all_progress).
842
+ leaked.values.reverse_each(&:finish)
843
+
844
+ @legacy_progress_handles.clear
845
+ end
846
+
780
847
  # Set workspace status to working (called when agent starts working)
781
848
  def set_working_status
782
849
  update_sessionbar(status: 'working')
@@ -141,26 +141,14 @@ module Clacky
141
141
  FileRef.new(name: name, type: :image, original_path: path)
142
142
 
143
143
  when ".csv"
144
- # CSV is plain text — read directly, no external parser needed.
145
- # Try UTF-8 first, then GBK (common in Chinese-origin CSV), then binary with replacement.
146
- begin
147
- text = read_text_with_encoding_fallback(path)
148
- preview_path = save_preview(text, path)
149
- FileRef.new(name: name, type: :csv, original_path: path, preview_path: preview_path)
150
- rescue => e
151
- FileRef.new(name: name, type: :csv, original_path: path, parse_error: e.message)
152
- end
144
+ # CSV is plain text — the file itself IS the preview. No parser, no copy.
145
+ # FileReader handles encoding fallback via safe_utf8 when it reads the file.
146
+ FileRef.new(name: name, type: :csv, original_path: path, preview_path: path)
153
147
 
154
148
  when *TEXT_PREVIEW_EXTENSIONS
155
- # Markdown / plain text: the file itself IS the preview.
156
- # No parser needed — just copy through (with encoding normalisation).
157
- begin
158
- text = read_text_with_encoding_fallback(path)
159
- preview_path = save_preview(text, path)
160
- FileRef.new(name: name, type: :text, original_path: path, preview_path: preview_path)
161
- rescue => e
162
- FileRef.new(name: name, type: :text, original_path: path, parse_error: e.message)
163
- end
149
+ # Markdown / plain text / log: the file itself IS the preview.
150
+ # No parser needed, no tmpdir copy — just point preview_path at the original.
151
+ FileRef.new(name: name, type: :text, original_path: path, preview_path: path)
164
152
 
165
153
  else
166
154
  result = Utils::ParserManager.parse(path)
@@ -397,7 +385,14 @@ module Clacky
397
385
  end
398
386
 
399
387
  def self.save_preview(content, original_path)
400
- dest = "#{original_path}.preview.md"
388
+ # Always write previews to a tmpdir-based path to avoid polluting the
389
+ # user's working directory with .preview.md sidecar files.
390
+ # Use the same UPLOAD_DIR that uploaded files live in; for on-disk files
391
+ # outside that dir (e.g. project files opened by file_reader), we still
392
+ # land in UPLOAD_DIR so the user's tree stays clean.
393
+ FileUtils.mkdir_p(UPLOAD_DIR)
394
+ safe_name = File.basename(original_path.to_s).gsub(/[\/\:\*?"<>|\x00]/, "_")
395
+ dest = File.join(UPLOAD_DIR, "#{SecureRandom.hex(8)}_#{safe_name}.preview.md")
401
396
  File.write(dest, content)
402
397
  dest
403
398
  end
@@ -413,26 +408,6 @@ module Clacky
413
408
  base.empty? ? 'upload' : base
414
409
  end
415
410
 
416
- # Read a text file with automatic encoding detection.
417
- # Tries UTF-8, then GBK (common for Chinese-origin CSV/text files), then
418
- # falls back to binary read with invalid byte replacement.
419
- def self.read_text_with_encoding_fallback(path)
420
- # Try UTF-8 first (most common, fastest path)
421
- raw = File.binread(path)
422
- utf8 = raw.dup.force_encoding("UTF-8")
423
- return utf8.encode("UTF-8") if utf8.valid_encoding?
424
-
425
- # Try GBK (GB2312 superset — common in Chinese Windows/Excel exports)
426
- begin
427
- return raw.encode("UTF-8", "GBK", invalid: :replace, undef: :replace, replace: "?")
428
- rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
429
- # fall through
430
- end
431
-
432
- # Last resort: binary read with replacement characters
433
- raw.encode("UTF-8", "binary", invalid: :replace, undef: :replace, replace: "?")
434
- end
435
-
436
411
  # Detect the actual image MIME type from raw binary data by inspecting
437
412
  # magic bytes, ignoring the file extension. Falls back to extension-based
438
413
  # detection when magic bytes don't match any known format.
@@ -549,7 +524,6 @@ module Clacky
549
524
  end
550
525
 
551
526
  private_class_method :parse_zip_listing, :parse_tar_listing, :save_preview, :sanitize_filename,
552
- :read_text_with_encoding_fallback,
553
527
  :downscale_png_chunky, :downscale_via_cli
554
528
  end
555
529
  end
@@ -177,9 +177,186 @@ module Clacky
177
177
  }
178
178
  },
179
179
 
180
+ # OpenAI GPT-5.5 / GPT-5.4 — breakpoint at 272K input tokens
181
+ # Source: https://openai.com/api/pricing/ (USD / 1M tokens)
182
+ # Note: OpenAI's actual tiered-pricing threshold is 272K, not the
183
+ # global 200K below. Prompts between 200K–272K will slightly
184
+ # over-estimate costs until a per-model threshold is implemented.
185
+ "gpt-5.5" => {
186
+ input: {
187
+ default: 5.00, # $5/MTok for prompts ≤ 272K tokens
188
+ over_200k: 10.00 # $10/MTok for prompts > 272K tokens
189
+ },
190
+ output: {
191
+ default: 30.00, # $30/MTok for prompts ≤ 272K tokens
192
+ over_200k: 45.00 # $45/MTok for prompts > 272K tokens
193
+ },
194
+ cache: {
195
+ write_default: 5.00, # $5/MTok cache write (≤ 272K)
196
+ write_over_200k: 10.00, # $10/MTok cache write (> 272K)
197
+ read_default: 0.50, # $0.50/MTok cache read (≤ 272K)
198
+ read_over_200k: 1.00 # $1.00/MTok cache read (> 272K)
199
+ }
200
+ },
201
+
202
+ "gpt-5.4" => {
203
+ input: {
204
+ default: 2.50, # $2.50/MTok for prompts ≤ 272K tokens
205
+ over_200k: 5.00 # $5/MTok for prompts > 272K tokens
206
+ },
207
+ output: {
208
+ default: 15.00, # $15/MTok for prompts ≤ 272K tokens
209
+ over_200k: 22.50 # $22.50/MTok for prompts > 272K tokens
210
+ },
211
+ cache: {
212
+ write_default: 2.50, # $2.50/MTok cache write (≤ 272K)
213
+ write_over_200k: 5.00, # $5/MTok cache write (> 272K)
214
+ read_default: 0.25, # $0.25/MTok cache read (≤ 272K)
215
+ read_over_200k: 0.50 # $0.50/MTok cache read (> 272K)
216
+ }
217
+ },
218
+
219
+ # GPT-5.4 flat-rate models (no breakpoint, single rate regardless of context)
220
+ "gpt-5.4-mini" => {
221
+ input: {
222
+ default: 0.75, # $0.75/MTok
223
+ over_200k: 0.75
224
+ },
225
+ output: {
226
+ default: 4.50, # $4.50/MTok
227
+ over_200k: 4.50
228
+ },
229
+ cache: {
230
+ write: 0.75, # $0.75/MTok cache write
231
+ read: 0.075 # $0.075/MTok cache read (10% of input)
232
+ }
233
+ },
234
+
235
+ "gpt-5.4-nano" => {
236
+ input: {
237
+ default: 0.20, # $0.20/MTok
238
+ over_200k: 0.20
239
+ },
240
+ output: {
241
+ default: 1.25, # $1.25/MTok
242
+ over_200k: 1.25
243
+ },
244
+ cache: {
245
+ write: 0.20, # $0.20/MTok cache write
246
+ read: 0.02 # $0.02/MTok cache read (10% of input)
247
+ }
248
+ },
249
+
250
+ # O-series reasoning models — flat-rate (200K context window)
251
+ # Source: https://openai.com/api/pricing/
252
+ "o3" => {
253
+ input: {
254
+ default: 2.00, # $2/MTok
255
+ over_200k: 2.00 # flat rate
256
+ },
257
+ output: {
258
+ default: 8.00, # $8/MTok
259
+ over_200k: 8.00
260
+ },
261
+ cache: {
262
+ write: 2.00, # $2/MTok cache write (same as input)
263
+ read: 0.50 # $0.50/MTok cache read (25% of input)
264
+ }
265
+ },
266
+
267
+ "o4-mini" => {
268
+ input: {
269
+ default: 1.10, # $1.10/MTok
270
+ over_200k: 1.10 # flat rate
271
+ },
272
+ output: {
273
+ default: 4.40, # $4.40/MTok
274
+ over_200k: 4.40
275
+ },
276
+ cache: {
277
+ write: 1.10, # $1.10/MTok cache write (same as input)
278
+ read: 0.275 # $0.275/MTok cache read (25% of input)
279
+ }
280
+ },
281
+
282
+ # GLM (Zhipu / Z.ai) — USD per 1M tokens.
283
+ # Source: https://docs.z.ai/guides/overview/pricing (Z.ai international).
284
+ # Pricing policy: we always bill at the Z.ai international flat rate,
285
+ # regardless of which endpoint (mainland bigmodel.cn vs intl z.ai) the
286
+ # user configured. Rationale:
287
+ # 1. Mainland GLM uses tiered pricing (≤32K / >32K / >128K) where the
288
+ # >32K tier is hit by the vast majority of real requests, and is
289
+ # actually a few RMB cheaper than Z.ai's flat rate — displaying the
290
+ # (slightly higher) Z.ai rate gives users a "displayed ≤ actual"
291
+ # experience which is psychologically safer than the reverse.
292
+ # 2. Single flat rate keeps the table shape consistent with every
293
+ # other provider here (no special-case tier logic for just GLM).
294
+ # Cache-write: same convention as DeepSeek/Kimi — OpenAI-compatible
295
+ # endpoints don't charge separately for cache writes (Z.ai's page lists
296
+ # "Cached Input Storage: Limited-time Free"), so bill writes at the
297
+ # regular input miss rate for safe "displayed ≤ actual" behaviour.
298
+ "glm-5.1" => {
299
+ input: { default: 1.40, over_200k: 1.40 },
300
+ output: { default: 4.40, over_200k: 4.40 },
301
+ cache: { write: 1.40, read: 0.26 }
302
+ },
303
+
304
+ "glm-5" => {
305
+ input: { default: 1.00, over_200k: 1.00 },
306
+ output: { default: 3.20, over_200k: 3.20 },
307
+ cache: { write: 1.00, read: 0.20 }
308
+ },
309
+
310
+ "glm-5-turbo" => {
311
+ input: { default: 1.20, over_200k: 1.20 },
312
+ output: { default: 4.00, over_200k: 4.00 },
313
+ cache: { write: 1.20, read: 0.24 }
314
+ },
315
+
316
+ # GLM-5V-Turbo is the multimodal sibling of GLM-5-Turbo (vision capable,
317
+ # see providers.rb model_capabilities override). Same input/output rate
318
+ # as 5-Turbo per Z.ai's Vision Models table.
319
+ "glm-5v-turbo" => {
320
+ input: { default: 1.20, over_200k: 1.20 },
321
+ output: { default: 4.00, over_200k: 4.00 },
322
+ cache: { write: 1.20, read: 0.24 }
323
+ },
324
+
325
+ "glm-4.7" => {
326
+ input: { default: 0.60, over_200k: 0.60 },
327
+ output: { default: 2.20, over_200k: 2.20 },
328
+ cache: { write: 0.60, read: 0.11 }
329
+ },
330
+
331
+ # MiniMax — USD per 1M tokens.
332
+ # Source: https://platform.minimaxi.com (Pay-as-You-Go).
333
+ # MiniMax pricing is identical across mainland (.com) and international
334
+ # (.io) endpoints, verified by the team. Same cache-write convention as
335
+ # DeepSeek/Kimi/GLM: bill writes at the input miss rate (OpenAI-compatible
336
+ # usage responses from MiniMax don't reliably carry a separate
337
+ # cache_creation_input_tokens field, so a distinct write rate would be
338
+ # dead code in practice).
339
+ # Note: providers.rb uses the capitalised "MiniMax-M2.x" model id, but
340
+ # the pricing table keys are lowercased to stay consistent with the
341
+ # rest of this file; normalize_model_name() lowercases incoming model
342
+ # names before lookup.
343
+ "minimax-m2.5" => {
344
+ input: { default: 0.30, over_200k: 0.30 },
345
+ output: { default: 1.20, over_200k: 1.20 },
346
+ cache: { write: 0.30, read: 0.03 }
347
+ },
348
+
349
+ "minimax-m2.7" => {
350
+ input: { default: 0.30, over_200k: 0.30 },
351
+ output: { default: 1.20, over_200k: 1.20 },
352
+ cache: { write: 0.30, read: 0.06 }
353
+ },
354
+
180
355
  }.freeze
181
356
 
182
357
  # Threshold for tiered pricing (200K tokens)
358
+ # NOTE: OpenAI GPT-5.5/GPT-5.4 use a 272K breakpoint, not 200K.
359
+ # Costs for prompts between 200K–272K will be slightly over-estimated.
183
360
  TIERED_PRICING_THRESHOLD = 200_000
184
361
 
185
362
  class << self
@@ -314,6 +491,44 @@ module Clacky
314
491
  "kimi-k2.5"
315
492
  when /^kimi-k2\.?6$/i
316
493
  "kimi-k2.6"
494
+ # GLM (Zhipu / Z.ai) — the five models registered in providers.rb.
495
+ # GLM-5V-Turbo is the vision variant; all five share the same Z.ai
496
+ # international flat-rate pricing regardless of which endpoint
497
+ # (mainland bigmodel.cn vs intl z.ai) the user configured.
498
+ # Strict anchored match so unrelated strings like "glm-5-x-foo"
499
+ # don't silently borrow a nearby model's rate.
500
+ when /^glm-5\.1$/i
501
+ "glm-5.1"
502
+ when /^glm-5v-turbo$/i
503
+ "glm-5v-turbo"
504
+ when /^glm-5-turbo$/i
505
+ "glm-5-turbo"
506
+ when /^glm-5$/i
507
+ "glm-5"
508
+ when /^glm-4\.7$/i
509
+ "glm-4.7"
510
+ # MiniMax — model ids in providers.rb use capitalised "MiniMax-M2.x"
511
+ # but we match case-insensitively and map to the lowercased table key.
512
+ when /^minimax-m2\.5$/i
513
+ "minimax-m2.5"
514
+ when /^minimax-m2\.7$/i
515
+ "minimax-m2.7"
516
+
517
+ # OpenAI GPT-5.x models — match various dashed/dotted/compact forms
518
+ # (e.g. "gpt-5.5", "gpt-5-5", "gpt5.5", "gpt55")
519
+ when /^gpt-?5\.?5$/i, /^gpt-?5[\.-]?5$/i
520
+ "gpt-5.5"
521
+ when /^gpt-?5\.?4[^.]*mini$/i, /^gpt-?5\.?4[\.-]?mini$/i
522
+ "gpt-5.4-mini"
523
+ when /^gpt-?5\.?4[^.]*nano$/i, /^gpt-?5\.?4[\.-]?nano$/i
524
+ "gpt-5.4-nano"
525
+ when /^gpt-?5\.?4$/i, /^gpt-?5[\.-]?4$/i
526
+ "gpt-5.4"
527
+ # O-series reasoning models
528
+ when /^o4[\.-]?mini$/i
529
+ "o4-mini"
530
+ when /^o3$/i
531
+ "o3"
317
532
  else
318
533
  nil # No pricing available for this model — cost will show as N/A
319
534
  end
@@ -33,19 +33,83 @@ module Clacky
33
33
  }.freeze
34
34
 
35
35
  # Ensure ~/.clacky/parsers/ exists and all default parsers are present.
36
- # Called once at startup.
36
+ # Called at Agent startup (idempotent — safe to run every time).
37
+ #
38
+ # Copies every file from default_parsers/ (not just the entry-point .rb
39
+ # scripts listed in PARSER_FOR). A parser may ship companion helper
40
+ # scripts — e.g. pdf_parser_ocr.py sits next to pdf_parser.rb and is
41
+ # invoked by relative path — so those helpers must be distributed too.
42
+ #
43
+ # Version upgrade policy:
44
+ # Each bundled parser declares `VERSION: <n>` in a header comment
45
+ # (works for Ruby `# VERSION: 2` and Python `# VERSION: 2` alike,
46
+ # scanned in the first 40 lines of the file).
47
+ #
48
+ # On startup, per-file:
49
+ # - If the file does NOT exist in ~/.clacky/parsers/ → copy it.
50
+ # - If it exists:
51
+ # * bundled has no VERSION → never touch (bundled file
52
+ # is opting out of managed upgrades).
53
+ # * installed has no VERSION → treat it as legacy v0 and
54
+ # upgrade (lenient mode — covers users who installed before
55
+ # the VERSION scheme existed). The old file is backed up.
56
+ # * both have VERSION, bundled > installed → upgrade, backing
57
+ # up the old copy as `<script>.v<old>.bak`.
58
+ # * bundled ≤ installed → leave the user's copy alone
59
+ # (preserves LLM/user modifications).
60
+ #
61
+ # Backups live alongside the parser so the user can inspect
62
+ # their own edits after an upgrade. They are never removed
63
+ # automatically.
37
64
  def self.setup!
38
65
  FileUtils.mkdir_p(PARSERS_DIR)
39
66
 
40
- PARSER_FOR.values.uniq.each do |script|
41
- dest = File.join(PARSERS_DIR, script)
42
- next if File.exist?(dest)
67
+ Dir.glob(File.join(DEFAULT_PARSERS_DIR, "**", "*")).each do |src|
68
+ next unless File.file?(src)
69
+ basename = File.basename(src)
70
+ next if basename.start_with?(".") || basename.end_with?(".bak")
71
+
72
+ rel = src.sub(/^#{Regexp.escape(DEFAULT_PARSERS_DIR)}\/?/, "")
73
+ dest = File.join(PARSERS_DIR, rel)
74
+
75
+ if !File.exist?(dest)
76
+ FileUtils.mkdir_p(File.dirname(dest))
77
+ FileUtils.cp(src, dest)
78
+ # Preserve executable bit so sibling scripts can be run directly.
79
+ FileUtils.chmod(File.stat(src).mode, dest)
80
+ next
81
+ end
82
+
83
+ bundled_version = extract_version(src)
84
+ # Bundled file opts out of managed upgrades — never touch user copy.
85
+ next unless bundled_version
43
86
 
44
- src = File.join(DEFAULT_PARSERS_DIR, script)
45
- if File.exist?(src)
87
+ installed_version = extract_version(dest) || 0
88
+
89
+ if bundled_version > installed_version
90
+ backup = "#{dest}.v#{installed_version}.bak"
91
+ FileUtils.cp(dest, backup) unless File.exist?(backup)
46
92
  FileUtils.cp(src, dest)
93
+ FileUtils.chmod(File.stat(src).mode, dest)
94
+ end
95
+ end
96
+ end
97
+
98
+ # Read the VERSION marker from a parser script (e.g. "# VERSION: 2").
99
+ # Works for any script language that uses `#` for comments
100
+ # (Ruby, Python, shell). Returns Integer or nil.
101
+ def self.extract_version(path)
102
+ return nil unless File.exist?(path)
103
+ # Only scan the first 40 lines — the marker lives in the header.
104
+ File.foreach(path).with_index do |line, i|
105
+ break if i >= 40
106
+ if (m = line.match(/^\s*#\s*VERSION:\s*(\d+)/i))
107
+ return m[1].to_i
47
108
  end
48
109
  end
110
+ nil
111
+ rescue StandardError
112
+ nil
49
113
  end
50
114
 
51
115
  # Run the appropriate parser for the given file path.
@@ -20,6 +20,14 @@ module Clacky
20
20
  # @return [Hash, nil] { matched_string: String, occurrences: Integer }
21
21
  # or nil when nothing matches
22
22
  def self.find_match(content, old_string)
23
+ # Defensive: if either side contains invalid UTF-8 bytes (binary files,
24
+ # mixed-encoding content, etc.), Regexp#scan / String#include? with a
25
+ # UTF-8-tagged candidate can raise `ArgumentError: invalid byte sequence
26
+ # in UTF-8`. Scrub once at the entry point so every matching layer —
27
+ # including callers like the edit preview — is safe.
28
+ content = Clacky::Utils::Encoding.to_utf8(content) unless content.nil?
29
+ old_string = Clacky::Utils::Encoding.to_utf8(old_string) unless old_string.nil?
30
+
23
31
  candidates = generate_candidates(old_string)
24
32
 
25
33
  # Simple string matching for each candidate
@@ -29,7 +37,7 @@ module Clacky
29
37
  if content.include?(candidate)
30
38
  return {
31
39
  matched_string: candidate,
32
- occurrences: content.scan(Regexp.quote(candidate)).length
40
+ occurrences: count_occurrences(content, candidate)
33
41
  }
34
42
  end
35
43
  end
@@ -38,6 +46,20 @@ module Clacky
38
46
  try_smart_match(content, old_string)
39
47
  end
40
48
 
49
+ # Count non-overlapping occurrences of `needle` in `haystack` without
50
+ # going through Regexp (safer on mixed-encoding strings and avoids an
51
+ # extra escape step).
52
+ def self.count_occurrences(haystack, needle)
53
+ return 0 if needle.empty?
54
+ count = 0
55
+ offset = 0
56
+ while (idx = haystack.index(needle, offset))
57
+ count += 1
58
+ offset = idx + needle.length
59
+ end
60
+ count
61
+ end
62
+
41
63
  # Generate candidate strings by applying different transformations.
42
64
  #
43
65
  # @param old_string [String]
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Clacky
4
- VERSION = "1.0.0"
4
+ VERSION = "1.0.2"
5
5
  end