openclacky 1.0.0.beta.3 → 1.0.0.beta.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +36 -4
  3. data/lib/clacky/agent/message_compressor.rb +46 -8
  4. data/lib/clacky/agent/message_compressor_helper.rb +18 -2
  5. data/lib/clacky/agent/session_serializer.rb +23 -1
  6. data/lib/clacky/agent/tool_executor.rb +14 -4
  7. data/lib/clacky/agent.rb +31 -0
  8. data/lib/clacky/agent_config.rb +16 -1
  9. data/lib/clacky/brand_config.rb +16 -8
  10. data/lib/clacky/client.rb +10 -1
  11. data/lib/clacky/default_skills/new/SKILL.md +13 -5
  12. data/lib/clacky/default_skills/recall-memory/SKILL.md +0 -1
  13. data/lib/clacky/message_format/open_ai.rb +80 -3
  14. data/lib/clacky/providers.rb +7 -18
  15. data/lib/clacky/server/browser_manager.rb +25 -2
  16. data/lib/clacky/server/channel/adapters/feishu/bot.rb +43 -3
  17. data/lib/clacky/server/channel/channel_ui_controller.rb +2 -2
  18. data/lib/clacky/server/web_ui_controller.rb +1 -1
  19. data/lib/clacky/tools/browser.rb +0 -57
  20. data/lib/clacky/tools/file_reader.rb +26 -10
  21. data/lib/clacky/tools/security.rb +67 -38
  22. data/lib/clacky/tools/terminal/persistent_session.rb +16 -6
  23. data/lib/clacky/tools/terminal.rb +117 -12
  24. data/lib/clacky/tools/todo_manager.rb +117 -30
  25. data/lib/clacky/utils/login_shell.rb +72 -0
  26. data/lib/clacky/utils/model_pricing.rb +44 -0
  27. data/lib/clacky/version.rb +1 -1
  28. data/lib/clacky/web/app.css +7 -0
  29. data/lib/clacky/web/index.html +7 -1
  30. data/lib/clacky/web/onboard.js +40 -4
  31. data/lib/clacky/web/sessions.js +2 -2
  32. data/lib/clacky.rb +1 -1
  33. data/scripts/install.ps1 +76 -68
  34. metadata +2 -2
  35. data/lib/clacky/tools/run_project.rb +0 -295
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b145934170a510f46e3263c3fbce94acf618cdd416c93e17a7758984361c02b7
4
- data.tar.gz: 618fdf4917ce68514e0332ad44c522afa061a21884d6e95511f564da985f8433
3
+ metadata.gz: 39e25cd04a3d01fdacbb0382c2c367a1e72e8d2be88408e7fb29f804b3af1ba6
4
+ data.tar.gz: 492ca66bcfb55a6cfc3f2cf38f171ce983f142a7a4b0f8655e5aafa317b79a69
5
5
  SHA512:
6
- metadata.gz: 50a63fc4087f97c9a3c3242e2e379da95de6e73cdac3bb75ab11ad67bc03eb151afaf47e3a229bd769a4790f57e3b116309c0908807f35618ae64222feb30575
7
- data.tar.gz: f492569e101a0b6af312c65cffa244b29a5c47d79201129214e66ac62db7181c29678d64c9ddec88e6dc61525cf2442b0f22647e8dcdb430e0bbc87ca9f1a370
6
+ metadata.gz: 014eeb8227bcc4cd94104a1da3bb2877083a1c70c4baaaf408233eec57ef684cbc2bcbac632ca52a771e2f1a8f436f2a09d89b697a165f1147891cabfe3708a0
7
+ data.tar.gz: cc54f77d960bfd2db73906b713a84d0da6465fc18c65d9ec3ceb75d250bf426adaf4d9ba42c71900beab889bb6acf6a6472fa3843420fec8bbd3460a13f00088
data/CHANGELOG.md CHANGED
@@ -7,10 +7,42 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [1.0.0.beta.5] - 2026-04-29
11
+
12
+ ### Added
13
+ - **WSL2 mirrored networking mode for localhost access.** Windows users running under WSL2 can now configure mirrored networking, allowing the Clacky server to be reached at `localhost` from the Windows host instead of needing to look up the WSL IP address.
14
+ - **Message compressor preserves chunk order.** Compression chunks are now consistently ordered with `chunk-nn` naming, making it easier to browse and understand compressed conversation history.
15
+ - **Session model is now saved.** The currently active model selection is persisted in session data, so it survives page refreshes and server restarts.
16
+ - **Feedback button styling in Web UI.** The feedback interface now has improved CSS styling for a better user experience.
17
+
18
+ ### Improved
19
+ - **Fewer LLM turns for common tool operations.** The file reader, security tool, and todo manager have been optimized to require fewer round-trips with the AI model, making tasks faster and cheaper.
20
+ - **Terminal now supports mise-based Node.js.** The terminal tool correctly resolves Node.js when installed through `mise` version manager, not just `nvm` or system paths.
21
+
22
+ ### Fixed
23
+ - **Browser MCP connection recovers from crashes.** The browser tool's MCP daemon handles process restarts more gracefully, and stale Node.js detection code has been cleaned up.
24
+ - **Brand configuration no longer crashes on empty data.** When brand config data is empty or missing, the system now handles it gracefully instead of raising an error.
25
+ - **Kimi K2.5 and K2.6 models now show correct pricing.** These models are now in the pricing table, so cost tracking reflects actual usage costs.
26
+ - **Feishu messages with images no longer silently dropped.** Image markdown syntax in Feishu messages is now sanitized before sending, preventing the Feishu API from silently rejecting them.
27
+ - **Onboarding model selector and provider presets fixed.** The model combobox in the onboarding flow now works correctly, and provider presets are properly updated.
28
+ - **File reader now works correctly with OpenAI provider.** Files attached to sessions are now properly read and processed when using the OpenAI API format.
29
+ - **Image URLs with special tokens no longer mis-handled.** The message formatter no longer mis-handles image URLs containing special tokens (e.g., `bong`).
30
+
31
+ ### Changed
32
+ - **`run_project` tool removed.** This deprecated tool has been removed. Use the terminal tool to run commands in projects instead.
33
+
34
+ ### More
35
+ - Improved WSL2 detection on Windows PowerShell installer
36
+ - Minor test and documentation fixes
37
+
38
+ ## [1.0.0.beta.4] - 2026-04-28
39
+
40
+ ### Fixed
41
+ - **Fix**: onboard.js was calling defunct `POST /api/config` → now calls `POST /api/config/models`
42
+
10
43
  ## [1.0.0.beta.3] - 2026-04-28
11
44
 
12
45
  ### Added
13
- - **Gemini 2.5 Pro support.** The new `gemini2.5-pro` model is now available as a selectable option, giving you access to Google's latest flagship model.
14
46
  - **File attachments now support Markdown, plain text, and `.tar.gz` archives.** When you attach `.md`, `.txt`, or `.tar.gz` files to a session, the agent can read and reason over their contents directly.
15
47
  - **Image type auto-detection.** Image files are now correctly identified by their binary content (magic bytes), not just their file extension — preventing misclassified images from causing upload or vision errors.
16
48
 
@@ -28,7 +60,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
28
60
  - **New session creation supports model & working-directory options.** The Web UI "new session" dialog now lets you pick the model and starting directory up front, instead of having to adjust them after the session opens.
29
61
 
30
62
  ### Fixed
31
- - **System prompt now refreshes when you switch models.** Previously the system prompt captured at session start stuck around even after `/model` or `/provider` switches, which could leave model-specific instructions out of sync. The agent now re-injects the correct system prompt on every model change.
63
+ - **System prompt now refreshes when you switch models.** Previously the system prompt captured at session start stuck around even after model switches, which could leave model-specific instructions out of sync. The agent now re-injects the correct system prompt on every model change.
32
64
  - **Port 7070 properly released when the terminal tool exits.** A lingering listener on port 7070 could block subsequent runs; the terminal tool now cleans it up on shutdown.
33
65
  - **Windows installer uses `[IO.Path]::GetTempPath()` for the temp directory** (#58) — more reliable than `$env:TEMP` on systems where the env var is unset or points to a non-ASCII path.
34
66
 
@@ -36,7 +68,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
36
68
 
37
69
  ### Added
38
70
  - **Vision support — agents can now "see" images.** When you attach image files (PNG, JPG, GIF, WebP), the agent can analyze them visually with vision-capable models. Non-vision models automatically fall back to disk references instead of breaking.
39
- - **DeepSeek V4 (Clacky-DS) provider.** New `deepseekv4` provider preset with native DeepSeek API endpoint, supporting `deepseek-v4-pro` and `deepseek-v4-flash` models with accurate pricing.
71
+ - **DeepSeek V4 (Clacky-DS) provider.** New `deepseekv4` provider preset with native DeepSeek API endpoint, supporting `dsk-deepseek-v4-pro` and `dsk-deepseek-v4-flash` models with accurate pricing.
40
72
  - **Memory subagent.** Long-term memory management now runs as a dedicated background subagent — writes memories when the task reaches meaningful completion, instead of on every turn.
41
73
  - **Usage telemetry.** Anonymous usage data collection helps us understand how the product is used and prioritize improvements. No personal or conversation data is collected.
42
74
  - **Brand configuration auto-refresh.** White-label brand settings now refresh automatically when the WebUI starts up, no manual restart needed.
@@ -44,7 +76,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
44
76
  ### Improved
45
77
  - **Progress handles revamped.** Nested progress handles now hide/show automatically, ticker threads keep animations smooth, and fast-completing tasks no longer flash a pointless "done" message.
46
78
  - **Todo manager tool upgrades.** Batch add/remove multiple todos at once, and completed todos auto-clear when you add new ones.
47
- - **Model switching more robust.** CLI slash commands (`/model`, `/provider`) now work seamlessly, server-side routing handles dynamic endpoints correctly, and switching between all provider types is more reliable.
79
+ - **Model switching more robust.** CLI slash commands (/config) now work seamlessly, server-side routing handles dynamic endpoints correctly, and switching between all provider types is more reliable.
48
80
 
49
81
  ### Fixed
50
82
  - **Access key now persists via cookies.** The WebUI login key was stored only in `localStorage`, causing WebSocket connections to lose authentication. Now also written to a `clacky_access_key` cookie for consistent auth across all connection types.
@@ -94,12 +94,18 @@ module Clacky
94
94
  # @param recent_messages [Array<Hash>] Recent messages to preserve
95
95
  # @param chunk_path [String, nil] Path to the archived chunk MD file (if saved)
96
96
  # @return [Array<Hash>] Rebuilt message list: system + compressed + recent
97
- def rebuild_with_compression(compressed_content, original_messages:, recent_messages:, chunk_path: nil)
97
+ def rebuild_with_compression(compressed_content, original_messages:, recent_messages:, chunk_path: nil, topics: nil, previous_chunks: [])
98
98
  # Find and preserve system message
99
99
  system_msg = original_messages.find { |m| m[:role] == "system" }
100
100
 
101
- # Parse the compressed result
102
- parsed_messages = parse_compressed_result(compressed_content, chunk_path: chunk_path)
101
+ # Parse the compressed result, embedding previous chunk references so the
102
+ # new summary carries a complete index of all older archives. This avoids
103
+ # keeping all prior compressed_summary messages in active history while
104
+ # still giving the AI a path to find old conversations via file_reader.
105
+ parsed_messages = parse_compressed_result(compressed_content,
106
+ chunk_path: chunk_path,
107
+ topics: topics,
108
+ previous_chunks: previous_chunks)
103
109
 
104
110
  # If parsing fails or returns empty, raise error
105
111
  if parsed_messages.nil? || parsed_messages.empty?
@@ -124,7 +130,7 @@ module Clacky
124
130
  m ? m[1].strip : nil
125
131
  end
126
132
 
127
- def parse_compressed_result(result, chunk_path: nil)
133
+ def parse_compressed_result(result, chunk_path: nil, topics: nil, previous_chunks: [])
128
134
  # Return the compressed result as a single user message (role: "user").
129
135
  #
130
136
  # Why role:"user" instead of "assistant":
@@ -144,6 +150,10 @@ module Clacky
144
150
  # The `compressed_summary: true` flag is preserved so that replay_history still
145
151
  # routes this message through the chunk-expansion path (which keys off that flag,
146
152
  # not the role).
153
+ #
154
+ # @param topics [String, nil] Short topic description extracted from <topics> tag
155
+ # @param previous_chunks [Array<Hash>] Info about older chunk files
156
+ # Each hash: { basename:, path:, topics: }
147
157
  content = result.to_s.strip
148
158
 
149
159
  if content.empty?
@@ -152,22 +162,50 @@ module Clacky
152
162
  # Strip out the <topics> block — it's metadata for the chunk file, not for AI context
153
163
  content_without_topics = content.gsub(/<topics>.*?<\/topics>\n*/m, "").strip
154
164
 
155
- # Inject chunk anchor so AI knows where to find original conversation
165
+ # Build previous chunks index section links to older chunk files so the AI
166
+ # can find earlier conversations without keeping all prior compressed_summary
167
+ # messages in the active history. Shows newest chunks first (reverse order),
168
+ # capped at 10 to keep the message size bounded.
169
+ previous_chunks_section = ""
170
+ if previous_chunks.any?
171
+ max_visible = 10
172
+ visible = previous_chunks.last(max_visible).reverse
173
+ older_count = previous_chunks.size - visible.size
174
+
175
+ previous_chunks_section = "\n\n---\n📁 **Previous chunks (newest first):**\n"
176
+ visible.each do |pc|
177
+ topic_str = pc[:topics] ? " — #{pc[:topics]}" : ""
178
+ previous_chunks_section += "- `#{pc[:basename]}`#{topic_str}\n"
179
+ end
180
+
181
+ if older_count > 0
182
+ oldest = previous_chunks.first
183
+ previous_chunks_section += "- ... and #{older_count} older chunks back to `#{oldest[:basename]}`\n"
184
+ end
185
+
186
+ previous_chunks_section += "_Use `file_reader` to recall details from these chunks._"
187
+ end
188
+
189
+ # Inject chunk anchor so AI knows where to find original conversation for THIS chunk
190
+ anchor = ""
156
191
  if chunk_path
157
- anchor = "\n\n---\n📁 **Original conversation archived at:** `#{chunk_path}`\n" \
192
+ anchor = "\n\n---\n📁 **Current chunk archived at:** `#{chunk_path}`\n" \
158
193
  "_Use `file_reader` tool to recall details from this chunk._"
159
- content_without_topics = content_without_topics + anchor
160
194
  end
161
195
 
162
196
  # Prefix lets the model recognise this is injected context, not a user utterance.
197
+ # Order: summary → previous chunks → current anchor (chronological)
163
198
  framed_content = "[Compressed conversation summary — previous turns archived]\n\n" \
164
- "#{content_without_topics}"
199
+ "#{content_without_topics}" \
200
+ "#{previous_chunks_section}" \
201
+ "#{anchor}"
165
202
 
166
203
  [{
167
204
  role: "user",
168
205
  content: framed_content,
169
206
  compressed_summary: true,
170
207
  chunk_path: chunk_path,
208
+ topics: topics,
171
209
  system_injected: true
172
210
  }]
173
211
  end
@@ -160,19 +160,35 @@ module Clacky
160
160
  # chunk files, creating circular chunk references. Counting from history is always accurate.
161
161
  existing_chunk_count = original_messages.count { |m| m[:compressed_summary] }
162
162
  chunk_index = existing_chunk_count + 1
163
+
164
+ # Extract topics from the LLM response to store in both the chunk MD front
165
+ # matter and the compressed_summary message hash (for future chunk indexing).
166
+ topics = @message_compressor.parse_topics(compressed_content)
167
+
163
168
  chunk_path = save_compressed_chunk(
164
169
  original_messages,
165
170
  compression_context[:recent_messages],
166
171
  chunk_index: chunk_index,
167
172
  compression_level: compression_context[:compression_level],
168
- topics: @message_compressor.parse_topics(compressed_content)
173
+ topics: topics
169
174
  )
170
175
 
176
+ # Collect previous chunk references so the new summary carries a complete
177
+ # index of all older archives. Without this, each new compression would
178
+ # lose all prior chunk references — leaving only the newest chunk reachable
179
+ # via replay_history. The AI can still access older chunks via file_reader
180
+ # using the embedded basenames and topics.
181
+ previous_chunks = original_messages
182
+ .select { |m| m[:compressed_summary] && m[:chunk_path] }
183
+ .map { |m| { basename: File.basename(m[:chunk_path]), path: m[:chunk_path], topics: m[:topics] } }
184
+
171
185
  @history.replace_all(@message_compressor.rebuild_with_compression(
172
186
  compressed_content,
173
187
  original_messages: original_messages,
174
188
  recent_messages: compression_context[:recent_messages],
175
- chunk_path: chunk_path
189
+ chunk_path: chunk_path,
190
+ topics: topics,
191
+ previous_chunks: previous_chunks
176
192
  ))
177
193
 
178
194
  # Reset to the estimated size of the rebuilt (small) history.
@@ -54,6 +54,20 @@ module Clacky
54
54
  @pending_error_rollback = true
55
55
  end
56
56
 
57
+ # Restore the session's original model if it still exists in the current
58
+ # config. This prevents all sessions from silently switching to the new
59
+ # default model when the user changes it and restarts. Falls back to the
60
+ # current default if the model was deleted/renamed since the session was
61
+ # last saved.
62
+ saved_model_name = session_data.dig(:config, :model_name)
63
+ if saved_model_name
64
+ saved_base_url = session_data.dig(:config, :model_base_url)
65
+ model_entry = @config.find_model_by_name_and_url(saved_model_name, saved_base_url)
66
+ if model_entry && model_entry["id"]
67
+ switch_model_by_id(model_entry["id"])
68
+ end
69
+ end
70
+
57
71
  # Rebuild and refresh the system prompt so any newly installed skills
58
72
  # (or other configuration changes since the session was saved) are
59
73
  # reflected immediately — without requiring the user to create a new session.
@@ -98,11 +112,19 @@ module Clacky
98
112
  config: {
99
113
  # NOTE: api_key and other sensitive credentials are intentionally excluded
100
114
  # to prevent leaking secrets into session files on disk.
115
+ # model_name is saved so the session can restore its original model on restart
116
+ # (falling back to the current default if the model no longer exists).
101
117
  permission_mode: @config.permission_mode.to_s,
102
118
  enable_compression: @config.enable_compression,
103
119
  enable_prompt_caching: @config.enable_prompt_caching,
104
120
  max_tokens: @config.max_tokens,
105
- verbose: @config.verbose
121
+ verbose: @config.verbose,
122
+ # Persist the current model identity so the session can restore its
123
+ # original model on restart. model_name + model_base_url form a
124
+ # composite key to avoid matching a different provider's model of
125
+ # the same name. Falls back to default if the model no longer exists.
126
+ model_name: @config.current_model&.dig("model"),
127
+ model_base_url: @config.current_model&.dig("base_url")
106
128
  },
107
129
  stats: stats_data,
108
130
  messages: @history.to_a
@@ -169,6 +169,17 @@ module Clacky
169
169
  # Inject TODO reminder for non-todo_manager tools
170
170
  formatted_result = inject_todo_reminder(call[:name], formatted_result)
171
171
 
172
+ # Extract image_inject sidecar before building the tool content string.
173
+ # image_inject carries the base64 payload that must be delivered as a
174
+ # follow-up `role:"user"` message (OpenAI/OpenRouter/Gemini only accept
175
+ # image_url blocks in user messages, not in tool messages).
176
+ # Strip it from the content sent to the API so it isn't tokenised as text.
177
+ image_inject = nil
178
+ if formatted_result.is_a?(Hash) && formatted_result[:image_inject]
179
+ image_inject = formatted_result[:image_inject]
180
+ formatted_result = formatted_result.reject { |k, _| k == :image_inject }
181
+ end
182
+
172
183
  # If the tool returned a plain string, use it directly (avoids double-escaping).
173
184
  # If it returned an Array (e.g. multipart vision blocks with image + text),
174
185
  # pass it through as-is so format_tool_results can send it to the API.
@@ -182,10 +193,9 @@ module Clacky
182
193
  JSON.generate(formatted_result)
183
194
  end
184
195
 
185
- {
186
- id: call[:id],
187
- content: content
188
- }
196
+ result = { id: call[:id], content: content }
197
+ result[:image_inject] = image_inject if image_inject
198
+ result
189
199
  end
190
200
 
191
201
  # Build error result for tool execution
data/lib/clacky/agent.rb CHANGED
@@ -883,6 +883,36 @@ module Clacky
883
883
 
884
884
  formatted_messages = @client.format_tool_results(response, tool_results, model: current_model)
885
885
  formatted_messages.each { |msg| @history.append(msg.merge(task_id: @current_task_id)) }
886
+
887
+ # Append a follow-up `role:"user"` message for any image payloads that
888
+ # could not be delivered inside the tool message.
889
+ #
890
+ # Background: OpenAI-compatible APIs (OpenRouter, Gemini, GPT-4o, etc.)
891
+ # only accept image_url content blocks in `role:"user"` messages. Putting
892
+ # base64 data in a `role:"tool"` message causes it to be JSON-encoded as
893
+ # plain text, inflating token counts by 20-40x. The tool result carries a
894
+ # plain-text description for the LLM; the actual image is delivered here.
895
+ tool_results.each do |tr|
896
+ inject = tr[:image_inject]
897
+ next unless inject
898
+
899
+ mime_type = inject[:mime_type]
900
+ base64_data = inject[:base64_data]
901
+ path = inject[:path]
902
+ next unless mime_type && base64_data
903
+
904
+ data_url = "data:#{mime_type};base64,#{base64_data}"
905
+ image_content = [
906
+ { type: "text", text: "[Image from file_reader: #{File.basename(path.to_s)}]" },
907
+ { type: "image_url", image_url: { url: data_url } }
908
+ ]
909
+ @history.append({
910
+ role: "user",
911
+ content: image_content,
912
+ system_injected: true,
913
+ task_id: @current_task_id
914
+ })
915
+ end
886
916
  end
887
917
 
888
918
  # Interrupt the agent's current run
@@ -1397,6 +1427,7 @@ module Clacky
1397
1427
  ].compact.join(". ")
1398
1428
 
1399
1429
  content = "[Session context: #{parts}]"
1430
+
1400
1431
  @history.append({
1401
1432
  role: "user",
1402
1433
  content: content,
@@ -158,7 +158,7 @@ module Clacky
158
158
 
159
159
  def initialize(options = {})
160
160
  @permission_mode = validate_permission_mode(options[:permission_mode])
161
- @max_tokens = options[:max_tokens] || 8192
161
+ @max_tokens = options[:max_tokens] || 16384
162
162
  @verbose = options[:verbose] || false
163
163
  @enable_compression = options[:enable_compression].nil? ? true : options[:enable_compression]
164
164
  # Enable prompt caching by default for cost savings
@@ -549,6 +549,21 @@ module Clacky
549
549
  @models.find { |m| m["type"] == type }
550
550
  end
551
551
 
552
+ # Find model by composite key (model name + base_url).
553
+ # Used when restoring a session to match its original model without relying
554
+ # on the runtime-only id (which changes on every process restart).
555
+ # base_url is optional for backward compatibility with sessions saved
556
+ # before base_url was persisted.
557
+ # @param model_name [String] the model's "model" field (e.g. "dsk-deepseek-v4-pro")
558
+ # @param base_url [String, nil] the model's "base_url" field
559
+ # @return [Hash, nil] the matching model entry or nil
560
+ def find_model_by_name_and_url(model_name, base_url = nil)
561
+ @models.find do |m|
562
+ m["model"] == model_name &&
563
+ (base_url.nil? || m["base_url"] == base_url)
564
+ end
565
+ end
566
+
552
567
  # Get the default model (type: default)
553
568
  # Falls back to current_model for backward compatibility
554
569
  def default_model
@@ -964,16 +964,24 @@ module Clacky
964
964
  key = fetch_decryption_key(skill_id: skill_id, skill_version_id: skill_version_id)
965
965
 
966
966
  ciphertext = File.binread(enc_path)
967
- pt = aes_gcm_decrypt(key, ciphertext, file_meta["iv"], file_meta["tag"])
968
967
 
969
- # Integrity check
970
- actual = Digest::SHA256.hexdigest(pt)
971
- expected = file_meta["original_checksum"]
972
- if expected && actual != expected
973
- raise "Checksum mismatch for #{rel_plain}: expected #{expected}, got #{actual}"
974
- end
968
+ if ciphertext.nil? || ciphertext.empty?
969
+ # AES-GCM of empty data still produces 16+ bytes (auth tag + IV).
970
+ # A 0-byte file means the skill package is corrupted; skip
971
+ # decryption and produce an empty output so the skill can still run.
972
+ ""
973
+ else
974
+ pt = aes_gcm_decrypt(key, ciphertext, file_meta["iv"], file_meta["tag"])
975
+
976
+ # Integrity check
977
+ actual = Digest::SHA256.hexdigest(pt)
978
+ expected = file_meta["original_checksum"]
979
+ if expected && actual != expected
980
+ raise "Checksum mismatch for #{rel_plain}: expected #{expected}, got #{actual}"
981
+ end
975
982
 
976
- pt
983
+ pt
984
+ end
977
985
  else
978
986
  # Mock/plain skill: raw bytes
979
987
  File.binread(enc_path).force_encoding("UTF-8")
data/lib/clacky/client.rb CHANGED
@@ -15,6 +15,12 @@ module Clacky
15
15
  @use_anthropic_format = anthropic_format
16
16
  # Detect Bedrock: ABSK key prefix (native AWS) or abs- model prefix (Clacky AI proxy)
17
17
  @use_bedrock = MessageFormat::Bedrock.bedrock_api_key?(api_key, model)
18
+
19
+ # Determine vision support once at construction time.
20
+ # Non-vision models (DeepSeek, Kimi, MiniMax, etc.) reject image_url
21
+ # content blocks; the conversion layer strips them when this is false.
22
+ provider_id = Providers.resolve_provider(base_url: @base_url, api_key: @api_key)
23
+ @vision_supported = Providers.supports?(provider_id, :vision, model_name: @model)
18
24
  end
19
25
 
20
26
  # Returns true when the client is using the AWS Bedrock Converse API.
@@ -185,7 +191,10 @@ module Clacky
185
191
  # OpenRouter proxies Claude with the same cache_control field convention as Anthropic direct.
186
192
  messages = apply_message_caching(messages) if caching_enabled
187
193
 
188
- body = MessageFormat::OpenAI.build_request_body(messages, model, tools, max_tokens, caching_enabled)
194
+ body = MessageFormat::OpenAI.build_request_body(
195
+ messages, model, tools, max_tokens, caching_enabled,
196
+ vision_supported: @vision_supported
197
+ )
189
198
  response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
190
199
 
191
200
  raise_error(response) unless response.status == 200
@@ -183,12 +183,20 @@ Print a success summary:
183
183
  ```
184
184
 
185
185
  ### 4. Start Development Server
186
- After the script completes, use the run_project tool to start the server:
186
+ After the script completes, read the `.1024` config file in the project root
187
+ to find the `run_command`, then start it in the background via the terminal tool:
188
+
187
189
  ```
188
- run_project(action: "start")
190
+ # First, read .1024 to get the run_command (usually `bin/dev` for Rails):
191
+ file_reader(path: ".1024")
192
+
193
+ # Then start the server in the background:
194
+ terminal(command: "<run_command from .1024>", background: true)
189
195
  ```
190
196
 
191
- **Important**: If run_project executes without errors, the server has started successfully.
197
+ **Important**: If the terminal call returns a session_id (and no error), the
198
+ server has started successfully. You can inspect logs later by polling the
199
+ same session_id with an empty input.
192
200
 
193
201
  Then inform the user and ask what to develop next:
194
202
  ```
@@ -210,7 +218,7 @@ What would you like to develop next?
210
218
  - bin/setup fails → Show error, suggest running `./bin/setup` manually
211
219
  - Cloud project creation fails → Soft-fail with warning, continue to start server
212
220
  - workspace_key missing → Ask user interactively; skip cloud init if user declines
213
- - run_project fails → Check logs with `run_project(action: "output")` and verify database status
221
+ - Dev server fails to start Poll the terminal session (empty input) to check logs, verify database status
214
222
 
215
223
  ## Example Interaction
216
224
  User: "/new"
@@ -224,5 +232,5 @@ Response:
224
232
  6. Project setup complete!
225
233
  7. Initializing cloud project binding...
226
234
  8. ✅ Cloud project created and config injected into config/application.yml!
227
- 9. Starting development server with run_project...
235
+ 9. Starting development server via terminal (background)...
228
236
  10. ✨ Server running! Visit http://localhost:3000
@@ -7,7 +7,6 @@ auto_summarize: true
7
7
  forbidden_tools:
8
8
  - write
9
9
  - edit
10
- - run_project
11
10
  - web_search
12
11
  - web_fetch
13
12
  - browser
@@ -27,15 +27,27 @@ module Clacky
27
27
  # ── Request building ──────────────────────────────────────────────────────
28
28
 
29
29
  # Build an OpenAI-compatible request body.
30
- # Canonical messages are already in OpenAI format — no conversion needed.
30
+ #
31
+ # Messages go through the canonical→OpenAI conversion layer
32
+ # (normalize_messages). For most models this is identity because
33
+ # the internal canonical format IS OpenAI format. The conversion
34
+ # handles one edge case: image_url content blocks are stripped
35
+ # when vision_supported is false (e.g. DeepSeek, Kimi, MiniMax),
36
+ # replacing them with a text placeholder so the API doesn't reject
37
+ # the request with "unknown variant 'image_url'".
38
+ #
31
39
  # @param messages [Array<Hash>] canonical messages
32
40
  # @param model [String]
33
41
  # @param tools [Array<Hash>] OpenAI-style tool definitions
34
42
  # @param max_tokens [Integer]
35
43
  # @param caching_enabled [Boolean] (only effective for Claude via OpenRouter)
44
+ # @param vision_supported [Boolean] whether the target model accepts
45
+ # image_url content blocks (default true, conservative)
36
46
  # @return [Hash]
37
- def build_request_body(messages, model, tools, max_tokens, caching_enabled)
38
- body = { model: model, max_tokens: max_tokens, messages: messages }
47
+ def build_request_body(messages, model, tools, max_tokens, caching_enabled, vision_supported: true)
48
+ api_messages = messages.map { |msg| normalize_message_content(msg, vision_supported: vision_supported) }
49
+
50
+ body = { model: model, max_tokens: max_tokens, messages: api_messages }
39
51
 
40
52
  if tools&.any?
41
53
  if caching_enabled
@@ -50,6 +62,71 @@ module Clacky
50
62
  body
51
63
  end
52
64
 
65
+ # ── Canonical → OpenAI conversion ─────────────────────────────────────────
66
+
67
+ # Process a single message's content through the canonical→OpenAI
68
+ # conversion layer. For String content this is a no-op; for Array
69
+ # content each block goes through normalize_block.
70
+ #
71
+ # @param msg [Hash] canonical message
72
+ # @param vision_supported [Boolean]
73
+ # @return [Hash] message with content normalised for OpenAI API
74
+ def normalize_message_content(msg, vision_supported:)
75
+ content = msg[:content]
76
+ return msg unless content.is_a?(Array)
77
+
78
+ blocks = content_to_blocks(content, vision_supported: vision_supported)
79
+ # Most APIs reject empty content arrays — use a placeholder text block.
80
+ blocks = [{ type: "text", text: "..." }] if blocks.empty?
81
+ msg.merge(content: blocks)
82
+ end
83
+
84
+ # Convert canonical content array to OpenAI-compatible block array.
85
+ # Each block goes through normalize_block; nil results are compacted.
86
+ #
87
+ # @param content [Array<Hash>] canonical content blocks
88
+ # @param vision_supported [Boolean]
89
+ # @return [Array<Hash>]
90
+ def content_to_blocks(content, vision_supported:)
91
+ content.map { |b| normalize_block(b, vision_supported: vision_supported) }.compact
92
+ end
93
+
94
+ # Normalize a single canonical content block to OpenAI API format.
95
+ #
96
+ # Canonical text blocks pass through (with cache_control preserved).
97
+ # image_url blocks are kept for vision-capable models and replaced
98
+ # with a text placeholder for non-vision models (DeepSeek, Kimi, etc.).
99
+ #
100
+ # @param block [Hash] canonical content block
101
+ # @param vision_supported [Boolean]
102
+ # @return [Hash, nil] nil for empty-text blocks (dropped)
103
+ def normalize_block(block, vision_supported:)
104
+ return block unless block.is_a?(Hash)
105
+
106
+ case block[:type]
107
+ when "text"
108
+ # Drop empty text blocks — most APIs (Anthropic, DeepSeek, etc.)
109
+ # reject { type: "text", text: "" }.
110
+ text = block[:text]
111
+ return nil if text.nil? || text.empty?
112
+
113
+ result = { type: "text", text: text }
114
+ result[:cache_control] = block[:cache_control] if block[:cache_control]
115
+ result
116
+ when "image_url"
117
+ if vision_supported
118
+ block # Pass through — GPT-4V, Gemini, etc. accept image_url
119
+ else
120
+ # Replace with text placeholder so the API doesn't reject the
121
+ # request. The model will still see the context that an image
122
+ # was present (from file_prompt / system_injected metadata).
123
+ { type: "text", text: "[Image content removed — current model does not support vision input]" }
124
+ end
125
+ else
126
+ block # Pass through unknown block types (tool_use, tool_result, etc.)
127
+ end
128
+ end
129
+
53
130
  # ── Response parsing ──────────────────────────────────────────────────────
54
131
 
55
132
  # Parse OpenAI-compatible API response into canonical internal format.
@@ -114,10 +114,10 @@ module Clacky
114
114
  "name" => "Kimi (Moonshot)",
115
115
  "base_url" => "https://api.moonshot.cn/v1",
116
116
  "api" => "openai-completions",
117
- "default_model" => "kimi-k2.5",
118
- "models" => ["kimi-k2.5"],
119
- # Kimi k2.5 (text family) does not accept image inputs.
120
- "capabilities" => { "vision" => false }.freeze,
117
+ "default_model" => "kimi-k2.6",
118
+ "models" => ["kimi-k2.6", "kimi-k2.5"],
119
+ # k2.5 / k2.6 are multimodal; legacy k2 text-only models need model_capabilities override if added.
120
+ "capabilities" => { "vision" => true }.freeze,
121
121
  "website_url" => "https://platform.moonshot.cn/console/api-keys"
122
122
  }.freeze,
123
123
 
@@ -136,29 +136,18 @@ module Clacky
136
136
  "api" => "bedrock",
137
137
  "default_model" => "abs-claude-sonnet-4-5",
138
138
  "models" => [
139
- "abs-claude-opus-4-7",
140
139
  "abs-claude-opus-4-6",
141
140
  "abs-claude-sonnet-4-6",
142
141
  "abs-claude-sonnet-4-5",
143
- "abs-claude-haiku-4-5",
144
- "dsk-deepseek-v4-pro",
145
- "dsk-deepseek-v4-flash",
146
- "or-gemini-3-1-pro"
142
+ "abs-claude-haiku-4-5"
147
143
  ],
148
- # Same lineup as openclacky Claude is vision, DeepSeek is text-only,
149
- # Gemini inherits the provider-default vision=true.
144
+ # Claude familyall vision-capable.
150
145
  "capabilities" => { "vision" => true }.freeze,
151
- "model_capabilities" => {
152
- "dsk-deepseek-v4-pro" => { "vision" => false }.freeze,
153
- "dsk-deepseek-v4-flash" => { "vision" => false }.freeze
154
- }.freeze,
155
146
  # Per-primary lite pairing — see openclacky preset for rationale.
156
147
  "lite_models" => {
157
- "abs-claude-opus-4-7" => "abs-claude-haiku-4-5",
158
148
  "abs-claude-opus-4-6" => "abs-claude-haiku-4-5",
159
149
  "abs-claude-sonnet-4-6" => "abs-claude-haiku-4-5",
160
- "abs-claude-sonnet-4-5" => "abs-claude-haiku-4-5",
161
- "dsk-deepseek-v4-pro" => "dsk-deepseek-v4-flash"
150
+ "abs-claude-sonnet-4-5" => "abs-claude-haiku-4-5"
162
151
  },
163
152
  # Fallback chain: if a model is unavailable, try the next one in order.
164
153
  # Keys are primary model names; values are the fallback model to use instead.