openclacky 0.8.5 → 0.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +52 -0
  3. data/docs/channel-architecture.md +235 -0
  4. data/lib/clacky/agent/memory_updater.rb +3 -2
  5. data/lib/clacky/agent/session_serializer.rb +48 -3
  6. data/lib/clacky/agent/skill_manager.rb +1 -1
  7. data/lib/clacky/agent.rb +34 -15
  8. data/lib/clacky/brand_config.rb +352 -43
  9. data/lib/clacky/cli.rb +5 -4
  10. data/lib/clacky/client.rb +2 -2
  11. data/lib/clacky/default_skills/channel-setup/SKILL.md +204 -0
  12. data/lib/clacky/default_skills/cron-task-creator/SKILL.md +250 -0
  13. data/lib/clacky/default_skills/cron-task-creator/evals/evals.json +38 -0
  14. data/lib/clacky/default_skills/cron-task-creator/scripts/list_tasks.rb +121 -0
  15. data/lib/clacky/default_skills/cron-task-creator/scripts/manage_schedule.rb +149 -0
  16. data/lib/clacky/default_skills/cron-task-creator/scripts/manage_task.rb +81 -0
  17. data/lib/clacky/default_skills/cron-task-creator/scripts/task_history.rb +137 -0
  18. data/lib/clacky/default_skills/pdf-reader/SKILL.md +90 -0
  19. data/lib/clacky/default_skills/skill-add/SKILL.md +29 -252
  20. data/lib/clacky/default_skills/skill-add/scripts/install_from_zip.rb +233 -0
  21. data/lib/clacky/default_skills/skill-creator/SKILL.md +547 -0
  22. data/lib/clacky/default_skills/skill-creator/agents/analyzer.md +274 -0
  23. data/lib/clacky/default_skills/skill-creator/agents/comparator.md +202 -0
  24. data/lib/clacky/default_skills/skill-creator/agents/grader.md +223 -0
  25. data/lib/clacky/default_skills/skill-creator/eval-viewer/generate_review.py +471 -0
  26. data/lib/clacky/default_skills/skill-creator/eval-viewer/viewer.html +1325 -0
  27. data/lib/clacky/default_skills/skill-creator/references/schemas.md +430 -0
  28. data/lib/clacky/default_skills/skill-creator/scripts/__init__.py +0 -0
  29. data/lib/clacky/default_skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  30. data/lib/clacky/default_skills/skill-creator/scripts/generate_report.py +326 -0
  31. data/lib/clacky/default_skills/skill-creator/scripts/improve_description.py +310 -0
  32. data/lib/clacky/default_skills/skill-creator/scripts/quick_validate.py +103 -0
  33. data/lib/clacky/default_skills/skill-creator/scripts/run_eval.py +317 -0
  34. data/lib/clacky/default_skills/skill-creator/scripts/run_loop.py +331 -0
  35. data/lib/clacky/default_skills/skill-creator/scripts/utils.py +47 -0
  36. data/lib/clacky/server/channel/adapters/base.rb +82 -0
  37. data/lib/clacky/server/channel/adapters/feishu/adapter.rb +172 -0
  38. data/lib/clacky/server/channel/adapters/feishu/bot.rb +191 -0
  39. data/lib/clacky/server/channel/adapters/feishu/message_parser.rb +106 -0
  40. data/lib/clacky/server/channel/adapters/feishu/ws_client.rb +385 -0
  41. data/lib/clacky/server/channel/adapters/wecom/adapter.rb +106 -0
  42. data/lib/clacky/server/channel/adapters/wecom/ws_client.rb +188 -0
  43. data/lib/clacky/server/channel/channel_config.rb +146 -0
  44. data/lib/clacky/server/channel/channel_manager.rb +230 -0
  45. data/lib/clacky/server/channel/channel_ui_controller.rb +179 -0
  46. data/lib/clacky/server/channel.rb +29 -0
  47. data/lib/clacky/server/http_server.rb +401 -12
  48. data/lib/clacky/server/web_ui_controller.rb +73 -1
  49. data/lib/clacky/skill.rb +25 -11
  50. data/lib/clacky/skill_loader.rb +15 -7
  51. data/lib/clacky/tools/browser.rb +300 -43
  52. data/lib/clacky/tools/file_reader.rb +3 -3
  53. data/lib/clacky/tools/shell.rb +22 -0
  54. data/lib/clacky/utils/file_processor.rb +2 -2
  55. data/lib/clacky/utils/logger.rb +20 -0
  56. data/lib/clacky/version.rb +1 -1
  57. data/lib/clacky/web/app.css +509 -17
  58. data/lib/clacky/web/app.js +143 -34
  59. data/lib/clacky/web/channels.js +196 -0
  60. data/lib/clacky/web/icon-dark.svg +23 -0
  61. data/lib/clacky/web/icon.svg +26 -0
  62. data/lib/clacky/web/index.html +31 -7
  63. data/lib/clacky/web/sessions.js +14 -1
  64. data/lib/clacky/web/settings.js +2 -2
  65. data/lib/clacky/web/skills.js +353 -108
  66. data/lib/clacky/web/tasks.js +2 -2
  67. metadata +40 -3
  68. data/lib/clacky/default_skills/create-task/SKILL.md +0 -102
  69. data/lib/clacky/default_skills/skill-add/scripts/install_from_github.rb +0 -189
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eafcf68d56923cdd3aaacc446277756c77661eaf5da6348ac7f54c565a141bd3
4
- data.tar.gz: 5f09c5ffdfba608554be327b9e7bf6ea1e7df57ceedf2ead6402ae8f74ce8780
3
+ metadata.gz: 38f9805e951dec0f87bda1b64033e0ea7f0c5c6d1c4fd2427f57dfc13aec0835
4
+ data.tar.gz: f6f0d08206ead392ffbbc073bb92c5b8e5b4c9f4ecf37172153c4bf46f4963e0
5
5
  SHA512:
6
- metadata.gz: 223dc788074dc74f3e61f1980eb9bab3ef0fb1d15d1644bf500013ae058212d52b8184be51762c08adeeee3d856d2eab4c78d7ea142fa69b3273e81003c80d7c
7
- data.tar.gz: 58fbeba9eb4f17f02d61a61707c82e16f3a5d152d0ae95cc9ad745d356470a677ab471093271f54ca3e25ddffe9203060e8612698269c96d18015f24288f912a
6
+ metadata.gz: d7400735f1f2cbf9fa6b74e56aaa9264e881ab0618885e87b9757458b3b87bde01c5319db6d6f6833573792229c8aa635d5c09bab43cdde15e8cddfe2ce3e418
7
+ data.tar.gz: ef4dede49038208ff386f5b536ba4c64158e5b72f5599694f14ecf83bd3259b51be6af52bef10fbdea88fbc23f2b2b11c9316e1bdbb1f350c355a0fedeb23bd1
data/CHANGELOG.md CHANGED
@@ -7,6 +7,58 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.8.7] - 2026-03-13
11
+
12
+ ### Added
13
+ - **PDF file upload and reading**: users can now upload PDF files directly in the WebUI chat; the agent reads and analyzes the content via the built-in `pdf-reader` skill
14
+ - **WebUI favicon and SVG icons**: browser tab now shows the Clacky icon
15
+ - **Public skill store install**: skills from the public store can be installed directly via the WebUI without a GitHub URL
16
+ - **Auto-kill previous server on startup**: launching `clacky serve` now automatically kills any previously running instance via pidfile, preventing port conflicts
17
+
18
+ ### Improved
19
+ - **Brand skill loading speed**: loading brand skills no longer triggers a network decryption request — name and description are now read from the local `brand_skills.json` cache, making New Session significantly faster
20
+ - **Memory update UX**: memory update step now shows a spinner and info-style message instead of a bare log line
21
+ - **Browser snapshot output**: snapshot output is compressed to reduce token cost when the agent uses browser tools
22
+ - **Subagent output**: subagent task completion now shows a brief info line instead of a full "Task Complete" block, reducing noise in the parent agent's context
23
+
24
+ ### Fixed
25
+ - **Subagent token delta on first iteration**: subagent now inherits `previous_total_tokens` correctly, fixing an inflated token count on the first tool iteration
26
+ - **Chrome DevTools inspect URL**: updated the remote debugging URL to include the `#remote-debugging` fragment for correct navigation
27
+ - **Shell output token explosion**: long lines in shell output are now truncated to prevent excessive token usage
28
+
29
+ ### More
30
+ - Binary file size limit lowered from 5 MB to 512 KB to reduce accidental token cost
31
+ - `kill_existing_server` logic moved from CLI into `HttpServer` for cleaner separation
32
+ - Browser tool prefers `snapshot -i` over `screenshot` for lower token cost
33
+ - Cross-platform PID file path using `Dir.tmpdir` instead of hardcoded `/tmp`
34
+
35
+ ## [0.8.6] - 2026-03-12
36
+
37
+ ### Added
38
+ - **Channel system with Feishu & WeCom support**: integrated IM platform adapters — agents can now receive and reply to messages via Feishu (WebSocket) and WeCom channels
39
+ - **Skill encryption (brand skills)**: brand skills can be distributed as encrypted `.enc` files, decrypted on-the-fly using license keys; includes a full key management and manifest system
40
+ - **Cron task creator & skill creator default skills**: two new built-in skills for creating scheduled tasks and new skills directly from chat
41
+ - **Image messages in session history restore**: session restore now correctly replays image-containing messages, including thumbnail display in the UI
42
+ - **Skill auto-upload to cloud**: skills can be uploaded to the cloud store from within the UI
43
+
44
+ ### Improved
45
+ - **WeCom setup flow**: improved step-by-step WeCom channel configuration UX (#11)
46
+ - **Skill autocomplete UI**: enhanced slash-command autocomplete interaction — better keyboard navigation, input behavior, and visual feedback (#6)
47
+ - **Chrome setup UX**: simplified Chrome installation flow with improved error messages and progress indicators (#8)
48
+ - **WebUI colors and layout**: polished light/dark mode colors, sidebar alignment, and badge styles for a more consistent look
49
+ - **Test suite speed**: `CLACKY_TEST` guard prevents brand skill network calls during tests — suite now runs ~60× faster per example
50
+
51
+ ### Fixed
52
+ - **Duplicate user bubble on skill install**: prevented an extra chat bubble appearing when installing a skill from the store
53
+ - **Image thumbnails in session replay**: restored missing image thumbnails when replaying historical sessions
54
+ - **WebUI permission mode**: Web UI sessions now correctly use `confirm_all` permission mode
55
+ - **Feishu WS log noise**: removed emoji characters from WebSocket connection log messages
56
+
57
+ ### More
58
+ - Subagent memory update disabled to reduce noise
59
+ - Ping request `max_tokens` bumped from 10 to 16
60
+ - WebUI updated to use new cron-task-creator and skill-creator skills
61
+
10
62
  ## [0.8.5] - 2026-03-11
11
63
 
12
64
  ### Fixed
@@ -0,0 +1,235 @@
1
+ # Channel Architecture
2
+
3
+ ## Overview
4
+
5
+ Channel is a feature that bridges Clacky's Server Sessions to IM platforms
6
+ (Feishu, WeCom, DingTalk, etc.). It reuses the existing Agent + SessionRegistry
7
+ infrastructure — the Agent knows nothing about IM; the Channel layer is purely
8
+ a transport adapter.
9
+
10
+ ## Design Principles
11
+
12
+ - **Zero Agent intrusion** — Agent only speaks `UIInterface`; swap the controller, get IM output
13
+ - **Reuse SessionRegistry** — IM chats resolve to the same `SessionRegistry` sessions as Web UI
14
+ - **WebSocket long connection** — No public domain required; adapters hold a persistent WSS connection to the IM platform
15
+ - **One platform = 2 threads** — read loop thread + ping/heartbeat thread (constant, small footprint)
16
+
17
+ ---
18
+
19
+ ## Layer Diagram
20
+
21
+ ```
22
+ IM Platforms (Feishu / WeCom / DingTalk)
23
+ │ WebSocket long connection (wss://)
24
+
25
+ ┌─────────────────────────────────────┐
26
+ │ Channel Adapter Layer │
27
+ │ Feishu::Adapter │
28
+ │ ├── WSClient (read loop + ping) │
29
+ │ ├── Bot (send API) │
30
+ │ └── MessageParser │
31
+ │ Wecom::Adapter │
32
+ │ └── WSClient (read loop + ping) │
33
+ │ (future) Dingtalk::Adapter │
34
+ └──────────────┬──────────────────────┘
35
+ │ standardized event Hash
36
+
37
+ ┌─────────────────────────────────────┐
38
+ │ ChannelManager │
39
+ │ • Owns adapter threads │
40
+ │ • Routes inbound event → │
41
+ │ ChannelBinding → session_id │
42
+ │ • Calls agent.run in Thread.new │
43
+ └──────────────┬──────────────────────┘
44
+
45
+ ┌───────┴────────┐
46
+ ▼ ▼
47
+ SessionRegistry ChannelUIController
48
+ (existing) (implements UIInterface)
49
+ │ │
50
+ ▼ ▼
51
+ Agent IM Platform reply
52
+ (unchanged) via adapter.send_text
53
+ ```
54
+
55
+ ---
56
+
57
+ ## File Structure
58
+
59
+ ```
60
+ lib/clacky/channel/
61
+ ├── adapters/
62
+ │ ├── base.rb # Adapter abstract base + registry
63
+ │ ├── feishu/
64
+ │ │ ├── adapter.rb # Feishu::Adapter < Base
65
+ │ │ ├── bot.rb # HTTP send API (token cache, Markdown/card)
66
+ │ │ ├── message_parser.rb # Raw WS event → standardized Hash
67
+ │ │ └── ws_client.rb # Feishu protobuf WS long connection
68
+ │ └── wecom/
69
+ │ ├── adapter.rb # Wecom::Adapter < Base
70
+ │ └── ws_client.rb # WeCom JSON WS long connection
71
+ ├── channel_message.rb # Struct: standardized inbound message
72
+ ├── channel_binding.rb # (platform, user_id) → session_id mapping
73
+ ├── channel_ui_controller.rb # UIInterface impl — pushes events to IM
74
+ └── channel_manager.rb # Lifecycle: start/stop adapters, route messages
75
+ lib/clacky/channel.rb # Top-level require entry point
76
+ ```
77
+
78
+ ---
79
+
80
+ ## Standardized Inbound Event
81
+
82
+ All adapters yield the same Hash shape to `ChannelManager`:
83
+
84
+ ```ruby
85
+ {
86
+ platform: :feishu, # Symbol
87
+ chat_id: "oc_xxx", # String — IM chat/group identifier
88
+ user_id: "ou_xxx", # String — IM user identifier
89
+ text: "deploy now", # String — cleaned user text
90
+ message_id: "om_xxx", # String — for threading / update
91
+ timestamp: Time, # Time object
92
+ chat_type: :direct | :group, # Symbol
93
+ raw: { ... } # Original platform payload
94
+ }
95
+ ```
96
+
97
+ ---
98
+
99
+ ## Adapter Interface (Base)
100
+
101
+ ```ruby
102
+ class Adapters::Base
103
+ def self.platform_id → Symbol
104
+ def self.platform_config(raw_config) → Hash # symbol-keyed
105
+ def self.env_keys → Array<String> # for config serialization
106
+
107
+ def start(&on_message) # blocks; yields event Hash per inbound message
108
+ def stop # graceful shutdown
109
+ def send_text(chat_id, text, reply_to: nil) → Hash
110
+ def update_message(chat_id, message_id, text) → Boolean
111
+ def supports_message_updates? → Boolean
112
+ def validate_config(config) → Array<String> # error messages
113
+ end
114
+ ```
115
+
116
+ ---
117
+
118
+ ## ChannelManager
119
+
120
+ ```ruby
121
+ class ChannelManager
122
+ def initialize(session_registry:, session_builder:, channel_config:, agent_config:)
123
+
124
+ def start # Thread.new per enabled platform adapter
125
+ def stop # kills all adapter threads gracefully
126
+
127
+ private
128
+
129
+ def route_message(adapter, event)
130
+ session_id = @binding.resolve_or_create(event, session_builder: @session_builder)
131
+ ui = ChannelUIController.new(event, adapter)
132
+ Thread.new { run_agent(session_id, event[:text], ui) }
133
+ end
134
+ end
135
+ ```
136
+
137
+ ---
138
+
139
+ ## ChannelBinding
140
+
141
+ Maps `(platform, user_id)` → `session_id`. Persisted to `~/.clacky/channel_bindings.yml`.
142
+
143
+ Binding modes (configurable per platform):
144
+
145
+ | Mode | Key | Description |
146
+ |------|-----|-------------|
147
+ | `user` | `(platform, user_id)` | Each IM user gets their own session (default) |
148
+ | `chat` | `(platform, chat_id)` | Whole group shares one session |
149
+
150
+ ---
151
+
152
+ ## ChannelUIController
153
+
154
+ Implements `UIInterface`. Key behaviours:
155
+
156
+ - `show_assistant_message` → `adapter.send_text(chat_id, content)`
157
+ - `show_tool_call` → buffers as `⚙️ \`tool summary\`` (flushed on next message)
158
+ - `show_progress` → `adapter.update_message(...)` if `supports_message_updates?`
159
+ - `show_complete` → sends `✅ Complete • N iterations • $cost`
160
+ - `request_confirmation` → **not supported in IM** (returns auto-approved / raises)
161
+
162
+ ---
163
+
164
+ ## Thread Model
165
+
166
+ ```
167
+ Main thread (WEBrick server.start — blocks)
168
+ ├── WEBrick request threads (existing)
169
+ ├── Agent task threads (existing, per task)
170
+ ├── Scheduler thread (existing, clacky-scheduler)
171
+ └── ChannelManager
172
+ ├── feishu-adapter thread (WSClient read loop, constant)
173
+ │ └── feishu-ping thread (heartbeat, 90s)
174
+ └── wecom-adapter thread (WSClient read loop, constant)
175
+ └── wecom-ping thread (heartbeat, 30s)
176
+ ```
177
+
178
+ Per enabled platform: **2 constant threads**. Agent task threads are spawned
179
+ on demand (same as Web UI path) and exit when done.
180
+
181
+ ---
182
+
183
+ ## Configuration
184
+
185
+ Channel credentials live in `~/.clacky/channels.yml` (managed by `ChannelConfig`
186
+ which already exists in main branch):
187
+
188
+ ```yaml
189
+ channels:
190
+ feishu:
191
+ enabled: true
192
+ app_id: cli_xxx
193
+ app_secret: xxx
194
+ allowed_users:
195
+ - ou_xxx
196
+ wecom:
197
+ enabled: false
198
+ bot_id: xxx
199
+ secret: xxx
200
+ ```
201
+
202
+ `ChannelManager` reads this via `ChannelConfig#platform_config(platform)`.
203
+
204
+ ---
205
+
206
+ ## Integration with HttpServer
207
+
208
+ ```ruby
209
+ # HttpServer#initialize
210
+ @channel_manager = ChannelManager.new(
211
+ session_registry: @registry,
212
+ session_builder: method(:build_session),
213
+ channel_config: Clacky::ChannelConfig.load,
214
+ agent_config: @agent_config
215
+ )
216
+
217
+ # HttpServer#start (after scheduler.start)
218
+ @channel_manager.start
219
+ ```
220
+
221
+ `ChannelManager#start` is non-blocking (spawns threads internally),
222
+ mirroring `Scheduler#start` behaviour.
223
+
224
+ ---
225
+
226
+ ## Future: DingTalk
227
+
228
+ DingTalk also supports a WebSocket Stream mode. Adding it means:
229
+
230
+ 1. `lib/clacky/channel/adapters/dingtalk/adapter.rb` inheriting `Base`
231
+ 2. `lib/clacky/channel/adapters/dingtalk/ws_client.rb`
232
+ 3. Register: `Adapters.register(:dingtalk, Adapter)`
233
+ 4. Add credentials to `ChannelConfig`
234
+
235
+ No changes needed to `ChannelManager`, `ChannelUIController`, or `ChannelBinding`.
@@ -26,6 +26,7 @@ module Clacky
26
26
  # @return [Boolean]
27
27
  def should_update_memory?
28
28
  return false unless memory_update_enabled?
29
+ return false if @is_subagent # Subagents never update memory
29
30
 
30
31
  task_iterations = @iterations - (@task_start_iterations || 0)
31
32
  task_iterations >= MEMORY_UPDATE_MIN_ITERATIONS
@@ -41,7 +42,7 @@ module Clacky
41
42
 
42
43
  @memory_prompt_injected = true
43
44
  @memory_updating = true
44
- @ui&.show_info("Updating long-term memory...")
45
+ @ui&.show_progress("Updating long-term memory")
45
46
 
46
47
  @messages << {
47
48
  role: "user",
@@ -61,7 +62,7 @@ module Clacky
61
62
  @messages.reject! { |m| m[:memory_update] }
62
63
  @memory_prompt_injected = false
63
64
  @memory_updating = false
64
- @ui&.show_info("Memory updated.")
65
+ @ui&.clear_progress
65
66
  end
66
67
 
67
68
  private def memory_update_enabled?
@@ -153,8 +153,20 @@ module Clacky
153
153
  @messages.each do |msg|
154
154
  role = msg[:role].to_s
155
155
 
156
- if role == "user" && !msg[:system_injected] && msg[:content].is_a?(String) &&
157
- !msg[:content].to_s.start_with?("[SYSTEM]")
156
+ # A real user message can have either a String content or an Array content
157
+ # (Array = multipart: text + image blocks). Exclude system-injected messages
158
+ # and synthetic [SYSTEM] text messages.
159
+ is_real_user_msg = role == "user" && !msg[:system_injected] &&
160
+ if msg[:content].is_a?(String)
161
+ !msg[:content].start_with?("[SYSTEM]")
162
+ elsif msg[:content].is_a?(Array)
163
+ # Must contain at least one text or image block (not a tool_result array)
164
+ msg[:content].any? { |b| b.is_a?(Hash) && %w[text image].include?(b[:type].to_s) }
165
+ else
166
+ false
167
+ end
168
+
169
+ if is_real_user_msg
158
170
  # Start a new round at each real user message
159
171
  current_round = { user_msg: msg, events: [] }
160
172
  rounds << current_round
@@ -175,8 +187,10 @@ module Clacky
175
187
  page.each do |round|
176
188
  msg = round[:user_msg]
177
189
  display_text = extract_text_from_content(msg[:content])
190
+ # Extract image data URLs from multipart content (for history replay rendering)
191
+ images = extract_images_from_content(msg[:content])
178
192
  # Emit user message with its timestamp for dedup on the frontend
179
- ui.show_user_message(display_text, created_at: msg[:created_at])
193
+ ui.show_user_message(display_text, created_at: msg[:created_at], images: images)
180
194
 
181
195
  round[:events].each do |ev|
182
196
  # Skip system-injected messages (e.g. synthetic skill content, memory prompts)
@@ -241,6 +255,37 @@ module Clacky
241
255
  Clacky::Logger.warn("refresh_system_prompt failed during session restore: #{e.message}")
242
256
  end
243
257
 
258
+ # Extract base64 data URLs from multipart content (image blocks).
259
+ # Returns an empty array when there are no images or content is plain text.
260
+ # @param content [String, Array, Object] Message content
261
+ # @return [Array<String>] Array of data URLs (e.g. "data:image/png;base64,...")
262
+ def extract_images_from_content(content)
263
+ return [] unless content.is_a?(Array)
264
+
265
+ content.filter_map do |block|
266
+ next unless block.is_a?(Hash)
267
+
268
+ case block[:type].to_s
269
+ when "image_url"
270
+ # OpenAI format: { type: "image_url", image_url: { url: "data:image/png;base64,..." } }
271
+ block.dig(:image_url, :url)
272
+ when "image"
273
+ # Anthropic format: { type: "image", source: { type: "base64", media_type: "image/png", data: "..." } }
274
+ source = block[:source]
275
+ next unless source.is_a?(Hash) && source[:type].to_s == "base64"
276
+
277
+ "data:#{source[:media_type]};base64,#{source[:data]}"
278
+ when "document"
279
+ # Anthropic PDF document block — return a sentinel string for frontend display
280
+ source = block[:source]
281
+ next unless source.is_a?(Hash) && source[:media_type].to_s == "application/pdf"
282
+
283
+ # Return a special marker so the frontend can render a PDF badge instead of an <img>
284
+ "pdf:#{source[:data]&.then { |d| d[0, 32] }}" # prefix to identify without full payload
285
+ end
286
+ end
287
+ end
288
+
244
289
  # Extract text from message content (handles string and array formats)
245
290
  # @param content [String, Array, Object] Message content
246
291
  # @return [String] Extracted text
@@ -184,7 +184,7 @@ module Clacky
184
184
  system_injected: true
185
185
  }
186
186
 
187
- @ui&.log("Injected skill content for /#{skill.identifier}", level: :info)
187
+ @ui&.show_info("Injected skill content for /#{skill.identifier}")
188
188
  end
189
189
 
190
190
  private
data/lib/clacky/agent.rb CHANGED
@@ -141,7 +141,7 @@ module Clacky
141
141
  @config.model_name
142
142
  end
143
143
 
144
- def run(user_input, images: [])
144
+ def run(user_input, images: [], files: [])
145
145
  # Start new task for Time Machine
146
146
  task_id = start_new_task
147
147
 
@@ -172,8 +172,8 @@ module Clacky
172
172
  @messages << system_message
173
173
  end
174
174
 
175
- # Format user message with images if provided
176
- user_content = format_user_content(user_input, images)
175
+ # Format user message with images and files if provided
176
+ user_content = format_user_content(user_input, images, files)
177
177
  @messages << { role: "user", content: user_content, task_id: task_id, created_at: Time.now.to_f }
178
178
  @total_tasks += 1
179
179
 
@@ -208,7 +208,12 @@ module Clacky
208
208
 
209
209
  # Check if done (no more tool calls needed)
210
210
  if response[:finish_reason] == "stop" || response[:tool_calls].nil? || response[:tool_calls].empty?
211
- @ui&.show_assistant_message(response[:content]) if response[:content] && !response[:content].empty?
211
+ # During memory update phase, show LLM response as info (not a chat bubble)
212
+ if @memory_updating && response[:content] && !response[:content].empty?
213
+ @ui&.show_info("🧠 " + response[:content].strip)
214
+ elsif response[:content] && !response[:content].empty?
215
+ @ui&.show_assistant_message(response[:content])
216
+ end
212
217
 
213
218
  # Debug: log why we're stopping
214
219
  if @config.verbose && (response[:tool_calls].nil? || response[:tool_calls].empty?)
@@ -227,7 +232,8 @@ module Clacky
227
232
  end
228
233
 
229
234
  # Show assistant message if there's content before tool calls
230
- if response[:content] && !response[:content].empty?
235
+ # During memory update phase, suppress text output (only tool calls matter)
236
+ if response[:content] && !response[:content].empty? && !@memory_updating
231
237
  @ui&.show_assistant_message(response[:content])
232
238
  end
233
239
 
@@ -272,13 +278,17 @@ module Clacky
272
278
  @modified_files_in_task = [] # Reset for next task
273
279
  end
274
280
 
275
- @ui&.show_complete(
276
- iterations: result[:iterations],
277
- cost: result[:total_cost_usd],
278
- duration: result[:duration_seconds],
279
- cache_stats: result[:cache_stats],
280
- awaiting_user_feedback: awaiting_user_feedback
281
- )
281
+ if @is_subagent
282
+ @ui&.show_info("Subagent done (#{result[:iterations]} iterations, $#{result[:total_cost_usd].round(4)})")
283
+ else
284
+ @ui&.show_complete(
285
+ iterations: result[:iterations],
286
+ cost: result[:total_cost_usd],
287
+ duration: result[:duration_seconds],
288
+ cache_stats: result[:cache_stats],
289
+ awaiting_user_feedback: awaiting_user_feedback
290
+ )
291
+ end
282
292
  @hooks.trigger(:on_complete, result)
283
293
  result
284
294
  rescue Clacky::AgentInterrupted
@@ -714,6 +724,10 @@ module Clacky
714
724
  ui: @ui,
715
725
  profile: @agent_profile.name
716
726
  )
727
+ subagent.instance_variable_set(:@is_subagent, true)
728
+
729
+ # Inherit previous_total_tokens so the first iteration delta is calculated correctly
730
+ subagent.instance_variable_set(:@previous_total_tokens, @previous_total_tokens)
717
731
 
718
732
  # Deep clone messages to avoid cross-contamination
719
733
  subagent.instance_variable_set(:@messages, deep_clone(@messages))
@@ -809,11 +823,16 @@ module Clacky
809
823
  end
810
824
 
811
825
  # Format user content with optional images
826
+ # PDF files are handled upstream (server injects file path into message text),
827
+ # so this method only needs to handle images.
812
828
  # @param text [String] User's text input
813
829
  # @param images [Array<String>] Array of image file paths or data: URLs
814
- # @return [String|Array] String if no images, Array with text and image_url objects if images present
815
- private def format_user_content(text, images)
816
- return text if images.nil? || images.empty?
830
+ # @param files [Array] Unused kept for signature compatibility
831
+ # @return [String|Array] String if no images, Array with content blocks otherwise
832
+ private def format_user_content(text, images, files = [])
833
+ images ||= []
834
+
835
+ return text if images.empty?
817
836
 
818
837
  content = []
819
838
  content << { type: "text", text: text } unless text.nil? || text.empty?