openclacky 0.8.5 → 0.8.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +52 -0
- data/docs/channel-architecture.md +235 -0
- data/lib/clacky/agent/memory_updater.rb +3 -2
- data/lib/clacky/agent/session_serializer.rb +48 -3
- data/lib/clacky/agent/skill_manager.rb +1 -1
- data/lib/clacky/agent.rb +34 -15
- data/lib/clacky/brand_config.rb +352 -43
- data/lib/clacky/cli.rb +5 -4
- data/lib/clacky/client.rb +2 -2
- data/lib/clacky/default_skills/channel-setup/SKILL.md +204 -0
- data/lib/clacky/default_skills/cron-task-creator/SKILL.md +250 -0
- data/lib/clacky/default_skills/cron-task-creator/evals/evals.json +38 -0
- data/lib/clacky/default_skills/cron-task-creator/scripts/list_tasks.rb +121 -0
- data/lib/clacky/default_skills/cron-task-creator/scripts/manage_schedule.rb +149 -0
- data/lib/clacky/default_skills/cron-task-creator/scripts/manage_task.rb +81 -0
- data/lib/clacky/default_skills/cron-task-creator/scripts/task_history.rb +137 -0
- data/lib/clacky/default_skills/pdf-reader/SKILL.md +90 -0
- data/lib/clacky/default_skills/skill-add/SKILL.md +29 -252
- data/lib/clacky/default_skills/skill-add/scripts/install_from_zip.rb +233 -0
- data/lib/clacky/default_skills/skill-creator/SKILL.md +547 -0
- data/lib/clacky/default_skills/skill-creator/agents/analyzer.md +274 -0
- data/lib/clacky/default_skills/skill-creator/agents/comparator.md +202 -0
- data/lib/clacky/default_skills/skill-creator/agents/grader.md +223 -0
- data/lib/clacky/default_skills/skill-creator/eval-viewer/generate_review.py +471 -0
- data/lib/clacky/default_skills/skill-creator/eval-viewer/viewer.html +1325 -0
- data/lib/clacky/default_skills/skill-creator/references/schemas.md +430 -0
- data/lib/clacky/default_skills/skill-creator/scripts/__init__.py +0 -0
- data/lib/clacky/default_skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
- data/lib/clacky/default_skills/skill-creator/scripts/generate_report.py +326 -0
- data/lib/clacky/default_skills/skill-creator/scripts/improve_description.py +310 -0
- data/lib/clacky/default_skills/skill-creator/scripts/quick_validate.py +103 -0
- data/lib/clacky/default_skills/skill-creator/scripts/run_eval.py +317 -0
- data/lib/clacky/default_skills/skill-creator/scripts/run_loop.py +331 -0
- data/lib/clacky/default_skills/skill-creator/scripts/utils.py +47 -0
- data/lib/clacky/server/channel/adapters/base.rb +82 -0
- data/lib/clacky/server/channel/adapters/feishu/adapter.rb +172 -0
- data/lib/clacky/server/channel/adapters/feishu/bot.rb +191 -0
- data/lib/clacky/server/channel/adapters/feishu/message_parser.rb +106 -0
- data/lib/clacky/server/channel/adapters/feishu/ws_client.rb +385 -0
- data/lib/clacky/server/channel/adapters/wecom/adapter.rb +106 -0
- data/lib/clacky/server/channel/adapters/wecom/ws_client.rb +188 -0
- data/lib/clacky/server/channel/channel_config.rb +146 -0
- data/lib/clacky/server/channel/channel_manager.rb +230 -0
- data/lib/clacky/server/channel/channel_ui_controller.rb +179 -0
- data/lib/clacky/server/channel.rb +29 -0
- data/lib/clacky/server/http_server.rb +401 -12
- data/lib/clacky/server/web_ui_controller.rb +73 -1
- data/lib/clacky/skill.rb +25 -11
- data/lib/clacky/skill_loader.rb +15 -7
- data/lib/clacky/tools/browser.rb +300 -43
- data/lib/clacky/tools/file_reader.rb +3 -3
- data/lib/clacky/tools/shell.rb +22 -0
- data/lib/clacky/utils/file_processor.rb +2 -2
- data/lib/clacky/utils/logger.rb +20 -0
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/web/app.css +509 -17
- data/lib/clacky/web/app.js +143 -34
- data/lib/clacky/web/channels.js +196 -0
- data/lib/clacky/web/icon-dark.svg +23 -0
- data/lib/clacky/web/icon.svg +26 -0
- data/lib/clacky/web/index.html +31 -7
- data/lib/clacky/web/sessions.js +14 -1
- data/lib/clacky/web/settings.js +2 -2
- data/lib/clacky/web/skills.js +353 -108
- data/lib/clacky/web/tasks.js +2 -2
- metadata +40 -3
- data/lib/clacky/default_skills/create-task/SKILL.md +0 -102
- data/lib/clacky/default_skills/skill-add/scripts/install_from_github.rb +0 -189
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 38f9805e951dec0f87bda1b64033e0ea7f0c5c6d1c4fd2427f57dfc13aec0835
|
|
4
|
+
data.tar.gz: f6f0d08206ead392ffbbc073bb92c5b8e5b4c9f4ecf37172153c4bf46f4963e0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d7400735f1f2cbf9fa6b74e56aaa9264e881ab0618885e87b9757458b3b87bde01c5319db6d6f6833573792229c8aa635d5c09bab43cdde15e8cddfe2ce3e418
|
|
7
|
+
data.tar.gz: ef4dede49038208ff386f5b536ba4c64158e5b72f5599694f14ecf83bd3259b51be6af52bef10fbdea88fbc23f2b2b11c9316e1bdbb1f350c355a0fedeb23bd1
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,58 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.8.7] - 2026-03-13
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- **PDF file upload and reading**: users can now upload PDF files directly in the WebUI chat; the agent reads and analyzes the content via the built-in `pdf-reader` skill
|
|
14
|
+
- **WebUI favicon and SVG icons**: browser tab now shows the Clacky icon
|
|
15
|
+
- **Public skill store install**: skills from the public store can be installed directly via the WebUI without a GitHub URL
|
|
16
|
+
- **Auto-kill previous server on startup**: launching `clacky serve` now automatically kills any previously running instance via pidfile, preventing port conflicts
|
|
17
|
+
|
|
18
|
+
### Improved
|
|
19
|
+
- **Brand skill loading speed**: loading brand skills no longer triggers a network decryption request — name and description are now read from the local `brand_skills.json` cache, making New Session significantly faster
|
|
20
|
+
- **Memory update UX**: memory update step now shows a spinner and info-style message instead of a bare log line
|
|
21
|
+
- **Browser snapshot output**: snapshot output is compressed to reduce token cost when the agent uses browser tools
|
|
22
|
+
- **Subagent output**: subagent task completion now shows a brief info line instead of a full "Task Complete" block, reducing noise in the parent agent's context
|
|
23
|
+
|
|
24
|
+
### Fixed
|
|
25
|
+
- **Subagent token delta on first iteration**: subagent now inherits `previous_total_tokens` correctly, fixing an inflated token count on the first tool iteration
|
|
26
|
+
- **Chrome DevTools inspect URL**: updated the remote debugging URL to include the `#remote-debugging` fragment for correct navigation
|
|
27
|
+
- **Shell output token explosion**: long lines in shell output are now truncated to prevent excessive token usage
|
|
28
|
+
|
|
29
|
+
### More
|
|
30
|
+
- Binary file size limit lowered from 5 MB to 512 KB to reduce accidental token cost
|
|
31
|
+
- `kill_existing_server` logic moved from CLI into `HttpServer` for cleaner separation
|
|
32
|
+
- Browser tool prefers `snapshot -i` over `screenshot` for lower token cost
|
|
33
|
+
- Cross-platform PID file path using `Dir.tmpdir` instead of hardcoded `/tmp`
|
|
34
|
+
|
|
35
|
+
## [0.8.6] - 2026-03-12
|
|
36
|
+
|
|
37
|
+
### Added
|
|
38
|
+
- **Channel system with Feishu & WeCom support**: integrated IM platform adapters — agents can now receive and reply to messages via Feishu (WebSocket) and WeCom channels
|
|
39
|
+
- **Skill encryption (brand skills)**: brand skills can be distributed as encrypted `.enc` files, decrypted on-the-fly using license keys; includes a full key management and manifest system
|
|
40
|
+
- **Cron task creator & skill creator default skills**: two new built-in skills for creating scheduled tasks and new skills directly from chat
|
|
41
|
+
- **Image messages in session history restore**: session restore now correctly replays image-containing messages, including thumbnail display in the UI
|
|
42
|
+
- **Skill auto-upload to cloud**: skills can be uploaded to the cloud store from within the UI
|
|
43
|
+
|
|
44
|
+
### Improved
|
|
45
|
+
- **WeCom setup flow**: improved step-by-step WeCom channel configuration UX (#11)
|
|
46
|
+
- **Skill autocomplete UI**: enhanced slash-command autocomplete interaction — better keyboard navigation, input behavior, and visual feedback (#6)
|
|
47
|
+
- **Chrome setup UX**: simplified Chrome installation flow with improved error messages and progress indicators (#8)
|
|
48
|
+
- **WebUI colors and layout**: polished light/dark mode colors, sidebar alignment, and badge styles for a more consistent look
|
|
49
|
+
- **Test suite speed**: `CLACKY_TEST` guard prevents brand skill network calls during tests — suite now runs ~60× faster per example
|
|
50
|
+
|
|
51
|
+
### Fixed
|
|
52
|
+
- **Duplicate user bubble on skill install**: prevented an extra chat bubble appearing when installing a skill from the store
|
|
53
|
+
- **Image thumbnails in session replay**: restored missing image thumbnails when replaying historical sessions
|
|
54
|
+
- **WebUI permission mode**: Web UI sessions now correctly use `confirm_all` permission mode
|
|
55
|
+
- **Feishu WS log noise**: removed emoji characters from WebSocket connection log messages
|
|
56
|
+
|
|
57
|
+
### More
|
|
58
|
+
- Subagent memory update disabled to reduce noise
|
|
59
|
+
- Ping request `max_tokens` bumped from 10 to 16
|
|
60
|
+
- WebUI updated to use new cron-task-creator and skill-creator skills
|
|
61
|
+
|
|
10
62
|
## [0.8.5] - 2026-03-11
|
|
11
63
|
|
|
12
64
|
### Fixed
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
# Channel Architecture
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
Channel is a feature that bridges Clacky's Server Sessions to IM platforms
|
|
6
|
+
(Feishu, WeCom, DingTalk, etc.). It reuses the existing Agent + SessionRegistry
|
|
7
|
+
infrastructure — the Agent knows nothing about IM; the Channel layer is purely
|
|
8
|
+
a transport adapter.
|
|
9
|
+
|
|
10
|
+
## Design Principles
|
|
11
|
+
|
|
12
|
+
- **Zero Agent intrusion** — Agent only speaks `UIInterface`; swap the controller, get IM output
|
|
13
|
+
- **Reuse SessionRegistry** — IM chats resolve to the same `SessionRegistry` sessions as Web UI
|
|
14
|
+
- **WebSocket long connection** — No public domain required; adapters hold a persistent WSS connection to the IM platform
|
|
15
|
+
- **One platform = 2 threads** — read loop thread + ping/heartbeat thread (constant, small footprint)
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Layer Diagram
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
IM Platforms (Feishu / WeCom / DingTalk)
|
|
23
|
+
│ WebSocket long connection (wss://)
|
|
24
|
+
▼
|
|
25
|
+
┌─────────────────────────────────────┐
|
|
26
|
+
│ Channel Adapter Layer │
|
|
27
|
+
│ Feishu::Adapter │
|
|
28
|
+
│ ├── WSClient (read loop + ping) │
|
|
29
|
+
│ ├── Bot (send API) │
|
|
30
|
+
│ └── MessageParser │
|
|
31
|
+
│ Wecom::Adapter │
|
|
32
|
+
│ └── WSClient (read loop + ping) │
|
|
33
|
+
│ (future) Dingtalk::Adapter │
|
|
34
|
+
└──────────────┬──────────────────────┘
|
|
35
|
+
│ standardized event Hash
|
|
36
|
+
▼
|
|
37
|
+
┌─────────────────────────────────────┐
|
|
38
|
+
│ ChannelManager │
|
|
39
|
+
│ • Owns adapter threads │
|
|
40
|
+
│ • Routes inbound event → │
|
|
41
|
+
│ ChannelBinding → session_id │
|
|
42
|
+
│ • Calls agent.run in Thread.new │
|
|
43
|
+
└──────────────┬──────────────────────┘
|
|
44
|
+
│
|
|
45
|
+
┌───────┴────────┐
|
|
46
|
+
▼ ▼
|
|
47
|
+
SessionRegistry ChannelUIController
|
|
48
|
+
(existing) (implements UIInterface)
|
|
49
|
+
│ │
|
|
50
|
+
▼ ▼
|
|
51
|
+
Agent IM Platform reply
|
|
52
|
+
(unchanged) via adapter.send_text
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## File Structure
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
lib/clacky/channel/
|
|
61
|
+
├── adapters/
|
|
62
|
+
│ ├── base.rb # Adapter abstract base + registry
|
|
63
|
+
│ ├── feishu/
|
|
64
|
+
│ │ ├── adapter.rb # Feishu::Adapter < Base
|
|
65
|
+
│ │ ├── bot.rb # HTTP send API (token cache, Markdown/card)
|
|
66
|
+
│ │ ├── message_parser.rb # Raw WS event → standardized Hash
|
|
67
|
+
│ │ └── ws_client.rb # Feishu protobuf WS long connection
|
|
68
|
+
│ └── wecom/
|
|
69
|
+
│ ├── adapter.rb # Wecom::Adapter < Base
|
|
70
|
+
│ └── ws_client.rb # WeCom JSON WS long connection
|
|
71
|
+
├── channel_message.rb # Struct: standardized inbound message
|
|
72
|
+
├── channel_binding.rb # (platform, user_id) → session_id mapping
|
|
73
|
+
├── channel_ui_controller.rb # UIInterface impl — pushes events to IM
|
|
74
|
+
└── channel_manager.rb # Lifecycle: start/stop adapters, route messages
|
|
75
|
+
lib/clacky/channel.rb # Top-level require entry point
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## Standardized Inbound Event
|
|
81
|
+
|
|
82
|
+
All adapters yield the same Hash shape to `ChannelManager`:
|
|
83
|
+
|
|
84
|
+
```ruby
|
|
85
|
+
{
|
|
86
|
+
platform: :feishu, # Symbol
|
|
87
|
+
chat_id: "oc_xxx", # String — IM chat/group identifier
|
|
88
|
+
user_id: "ou_xxx", # String — IM user identifier
|
|
89
|
+
text: "deploy now", # String — cleaned user text
|
|
90
|
+
message_id: "om_xxx", # String — for threading / update
|
|
91
|
+
timestamp: Time, # Time object
|
|
92
|
+
chat_type: :direct | :group, # Symbol
|
|
93
|
+
raw: { ... } # Original platform payload
|
|
94
|
+
}
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
## Adapter Interface (Base)
|
|
100
|
+
|
|
101
|
+
```ruby
|
|
102
|
+
class Adapters::Base
|
|
103
|
+
def self.platform_id → Symbol
|
|
104
|
+
def self.platform_config(raw_config) → Hash # symbol-keyed
|
|
105
|
+
def self.env_keys → Array<String> # for config serialization
|
|
106
|
+
|
|
107
|
+
def start(&on_message) # blocks; yields event Hash per inbound message
|
|
108
|
+
def stop # graceful shutdown
|
|
109
|
+
def send_text(chat_id, text, reply_to: nil) → Hash
|
|
110
|
+
def update_message(chat_id, message_id, text) → Boolean
|
|
111
|
+
def supports_message_updates? → Boolean
|
|
112
|
+
def validate_config(config) → Array<String> # error messages
|
|
113
|
+
end
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## ChannelManager
|
|
119
|
+
|
|
120
|
+
```ruby
|
|
121
|
+
class ChannelManager
|
|
122
|
+
def initialize(session_registry:, session_builder:, channel_config:, agent_config:)
|
|
123
|
+
|
|
124
|
+
def start # Thread.new per enabled platform adapter
|
|
125
|
+
def stop # kills all adapter threads gracefully
|
|
126
|
+
|
|
127
|
+
private
|
|
128
|
+
|
|
129
|
+
def route_message(adapter, event)
|
|
130
|
+
session_id = @binding.resolve_or_create(event, session_builder: @session_builder)
|
|
131
|
+
ui = ChannelUIController.new(event, adapter)
|
|
132
|
+
Thread.new { run_agent(session_id, event[:text], ui) }
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## ChannelBinding
|
|
140
|
+
|
|
141
|
+
Maps `(platform, user_id)` → `session_id`. Persisted to `~/.clacky/channel_bindings.yml`.
|
|
142
|
+
|
|
143
|
+
Binding modes (configurable per platform):
|
|
144
|
+
|
|
145
|
+
| Mode | Key | Description |
|
|
146
|
+
|------|-----|-------------|
|
|
147
|
+
| `user` | `(platform, user_id)` | Each IM user gets their own session (default) |
|
|
148
|
+
| `chat` | `(platform, chat_id)` | Whole group shares one session |
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## ChannelUIController
|
|
153
|
+
|
|
154
|
+
Implements `UIInterface`. Key behaviours:
|
|
155
|
+
|
|
156
|
+
- `show_assistant_message` → `adapter.send_text(chat_id, content)`
|
|
157
|
+
- `show_tool_call` → buffers as `⚙️ \`tool summary\`` (flushed on next message)
|
|
158
|
+
- `show_progress` → `adapter.update_message(...)` if `supports_message_updates?`
|
|
159
|
+
- `show_complete` → sends `✅ Complete • N iterations • $cost`
|
|
160
|
+
- `request_confirmation` → **not supported in IM** (returns auto-approved / raises)
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## Thread Model
|
|
165
|
+
|
|
166
|
+
```
|
|
167
|
+
Main thread (WEBrick server.start — blocks)
|
|
168
|
+
├── WEBrick request threads (existing)
|
|
169
|
+
├── Agent task threads (existing, per task)
|
|
170
|
+
├── Scheduler thread (existing, clacky-scheduler)
|
|
171
|
+
└── ChannelManager
|
|
172
|
+
├── feishu-adapter thread (WSClient read loop, constant)
|
|
173
|
+
│ └── feishu-ping thread (heartbeat, 90s)
|
|
174
|
+
└── wecom-adapter thread (WSClient read loop, constant)
|
|
175
|
+
└── wecom-ping thread (heartbeat, 30s)
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
Per enabled platform: **2 constant threads**. Agent task threads are spawned
|
|
179
|
+
on demand (same as Web UI path) and exit when done.
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## Configuration
|
|
184
|
+
|
|
185
|
+
Channel credentials live in `~/.clacky/channels.yml` (managed by `ChannelConfig`
|
|
186
|
+
which already exists in main branch):
|
|
187
|
+
|
|
188
|
+
```yaml
|
|
189
|
+
channels:
|
|
190
|
+
feishu:
|
|
191
|
+
enabled: true
|
|
192
|
+
app_id: cli_xxx
|
|
193
|
+
app_secret: xxx
|
|
194
|
+
allowed_users:
|
|
195
|
+
- ou_xxx
|
|
196
|
+
wecom:
|
|
197
|
+
enabled: false
|
|
198
|
+
bot_id: xxx
|
|
199
|
+
secret: xxx
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
`ChannelManager` reads this via `ChannelConfig#platform_config(platform)`.
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## Integration with HttpServer
|
|
207
|
+
|
|
208
|
+
```ruby
|
|
209
|
+
# HttpServer#initialize
|
|
210
|
+
@channel_manager = ChannelManager.new(
|
|
211
|
+
session_registry: @registry,
|
|
212
|
+
session_builder: method(:build_session),
|
|
213
|
+
channel_config: Clacky::ChannelConfig.load,
|
|
214
|
+
agent_config: @agent_config
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# HttpServer#start (after scheduler.start)
|
|
218
|
+
@channel_manager.start
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
`ChannelManager#start` is non-blocking (spawns threads internally),
|
|
222
|
+
mirroring `Scheduler#start` behaviour.
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## Future: DingTalk
|
|
227
|
+
|
|
228
|
+
DingTalk also supports a WebSocket Stream mode. Adding it means:
|
|
229
|
+
|
|
230
|
+
1. `lib/clacky/channel/adapters/dingtalk/adapter.rb` inheriting `Base`
|
|
231
|
+
2. `lib/clacky/channel/adapters/dingtalk/ws_client.rb`
|
|
232
|
+
3. Register: `Adapters.register(:dingtalk, Adapter)`
|
|
233
|
+
4. Add credentials to `ChannelConfig`
|
|
234
|
+
|
|
235
|
+
No changes needed to `ChannelManager`, `ChannelUIController`, or `ChannelBinding`.
|
|
@@ -26,6 +26,7 @@ module Clacky
|
|
|
26
26
|
# @return [Boolean]
|
|
27
27
|
def should_update_memory?
|
|
28
28
|
return false unless memory_update_enabled?
|
|
29
|
+
return false if @is_subagent # Subagents never update memory
|
|
29
30
|
|
|
30
31
|
task_iterations = @iterations - (@task_start_iterations || 0)
|
|
31
32
|
task_iterations >= MEMORY_UPDATE_MIN_ITERATIONS
|
|
@@ -41,7 +42,7 @@ module Clacky
|
|
|
41
42
|
|
|
42
43
|
@memory_prompt_injected = true
|
|
43
44
|
@memory_updating = true
|
|
44
|
-
@ui&.
|
|
45
|
+
@ui&.show_progress("Updating long-term memory…")
|
|
45
46
|
|
|
46
47
|
@messages << {
|
|
47
48
|
role: "user",
|
|
@@ -61,7 +62,7 @@ module Clacky
|
|
|
61
62
|
@messages.reject! { |m| m[:memory_update] }
|
|
62
63
|
@memory_prompt_injected = false
|
|
63
64
|
@memory_updating = false
|
|
64
|
-
@ui&.
|
|
65
|
+
@ui&.clear_progress
|
|
65
66
|
end
|
|
66
67
|
|
|
67
68
|
private def memory_update_enabled?
|
|
@@ -153,8 +153,20 @@ module Clacky
|
|
|
153
153
|
@messages.each do |msg|
|
|
154
154
|
role = msg[:role].to_s
|
|
155
155
|
|
|
156
|
-
|
|
157
|
-
|
|
156
|
+
# A real user message can have either a String content or an Array content
|
|
157
|
+
# (Array = multipart: text + image blocks). Exclude system-injected messages
|
|
158
|
+
# and synthetic [SYSTEM] text messages.
|
|
159
|
+
is_real_user_msg = role == "user" && !msg[:system_injected] &&
|
|
160
|
+
if msg[:content].is_a?(String)
|
|
161
|
+
!msg[:content].start_with?("[SYSTEM]")
|
|
162
|
+
elsif msg[:content].is_a?(Array)
|
|
163
|
+
# Must contain at least one text or image block (not a tool_result array)
|
|
164
|
+
msg[:content].any? { |b| b.is_a?(Hash) && %w[text image].include?(b[:type].to_s) }
|
|
165
|
+
else
|
|
166
|
+
false
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
if is_real_user_msg
|
|
158
170
|
# Start a new round at each real user message
|
|
159
171
|
current_round = { user_msg: msg, events: [] }
|
|
160
172
|
rounds << current_round
|
|
@@ -175,8 +187,10 @@ module Clacky
|
|
|
175
187
|
page.each do |round|
|
|
176
188
|
msg = round[:user_msg]
|
|
177
189
|
display_text = extract_text_from_content(msg[:content])
|
|
190
|
+
# Extract image data URLs from multipart content (for history replay rendering)
|
|
191
|
+
images = extract_images_from_content(msg[:content])
|
|
178
192
|
# Emit user message with its timestamp for dedup on the frontend
|
|
179
|
-
ui.show_user_message(display_text, created_at: msg[:created_at])
|
|
193
|
+
ui.show_user_message(display_text, created_at: msg[:created_at], images: images)
|
|
180
194
|
|
|
181
195
|
round[:events].each do |ev|
|
|
182
196
|
# Skip system-injected messages (e.g. synthetic skill content, memory prompts)
|
|
@@ -241,6 +255,37 @@ module Clacky
|
|
|
241
255
|
Clacky::Logger.warn("refresh_system_prompt failed during session restore: #{e.message}")
|
|
242
256
|
end
|
|
243
257
|
|
|
258
|
+
# Extract base64 data URLs from multipart content (image blocks).
|
|
259
|
+
# Returns an empty array when there are no images or content is plain text.
|
|
260
|
+
# @param content [String, Array, Object] Message content
|
|
261
|
+
# @return [Array<String>] Array of data URLs (e.g. "data:image/png;base64,...")
|
|
262
|
+
def extract_images_from_content(content)
|
|
263
|
+
return [] unless content.is_a?(Array)
|
|
264
|
+
|
|
265
|
+
content.filter_map do |block|
|
|
266
|
+
next unless block.is_a?(Hash)
|
|
267
|
+
|
|
268
|
+
case block[:type].to_s
|
|
269
|
+
when "image_url"
|
|
270
|
+
# OpenAI format: { type: "image_url", image_url: { url: "data:image/png;base64,..." } }
|
|
271
|
+
block.dig(:image_url, :url)
|
|
272
|
+
when "image"
|
|
273
|
+
# Anthropic format: { type: "image", source: { type: "base64", media_type: "image/png", data: "..." } }
|
|
274
|
+
source = block[:source]
|
|
275
|
+
next unless source.is_a?(Hash) && source[:type].to_s == "base64"
|
|
276
|
+
|
|
277
|
+
"data:#{source[:media_type]};base64,#{source[:data]}"
|
|
278
|
+
when "document"
|
|
279
|
+
# Anthropic PDF document block — return a sentinel string for frontend display
|
|
280
|
+
source = block[:source]
|
|
281
|
+
next unless source.is_a?(Hash) && source[:media_type].to_s == "application/pdf"
|
|
282
|
+
|
|
283
|
+
# Return a special marker so the frontend can render a PDF badge instead of an <img>
|
|
284
|
+
"pdf:#{source[:data]&.then { |d| d[0, 32] }}" # prefix to identify without full payload
|
|
285
|
+
end
|
|
286
|
+
end
|
|
287
|
+
end
|
|
288
|
+
|
|
244
289
|
# Extract text from message content (handles string and array formats)
|
|
245
290
|
# @param content [String, Array, Object] Message content
|
|
246
291
|
# @return [String] Extracted text
|
data/lib/clacky/agent.rb
CHANGED
|
@@ -141,7 +141,7 @@ module Clacky
|
|
|
141
141
|
@config.model_name
|
|
142
142
|
end
|
|
143
143
|
|
|
144
|
-
def run(user_input, images: [])
|
|
144
|
+
def run(user_input, images: [], files: [])
|
|
145
145
|
# Start new task for Time Machine
|
|
146
146
|
task_id = start_new_task
|
|
147
147
|
|
|
@@ -172,8 +172,8 @@ module Clacky
|
|
|
172
172
|
@messages << system_message
|
|
173
173
|
end
|
|
174
174
|
|
|
175
|
-
# Format user message with images if provided
|
|
176
|
-
user_content = format_user_content(user_input, images)
|
|
175
|
+
# Format user message with images and files if provided
|
|
176
|
+
user_content = format_user_content(user_input, images, files)
|
|
177
177
|
@messages << { role: "user", content: user_content, task_id: task_id, created_at: Time.now.to_f }
|
|
178
178
|
@total_tasks += 1
|
|
179
179
|
|
|
@@ -208,7 +208,12 @@ module Clacky
|
|
|
208
208
|
|
|
209
209
|
# Check if done (no more tool calls needed)
|
|
210
210
|
if response[:finish_reason] == "stop" || response[:tool_calls].nil? || response[:tool_calls].empty?
|
|
211
|
-
|
|
211
|
+
# During memory update phase, show LLM response as info (not a chat bubble)
|
|
212
|
+
if @memory_updating && response[:content] && !response[:content].empty?
|
|
213
|
+
@ui&.show_info("🧠 " + response[:content].strip)
|
|
214
|
+
elsif response[:content] && !response[:content].empty?
|
|
215
|
+
@ui&.show_assistant_message(response[:content])
|
|
216
|
+
end
|
|
212
217
|
|
|
213
218
|
# Debug: log why we're stopping
|
|
214
219
|
if @config.verbose && (response[:tool_calls].nil? || response[:tool_calls].empty?)
|
|
@@ -227,7 +232,8 @@ module Clacky
|
|
|
227
232
|
end
|
|
228
233
|
|
|
229
234
|
# Show assistant message if there's content before tool calls
|
|
230
|
-
|
|
235
|
+
# During memory update phase, suppress text output (only tool calls matter)
|
|
236
|
+
if response[:content] && !response[:content].empty? && !@memory_updating
|
|
231
237
|
@ui&.show_assistant_message(response[:content])
|
|
232
238
|
end
|
|
233
239
|
|
|
@@ -272,13 +278,17 @@ module Clacky
|
|
|
272
278
|
@modified_files_in_task = [] # Reset for next task
|
|
273
279
|
end
|
|
274
280
|
|
|
275
|
-
@
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
281
|
+
if @is_subagent
|
|
282
|
+
@ui&.show_info("Subagent done (#{result[:iterations]} iterations, $#{result[:total_cost_usd].round(4)})")
|
|
283
|
+
else
|
|
284
|
+
@ui&.show_complete(
|
|
285
|
+
iterations: result[:iterations],
|
|
286
|
+
cost: result[:total_cost_usd],
|
|
287
|
+
duration: result[:duration_seconds],
|
|
288
|
+
cache_stats: result[:cache_stats],
|
|
289
|
+
awaiting_user_feedback: awaiting_user_feedback
|
|
290
|
+
)
|
|
291
|
+
end
|
|
282
292
|
@hooks.trigger(:on_complete, result)
|
|
283
293
|
result
|
|
284
294
|
rescue Clacky::AgentInterrupted
|
|
@@ -714,6 +724,10 @@ module Clacky
|
|
|
714
724
|
ui: @ui,
|
|
715
725
|
profile: @agent_profile.name
|
|
716
726
|
)
|
|
727
|
+
subagent.instance_variable_set(:@is_subagent, true)
|
|
728
|
+
|
|
729
|
+
# Inherit previous_total_tokens so the first iteration delta is calculated correctly
|
|
730
|
+
subagent.instance_variable_set(:@previous_total_tokens, @previous_total_tokens)
|
|
717
731
|
|
|
718
732
|
# Deep clone messages to avoid cross-contamination
|
|
719
733
|
subagent.instance_variable_set(:@messages, deep_clone(@messages))
|
|
@@ -809,11 +823,16 @@ module Clacky
|
|
|
809
823
|
end
|
|
810
824
|
|
|
811
825
|
# Format user content with optional images
|
|
826
|
+
# PDF files are handled upstream (server injects file path into message text),
|
|
827
|
+
# so this method only needs to handle images.
|
|
812
828
|
# @param text [String] User's text input
|
|
813
829
|
# @param images [Array<String>] Array of image file paths or data: URLs
|
|
814
|
-
# @
|
|
815
|
-
|
|
816
|
-
|
|
830
|
+
# @param files [Array] Unused — kept for signature compatibility
|
|
831
|
+
# @return [String|Array] String if no images, Array with content blocks otherwise
|
|
832
|
+
private def format_user_content(text, images, files = [])
|
|
833
|
+
images ||= []
|
|
834
|
+
|
|
835
|
+
return text if images.empty?
|
|
817
836
|
|
|
818
837
|
content = []
|
|
819
838
|
content << { type: "text", text: text } unless text.nil? || text.empty?
|