openclacky 0.9.6 → 0.9.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.clacky/skills/commit/SKILL.md +1 -0
- data/.clacky/skills/gem-release/SKILL.md +4 -1
- data/CHANGELOG.md +52 -0
- data/docs/browser-cdp-native-design.md +195 -0
- data/docs/session-management-redesign.md +202 -0
- data/docs/system-skill-authoring-guide.md +47 -0
- data/lib/clacky/agent/cost_tracker.rb +2 -1
- data/lib/clacky/agent/session_serializer.rb +50 -4
- data/lib/clacky/agent/skill_manager.rb +160 -75
- data/lib/clacky/agent/system_prompt_builder.rb +30 -13
- data/lib/clacky/agent/tool_executor.rb +7 -3
- data/lib/clacky/agent.rb +131 -30
- data/lib/clacky/agent_config.rb +5 -0
- data/lib/clacky/banner.rb +3 -3
- data/lib/clacky/brand_config.rb +106 -69
- data/lib/clacky/cli.rb +40 -11
- data/lib/clacky/client.rb +69 -12
- data/lib/clacky/default_parsers/doc_parser.rb +69 -0
- data/lib/clacky/default_parsers/docx_parser.rb +172 -0
- data/lib/clacky/default_parsers/pdf_parser.rb +79 -0
- data/lib/clacky/default_parsers/pptx_parser.rb +140 -0
- data/lib/clacky/default_parsers/xlsx_parser.rb +121 -0
- data/lib/clacky/default_skills/browser-setup/SKILL.md +238 -0
- data/lib/clacky/default_skills/channel-setup/SKILL.md +139 -42
- data/lib/clacky/default_skills/channel-setup/feishu_setup.rb +582 -0
- data/lib/clacky/default_skills/channel-setup/weixin_setup.rb +274 -0
- data/lib/clacky/default_skills/onboard/SKILL.md +132 -6
- data/lib/clacky/default_skills/personal-website/SKILL.md +113 -0
- data/lib/clacky/default_skills/personal-website/publish.rb +214 -0
- data/lib/clacky/default_skills/skill-add/SKILL.md +2 -3
- data/lib/clacky/default_skills/skill-add/scripts/install_from_zip.rb +1 -1
- data/lib/clacky/message_format/bedrock.rb +257 -0
- data/lib/clacky/message_format/open_ai.rb +7 -1
- data/lib/clacky/providers.rb +11 -0
- data/lib/clacky/server/browser_manager.rb +284 -0
- data/lib/clacky/server/channel/adapters/feishu/adapter.rb +80 -15
- data/lib/clacky/server/channel/adapters/feishu/bot.rb +216 -7
- data/lib/clacky/server/channel/adapters/feishu/message_parser.rb +12 -0
- data/lib/clacky/server/channel/adapters/wecom/adapter.rb +2 -8
- data/lib/clacky/server/channel/adapters/weixin/adapter.rb +391 -0
- data/lib/clacky/server/channel/adapters/weixin/api_client.rb +374 -0
- data/lib/clacky/server/channel/channel_config.rb +6 -0
- data/lib/clacky/server/channel/channel_manager.rb +50 -15
- data/lib/clacky/server/channel/channel_ui_controller.rb +18 -0
- data/lib/clacky/server/channel.rb +1 -0
- data/lib/clacky/server/http_server.rb +216 -76
- data/lib/clacky/server/scheduler.rb +1 -1
- data/lib/clacky/server/session_registry.rb +131 -40
- data/lib/clacky/server/web_ui_controller.rb +14 -2
- data/lib/clacky/session_manager.rb +43 -73
- data/lib/clacky/skill.rb +149 -60
- data/lib/clacky/skill_loader.rb +45 -52
- data/lib/clacky/tools/browser.rb +728 -183
- data/lib/clacky/tools/invoke_skill.rb +21 -18
- data/lib/clacky/tools/run_project.rb +3 -4
- data/lib/clacky/tools/safe_shell.rb +3 -2
- data/lib/clacky/tools/shell.rb +43 -9
- data/lib/clacky/tools/undo_task.rb +4 -1
- data/lib/clacky/tools/web_fetch.rb +5 -2
- data/lib/clacky/tools/web_search.rb +4 -3
- data/lib/clacky/ui2/README.md +1 -1
- data/lib/clacky/ui2/components/input_area.rb +9 -8
- data/lib/clacky/ui2/components/welcome_banner.rb +34 -11
- data/lib/clacky/ui2/layout_manager.rb +33 -6
- data/lib/clacky/ui2/screen_buffer.rb +2 -1
- data/lib/clacky/ui2/ui_controller.rb +66 -3
- data/lib/clacky/utils/encoding.rb +71 -0
- data/lib/clacky/utils/file_processor.rb +116 -139
- data/lib/clacky/utils/model_pricing.rb +5 -4
- data/lib/clacky/utils/parser_manager.rb +93 -0
- data/lib/clacky/utils/workspace_rules.rb +46 -0
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/web/app.css +820 -69
- data/lib/clacky/web/app.js +131 -34
- data/lib/clacky/web/brand.js +35 -6
- data/lib/clacky/web/channels.js +10 -1
- data/lib/clacky/web/i18n.js +57 -7
- data/lib/clacky/web/index.html +70 -9
- data/lib/clacky/web/onboard.js +2 -1
- data/lib/clacky/web/sessions.js +229 -69
- data/lib/clacky/web/settings.js +104 -5
- data/lib/clacky/web/skills.js +109 -22
- data/lib/clacky/web/tasks.js +11 -6
- data/lib/clacky/web/weixin-qr.html +104 -0
- data/lib/clacky.rb +4 -0
- data/scripts/install.sh +43 -13
- metadata +50 -7
- data/lib/clacky/default_skills/pdf-reader/SKILL.md +0 -90
- data/lib/clacky/utils/file_parser/docx_parser.rb +0 -156
- data/lib/clacky/utils/file_parser/pptx_parser.rb +0 -116
- data/lib/clacky/utils/file_parser/xlsx_parser.rb +0 -95
- data/lib/clacky/utils/file_parser/zip_parser.rb +0 -60
- data/scripts/install_agent_browser.sh +0 -67
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bc91293707f008c4b110c563d2c03e295cc6c337d456c050ccfa79b761fb8993
|
|
4
|
+
data.tar.gz: 7cc5b5a1a98c59e8cb0a5779e6ce8a06ee64d41e814c0ba1e4ace2800b8f8670
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e2448da89cc3c21bee66d370f9641246d631ba477d8662cf72093439c52f0c91e9eac1d9584aa03f84aa1406257f723cf68b05f8c0d84a33c8eed67fde1e6e23
|
|
7
|
+
data.tar.gz: 0e88e8994f4d6a3ceae39e925831c9462c539726e1181c4624734117efc1444821df646609da19610655a6c395f45a007ba9f1851ac2aae04d37966fbdf32c83
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
---
|
|
2
|
+
---
|
|
2
3
|
name: gem-release
|
|
3
|
-
description:
|
|
4
|
+
description: >-
|
|
5
|
+
Automates the complete process of releasing a new version of the openclacky Ruby
|
|
6
|
+
gem
|
|
4
7
|
disable-model-invocation: false
|
|
5
8
|
user-invocable: true
|
|
6
9
|
---
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,58 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.9.8] - 2026-03-23
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- **Real browser automation via CDP**: the browser tool now drives a real Chromium browser using the Chrome DevTools Protocol — structured action schemas, snapshots, screenshots, and full page interaction are all supported
|
|
14
|
+
- **Browser DevTools MCP integration**: the browser connects to Chrome's DevTools via MCP, enabling deeper inspection and control beyond standard WebDriver capabilities
|
|
15
|
+
- **Browser manager in Web UI**: a new browser management panel lets you start, stop, restart, and monitor the connected browser session directly from the Web UI
|
|
16
|
+
- **WeChat (Weixin) channel support**: the agent can now receive and reply to messages via WeChat, including sending and receiving images
|
|
17
|
+
- **Feishu Docs integration**: the agent can now read and process Feishu (Lark) documents directly as context
|
|
18
|
+
- **PDF preview in Web UI**: PDFs attached to a conversation now render inline in the chat interface
|
|
19
|
+
- **Session source tracking**: sessions now track where they originated (Web UI, Feishu, WeCom, WeChat, CLI) and display the source in the sessions list
|
|
20
|
+
- **Sessions list in Web UI**: a dedicated sessions UI shows all your recent conversations with source badges and load-more pagination
|
|
21
|
+
- **Setup session type**: a special onboarding session type is available to guide new users through initial configuration
|
|
22
|
+
- **Personal website skill**: a built-in skill that generates and publishes a personal profile page (linktree-style) from your user profile
|
|
23
|
+
- **Sub-directory `.clackyrules` loading**: project rules files in subdirectories are now discovered and merged automatically
|
|
24
|
+
- **Self-improving response parser**: the parser now repairs itself when it encounters malformed tool-call sequences, improving reliability with all models
|
|
25
|
+
- **UJK format support**: the agent can now handle UJK-encoded content in file and channel inputs
|
|
26
|
+
- **Browser toggle in Web UI**: a toggle in the settings sidebar lets you enable or disable browser control without restarting
|
|
27
|
+
- **Logo and QR code on homepage**: the Web UI homepage now displays the product logo and a shareable QR code
|
|
28
|
+
- **Clear thinking in channel**: channel messages now strip internal `<thinking>` blocks before sending the reply to the user
|
|
29
|
+
|
|
30
|
+
### Fixed
|
|
31
|
+
- **`invoke_skill` tool-call sequence**: skill invocations via tool call now correctly follow the expected request/response order, preventing out-of-sequence tool results
|
|
32
|
+
- **URI parsing for edge cases**: fixed a crash when parsing certain malformed or unusual URIs
|
|
33
|
+
- **Doc reader parsing**: fixed an issue where some document formats were not correctly parsed by the doc reader tool
|
|
34
|
+
- **Zip skill location discovery**: fixed skill loading from zip files installed in non-standard locations
|
|
35
|
+
- **Install script compatibility**: the install script now explicitly uses bash to avoid failures on systems where `/bin/sh` is not bash
|
|
36
|
+
|
|
37
|
+
### More
|
|
38
|
+
- Rename `working` → `thinking` in agent status display
|
|
39
|
+
- Channel and Web UI now sync session state in real time
|
|
40
|
+
- Cost usage display improvements
|
|
41
|
+
|
|
42
|
+
## [0.9.7] - 2026-03-20
|
|
43
|
+
|
|
44
|
+
### Added
|
|
45
|
+
- **AWS Bedrock support**: the agent can now use Claude models hosted on AWS Bedrock (including the Japan region `bedrock-jp` provider with `jp.anthropic.claude-sonnet-4-6` and `jp.anthropic.claude-haiku-4-6`)
|
|
46
|
+
- **Brand skill confidentiality protection**: when a brand skill is injected, the agent is now instructed to never reveal, quote, or paraphrase the skill's proprietary instructions — keeping white-label content secure
|
|
47
|
+
- **Slash command guard in skill injection**: skills invoked via `/skill-name` commands now include a system notice that prevents the agent from calling `invoke_skill` a second time for the same request
|
|
48
|
+
- **"Show system skills" toggle in Web UI**: the Skills settings page now has a checkbox to show or hide built-in system skills, making it easier to find your own custom skills in a long list
|
|
49
|
+
|
|
50
|
+
### Fixed
|
|
51
|
+
- **Shell commands with non-UTF-8 output no longer crash**: output from commands that produce GBK, Latin-1, or binary bytes (e.g. some `cat` or legacy tool output) is now safely transcoded to UTF-8 instead of raising an encoding error
|
|
52
|
+
- **Task interruption no longer duplicates or garbles output**: a non-blocking progress-clear path ensures the user's message appears immediately on screen when a task is interrupted, without leaving stale progress lines behind
|
|
53
|
+
- **Terminal inline content resize no longer overflows into the fixed toolbar area**: when an inline block grows past the available output rows, the terminal now scrolls correctly instead of writing into the status bar region
|
|
54
|
+
- **Brand skills always show the latest version**: the skills list in the Web UI now correctly reflects the most recent version of a brand skill after an update
|
|
55
|
+
|
|
56
|
+
### More
|
|
57
|
+
- Rename brand skill `slug` field to `name` for consistency across the codebase
|
|
58
|
+
- Rename `brandname` → `productname` in brand config internals
|
|
59
|
+
- Unify skill injection into a shared `inject_skill_as_assistant_message` method
|
|
60
|
+
- Update built-in skill definitions
|
|
61
|
+
|
|
10
62
|
## [0.9.6] - 2026-03-18
|
|
11
63
|
|
|
12
64
|
### Added
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
# Browser Tool: Native CDP Integration Design
|
|
2
|
+
|
|
3
|
+
## 背景与目标
|
|
4
|
+
|
|
5
|
+
现有的 browser tool 依赖 `agent-browser`(Rust 二进制,通过 npm 分发),每次使用都启动一个独立的 Chrome 实例,存在以下问题:
|
|
6
|
+
|
|
7
|
+
- 用户登录态、Cookie 无法复用
|
|
8
|
+
- 需要额外安装 npm / agent-browser
|
|
9
|
+
- 每次任务弹出新 Chrome 窗口,体验差
|
|
10
|
+
- 依赖链长:npm → agent-browser binary → Chrome for Testing
|
|
11
|
+
|
|
12
|
+
**核心目标**:Clacky 直接复用用户已打开的 Chrome,继承所有登录态和 Cookie,零额外依赖。
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## Chrome 146 的关键变化
|
|
17
|
+
|
|
18
|
+
### 时间线
|
|
19
|
+
|
|
20
|
+
| Chrome 版本 | 行为 |
|
|
21
|
+
|------------|------|
|
|
22
|
+
| ≤ 135 | `--remote-debugging-port` 可连接 default profile(不推荐但能用)|
|
|
23
|
+
| 136 ~ 145 | Default profile 被封锁,必须用 `--user-data-dir` 开隔离 profile(空的,无登录态)|
|
|
24
|
+
| **146+** | 新增 **autoConnect toggle**,一次开关,直接连真实浏览器,Consent-based ✅ |
|
|
25
|
+
|
|
26
|
+
### 用户操作(一次性)
|
|
27
|
+
|
|
28
|
+
1. 打开 `chrome://inspect/#remote-debugging`
|
|
29
|
+
2. 勾选 **"Allow remote debugging for this browser instance"**
|
|
30
|
+
3. Chrome 在 `127.0.0.1:9222` 启动 CDP server
|
|
31
|
+
|
|
32
|
+
之后每次 Clacky 连接时,Chrome 会弹一次 **"Allow remote debugging?"** 权限确认框,用户点 Allow 即可。
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## 技术方案:纯 Ruby CDP Client
|
|
37
|
+
|
|
38
|
+
### 核心发现
|
|
39
|
+
|
|
40
|
+
Chrome 146 的 autoConnect 模式**不暴露标准 `/json` HTTP endpoint**(返回 404),而是通过一个文件告知连接信息:
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
~/Library/Application Support/Google/Chrome/DevToolsActivePort
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
文件内容格式:
|
|
47
|
+
```
|
|
48
|
+
9222
|
|
49
|
+
/devtools/browser/98823857-17b3-48ec-8f24-5805e3012a05
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
第一行是端口,第二行是 WebSocket path,直接拼成:
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
ws://127.0.0.1:9222/devtools/browser/98823857-17b3-48ec-8f24-5805e3012a05
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### 连接流程
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
1. 读 DevToolsActivePort 文件
|
|
62
|
+
↓
|
|
63
|
+
2. WebSocket 连接 Browser endpoint
|
|
64
|
+
↓
|
|
65
|
+
3. Target.getTargets → 列出所有真实 tab
|
|
66
|
+
↓
|
|
67
|
+
4. Target.attachToTarget(targetId, flatten: true) → 获得 sessionId
|
|
68
|
+
↓
|
|
69
|
+
5. 通过 sessionId 发送 CDP 命令操作指定 tab
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### 依赖
|
|
73
|
+
|
|
74
|
+
**零新依赖**,只用已有的:
|
|
75
|
+
- `websocket-driver`(已在 gemspec)
|
|
76
|
+
- `socket`(Ruby 标准库)
|
|
77
|
+
- `net/http`(Ruby 标准库)
|
|
78
|
+
- `json`(Ruby 标准库)
|
|
79
|
+
|
|
80
|
+
### 已验证能力
|
|
81
|
+
|
|
82
|
+
实测(2026-03-20)通过脚本验证:
|
|
83
|
+
|
|
84
|
+
- ✅ 读取 DevToolsActivePort,发现 9222 端口
|
|
85
|
+
- ✅ WebSocket 连接 Browser endpoint
|
|
86
|
+
- ✅ `Target.getTargets` 列出用户所有真实 tab(含标题、URL)
|
|
87
|
+
- ✅ `Target.attachToTarget` attach 到指定 tab
|
|
88
|
+
- ✅ `Runtime.evaluate` 执行 JS(获取 URL、title 等)
|
|
89
|
+
- ✅ `Page.captureScreenshot` 截图
|
|
90
|
+
- ✅ `Target.createTarget` 开新 tab 并导航
|
|
91
|
+
- ✅ 复用用户登录态(访问 yafeilee.com/admin 直接进后台,无需重新登录)
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## 实施方案
|
|
96
|
+
|
|
97
|
+
### 第一层:Discovery(发现层)
|
|
98
|
+
|
|
99
|
+
```ruby
|
|
100
|
+
# 检测 Chrome 是否开启了 remote debugging
|
|
101
|
+
def discover_chrome_cdp
|
|
102
|
+
port_file = File.expand_path(
|
|
103
|
+
"~/Library/Application Support/Google/Chrome/DevToolsActivePort"
|
|
104
|
+
)
|
|
105
|
+
return nil unless File.exist?(port_file)
|
|
106
|
+
|
|
107
|
+
lines = File.read(port_file).strip.split("\n")
|
|
108
|
+
port = lines[0].to_i
|
|
109
|
+
path = lines[1]
|
|
110
|
+
|
|
111
|
+
# 验证端口确实在监听
|
|
112
|
+
TCPSocket.new("127.0.0.1", port).close
|
|
113
|
+
{ port: port, path: path, ws_url: "ws://127.0.0.1:#{port}#{path}" }
|
|
114
|
+
rescue Errno::ECONNREFUSED
|
|
115
|
+
nil
|
|
116
|
+
end
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
**没有发现时的引导**:
|
|
120
|
+
|
|
121
|
+
> "请在 Chrome 地址栏打开 `chrome://inspect/#remote-debugging`,
|
|
122
|
+
> 勾选 'Allow remote debugging for this browser instance',只需一次。"
|
|
123
|
+
|
|
124
|
+
### 第二层:CDP Client(通信层)
|
|
125
|
+
|
|
126
|
+
新建 `lib/clacky/tools/cdp_client.rb`,实现:
|
|
127
|
+
|
|
128
|
+
- WebSocket 连接管理
|
|
129
|
+
- 命令发送(带 id)/ 响应匹配
|
|
130
|
+
- Session 管理(Browser-level vs Tab-level)
|
|
131
|
+
- 事件监听(Page.loadEventFired 等)
|
|
132
|
+
|
|
133
|
+
### 第三层:Browser Tool 改造
|
|
134
|
+
|
|
135
|
+
`lib/clacky/tools/browser.rb` 改造策略:
|
|
136
|
+
|
|
137
|
+
```
|
|
138
|
+
优先级 1: 检测 DevToolsActivePort → 用户真实 Chrome(Native CDP)
|
|
139
|
+
优先级 2: Fallback → 现有 agent-browser(向后兼容)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### macOS 路径(其他平台待补充)
|
|
143
|
+
|
|
144
|
+
| 平台 | DevToolsActivePort 路径 |
|
|
145
|
+
|------|------------------------|
|
|
146
|
+
| macOS | `~/Library/Application Support/Google/Chrome/DevToolsActivePort` |
|
|
147
|
+
| Linux | `~/.config/google-chrome/DevToolsActivePort` |
|
|
148
|
+
| Windows | `%LOCALAPPDATA%\Google\Chrome\User Data\DevToolsActivePort` |
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## 关键问题与结论
|
|
153
|
+
|
|
154
|
+
### Q: `/json` endpoint 返回 404,怎么办?
|
|
155
|
+
|
|
156
|
+
Chrome 146 autoConnect 模式不走 HTTP `/json`,改用 `DevToolsActivePort` 文件 + 直接 WebSocket 连接。
|
|
157
|
+
|
|
158
|
+
### Q: ferrum gem 是否适用?
|
|
159
|
+
|
|
160
|
+
**不适用**。`Ferrum::Browser.new(url: "http://localhost:9222")` 虽然能连接到已有 Chrome,但会创建新的 incognito browser context,不复用用户的 tab 和登录态。需要绕过 ferrum,直接操作原始 CDP。
|
|
161
|
+
|
|
162
|
+
### Q: 每次连接都要点 Allow?
|
|
163
|
+
|
|
164
|
+
是的,Chrome 146 每次新的 WebSocket 连接都会弹确认框。这是 Chrome 的安全 consent 机制,无法绕过,但体验上是可以接受的(用户清楚地知道浏览器被控制了)。
|
|
165
|
+
|
|
166
|
+
### Q: agent-browser 是否彻底废弃?
|
|
167
|
+
|
|
168
|
+
建议渐进迁移:先并行运行,Native CDP 作为优先路径,agent-browser 作为 fallback,稳定后再移除。
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## 参考资料
|
|
173
|
+
|
|
174
|
+
- [Chrome 146 autoConnect 介绍 - DEV Community](https://dev.to/minatoplanb/chrome-146-finally-lets-ai-control-your-real-browser-google-oauth-included-28b7)
|
|
175
|
+
- [One Toggle That Changed Browser Automation - LinkedIn](https://www.linkedin.com/posts/surajadsul_one-toggle-that-changed-the-browser-automation-activity-7439161929664864257-0v8z)
|
|
176
|
+
- [Chrome DevTools MCP 连接模式详解](https://www.heyuan110.com/posts/ai/2026-03-17-chrome-devtools-mcp-guide/)
|
|
177
|
+
- [agent-browser #412: Support --auto-connect](https://github.com/vercel-labs/agent-browser/issues/412)
|
|
178
|
+
- [Chrome DevTools Protocol 官方文档](https://chromedevtools.github.io/devtools-protocol/)
|
|
179
|
+
- [DevToolsActivePort WebSocket path 说明](https://deepwiki.com/ChromeDevTools/chrome-devtools-mcp/2.3-connection-modes)
|
|
180
|
+
- [ferrum issue #320: Connect to existing Chrome](https://github.com/rubycdp/ferrum/issues/320)
|
|
181
|
+
- [Chrome remote-debugging security changes](https://developer.chrome.com/blog/remote-debugging-port)
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## 测试脚本
|
|
186
|
+
|
|
187
|
+
原型验证脚本位于:`tmp/cdp_test.rb`
|
|
188
|
+
|
|
189
|
+
运行前提:
|
|
190
|
+
1. Chrome 已开启 remote debugging(`chrome://inspect/#remote-debugging`)
|
|
191
|
+
2. 点击 Allow 弹框
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
bundle exec ruby tmp/cdp_test.rb
|
|
195
|
+
```
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
# Session Management Redesign
|
|
2
|
+
|
|
3
|
+
> Status: Design finalized, pending implementation
|
|
4
|
+
> Date: 2026-03-22
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Background
|
|
9
|
+
|
|
10
|
+
Current session management has several problems:
|
|
11
|
+
|
|
12
|
+
| Problem | Detail |
|
|
13
|
+
|---------|--------|
|
|
14
|
+
| **Delete bug (P0)** | `DELETE /api/sessions/:id` only removes from in-memory registry, disk JSON file is never deleted |
|
|
15
|
+
| **Retention too small** | `cleanup_by_count(keep: 10)` — floods quickly with cron + channel sessions |
|
|
16
|
+
| **Only 5 sessions restored on startup** | Misses most cron/channel history |
|
|
17
|
+
| **No agent type selection in WebUI** | Always creates `general` profile, no UI to choose |
|
|
18
|
+
| **No session source tracking** | No `source` field — can't distinguish manual vs cron vs channel |
|
|
19
|
+
| **No agent profile tracking** | No `agent_profile` field in session JSON |
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## UI Design
|
|
24
|
+
|
|
25
|
+
### Sidebar Layout
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
┌─────────────────────────────────┐
|
|
29
|
+
│ Sessions [+ ▾] │
|
|
30
|
+
├─────────────────────────────────┤
|
|
31
|
+
│ Manual Scheduled Channel │ ← tab 切换
|
|
32
|
+
├─────────────────────────────────┤
|
|
33
|
+
│ │
|
|
34
|
+
│ ● Session 3 2t $0.02 │
|
|
35
|
+
│ ○ Session 2 5t $0.08 │
|
|
36
|
+
│ ○ Session 1 1t $0.01 │
|
|
37
|
+
│ │
|
|
38
|
+
├─────────────────────────────────┤
|
|
39
|
+
│ 👨💻 Coding │ ← 固定区域,不参与 tab
|
|
40
|
+
├─────────────────────────────────┤
|
|
41
|
+
│ ● 重构 auth 模块 3t $0.05 │
|
|
42
|
+
│ ○ 接口联调 1t $0.02 │
|
|
43
|
+
└─────────────────────────────────┘
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
**Upper area — General Agent sessions:**
|
|
47
|
+
- Three tabs: `Manual` / `Scheduled` / `Channel`
|
|
48
|
+
- Default tab: `Manual`
|
|
49
|
+
- Each tab shows sessions filtered by `source` field AND `agent_profile = general`
|
|
50
|
+
- Scheduled and Channel tabs show sessions where `agent_profile = general` AND `source = cron/channel`
|
|
51
|
+
|
|
52
|
+
**Lower area — Coding Agent (and future agents):**
|
|
53
|
+
- Fixed section, always visible, does not participate in tab switching
|
|
54
|
+
- Shows all sessions where `agent_profile = coding`, regardless of source
|
|
55
|
+
- Future custom agents each get their own section below Coding
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
### New Session Button: `[+ ▾]`
|
|
60
|
+
|
|
61
|
+
```
|
|
62
|
+
┌─────────────────────────────────┐
|
|
63
|
+
│ Sessions [+ ▾] │
|
|
64
|
+
└─────────────────────────────────┘
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
- **Click `+`** → immediately create a new General session (zero friction, most common action)
|
|
68
|
+
- **Click `▾`** → dropdown appears:
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
┌──────────────────┐
|
|
72
|
+
│ ✦ General │
|
|
73
|
+
│ 👨💻 Coding │
|
|
74
|
+
│ ──────────────── │
|
|
75
|
+
│ + Create Agent │ ← future
|
|
76
|
+
└──────────────────┘
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
- Selecting an agent from the dropdown creates a new session with that `agent_profile`
|
|
80
|
+
- `Create Agent` is a placeholder for future custom agent creation UI
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## Data Layer Changes
|
|
85
|
+
|
|
86
|
+
### Session JSON — new fields
|
|
87
|
+
|
|
88
|
+
```json
|
|
89
|
+
{
|
|
90
|
+
"session_id": "...",
|
|
91
|
+
"source": "manual",
|
|
92
|
+
"agent_profile": "general",
|
|
93
|
+
...
|
|
94
|
+
}
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
**`source` values:** `manual` | `cron` | `channel`
|
|
98
|
+
**`agent_profile` values:** `general` | `coding` | `<custom-name>`
|
|
99
|
+
|
|
100
|
+
### `build_session` signature update
|
|
101
|
+
|
|
102
|
+
```ruby
|
|
103
|
+
build_session(
|
|
104
|
+
name:,
|
|
105
|
+
working_dir:,
|
|
106
|
+
source: :manual, # :manual | :cron | :channel
|
|
107
|
+
profile: "general", # agent profile name
|
|
108
|
+
permission_mode: :confirm_all
|
|
109
|
+
)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Both `source` and `agent_profile` must be serialized into the session JSON and restored on `from_session`.
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## New API Endpoints
|
|
117
|
+
|
|
118
|
+
### `GET /api/agents`
|
|
119
|
+
|
|
120
|
+
Returns all available agent profiles (built-in + user custom).
|
|
121
|
+
|
|
122
|
+
Scan order:
|
|
123
|
+
1. `~/.clacky/agents/<name>/profile.yml` (user override / custom)
|
|
124
|
+
2. `<gem>/lib/clacky/default_agents/<name>/profile.yml` (built-in)
|
|
125
|
+
|
|
126
|
+
Response:
|
|
127
|
+
```json
|
|
128
|
+
{
|
|
129
|
+
"agents": [
|
|
130
|
+
{ "name": "general", "description": "A versatile digital employee living on your computer", "builtin": true },
|
|
131
|
+
{ "name": "coding", "description": "AI coding assistant and technical co-founder", "builtin": true },
|
|
132
|
+
{ "name": "my-pm", "description": "Product manager assistant", "builtin": false }
|
|
133
|
+
]
|
|
134
|
+
}
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### `POST /api/sessions` — updated body
|
|
138
|
+
|
|
139
|
+
```json
|
|
140
|
+
{
|
|
141
|
+
"name": "Session 4",
|
|
142
|
+
"agent_profile": "coding"
|
|
143
|
+
}
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
`source` is always `manual` for API-created sessions. `agent_profile` defaults to `"general"` if omitted.
|
|
147
|
+
|
|
148
|
+
### `DELETE /api/sessions/:id` — fix
|
|
149
|
+
|
|
150
|
+
Must delete the disk JSON file in addition to removing from registry:
|
|
151
|
+
|
|
152
|
+
```ruby
|
|
153
|
+
def api_delete_session(session_id, res)
|
|
154
|
+
if @registry.delete(session_id)
|
|
155
|
+
@session_manager.delete(session_id) # ← ADD THIS
|
|
156
|
+
broadcast(session_id, { type: "session_deleted", session_id: session_id })
|
|
157
|
+
unsubscribe_all(session_id)
|
|
158
|
+
json_response(res, 200, { ok: true })
|
|
159
|
+
else
|
|
160
|
+
json_response(res, 404, { error: "Session not found" })
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
`SessionManager` needs a `delete(session_id)` method that finds and removes the file by session_id prefix.
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## Persistence Strategy Changes
|
|
170
|
+
|
|
171
|
+
| Setting | Current | New |
|
|
172
|
+
|---------|---------|-----|
|
|
173
|
+
| Count limit | `keep: 10` | `keep: 200` |
|
|
174
|
+
| Time-based cleanup | None | Delete sessions not accessed in **90 days** |
|
|
175
|
+
| Cleanup timing | On every save | On server startup + every 24h |
|
|
176
|
+
| Sessions restored on startup | 5 (current dir only) | 20 (current dir) |
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## Implementation Order
|
|
181
|
+
|
|
182
|
+
1. **Fix DELETE bug** — `api_delete_session` + `SessionManager#delete` by session_id
|
|
183
|
+
2. **Data fields** — add `source` + `agent_profile` to `build_session`, `to_session_data`, `restore_session`
|
|
184
|
+
3. **Channel/Cron tagging** — pass `source: :channel` / `source: :cron` when `ChannelManager` and cron create sessions
|
|
185
|
+
4. **Persistence upgrade** — `keep: 200`, 90-day cleanup, restore 20 on startup
|
|
186
|
+
5. **`GET /api/agents`** — scan both dirs, merge, return list
|
|
187
|
+
6. **Frontend — sidebar redesign** — Manual/Scheduled/Channel tabs + Coding fixed section
|
|
188
|
+
7. **Frontend — `[+ ▾]` button** — split button with agent dropdown
|
|
189
|
+
|
|
190
|
+
---
|
|
191
|
+
|
|
192
|
+
## File Locations
|
|
193
|
+
|
|
194
|
+
| File | Change |
|
|
195
|
+
|------|--------|
|
|
196
|
+
| `lib/clacky/session_manager.rb` | Add `delete(session_id)`, change keep to 200, add 90-day cleanup |
|
|
197
|
+
| `lib/clacky/agent/session_serializer.rb` | Serialize/restore `source` + `agent_profile` |
|
|
198
|
+
| `lib/clacky/server/http_server.rb` | Fix `api_delete_session`, add `GET /api/agents`, update `build_session`, restore 20 sessions |
|
|
199
|
+
| `lib/clacky/server/session_registry.rb` | Expose `agent_profile` + `source` in `session_summary` |
|
|
200
|
+
| `lib/clacky/server/channel/channel_manager.rb` | Pass `source: :channel` to `build_session` |
|
|
201
|
+
| `lib/clacky/web/sessions.js` | Tab switching, Coding section, `[+ ▾]` button |
|
|
202
|
+
| CSS / HTML template | New sidebar layout, tab styles, split button |
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# System Skill Authoring Guide
|
|
2
|
+
|
|
3
|
+
Guidelines for writing built-in (system-level) skills under `lib/clacky/default_skills/`.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## 1. Communicating with the Clacky server
|
|
8
|
+
|
|
9
|
+
Always use environment variables — never hardcode the port.
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
curl -s http://${CLACKY_SERVER_HOST}:${CLACKY_SERVER_PORT}/api/xxx
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
`http_server.rb` injects `CLACKY_SERVER_HOST` and `CLACKY_SERVER_PORT` at startup.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## 2. Read state via API, not config files
|
|
20
|
+
|
|
21
|
+
Skills must not read local config files directly.
|
|
22
|
+
|
|
23
|
+
- ❌ `cat ~/.clacky/browser.yml`
|
|
24
|
+
- ✅ `curl http://${CLACKY_SERVER_HOST}:${CLACKY_SERVER_PORT}/api/browser/status`
|
|
25
|
+
|
|
26
|
+
Exception: lightweight `enable` / `disable` operations may read/write yml directly (see `channel-setup`).
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## 3. Running supporting scripts
|
|
31
|
+
|
|
32
|
+
If a skill includes supporting scripts, instruct the AI to run them directly using the full path — **do not describe how to discover the path**. The LLM context already contains the full paths of all files in the skill directory (injected via supporting files at invoke time).
|
|
33
|
+
|
|
34
|
+
Write it simply as:
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
Run the setup script:
|
|
38
|
+
ruby SKILL_DIR/scripts/feishu_setup.rb
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
or for Python:
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
python3 SKILL_DIR/scripts/setup.py
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
No `Gem.find_files`, no `find` fallback, no path-discovery logic needed.
|
|
@@ -17,6 +17,8 @@ module Clacky
|
|
|
17
17
|
@working_dir = session_data[:working_dir]
|
|
18
18
|
@created_at = session_data[:created_at]
|
|
19
19
|
@total_tasks = session_data.dig(:stats, :total_tasks) || 0
|
|
20
|
+
# Restore source; fall back to :manual for sessions saved before this field existed
|
|
21
|
+
@source = (session_data[:source] || "manual").to_sym
|
|
20
22
|
|
|
21
23
|
# Restore cache statistics if available
|
|
22
24
|
@cache_stats = session_data.dig(:stats, :cache_stats) || {
|
|
@@ -83,6 +85,8 @@ module Clacky
|
|
|
83
85
|
created_at: @created_at,
|
|
84
86
|
updated_at: Time.now.iso8601,
|
|
85
87
|
working_dir: @working_dir,
|
|
88
|
+
source: @source.to_s, # "manual" | "cron" | "channel" | "setup"
|
|
89
|
+
agent_profile: @agent_profile&.name || "", # "general" | "coding" | custom
|
|
86
90
|
todos: @todos, # Include todos in session data
|
|
87
91
|
time_machine: { # Include Time Machine state
|
|
88
92
|
task_parents: @task_parents || {},
|
|
@@ -171,9 +175,16 @@ module Clacky
|
|
|
171
175
|
|
|
172
176
|
page.each do |round|
|
|
173
177
|
msg = round[:user_msg]
|
|
174
|
-
raw_text
|
|
175
|
-
#
|
|
176
|
-
|
|
178
|
+
raw_text = extract_text_from_content(msg[:content])
|
|
179
|
+
# Images: recovered from inline image_url blocks in content (carry data_url for <img> rendering)
|
|
180
|
+
image_files = extract_image_files_from_content(msg[:content])
|
|
181
|
+
# Disk files (PDF, doc, etc.): stored in display_files on the user message at send time
|
|
182
|
+
disk_files = Array(msg[:display_files]).map { |f|
|
|
183
|
+
{ name: f[:name] || f["name"], type: f[:type] || f["type"] || "file",
|
|
184
|
+
preview_path: f[:preview_path] || f["preview_path"] }
|
|
185
|
+
}
|
|
186
|
+
all_files = image_files + disk_files
|
|
187
|
+
ui.show_user_message(raw_text, created_at: msg[:created_at], files: all_files)
|
|
177
188
|
|
|
178
189
|
round[:events].each do |ev|
|
|
179
190
|
# Skip system-injected messages (e.g. synthetic skill content, memory prompts)
|
|
@@ -208,9 +219,22 @@ module Clacky
|
|
|
208
219
|
|
|
209
220
|
# Special handling: request_user_feedback question is shown as an
|
|
210
221
|
# assistant message (matching real-time behavior), not as a tool call.
|
|
222
|
+
# Reconstruct the full formatted message including options (mirrors RequestUserFeedback#execute).
|
|
211
223
|
if name == "request_user_feedback"
|
|
212
224
|
question = args.is_a?(Hash) ? (args[:question] || args["question"]).to_s : ""
|
|
213
|
-
|
|
225
|
+
context = args.is_a?(Hash) ? (args[:context] || args["context"]).to_s : ""
|
|
226
|
+
options = args.is_a?(Hash) ? (args[:options] || args["options"]) : nil
|
|
227
|
+
|
|
228
|
+
unless question.empty?
|
|
229
|
+
parts = []
|
|
230
|
+
parts << "**Context:** #{context.strip}" << "" unless context.strip.empty?
|
|
231
|
+
parts << "**Question:** #{question.strip}"
|
|
232
|
+
if options && !options.empty?
|
|
233
|
+
parts << "" << "**Options:**"
|
|
234
|
+
options.each_with_index { |opt, i| parts << " #{i + 1}. #{opt}" }
|
|
235
|
+
end
|
|
236
|
+
ui.show_assistant_message(parts.join("\n"), files: [])
|
|
237
|
+
end
|
|
214
238
|
else
|
|
215
239
|
ui.show_tool_call(name, args)
|
|
216
240
|
end
|
|
@@ -296,6 +320,28 @@ module Clacky
|
|
|
296
320
|
content.to_s
|
|
297
321
|
end
|
|
298
322
|
end
|
|
323
|
+
|
|
324
|
+
# Extract images from a multipart content array and return them as file entries.
|
|
325
|
+
# Returns an array of { name:, mime_type:, data_url: } hashes — the same structure
|
|
326
|
+
# that the frontend sends via `files` in a message, and that show_user_message(files:) expects.
|
|
327
|
+
# Only includes inline data_url images (not remote URLs).
|
|
328
|
+
def extract_image_files_from_content(content)
|
|
329
|
+
return [] unless content.is_a?(Array)
|
|
330
|
+
|
|
331
|
+
content.each_with_index.filter_map do |block, idx|
|
|
332
|
+
next unless block.is_a?(Hash)
|
|
333
|
+
# OpenAI-style: { type: "image_url", image_url: { url: "data:image/png;base64,..." } }
|
|
334
|
+
next unless block[:type] == "image_url"
|
|
335
|
+
|
|
336
|
+
url = block.dig(:image_url, :url)
|
|
337
|
+
next unless url && url.start_with?("data:")
|
|
338
|
+
|
|
339
|
+
# Derive mime_type from the data URL prefix (e.g. "data:image/jpeg;base64,...")
|
|
340
|
+
mime_type = url[/\Adata:([^;]+);/, 1] || "image/jpeg"
|
|
341
|
+
ext = mime_type.split("/").last
|
|
342
|
+
{ name: "image_#{idx + 1}.#{ext}", mime_type: mime_type, data_url: url }
|
|
343
|
+
end
|
|
344
|
+
end
|
|
299
345
|
end
|
|
300
346
|
end
|
|
301
347
|
end
|