openclacky 0.8.6 → 0.8.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/lib/clacky/agent/memory_updater.rb +2 -2
- data/lib/clacky/agent/session_serializer.rb +7 -0
- data/lib/clacky/agent.rb +34 -15
- data/lib/clacky/default_skills/channel-setup/SKILL.md +37 -110
- data/lib/clacky/default_skills/pdf-reader/SKILL.md +90 -0
- data/lib/clacky/default_skills/skill-add/SKILL.md +39 -23
- data/lib/clacky/default_skills/skill-add/scripts/install_from_zip.rb +233 -0
- data/lib/clacky/server/http_server.rb +78 -3
- data/lib/clacky/skill.rb +25 -11
- data/lib/clacky/skill_loader.rb +14 -7
- data/lib/clacky/tools/browser.rb +75 -56
- data/lib/clacky/tools/file_reader.rb +3 -3
- data/lib/clacky/tools/shell.rb +22 -0
- data/lib/clacky/utils/file_processor.rb +2 -2
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/web/app.css +57 -0
- data/lib/clacky/web/app.js +90 -16
- data/lib/clacky/web/channels.js +1 -1
- data/lib/clacky/web/icon-dark.svg +23 -0
- data/lib/clacky/web/icon.svg +26 -0
- data/lib/clacky/web/index.html +2 -1
- data/lib/clacky/web/sessions.js +8 -4
- data/lib/clacky/web/skills.js +60 -30
- metadata +5 -2
- data/lib/clacky/default_skills/skill-add/scripts/install_from_github.rb +0 -233
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 38f9805e951dec0f87bda1b64033e0ea7f0c5c6d1c4fd2427f57dfc13aec0835
|
|
4
|
+
data.tar.gz: f6f0d08206ead392ffbbc073bb92c5b8e5b4c9f4ecf37172153c4bf46f4963e0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d7400735f1f2cbf9fa6b74e56aaa9264e881ab0618885e87b9757458b3b87bde01c5319db6d6f6833573792229c8aa635d5c09bab43cdde15e8cddfe2ce3e418
|
|
7
|
+
data.tar.gz: ef4dede49038208ff386f5b536ba4c64158e5b72f5599694f14ecf83bd3259b51be6af52bef10fbdea88fbc23f2b2b11c9316e1bdbb1f350c355a0fedeb23bd1
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.8.7] - 2026-03-13
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- **PDF file upload and reading**: users can now upload PDF files directly in the WebUI chat; the agent reads and analyzes the content via the built-in `pdf-reader` skill
|
|
14
|
+
- **WebUI favicon and SVG icons**: browser tab now shows the Clacky icon
|
|
15
|
+
- **Public skill store install**: skills from the public store can be installed directly via the WebUI without a GitHub URL
|
|
16
|
+
- **Auto-kill previous server on startup**: launching `clacky serve` now automatically kills any previously running instance via pidfile, preventing port conflicts
|
|
17
|
+
|
|
18
|
+
### Improved
|
|
19
|
+
- **Brand skill loading speed**: loading brand skills no longer triggers a network decryption request — name and description are now read from the local `brand_skills.json` cache, making New Session significantly faster
|
|
20
|
+
- **Memory update UX**: memory update step now shows a spinner and info-style message instead of a bare log line
|
|
21
|
+
- **Browser snapshot output**: snapshot output is compressed to reduce token cost when the agent uses browser tools
|
|
22
|
+
- **Subagent output**: subagent task completion now shows a brief info line instead of a full "Task Complete" block, reducing noise in the parent agent's context
|
|
23
|
+
|
|
24
|
+
### Fixed
|
|
25
|
+
- **Subagent token delta on first iteration**: subagent now inherits `previous_total_tokens` correctly, fixing an inflated token count on the first tool iteration
|
|
26
|
+
- **Chrome DevTools inspect URL**: updated the remote debugging URL to include the `#remote-debugging` fragment for correct navigation
|
|
27
|
+
- **Shell output token explosion**: long lines in shell output are now truncated to prevent excessive token usage
|
|
28
|
+
|
|
29
|
+
### More
|
|
30
|
+
- Binary file size limit lowered from 5 MB to 512 KB to reduce accidental token cost
|
|
31
|
+
- `kill_existing_server` logic moved from CLI into `HttpServer` for cleaner separation
|
|
32
|
+
- Browser tool prefers `snapshot -i` over `screenshot` for lower token cost
|
|
33
|
+
- Cross-platform PID file path using `Dir.tmpdir` instead of hardcoded `/tmp`
|
|
34
|
+
|
|
10
35
|
## [0.8.6] - 2026-03-12
|
|
11
36
|
|
|
12
37
|
### Added
|
|
@@ -42,7 +42,7 @@ module Clacky
|
|
|
42
42
|
|
|
43
43
|
@memory_prompt_injected = true
|
|
44
44
|
@memory_updating = true
|
|
45
|
-
@ui&.
|
|
45
|
+
@ui&.show_progress("Updating long-term memory…")
|
|
46
46
|
|
|
47
47
|
@messages << {
|
|
48
48
|
role: "user",
|
|
@@ -62,7 +62,7 @@ module Clacky
|
|
|
62
62
|
@messages.reject! { |m| m[:memory_update] }
|
|
63
63
|
@memory_prompt_injected = false
|
|
64
64
|
@memory_updating = false
|
|
65
|
-
@ui&.
|
|
65
|
+
@ui&.clear_progress
|
|
66
66
|
end
|
|
67
67
|
|
|
68
68
|
private def memory_update_enabled?
|
|
@@ -275,6 +275,13 @@ module Clacky
|
|
|
275
275
|
next unless source.is_a?(Hash) && source[:type].to_s == "base64"
|
|
276
276
|
|
|
277
277
|
"data:#{source[:media_type]};base64,#{source[:data]}"
|
|
278
|
+
when "document"
|
|
279
|
+
# Anthropic PDF document block — return a sentinel string for frontend display
|
|
280
|
+
source = block[:source]
|
|
281
|
+
next unless source.is_a?(Hash) && source[:media_type].to_s == "application/pdf"
|
|
282
|
+
|
|
283
|
+
# Return a special marker so the frontend can render a PDF badge instead of an <img>
|
|
284
|
+
"pdf:#{source[:data]&.then { |d| d[0, 32] }}" # prefix to identify without full payload
|
|
278
285
|
end
|
|
279
286
|
end
|
|
280
287
|
end
|
data/lib/clacky/agent.rb
CHANGED
|
@@ -141,7 +141,7 @@ module Clacky
|
|
|
141
141
|
@config.model_name
|
|
142
142
|
end
|
|
143
143
|
|
|
144
|
-
def run(user_input, images: [])
|
|
144
|
+
def run(user_input, images: [], files: [])
|
|
145
145
|
# Start new task for Time Machine
|
|
146
146
|
task_id = start_new_task
|
|
147
147
|
|
|
@@ -172,8 +172,8 @@ module Clacky
|
|
|
172
172
|
@messages << system_message
|
|
173
173
|
end
|
|
174
174
|
|
|
175
|
-
# Format user message with images if provided
|
|
176
|
-
user_content = format_user_content(user_input, images)
|
|
175
|
+
# Format user message with images and files if provided
|
|
176
|
+
user_content = format_user_content(user_input, images, files)
|
|
177
177
|
@messages << { role: "user", content: user_content, task_id: task_id, created_at: Time.now.to_f }
|
|
178
178
|
@total_tasks += 1
|
|
179
179
|
|
|
@@ -208,7 +208,12 @@ module Clacky
|
|
|
208
208
|
|
|
209
209
|
# Check if done (no more tool calls needed)
|
|
210
210
|
if response[:finish_reason] == "stop" || response[:tool_calls].nil? || response[:tool_calls].empty?
|
|
211
|
-
|
|
211
|
+
# During memory update phase, show LLM response as info (not a chat bubble)
|
|
212
|
+
if @memory_updating && response[:content] && !response[:content].empty?
|
|
213
|
+
@ui&.show_info("🧠 " + response[:content].strip)
|
|
214
|
+
elsif response[:content] && !response[:content].empty?
|
|
215
|
+
@ui&.show_assistant_message(response[:content])
|
|
216
|
+
end
|
|
212
217
|
|
|
213
218
|
# Debug: log why we're stopping
|
|
214
219
|
if @config.verbose && (response[:tool_calls].nil? || response[:tool_calls].empty?)
|
|
@@ -227,7 +232,8 @@ module Clacky
|
|
|
227
232
|
end
|
|
228
233
|
|
|
229
234
|
# Show assistant message if there's content before tool calls
|
|
230
|
-
|
|
235
|
+
# During memory update phase, suppress text output (only tool calls matter)
|
|
236
|
+
if response[:content] && !response[:content].empty? && !@memory_updating
|
|
231
237
|
@ui&.show_assistant_message(response[:content])
|
|
232
238
|
end
|
|
233
239
|
|
|
@@ -272,13 +278,17 @@ module Clacky
|
|
|
272
278
|
@modified_files_in_task = [] # Reset for next task
|
|
273
279
|
end
|
|
274
280
|
|
|
275
|
-
@
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
281
|
+
if @is_subagent
|
|
282
|
+
@ui&.show_info("Subagent done (#{result[:iterations]} iterations, $#{result[:total_cost_usd].round(4)})")
|
|
283
|
+
else
|
|
284
|
+
@ui&.show_complete(
|
|
285
|
+
iterations: result[:iterations],
|
|
286
|
+
cost: result[:total_cost_usd],
|
|
287
|
+
duration: result[:duration_seconds],
|
|
288
|
+
cache_stats: result[:cache_stats],
|
|
289
|
+
awaiting_user_feedback: awaiting_user_feedback
|
|
290
|
+
)
|
|
291
|
+
end
|
|
282
292
|
@hooks.trigger(:on_complete, result)
|
|
283
293
|
result
|
|
284
294
|
rescue Clacky::AgentInterrupted
|
|
@@ -714,6 +724,10 @@ module Clacky
|
|
|
714
724
|
ui: @ui,
|
|
715
725
|
profile: @agent_profile.name
|
|
716
726
|
)
|
|
727
|
+
subagent.instance_variable_set(:@is_subagent, true)
|
|
728
|
+
|
|
729
|
+
# Inherit previous_total_tokens so the first iteration delta is calculated correctly
|
|
730
|
+
subagent.instance_variable_set(:@previous_total_tokens, @previous_total_tokens)
|
|
717
731
|
|
|
718
732
|
# Deep clone messages to avoid cross-contamination
|
|
719
733
|
subagent.instance_variable_set(:@messages, deep_clone(@messages))
|
|
@@ -809,11 +823,16 @@ module Clacky
|
|
|
809
823
|
end
|
|
810
824
|
|
|
811
825
|
# Format user content with optional images
|
|
826
|
+
# PDF files are handled upstream (server injects file path into message text),
|
|
827
|
+
# so this method only needs to handle images.
|
|
812
828
|
# @param text [String] User's text input
|
|
813
829
|
# @param images [Array<String>] Array of image file paths or data: URLs
|
|
814
|
-
# @
|
|
815
|
-
|
|
816
|
-
|
|
830
|
+
# @param files [Array] Unused — kept for signature compatibility
|
|
831
|
+
# @return [String|Array] String if no images, Array with content blocks otherwise
|
|
832
|
+
private def format_user_content(text, images, files = [])
|
|
833
|
+
images ||= []
|
|
834
|
+
|
|
835
|
+
return text if images.empty?
|
|
817
836
|
|
|
818
837
|
content = []
|
|
819
838
|
content << { type: "text", text: text } unless text.nil? || text.empty?
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: channel-setup
|
|
3
3
|
description: |
|
|
4
|
-
Configure IM platform channels (Feishu
|
|
5
|
-
Uses browser automation
|
|
4
|
+
Configure IM platform channels (Feishu, WeCom) for open-clacky.
|
|
5
|
+
Uses browser automation for navigation; guides the user to paste credentials and perform UI steps.
|
|
6
6
|
Trigger on: "channel setup", "setup feishu", "setup wecom", "channel config",
|
|
7
7
|
"channel status", "channel enable", "channel disable", "channel reconfigure", "channel doctor".
|
|
8
8
|
Subcommands: setup, status, enable <platform>, disable <platform>, reconfigure, doctor.
|
|
@@ -21,23 +21,14 @@ allowed-tools:
|
|
|
21
21
|
|
|
22
22
|
Configure IM platform channels for open-clacky. Config is stored at `~/.clacky/channels.yml`.
|
|
23
23
|
|
|
24
|
-
## Core Rule: Never ask for credentials
|
|
25
|
-
|
|
26
|
-
All credentials (App Secret, Bot Secret, etc.) must be read directly from browser snapshots.
|
|
27
|
-
**Asking the user to copy, type, or provide any credential is a failure.**
|
|
28
|
-
If automation cannot reveal a value, say so and suggest retrying — never fall back to manual input.
|
|
29
|
-
**Exception**: For Feishu and WeCom, guide the user to paste credentials — do not take snapshots or screenshots to extract. Directly ask the user to reveal and paste.
|
|
30
|
-
|
|
31
24
|
## Browser Automation Principles
|
|
32
25
|
|
|
33
|
-
-
|
|
34
|
-
- **
|
|
35
|
-
-
|
|
36
|
-
-
|
|
26
|
+
- **Always use built-in browser**: Pass `isolated: true` on every browser tool call. Do NOT ask the user to choose — use the built-in browser only.
|
|
27
|
+
- **Never use `screenshot`**: Use `snapshot -i` instead to get page structure as text. Do NOT generate image files.
|
|
28
|
+
- Use `open <url>` for navigation.
|
|
29
|
+
- AI navigates; user performs form fills, clicks, and pastes when instructed.
|
|
37
30
|
- If a login page or QR code appears, tell the user to log in and wait for "done" before continuing.
|
|
38
|
-
-
|
|
39
|
-
- If stuck (CAPTCHA, unexpected page, dialog, cannot find a UI element, scroll fails), take a screenshot, describe the situation, and **guide the user to help** — do NOT fall back to alternative navigation (e.g., switching tabs, trying different URLs). Ask the user to perform the specific step manually and reply "done" when ready.
|
|
40
|
-
- Never print raw secrets — mask to last 4 characters in all output.
|
|
31
|
+
- If stuck (CAPTCHA, unexpected page, dialog, cannot find a UI element), **guide the user to help** — ask the user to perform the specific step manually and reply "done" when ready.
|
|
41
32
|
|
|
42
33
|
---
|
|
43
34
|
|
|
@@ -76,7 +67,7 @@ If the file doesn't exist: "No channels configured yet. Run `/channel-setup setu
|
|
|
76
67
|
Ask:
|
|
77
68
|
> Which platform would you like to connect?
|
|
78
69
|
>
|
|
79
|
-
> 1. Feishu
|
|
70
|
+
> 1. Feishu
|
|
80
71
|
> 2. WeCom (Enterprise WeChat)
|
|
81
72
|
|
|
82
73
|
---
|
|
@@ -85,33 +76,27 @@ Ask:
|
|
|
85
76
|
|
|
86
77
|
#### Phase 1 — Open Feishu Open Platform
|
|
87
78
|
|
|
88
|
-
1.
|
|
89
|
-
2.
|
|
90
|
-
|
|
91
|
-
> 1. Feishu — https://open.feishu.cn
|
|
92
|
-
> 2. Lark — https://open.larksuite.com
|
|
93
|
-
3. Navigate to `https://open.feishu.cn/app` (or `/larksuite.com/app`).
|
|
94
|
-
4. Take a snapshot. If a login page or QR code is shown, tell the user to log in and wait for "done".
|
|
95
|
-
5. Confirm the app list is visible.
|
|
79
|
+
1. Navigate: `open https://open.feishu.cn/app`. Pass `isolated: true`.
|
|
80
|
+
2. Use `snapshot -i` to check page state. If a login page or QR code is shown, tell the user to log in and wait for "done".
|
|
81
|
+
3. Confirm the app list is visible.
|
|
96
82
|
|
|
97
83
|
#### Phase 2 — Create a new app
|
|
98
84
|
|
|
99
|
-
6. **Always create a new app** — do NOT reuse existing apps. Click
|
|
85
|
+
6. **Always create a new app** — do NOT reuse existing apps. Guide the user: "Click 'Create Enterprise Self-Built App', fill in name (e.g. Open Clacky) and description (e.g. AI assistant powered by open-clacky), then submit. Reply done." Wait for "done".
|
|
100
86
|
|
|
101
|
-
#### Phase 3 —
|
|
87
|
+
#### Phase 3 — Enable Bot capability
|
|
102
88
|
|
|
103
|
-
7.
|
|
104
|
-
8. Do NOT take snapshots or screenshots. Directly guide the user: "Click the eye icon next to App Secret to reveal it. Copy App ID and App Secret, then paste here. Reply with: App ID: xxx, App Secret: xxx" (confirm back masked to last 4 chars).
|
|
89
|
+
7. Feishu opens Add App Capabilities by default after creating an app. Guide the user: "Find the Bot capability card and click the Add button next to it, then reply done." Wait for "done".
|
|
105
90
|
|
|
106
|
-
#### Phase 4 —
|
|
91
|
+
#### Phase 4 — Get credentials
|
|
107
92
|
|
|
108
|
-
|
|
109
|
-
|
|
93
|
+
8. Navigate to Credentials & Basic Info in the left menu.
|
|
94
|
+
9. Guide the user: "Copy App ID and App Secret, then paste here. Reply with: App ID: xxx, App Secret: xxx" Wait for "done".
|
|
110
95
|
|
|
111
96
|
#### Phase 5 — Add message permissions
|
|
112
97
|
|
|
113
|
-
|
|
114
|
-
|
|
98
|
+
10. Navigate to Permission Management and open the bulk import dialog.
|
|
99
|
+
11. Guide the user: "In the bulk import dialog, clear the existing example first (select all, delete), then paste the following JSON. Reply done." Wait for "done". Do NOT try to clear or edit via browser — user does it.
|
|
115
100
|
|
|
116
101
|
```json
|
|
117
102
|
{
|
|
@@ -126,45 +111,21 @@ Ask:
|
|
|
126
111
|
}
|
|
127
112
|
```
|
|
128
113
|
|
|
129
|
-
|
|
114
|
+
#### Phase 6 — Configure event subscription (Long Connection)
|
|
130
115
|
|
|
131
|
-
|
|
116
|
+
**CRITICAL**: Feishu requires the long connection to be established *before* you can save the event config. The platform shows "No application connection detected, ensure long connection is established before saving" until `clacky server` is running and connected. Do NOT try to save until the connection is established.
|
|
132
117
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
118
|
+
12. **Apply config and establish connection** — Run `curl -X POST http://localhost:7070/api/channels/feishu -H "Content-Type: application/json" -d '{"app_id":"...","app_secret":"...","domain":"..."}'`. The server hot-reloads the Feishu adapter and establishes the WebSocket.
|
|
119
|
+
13. **Wait for connection** — Wait until the log shows `[feishu-ws] WebSocket connected ✅`.
|
|
120
|
+
14. **Navigate to Events & Callbacks** — Then guide the user: "Select 'Long Connection' mode. Click Save. Then click Add Event, type `im.message.receive_v1` in the search box, select it, click Add. Reply done." Wait for "done".
|
|
136
121
|
|
|
137
122
|
#### Phase 7 — Publish the app
|
|
138
123
|
|
|
139
|
-
|
|
140
|
-
19. Note: personal accounts publish immediately; enterprise accounts require admin approval — tell the user if this applies.
|
|
141
|
-
|
|
142
|
-
#### Phase 8 — Allowed users (optional)
|
|
124
|
+
15. Navigate to Version Management & Release. Then guide the user: "Create a new version, fill in version (e.g. 1.0.0) and update description (e.g. Initial release for Open Clacky), then publish. Reply done." Wait for "done".
|
|
143
125
|
|
|
144
|
-
|
|
145
|
-
> Do you want to restrict which Feishu users can send tasks to the AI?
|
|
146
|
-
> Reply "skip" to allow everyone, or "yes" to configure a whitelist.
|
|
147
|
-
21. If "yes":
|
|
148
|
-
- Tell the user to send any message to the Open Clacky bot in Feishu, then reply "done".
|
|
149
|
-
- Navigate to Log Search → Event Log, find the latest `im.message.receive_v1` event, and read `sender.sender_id.open_id` (format `ou_xxx`) directly from the page.
|
|
150
|
-
- Repeat for additional users if needed.
|
|
126
|
+
#### Phase 8 — Finalize config and validate
|
|
151
127
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
Write `~/.clacky/channels.yml` (merge with existing content, never overwrite other platforms):
|
|
155
|
-
|
|
156
|
-
```yaml
|
|
157
|
-
channels:
|
|
158
|
-
feishu:
|
|
159
|
-
enabled: true
|
|
160
|
-
app_id: <from user paste>
|
|
161
|
-
app_secret: <from user paste>
|
|
162
|
-
domain: https://open.feishu.cn # or https://open.larksuite.com
|
|
163
|
-
# allowed_users: # omit if not configured
|
|
164
|
-
# - ou_xxx
|
|
165
|
-
```
|
|
166
|
-
|
|
167
|
-
Run `chmod 600 ~/.clacky/channels.yml`.
|
|
128
|
+
Config was applied in step 12 (via API).
|
|
168
129
|
|
|
169
130
|
Validate:
|
|
170
131
|
```bash
|
|
@@ -174,57 +135,23 @@ curl -s -X POST "${DOMAIN}/open-apis/auth/v3/tenant_access_token/internal" \
|
|
|
174
135
|
```
|
|
175
136
|
Check for `"code":0`. If it fails, explain and offer to retry.
|
|
176
137
|
|
|
177
|
-
On success: "✅ Feishu channel configured.
|
|
138
|
+
On success: "✅ Feishu channel configured. The channel is already active."
|
|
178
139
|
|
|
179
140
|
---
|
|
180
141
|
|
|
181
142
|
### WeCom setup
|
|
182
143
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
#### Admin Console flow (user has admin access)
|
|
191
|
-
|
|
192
|
-
**Principle**: Do NOT take snapshots or screenshots to inspect the UI. Directly guide the user through each step. For Bot ID and Secret, guide the user to paste them — do NOT try to extract from the page.
|
|
144
|
+
1. Navigate: `open https://work.weixin.qq.com/wework_admin/frame#/aiHelper/create`. Pass `isolated: true`.
|
|
145
|
+
2. Use `snapshot -i` to check page state. If a login page or QR code is shown, tell the user to log in and wait for "done".
|
|
146
|
+
3. Steps 3–7: Do NOT take snapshots. Guide the user: "Scroll to the bottom of the right panel and click 'API mode creation'. Reply done." Wait for "done".
|
|
147
|
+
4. Guide the user: "Click 'Add' next to 'Visible Range'. In the scope dialog, select the top-level company node (or specific users/departments). Click Confirm. Reply done." Wait for "done".
|
|
148
|
+
5. Guide the user: "If Secret is not visible, click 'Get Secret'. Copy Bot ID and Secret **before** clicking Save — do NOT click 'Get Secret' again after copying (it invalidates the previous secret). Paste here. Reply with: Bot ID: xxx, Secret: xxx" Wait for "done".
|
|
149
|
+
6. Guide the user: "Click Save. In the dialog, enter name (e.g. Open Clacky) and description (e.g. AI assistant powered by open-clacky). Click Confirm. Click Save again. Reply done." Wait for "done".
|
|
150
|
+
7. **Apply config and hot-reload** — Parse credentials from step 5. Trim leading/trailing whitespace from bot_id and secret. Run `curl -X POST http://localhost:7070/api/channels/wecom -H "Content-Type: application/json" -d '{"bot_id":"...","secret":"..."}'`. Ensure bot_id (starts with `aib`) and secret (longer string) are not swapped.
|
|
193
151
|
|
|
194
|
-
|
|
195
|
-
2. Navigate directly to `https://work.weixin.qq.com/wework_admin/frame#/aiHelper/create` (use `tab new <url>` when isolated=false). Pass the same `isolated` value on every browser call.
|
|
196
|
-
3. Directly guide the user: "If you see a login page or QR code, log in. When the create page is visible, reply done." Wait for "done".
|
|
197
|
-
4. Guide the user: "Scroll to the bottom of the right panel and click 'API mode creation', then reply done." Wait for "done".
|
|
198
|
-
5. Guide the user: "In the scope dialog, select the top-level company node to allow all members, or select specific users/departments if you prefer. Click Confirm, then reply done." Wait for "done".
|
|
199
|
-
6. Guide the user: "If the Secret is not yet visible, click 'Get Secret'. When both Bot ID and Secret are visible, copy them and paste here. Reply with: Bot ID: xxx, Secret: xxx" (confirm back masked to last 4 chars).
|
|
200
|
-
7. Guide the user: "Click Save. In the dialog, enter name 'Open Clacky' and description 'AI assistant powered by open-clacky', click Confirm, then reply done." Wait for "done".
|
|
201
|
-
8. Write config and run `chmod 600 ~/.clacky/channels.yml`.
|
|
202
|
-
|
|
203
|
-
---
|
|
204
|
-
|
|
205
|
-
#### Client flow (user is not admin; cannot access admin console)
|
|
206
|
-
|
|
207
|
-
Guide the user to operate in the **WeCom desktop client** (Workbench). No browser automation needed.
|
|
208
|
-
|
|
209
|
-
1. Guide the user: "Open the WeCom desktop client → Workbench → Smart Bot → "Create Bot". Reply done when you see the creation page." Wait for "done".
|
|
210
|
-
2. Guide the user: "Scroll to the bottom of the page and click 'API Mode'. Reply done." Wait for "done".
|
|
211
|
-
3. Guide the user: "The Bot ID appears on the right side. Under 'API Configuration', find the Secret row and click 'Click to Reveal' if needed. Copy both and paste here. Reply with: Bot ID: xxx, Secret: xxx" (confirm back masked to last 4 chars).
|
|
212
|
-
4. Guide the user: "Fill in name 'Open Clacky' and description 'AI assistant powered by open-clacky', click Save (or Confirm), then reply done." Wait for "done".
|
|
213
|
-
5. Write `~/.clacky/channels.yml` and run `chmod 600 ~/.clacky/channels.yml`.
|
|
214
|
-
|
|
215
|
-
---
|
|
216
|
-
|
|
217
|
-
#### Save config (both flows)
|
|
218
|
-
|
|
219
|
-
```yaml
|
|
220
|
-
channels:
|
|
221
|
-
wecom:
|
|
222
|
-
enabled: true
|
|
223
|
-
bot_id: <extracted or entered>
|
|
224
|
-
secret: <extracted or entered>
|
|
225
|
-
```
|
|
152
|
+
On success: "✅ WeCom channel configured."
|
|
226
153
|
|
|
227
|
-
On success: "✅ WeCom channel configured.
|
|
154
|
+
On success: "✅ WeCom channel configured. To use the bot: WeCom client → Contacts → select Smart Bot to see the newly created bot.".
|
|
228
155
|
|
|
229
156
|
---
|
|
230
157
|
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: pdf-reader
|
|
3
|
+
description: 'Read and analyze PDF files. Use this skill when the user has attached a PDF or mentions a PDF file path and wants to understand, summarize, extract, or ask questions about its content. Trigger on: "read this PDF", "analyze the PDF", "what does this PDF say", "what is in this file", "里面有什么", "帮我看看这个PDF", "总结一下", "这份文件说了什么" — or when a message contains a PDF attachment reference even without an explicit question. Also trigger when the user asks vague questions like "what is this?", "summarize", "tell me about this" if a PDF is attached.'
|
|
4
|
+
disable-model-invocation: false
|
|
5
|
+
user-invocable: true
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# PDF Reading Skill
|
|
9
|
+
|
|
10
|
+
## Your Goal
|
|
11
|
+
Extract text content from the PDF file and answer the user's question based on that content. If the user's question is vague or absent, default to providing a clear structured summary of the document.
|
|
12
|
+
|
|
13
|
+
## Step 1 — Extract text from the PDF
|
|
14
|
+
|
|
15
|
+
Use `pdftotext` (preferred, fastest) or Python `pdfplumber` as fallback.
|
|
16
|
+
|
|
17
|
+
### Option A: pdftotext (use this first)
|
|
18
|
+
```bash
|
|
19
|
+
pdftotext -layout -enc UTF-8 "/path/to/file.pdf" -
|
|
20
|
+
```
|
|
21
|
+
- `-enc UTF-8` ensures correct encoding for Chinese, Japanese, and other non-Latin text
|
|
22
|
+
- `-layout` preserves column layout for tables
|
|
23
|
+
- The `-` at the end prints to stdout (no temp file needed)
|
|
24
|
+
|
|
25
|
+
**Install if missing:**
|
|
26
|
+
- macOS: `brew install poppler`
|
|
27
|
+
- Ubuntu/Debian: `apt install poppler-utils`
|
|
28
|
+
- CentOS/Fedora: `yum install poppler-utils`
|
|
29
|
+
|
|
30
|
+
### Option B: Python pdfplumber (fallback if pdftotext not available)
|
|
31
|
+
```python
|
|
32
|
+
import pdfplumber
|
|
33
|
+
|
|
34
|
+
with pdfplumber.open("/path/to/file.pdf") as pdf:
|
|
35
|
+
for i, page in enumerate(pdf.pages, 1):
|
|
36
|
+
text = page.extract_text()
|
|
37
|
+
if text:
|
|
38
|
+
print(f"--- Page {i} ---")
|
|
39
|
+
print(text)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Option C: pypdf (last resort)
|
|
43
|
+
```python
|
|
44
|
+
from pypdf import PdfReader
|
|
45
|
+
|
|
46
|
+
reader = PdfReader("/path/to/file.pdf")
|
|
47
|
+
for i, page in enumerate(reader.pages, 1):
|
|
48
|
+
print(f"--- Page {i} ---")
|
|
49
|
+
print(page.extract_text())
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Step 2 — Handle large files
|
|
53
|
+
|
|
54
|
+
If the extracted text is truncated or very long (>200 lines):
|
|
55
|
+
- For a **summary request**: read the full output file instead of relying on stdout — save to a temp file first:
|
|
56
|
+
```bash
|
|
57
|
+
pdftotext -layout -enc UTF-8 "/path/to/file.pdf" /tmp/pdf_extracted.txt
|
|
58
|
+
cat /tmp/pdf_extracted.txt
|
|
59
|
+
```
|
|
60
|
+
- For a **specific question**: use `grep` to locate relevant sections before reading the full content:
|
|
61
|
+
```bash
|
|
62
|
+
grep -n "keyword" /tmp/pdf_extracted.txt | head -30
|
|
63
|
+
```
|
|
64
|
+
- Extract once, answer from memory — do NOT re-read the file multiple times.
|
|
65
|
+
|
|
66
|
+
## Step 3 — Answer the user's question
|
|
67
|
+
|
|
68
|
+
### Output format guidelines
|
|
69
|
+
|
|
70
|
+
Adapt the response format to the document type:
|
|
71
|
+
|
|
72
|
+
| Document type | Recommended format |
|
|
73
|
+
|---|---|
|
|
74
|
+
| Business plan / Report | Structured summary with ## headers per section |
|
|
75
|
+
| Contract / Legal | Key clauses in bullet points, highlight dates and parties |
|
|
76
|
+
| Academic paper | Abstract → Key findings → Methodology → Conclusions |
|
|
77
|
+
| Invoice / Receipt | Table: item, amount, total |
|
|
78
|
+
| General / Unknown | Brief overview paragraph + key points as bullets |
|
|
79
|
+
|
|
80
|
+
**General rules:**
|
|
81
|
+
- Use Markdown formatting (headers, bullets, tables) for clarity
|
|
82
|
+
- Match the user's language — if they asked in Chinese, answer in Chinese
|
|
83
|
+
- Lead with the most important information first
|
|
84
|
+
- If the user asked a specific question, answer it directly before summarizing
|
|
85
|
+
|
|
86
|
+
## Rules
|
|
87
|
+
- Always use the **actual file path** from the `[PDF attached: ...]` message
|
|
88
|
+
- If text extraction returns empty (scanned/image PDF), inform the user and suggest: `brew install tesseract` + `tesseract file.pdf output txt`
|
|
89
|
+
- Do NOT re-read the file multiple times — extract once, answer from memory
|
|
90
|
+
- If the user's question is vague (e.g. "里面有什么", "what is this?"), default to a full structured summary
|
|
@@ -1,44 +1,60 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: skill-add
|
|
3
|
-
description: Install skills from
|
|
3
|
+
description: 'Install skills from a zip URL. Use this skill whenever the user wants to install a skill from a zip link, or uses commands like /skill-add with a URL. Trigger on phrases like: install skill, install from zip, skill from zip, skill from url, add skill from zip, 安装skill, 从zip安装skill.'
|
|
4
4
|
disable-model-invocation: false
|
|
5
5
|
user-invocable: true
|
|
6
6
|
---
|
|
7
7
|
|
|
8
|
-
# Skill Add —
|
|
8
|
+
# Skill Add — Zip Installer
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
Installs a skill from a zip URL using the bundled `install_from_zip.rb` script.
|
|
11
11
|
|
|
12
|
-
##
|
|
12
|
+
## Finding the Script
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
# Install all skills under a specific subdirectory
|
|
19
|
-
https://github.com/user/repo/skills
|
|
14
|
+
The script lives inside this skill's directory, in one of two locations:
|
|
15
|
+
- Global: `~/.clacky/skills/skill-add/scripts/`
|
|
16
|
+
- Project-level: `.clacky/skills/skill-add/scripts/`
|
|
20
17
|
|
|
21
|
-
|
|
22
|
-
|
|
18
|
+
Locate it at runtime with:
|
|
19
|
+
```bash
|
|
20
|
+
ruby "$(find ~/.clacky/skills/skill-add .clacky/skills/skill-add -name 'install_from_zip.rb' 2>/dev/null | head -1)" <zip_url> <slug>
|
|
23
21
|
```
|
|
24
22
|
|
|
25
|
-
|
|
23
|
+
---
|
|
26
24
|
|
|
27
|
-
|
|
25
|
+
## How to Install
|
|
26
|
+
|
|
27
|
+
When the user provides a `.zip` URL, run:
|
|
28
28
|
|
|
29
29
|
```bash
|
|
30
|
-
ruby
|
|
30
|
+
ruby "$(find ~/.clacky/skills/skill-add .clacky/skills/skill-add -name 'install_from_zip.rb' 2>/dev/null | head -1)" <zip_url> <slug>
|
|
31
31
|
```
|
|
32
32
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
33
|
+
- `<zip_url>` — the download URL provided by the user
|
|
34
|
+
- `<slug>` — the skill's directory name; if not provided, infer it from the URL filename by stripping version suffixes (e.g. `canvas-design-1.2.0.zip` → `canvas-design`)
|
|
35
|
+
|
|
36
|
+
The script handles everything automatically:
|
|
37
|
+
- Downloads the zip (follows HTTP redirects)
|
|
38
|
+
- Extracts and locates all `SKILL.md` files inside
|
|
39
|
+
- Copies skill directories to `.clacky/skills/` in the current project (overwrites existing)
|
|
40
|
+
- Reports installed skills with their descriptions
|
|
41
|
+
|
|
42
|
+
**Do NOT manually download or unzip — the script handles everything.**
|
|
38
43
|
|
|
39
|
-
|
|
44
|
+
## Example
|
|
45
|
+
|
|
46
|
+
```
|
|
47
|
+
/skill-add https://store.clacky.ai/skills/canvas-design-1.2.0.zip
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
ruby "$(find ~/.clacky/skills/skill-add .clacky/skills/skill-add -name 'install_from_zip.rb' 2>/dev/null | head -1)" \
|
|
52
|
+
"https://store.clacky.ai/skills/canvas-design-1.2.0.zip" \
|
|
53
|
+
"canvas-design"
|
|
54
|
+
```
|
|
40
55
|
|
|
41
56
|
## Notes
|
|
42
57
|
|
|
43
|
-
-
|
|
44
|
-
-
|
|
58
|
+
- Skills install to `.clacky/skills/` in the current project
|
|
59
|
+
- Project-level skills override global skills (`~/.clacky/skills/`)
|
|
60
|
+
- If the user doesn't provide a URL, ask them for the zip URL — this skill only supports zip installs
|