openclacky 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +39 -0
- data/README.md +87 -53
- data/lib/clacky/agent/cost_tracker.rb +19 -2
- data/lib/clacky/agent/llm_caller.rb +218 -0
- data/lib/clacky/agent/message_compressor_helper.rb +32 -2
- data/lib/clacky/agent.rb +54 -22
- data/lib/clacky/client.rb +44 -5
- data/lib/clacky/default_parsers/pdf_parser.rb +58 -17
- data/lib/clacky/default_parsers/pdf_parser_ocr.py +103 -0
- data/lib/clacky/default_parsers/pdf_parser_plumber.py +62 -0
- data/lib/clacky/default_skills/deploy/SKILL.md +201 -77
- data/lib/clacky/default_skills/new/SKILL.md +3 -114
- data/lib/clacky/default_skills/onboard/SKILL.md +349 -133
- data/lib/clacky/default_skills/onboard/scripts/import_external_skills.rb +371 -0
- data/lib/clacky/default_skills/onboard/scripts/install_builtin_skills.rb +175 -0
- data/lib/clacky/default_skills/skill-add/scripts/install_from_zip.rb +59 -26
- data/lib/clacky/message_format/anthropic.rb +72 -8
- data/lib/clacky/message_format/bedrock.rb +6 -3
- data/lib/clacky/providers.rb +146 -3
- data/lib/clacky/server/channel/adapters/feishu/adapter.rb +14 -0
- data/lib/clacky/server/channel/adapters/feishu/bot.rb +10 -0
- data/lib/clacky/server/channel/adapters/feishu/message_parser.rb +1 -0
- data/lib/clacky/server/channel/channel_manager.rb +12 -4
- data/lib/clacky/server/channel/channel_ui_controller.rb +8 -2
- data/lib/clacky/server/http_server.rb +746 -13
- data/lib/clacky/server/session_registry.rb +55 -24
- data/lib/clacky/skill.rb +10 -9
- data/lib/clacky/skill_loader.rb +23 -11
- data/lib/clacky/tools/file_reader.rb +232 -127
- data/lib/clacky/tools/security.rb +42 -64
- data/lib/clacky/tools/terminal/persistent_session.rb +15 -4
- data/lib/clacky/tools/terminal/safe_rm.sh +106 -0
- data/lib/clacky/tools/terminal/session_manager.rb +8 -3
- data/lib/clacky/tools/terminal.rb +263 -16
- data/lib/clacky/ui2/layout_manager.rb +8 -1
- data/lib/clacky/ui2/output_buffer.rb +83 -23
- data/lib/clacky/ui2/ui_controller.rb +74 -7
- data/lib/clacky/utils/file_processor.rb +14 -40
- data/lib/clacky/utils/model_pricing.rb +215 -0
- data/lib/clacky/utils/parser_manager.rb +70 -6
- data/lib/clacky/utils/string_matcher.rb +23 -1
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/web/app.css +673 -9
- data/lib/clacky/web/app.js +40 -1608
- data/lib/clacky/web/i18n.js +209 -0
- data/lib/clacky/web/index.html +166 -2
- data/lib/clacky/web/onboard.js +77 -1
- data/lib/clacky/web/profile.js +442 -0
- data/lib/clacky/web/sessions.js +1034 -2
- data/lib/clacky/web/settings.js +127 -6
- data/lib/clacky/web/sidebar.js +39 -0
- data/lib/clacky/web/skills.js +460 -0
- data/lib/clacky/web/trash.js +343 -0
- data/lib/clacky/web/ws-dispatcher.js +255 -0
- data/lib/clacky.rb +5 -3
- metadata +16 -17
- data/lib/clacky/clacky_auth_client.rb +0 -152
- data/lib/clacky/clacky_cloud_config.rb +0 -123
- data/lib/clacky/cloud_project_client.rb +0 -169
- data/lib/clacky/default_skills/deploy/scripts/rails_deploy.rb +0 -1377
- data/lib/clacky/default_skills/deploy/tools/check_health.rb +0 -116
- data/lib/clacky/default_skills/deploy/tools/create_database_service.rb +0 -341
- data/lib/clacky/default_skills/deploy/tools/execute_deployment.rb +0 -99
- data/lib/clacky/default_skills/deploy/tools/fetch_runtime_logs.rb +0 -77
- data/lib/clacky/default_skills/deploy/tools/list_services.rb +0 -67
- data/lib/clacky/default_skills/deploy/tools/report_deploy_status.rb +0 -67
- data/lib/clacky/default_skills/deploy/tools/set_deploy_variables.rb +0 -189
- data/lib/clacky/default_skills/new/scripts/cloud_project_init.sh +0 -74
- data/lib/clacky/deploy_api_client.rb +0 -484
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d36230a47c25a8b5fb04dfc14f9359155489a2539d0a699843e140deed1434ba
|
|
4
|
+
data.tar.gz: c237725ed637d2d7a852d3624611cca101290e2348e0c6befb2650342550ec03
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 89c65d848c67dff3ed63ae70cd6a0539a7a8068682d72009b34741ea09c44749f5fa05c5839bc9c02c5c499709c8e5bce321165561bdbf8a43500539d1e4b21c
|
|
7
|
+
data.tar.gz: 74ebac898a16e090481c8ba423ac7c2d9cafe918f09cdc87066b54c911034b941c713650d24aaa8d71c627c48d3c8c56a780c2ffa6e717448e4712cdd5ca9512
|
data/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,45 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.0.2] - 2026-05-07
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- **Multi-region provider endpoints.** Providers can now expose multiple endpoint variants (e.g. global vs. CN-optimized Anthropic), and you can switch between them from both the onboarding flow and the Settings page. Bundled with updated model pricing data so cost estimates stay accurate across regions. (#67)
|
|
12
|
+
- **Pre-installed platform-recommended skills during onboarding.** New users get a curated set of skills automatically during onboard — downloaded concurrently with dual-host fallback and a hard deadline so onboarding never hangs on a slow mirror. (#68)
|
|
13
|
+
- **Builtin skills served via platform API.** Recommended skills are now fetched through `/api/v1/skills/builtin`, making the list easier to update without shipping a new gem. (#72)
|
|
14
|
+
- **Feishu group chats: respond only when @-mentioned.** The Feishu adapter now parses the mentions array and ignores group messages that don't @ the bot, so the bot no longer replies to every message in a busy group. Sessions are also isolated per (chat, user) pair by default (`:chat_user` binding mode), preventing context leaks between DMs and groups. (#71)
|
|
15
|
+
|
|
16
|
+
### Fixed
|
|
17
|
+
- **Recover from truncated upstream tool calls.** When an upstream LLM response cuts off mid tool-call, the agent now detects the truncation and recovers automatically instead of getting stuck. Covered by extensive new tests.
|
|
18
|
+
- **Feedback option click now sends the message.** Clicking a suggested feedback option previously set the input text but silently failed to send (due to a `sendMessage` vs `_sendMessage` scope bug). Now it dispatches immediately as expected. (#69)
|
|
19
|
+
- **Sidebar footer and input area heights aligned.** Introduced a shared `--footer-height` CSS variable (56px) and reworked the stop button to use a pseudo-element square for pixel-perfect centering — both columns now line up cleanly. (#70)
|
|
20
|
+
- **Feishu bot fails closed on API outage.** If `/open-apis/bot/v3/info` fails and `bot_open_id` can't be resolved, the adapter now drops group messages (with a warning) instead of spamming every group message as a fallback.
|
|
21
|
+
- **`preview.md` no longer pollutes user project directories.** Preview files are written to the system tmpdir, and plain text formats (md/log/csv) skip preview generation entirely since they're already readable as-is.
|
|
22
|
+
|
|
23
|
+
### More
|
|
24
|
+
- Added agent stop logging to make interrupt / stop chains easier to debug.
|
|
25
|
+
|
|
26
|
+
## [1.0.1] - 2026-05-06
|
|
27
|
+
|
|
28
|
+
### Added
|
|
29
|
+
- **OpenRouter Anthropic API support.** You can now route Claude model requests through OpenRouter, giving access to Anthropic models via a single OpenRouter API key — useful when Anthropic direct access is limited in your region.
|
|
30
|
+
- **GPT provider support.** Direct GPT provider configuration is now available alongside other providers, making it easier to switch between different OpenAI-compatible endpoints.
|
|
31
|
+
- **OCR-powered PDF reading.** PDF files that contain scanned images (non-text PDFs) are now readable via OCR, allowing the agent to extract content from scanned documents, invoices, and image-heavy PDFs.
|
|
32
|
+
- **Terminal output size control.** The agent now limits terminal output to a configurable size, preventing token overflows when running commands that produce very long output.
|
|
33
|
+
- **Memories & Trash manager in Web UI.** A new management panel lets you browse, review, and delete agent memories and trashed files directly from the Web UI.
|
|
34
|
+
- **Watchdog for interrupt messages.** A background watchdog ensures interrupt signals reliably stop the agent even when it's deep in a tool execution loop.
|
|
35
|
+
- **Skill import with category directory scanning.** When importing skills from openclaw packages, nested category directories are now scanned automatically, so all skills in a category bundle are imported at once.
|
|
36
|
+
|
|
37
|
+
### Improved
|
|
38
|
+
- **Deploy skill simplified.** The deploy skill now uses Railway CLI directly without custom helper tools, making deployments more reliable and the codebase significantly lighter.
|
|
39
|
+
- **Fix double-render of progress indicators.** Progress spinners and status lines no longer render twice in quick succession, keeping the Web UI output clean.
|
|
40
|
+
- **Session idle status tracking and file descriptor cleanup.** Sessions now correctly report idle state when the agent finishes, and open file descriptors are properly closed to avoid resource leaks.
|
|
41
|
+
- **GPT-4.1 and GPT-5 pricing added.** Model cost tracking now includes the latest GPT-4.1 and GPT-5 pricing tiers.
|
|
42
|
+
|
|
43
|
+
### Fixed
|
|
44
|
+
- **UTF-8 encoding error in file preview.** Opening files with non-UTF-8 characters no longer crashes the preview — they are now handled gracefully.
|
|
45
|
+
- **Expand `~` in openfile path.** The "open file in editor" API endpoint now correctly expands `~` to the user's home directory.
|
|
46
|
+
|
|
8
47
|
## [1.0.0] - 2026-04-30
|
|
9
48
|
|
|
10
49
|
### Added
|
data/README.md
CHANGED
|
@@ -6,77 +6,79 @@
|
|
|
6
6
|
[](https://rubygems.org/gems/openclacky)
|
|
7
7
|
[](LICENSE.txt)
|
|
8
8
|
|
|
9
|
-
**
|
|
9
|
+
**The most Token-efficient open-source AI Agent.**
|
|
10
10
|
|
|
11
|
-
OpenClacky
|
|
11
|
+
OpenClacky matches Claude Code on capability at comparable cost, and saves significantly against other open-source agents (~50% vs OpenClaw, ~3× cheaper than Hermes). 100% open source (MIT), BYOK with any OpenAI-compatible model, built on two years of Agentic R&D and harness engineering.
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
> Website: https://www.openclacky.com/ · Backed by **MiraclePlus · ZhenFund · Sequoia China · Hillhouse Capital**
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
## Why OpenClacky?
|
|
16
16
|
|
|
17
|
-
OpenClacky
|
|
17
|
+
Same task, how much do you pay? Under comparable agent workloads, OpenClacky saves a large amount of Token spend compared to mainstream alternatives.
|
|
18
18
|
|
|
19
|
-
|
|
|
19
|
+
| Agent | Relative cost | Notes |
|
|
20
20
|
|---|---|---|
|
|
21
|
-
| **
|
|
22
|
-
|
|
|
23
|
-
|
|
|
24
|
-
|
|
|
25
|
-
| **Driven by** | Technical contributors | Domain expertise |
|
|
21
|
+
| **OpenClacky** | **~0.8–1.2×** | 16 tools · ~100% cache hit · subagent routing |
|
|
22
|
+
| Claude Code | 1.0× (baseline) | World-class harness, closed-source subscription |
|
|
23
|
+
| OpenClaw | ~1.5× | Comparable harness agent |
|
|
24
|
+
| Hermes | ~3× | 52 built-in tools — schema bloat ~3–4× |
|
|
26
25
|
|
|
27
|
-
|
|
26
|
+
*Numbers are averages measured on internal common agent tasks, using Claude Code as the baseline. Full benchmark reports will be published on GitHub.*
|
|
28
27
|
|
|
29
|
-
|
|
28
|
+
## Feature comparison
|
|
30
29
|
|
|
31
|
-
|
|
32
|
-
2. **Encrypt & protect** — Your logic stays yours; clients can't inspect or copy it
|
|
33
|
-
3. **Package your brand** — Ship under your name, your logo, your onboarding experience
|
|
34
|
-
4. **Launch & acquire** — One-click sales page, built-in SEO, start converting traffic
|
|
30
|
+
Core agent capability is roughly on par across the field — the real differentiators are **cost, openness, Skill evolution, and integrations**.
|
|
35
31
|
|
|
36
|
-
|
|
32
|
+
| Feature | Claude Code | OpenClaw | Hermes | **OpenClacky** |
|
|
33
|
+
|---|:---:|:---:|:---:|:---:|
|
|
34
|
+
| Token cost | 1.0× | ~1.5× | ~3× | **~0.8–1.2×** |
|
|
35
|
+
| Open source | ❌ Closed | ✅ Open | ✅ Open | ✅ MIT |
|
|
36
|
+
| BYOK / model freedom | ❌ Anthropic only | ✅ | ✅ | ✅ |
|
|
37
|
+
| Skill self-evolution | ❌ | ❌ | ✅ | ✅ |
|
|
38
|
+
| IM integration (Feishu / WeCom / WeChat) | ❌ | ✅ | ✅ | ✅ |
|
|
37
39
|
|
|
38
|
-
|
|
40
|
+
## How we get the cost down
|
|
39
41
|
|
|
40
|
-
|
|
41
|
-
- **Lawyers** — contract review, case retrieval, risk flagging
|
|
42
|
-
- **Traders** — signal detection, strategy backtesting, automated execution
|
|
43
|
-
- **Data analysts** — cleaning, modeling, report generation
|
|
44
|
-
- **Content strategists** — topic selection, outlines, drafts at scale
|
|
42
|
+
Not by cutting features — by compounding the right choice at every layer.
|
|
45
43
|
|
|
46
|
-
|
|
44
|
+
### 1. Ultra-high cache hit rate
|
|
45
|
+
Sessions never restart, double cache markers, **Insert-then-Compress** — the system prompt is never mutated, so compression still reuses the cache. **Measured cache hit rate: near 100%.**
|
|
47
46
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
- [x] **White-label packaging** — Your brand, your product line, your client experience
|
|
51
|
-
- [x] **Auto-update delivery** — Push updates to all users seamlessly, with version control
|
|
52
|
-
- [x] **Cross-platform distribution** — Windows, macOS, Linux — one Skill, every platform
|
|
53
|
-
- [x] **Sales page generator** — Launch your storefront fast, with built-in SEO foundations
|
|
54
|
-
- [x] **Cost monitoring** — Real-time token tracking, automatic compression (up to 90% savings)
|
|
55
|
-
- [x] **Multi-provider support** — OpenAI, Anthropic, DeepSeek, and any OpenAI-compatible API
|
|
56
|
-
- [ ] **Skill marketplace** — Discover and distribute premium Skills *(coming soon)*
|
|
47
|
+
### 2. Minimal tool set
|
|
48
|
+
Only **16 core tools**. Capabilities are offloaded to the Skill ecosystem via a single `invoke_skill` meta-tool. Tool count is not the metric — task completion rate is.
|
|
57
49
|
|
|
58
|
-
|
|
50
|
+
| OpenClacky | Claude Code | OpenClaw | Hermes |
|
|
51
|
+
|:--:|:--:|:--:|:--:|
|
|
52
|
+
| **16** | 40+ | 23 | 52 |
|
|
59
53
|
|
|
60
|
-
|
|
54
|
+
### 3. Idle-time auto-compression
|
|
55
|
+
Go to a meeting, grab coffee — the agent compresses long context in the background and pre-warms the cache. Your first message back hits the cache directly. **Cold-start first-token cost reduced by 50%+.**
|
|
61
56
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
57
|
+
### 4. BYOK — you pick the model, you set the cost
|
|
58
|
+
Any OpenAI-compatible API, plug and play. Official direct, aggregate routing, compatible relays — the choice is 100% yours. Use Claude for code, auto-route subtasks to DeepSeek, save another chunk of tokens.
|
|
59
|
+
|
|
60
|
+
Built on **2 years · 3 generations of agentic architecture · 6 core harness engineering decisions**.
|
|
61
|
+
|
|
62
|
+
## Skills — the soul of the agent
|
|
68
63
|
|
|
69
|
-
|
|
64
|
+
- **Invoke with `/`** — instant browse, fuzzy search, direct call. Hundreds of Skills at your fingertips.
|
|
65
|
+
- **Create Skills in natural language** — just describe what you want; the agent drafts `SKILL.md`, breaks down steps, and runs validation. No code required.
|
|
66
|
+
- **Self-evolving** — after each run, the agent updates the Skill based on execution context and results. The next call is more stable and more accurate.
|
|
67
|
+
- **Open & compatible** — supports Claude Skills / Markdown Pack / custom formats.
|
|
68
|
+
- **Monetizable** — polished Skills can be packaged for sale, with encrypted distribution, License management, and creator-defined pricing.
|
|
70
69
|
|
|
71
70
|
## Installation
|
|
72
71
|
|
|
73
|
-
###
|
|
72
|
+
### Desktop installer (recommended)
|
|
74
73
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
74
|
+
Double-click to install — environment, dependencies, and Skills all set up automatically.
|
|
75
|
+
|
|
76
|
+
- **macOS** — [Download `.dmg`](https://oss.1024code.com/openclacky-installer/official/openclacky-installer.dmg) (Apple Silicon / Intel)
|
|
77
|
+
- **Windows** — [Download `.exe`](https://oss.1024code.com/openclacky-installer/official/openclacky-installer.exe) (Windows 10 2004+ / Windows 11)
|
|
78
78
|
|
|
79
|
-
|
|
79
|
+
More options: https://www.openclacky.com/
|
|
80
|
+
|
|
81
|
+
### Command line
|
|
80
82
|
|
|
81
83
|
**Requirements:** Ruby >= 3.1.0
|
|
82
84
|
|
|
@@ -84,6 +86,12 @@ Built on a production-ready Rails architecture with one-click deployment, dev/pr
|
|
|
84
86
|
gem install openclacky
|
|
85
87
|
```
|
|
86
88
|
|
|
89
|
+
Or one-line install:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
/bin/bash -c "$(curl -sSL https://raw.githubusercontent.com/clacky-ai/openclacky/main/scripts/install.sh)"
|
|
93
|
+
```
|
|
94
|
+
|
|
87
95
|
## Quick Start
|
|
88
96
|
|
|
89
97
|
### Terminal (CLI)
|
|
@@ -95,16 +103,16 @@ openclacky # start interactive agent in current directory
|
|
|
95
103
|
### Web UI
|
|
96
104
|
|
|
97
105
|
```bash
|
|
98
|
-
openclacky server #
|
|
106
|
+
openclacky server # default: http://localhost:7070
|
|
99
107
|
```
|
|
100
108
|
|
|
101
|
-
|
|
109
|
+
Open **http://localhost:7070** for a full chat interface with multi-session support — run coding, copywriting, research sessions in parallel.
|
|
102
110
|
|
|
103
111
|
Options:
|
|
104
112
|
|
|
105
113
|
```bash
|
|
106
|
-
openclacky server --port 8080
|
|
107
|
-
openclacky server --host 0.0.0.0
|
|
114
|
+
openclacky server --port 8080 # custom port
|
|
115
|
+
openclacky server --host 0.0.0.0 # listen on all interfaces (remote access)
|
|
108
116
|
```
|
|
109
117
|
|
|
110
118
|
## Configuration
|
|
@@ -114,7 +122,26 @@ $ openclacky
|
|
|
114
122
|
> /config
|
|
115
123
|
```
|
|
116
124
|
|
|
117
|
-
|
|
125
|
+
Set your **API Key**, **Model**, and **Base URL** (any OpenAI-compatible provider).
|
|
126
|
+
|
|
127
|
+
Supported out of the box: **Claude (Anthropic) · GPT (OpenAI) · DeepSeek · Kimi (Moonshot) · MiniMax · OpenRouter** — or any custom endpoint.
|
|
128
|
+
|
|
129
|
+
## Coding use case
|
|
130
|
+
|
|
131
|
+
OpenClacky works as a general AI coding assistant — scaffold full-stack apps, add features, or explore unfamiliar codebases:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
$ openclacky
|
|
135
|
+
> /new my-app # scaffold a new project
|
|
136
|
+
> Add user auth with email and password
|
|
137
|
+
> How does the payment module work?
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Advanced — Creator Program
|
|
141
|
+
|
|
142
|
+
Already power users are turning their workflows into vertical AI experts on OpenClacky — encrypted distribution, License management, self-set pricing. Legal, healthcare, financial planning, and more.
|
|
143
|
+
|
|
144
|
+
Learn more: https://www.openclacky.com/ → Creators
|
|
118
145
|
|
|
119
146
|
## Install from Source
|
|
120
147
|
|
|
@@ -125,6 +152,13 @@ bundle install
|
|
|
125
152
|
bin/clacky
|
|
126
153
|
```
|
|
127
154
|
|
|
155
|
+
## Trust & Credibility
|
|
156
|
+
|
|
157
|
+
- **100% open source** — MIT License, all code public, all decisions traceable
|
|
158
|
+
- **2 years of Agentic R&D** — 3 generations of architecture
|
|
159
|
+
- **16 core tools** — minimal by design
|
|
160
|
+
- **Backed by** MiraclePlus · ZhenFund · Sequoia China · Hillhouse Capital
|
|
161
|
+
|
|
128
162
|
## Contributing
|
|
129
163
|
|
|
130
164
|
Bug reports and pull requests are welcome on GitHub at https://github.com/clacky-ai/openclacky. Contributors are expected to adhere to the [code of conduct](https://github.com/clacky-ai/openclacky/blob/main/CODE_OF_CONDUCT.md).
|
|
@@ -105,8 +105,25 @@ module Clacky
|
|
|
105
105
|
cache_write = usage[:cache_creation_input_tokens] || 0
|
|
106
106
|
cache_read = usage[:cache_read_input_tokens] || 0
|
|
107
107
|
|
|
108
|
-
# Calculate token delta from previous iteration
|
|
109
|
-
|
|
108
|
+
# Calculate token delta from previous iteration.
|
|
109
|
+
#
|
|
110
|
+
# Two conventions exist for total_tokens across providers:
|
|
111
|
+
# - OpenAI (default): cumulative per-request input+output (grows
|
|
112
|
+
# with history every turn). Delta = total - prev.
|
|
113
|
+
# - Anthropic direct: already the per-turn new compute
|
|
114
|
+
# (raw_input + cache_creation + output).
|
|
115
|
+
# The MessageFormat sets :total_is_per_turn so
|
|
116
|
+
# we use total_tokens directly as the delta.
|
|
117
|
+
#
|
|
118
|
+
# Without this branch, Anthropic's per-turn total would be treated as
|
|
119
|
+
# cumulative and produce negative / nonsensical deltas whenever cached
|
|
120
|
+
# prefixes make the per-turn new-compute smaller than the previous turn.
|
|
121
|
+
delta_tokens =
|
|
122
|
+
if usage[:total_is_per_turn]
|
|
123
|
+
total_tokens
|
|
124
|
+
else
|
|
125
|
+
total_tokens - @previous_total_tokens
|
|
126
|
+
end
|
|
110
127
|
@previous_total_tokens = total_tokens # Update for next iteration
|
|
111
128
|
|
|
112
129
|
{
|
|
@@ -54,6 +54,20 @@ module Clacky
|
|
|
54
54
|
max_retries = 10
|
|
55
55
|
retry_delay = 5
|
|
56
56
|
retries = 0
|
|
57
|
+
|
|
58
|
+
# Track whether any of the retry/fallback branches below opened a
|
|
59
|
+
# "retrying" progress slot via show_progress(progress_type:
|
|
60
|
+
# "retrying", phase: "active"). If so, we MUST close it before
|
|
61
|
+
# leaving call_llm — otherwise the UI's legacy shim in
|
|
62
|
+
# UI2::UIController keeps the :quiet ProgressHandle alive, its
|
|
63
|
+
# ticker thread keeps running, and the user sees a frozen
|
|
64
|
+
# "Network failed: ... (681s)" line long after the task finished.
|
|
65
|
+
#
|
|
66
|
+
# The close is done in the outer ensure below so it runs on:
|
|
67
|
+
# - normal success (response returned)
|
|
68
|
+
# - unrecoverable failure (raise propagates out)
|
|
69
|
+
# - BadRequestError reasoning-content retry success
|
|
70
|
+
retrying_progress_opened = false
|
|
57
71
|
# One-shot flag set by the BadRequestError rescue below when the server
|
|
58
72
|
# complained about missing reasoning_content. The subsequent retry will
|
|
59
73
|
# pad every assistant message's reasoning_content, which satisfies
|
|
@@ -67,6 +81,7 @@ module Clacky
|
|
|
67
81
|
thinking_retry_attempted = false
|
|
68
82
|
|
|
69
83
|
begin
|
|
84
|
+
begin
|
|
70
85
|
# Use active_messages (Time Machine) when undone, otherwise send full history.
|
|
71
86
|
# to_api strips internal fields and handles orphaned tool_calls.
|
|
72
87
|
messages_to_send = if respond_to?(:active_messages)
|
|
@@ -86,6 +101,19 @@ module Clacky
|
|
|
86
101
|
# Successful response — if we were probing, confirm primary is healthy.
|
|
87
102
|
handle_probe_success if @config.probing?
|
|
88
103
|
|
|
104
|
+
# ── Upstream truncation detector ──────────────────────────────────
|
|
105
|
+
# OpenRouter / Bedrock and other routers sometimes close the SSE
|
|
106
|
+
# stream mid-tool_use: we receive finish_reason="stop" together with
|
|
107
|
+
# a syntactically valid tool_call whose `arguments` JSON is empty,
|
|
108
|
+
# "{}" (placeholder before any key was streamed), or otherwise
|
|
109
|
+
# unparseable. Treat this as retryable — otherwise the agent would
|
|
110
|
+
# execute a tool with empty args (often failing cryptically) or
|
|
111
|
+
# silently exit thinking the task is done.
|
|
112
|
+
#
|
|
113
|
+
# Raises UpstreamTruncatedError (a RetryableError) so the rescue
|
|
114
|
+
# block below handles retry + fallback identically to 5xx/429.
|
|
115
|
+
detect_upstream_truncation!(response)
|
|
116
|
+
|
|
89
117
|
rescue Faraday::TimeoutError => e
|
|
90
118
|
# ── Read-timeout path (distinct from connection-level failures) ──
|
|
91
119
|
# Faraday::TimeoutError on our non-streaming POST almost always means
|
|
@@ -118,6 +146,7 @@ module Clacky
|
|
|
118
146
|
phase: "active",
|
|
119
147
|
metadata: { attempt: retries, total: max_retries }
|
|
120
148
|
)
|
|
149
|
+
retrying_progress_opened = true
|
|
121
150
|
sleep retry_delay
|
|
122
151
|
retry
|
|
123
152
|
else
|
|
@@ -144,6 +173,7 @@ module Clacky
|
|
|
144
173
|
phase: "active",
|
|
145
174
|
metadata: { attempt: retries, total: max_retries }
|
|
146
175
|
)
|
|
176
|
+
retrying_progress_opened = true
|
|
147
177
|
sleep retry_delay
|
|
148
178
|
retry
|
|
149
179
|
else
|
|
@@ -180,6 +210,7 @@ module Clacky
|
|
|
180
210
|
phase: "active",
|
|
181
211
|
metadata: { attempt: retries, total: current_max }
|
|
182
212
|
)
|
|
213
|
+
retrying_progress_opened = true
|
|
183
214
|
sleep retry_delay
|
|
184
215
|
retry
|
|
185
216
|
else
|
|
@@ -212,7 +243,65 @@ module Clacky
|
|
|
212
243
|
token_data = track_cost(response[:usage], raw_api_usage: response[:raw_api_usage])
|
|
213
244
|
response[:token_usage] = token_data
|
|
214
245
|
|
|
246
|
+
# [DIAG] Log raw client response shape. Only emit when we see the
|
|
247
|
+
# "finish_reason=stop + non-empty tool_calls" combo, or when any
|
|
248
|
+
# tool_call's arguments look empty/unparseable — both indicate the
|
|
249
|
+
# upstream (Bedrock/relay/model) cut the tool_use stream short.
|
|
250
|
+
# Normal responses produce no log line (too noisy).
|
|
251
|
+
begin
|
|
252
|
+
tool_calls = response[:tool_calls] || []
|
|
253
|
+
if !tool_calls.empty?
|
|
254
|
+
raw_tcs = tool_calls.map do |c|
|
|
255
|
+
args_str = c[:arguments].is_a?(String) ? c[:arguments] : c[:arguments].to_s
|
|
256
|
+
parseable = begin
|
|
257
|
+
JSON.parse(args_str)
|
|
258
|
+
true
|
|
259
|
+
rescue StandardError
|
|
260
|
+
false
|
|
261
|
+
end
|
|
262
|
+
{
|
|
263
|
+
name: c[:name].to_s,
|
|
264
|
+
args_len: args_str.length,
|
|
265
|
+
args_parseable: parseable,
|
|
266
|
+
args_head: args_str[0, 120]
|
|
267
|
+
}
|
|
268
|
+
end
|
|
269
|
+
truncated_call = raw_tcs.any? { |t| t[:args_len] == 0 || t[:args_len] == 2 || !t[:args_parseable] }
|
|
270
|
+
suspicious = response[:finish_reason] == "stop"
|
|
271
|
+
|
|
272
|
+
if suspicious || truncated_call
|
|
273
|
+
Clacky::Logger.warn("llm.response_suspicious",
|
|
274
|
+
model: current_model,
|
|
275
|
+
finish_reason: response[:finish_reason].to_s,
|
|
276
|
+
tool_calls_count: raw_tcs.size,
|
|
277
|
+
tool_calls: raw_tcs,
|
|
278
|
+
completion_tokens: token_data[:completion_tokens],
|
|
279
|
+
ttft_ms: response.dig(:latency, :ttft_ms),
|
|
280
|
+
combo_stop_with_toolcalls: suspicious,
|
|
281
|
+
has_truncated_args: truncated_call
|
|
282
|
+
)
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
rescue StandardError => e
|
|
286
|
+
Clacky::Logger.warn("llm.response_log_failed", error: e.message)
|
|
287
|
+
end
|
|
288
|
+
|
|
215
289
|
response
|
|
290
|
+
ensure
|
|
291
|
+
# Close any "retrying" progress slot that was opened during the
|
|
292
|
+
# retry/fallback loop above. The legacy UI shim allocates a
|
|
293
|
+
# separate :quiet ProgressHandle under the "retrying" key; if it
|
|
294
|
+
# is never finished its ticker thread keeps running and the user
|
|
295
|
+
# sees a stale "Network failed: ... (NNN s)" line long after the
|
|
296
|
+
# task has completed. This ensure runs on:
|
|
297
|
+
# - successful retry → close the slot, message is "Recovered"
|
|
298
|
+
# so the final frame is informative rather than blank
|
|
299
|
+
# - unrecoverable failure that raises out → close the slot so
|
|
300
|
+
# the spinner doesn't linger while the error bubbles up
|
|
301
|
+
if retrying_progress_opened
|
|
302
|
+
@ui&.show_progress(progress_type: "retrying", phase: "done")
|
|
303
|
+
end
|
|
304
|
+
end
|
|
216
305
|
end
|
|
217
306
|
|
|
218
307
|
# Attempt to activate the provider fallback model for the given primary model.
|
|
@@ -269,6 +358,87 @@ module Clacky
|
|
|
269
358
|
msg.include?("must be provided"))
|
|
270
359
|
end
|
|
271
360
|
|
|
361
|
+
# Detect upstream tool-call truncation and raise UpstreamTruncatedError
|
|
362
|
+
# so the standard RetryableError rescue (with fallback model support)
|
|
363
|
+
# handles retry identically to 5xx/429.
|
|
364
|
+
#
|
|
365
|
+
# Background: OpenRouter routes to Anthropic/Bedrock/etc. and passes
|
|
366
|
+
# through whatever the upstream sends. If the upstream closes the SSE
|
|
367
|
+
# stream mid-tool_use (observed with Anthropic at ~127 s TTFT under
|
|
368
|
+
# load), OpenRouter does NOT surface an error — it emits a valid
|
|
369
|
+
# `tool_calls[]` whose `arguments` is empty, `"{}"`, or non-parseable
|
|
370
|
+
# JSON. Without this check the agent would either execute the tool with
|
|
371
|
+
# empty args or (worse) silently exit thinking the task finished.
|
|
372
|
+
#
|
|
373
|
+
# Rule is deliberately narrow: we only intercept the case where the
|
|
374
|
+
# model streamed literally nothing into the tool_call arguments —
|
|
375
|
+
# i.e. `nil`, empty string, or the placeholder `"{}"`. Partial/invalid
|
|
376
|
+
# JSON (e.g. `{"path": "/tmp/x"`) is left to the existing
|
|
377
|
+
# ArgumentsParser → BadArgumentsError path, because the model already
|
|
378
|
+
# committed to specific values and feeding the parse error back as a
|
|
379
|
+
# tool_result lets it self-correct in one round-trip (faster than a
|
|
380
|
+
# blind retry from scratch).
|
|
381
|
+
private def detect_upstream_truncation!(response)
|
|
382
|
+
tool_calls = response[:tool_calls]
|
|
383
|
+
return if tool_calls.nil? || tool_calls.empty?
|
|
384
|
+
|
|
385
|
+
truncated = tool_calls.find { |tc| tool_call_args_truncated?(tc[:arguments]) }
|
|
386
|
+
return unless truncated
|
|
387
|
+
|
|
388
|
+
args_str = truncated[:arguments].is_a?(String) ? truncated[:arguments] : truncated[:arguments].to_s
|
|
389
|
+
Clacky::Logger.warn("llm.upstream_truncation_detected",
|
|
390
|
+
model: current_model,
|
|
391
|
+
tool_name: truncated[:name].to_s,
|
|
392
|
+
args_len: args_str.length,
|
|
393
|
+
args_head: args_str[0, 80],
|
|
394
|
+
finish_reason: response[:finish_reason].to_s,
|
|
395
|
+
completion_tokens: response.dig(:token_usage, :completion_tokens),
|
|
396
|
+
ttft_ms: response.dig(:latency, :ttft_ms)
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
# Inject a one-shot [SYSTEM] hint so a plain retry isn't doomed to the
|
|
400
|
+
# same fate when the truncation correlates with large tool_call args
|
|
401
|
+
# (e.g. writing a 5000-char file in one go). For infrastructure-level
|
|
402
|
+
# blips this hint is harmless — the retry usually succeeds on its own
|
|
403
|
+
# and the hint just sits in history without affecting behaviour.
|
|
404
|
+
inject_upstream_truncation_hint_if_first(truncated)
|
|
405
|
+
|
|
406
|
+
raise Clacky::UpstreamTruncatedError,
|
|
407
|
+
"[LLM] Upstream truncated tool_call `#{truncated[:name]}` " \
|
|
408
|
+
"(args=#{args_str[0, 40].inspect}). Retrying..."
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
# True when a tool_call's arguments field looks COMPLETELY empty —
|
|
412
|
+
# i.e. the upstream stream was cut before the model wrote any real
|
|
413
|
+
# content into the arguments JSON.
|
|
414
|
+
#
|
|
415
|
+
# Rules:
|
|
416
|
+
# - nil / non-String / empty string → truncated (nothing at all)
|
|
417
|
+
# - parses to {} (empty object) → truncated (placeholder only)
|
|
418
|
+
# - anything else (including partial/invalid JSON like `{"path":
|
|
419
|
+
# "/tmp/x"` where the model already started writing) → NOT
|
|
420
|
+
# truncated by this detector
|
|
421
|
+
#
|
|
422
|
+
# Partial-JSON cases are deliberately left to the existing
|
|
423
|
+
# ArgumentsParser → BadArgumentsError path, which surfaces the parse
|
|
424
|
+
# error back to the LLM as a tool_result so it can self-correct. That
|
|
425
|
+
# is more efficient than a blind retry when the model already wrote
|
|
426
|
+
# most of the args.
|
|
427
|
+
private def tool_call_args_truncated?(args)
|
|
428
|
+
return true if args.nil?
|
|
429
|
+
return true unless args.is_a?(String)
|
|
430
|
+
return true if args.empty?
|
|
431
|
+
|
|
432
|
+
parsed = begin
|
|
433
|
+
JSON.parse(args)
|
|
434
|
+
rescue JSON::ParserError
|
|
435
|
+
# Partial/invalid JSON — let ArgumentsParser handle it downstream.
|
|
436
|
+
return false
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
parsed.is_a?(Hash) && parsed.empty?
|
|
440
|
+
end
|
|
441
|
+
|
|
272
442
|
# On the FIRST Faraday::TimeoutError within a task, append a [SYSTEM]
|
|
273
443
|
# user message to the history instructing the model to break its work
|
|
274
444
|
# into smaller steps. Subsequent timeouts in the same task are ignored
|
|
@@ -312,6 +482,54 @@ module Clacky
|
|
|
312
482
|
"LLM response timed out — asking model to break the task into smaller steps and retrying..."
|
|
313
483
|
)
|
|
314
484
|
end
|
|
485
|
+
|
|
486
|
+
# On the FIRST upstream-truncation detection within a task, append a
|
|
487
|
+
# [SYSTEM] user message nudging the model toward smaller tool_call args.
|
|
488
|
+
# This guards against the (real but rare) case where the upstream SSE
|
|
489
|
+
# cut correlates with large tool_call payloads — a plain retry on the
|
|
490
|
+
# same oversized args would keep tripping the same wire.
|
|
491
|
+
#
|
|
492
|
+
# For purely infrastructural truncations (Anthropic edge blip, router
|
|
493
|
+
# hiccup), the hint is harmless — the retry will succeed and the hint
|
|
494
|
+
# just sits unused in history. Cheaper than letting the agent burn
|
|
495
|
+
# through its retry budget on the same oversized payload.
|
|
496
|
+
#
|
|
497
|
+
# Same plumbing as inject_large_output_hint_if_first_timeout: one-shot
|
|
498
|
+
# per task, carries `system_injected: true` so it's hidden from UI
|
|
499
|
+
# replay and skipped by compression/caching placement logic. Reset per
|
|
500
|
+
# task via Agent#run (see @task_upstream_truncation_hint_injected).
|
|
501
|
+
private def inject_upstream_truncation_hint_if_first(truncated_call)
|
|
502
|
+
return if @task_upstream_truncation_hint_injected
|
|
503
|
+
|
|
504
|
+
@task_upstream_truncation_hint_injected = true
|
|
505
|
+
|
|
506
|
+
tool_name = truncated_call[:name].to_s
|
|
507
|
+
hint = "[SYSTEM] The previous response was cut short by the upstream provider " \
|
|
508
|
+
"before the `#{tool_name}` tool_call finished streaming. " \
|
|
509
|
+
"The partial tool_call has been discarded. To avoid the same problem on retry, " \
|
|
510
|
+
"please adapt your approach:\n" \
|
|
511
|
+
"- Prefer smaller tool_call arguments — large single-shot payloads are more likely to be truncated.\n" \
|
|
512
|
+
"- For long file content: create the file first with a minimal skeleton via `write`, " \
|
|
513
|
+
"then append sections one at a time with `edit`.\n" \
|
|
514
|
+
"- Break large tasks into multiple smaller tool calls instead of one big one.\n" \
|
|
515
|
+
"- Keep each tool-call argument comfortably under ~2000 characters when possible."
|
|
516
|
+
|
|
517
|
+
@history.append({
|
|
518
|
+
role: "user",
|
|
519
|
+
content: hint,
|
|
520
|
+
system_injected: true,
|
|
521
|
+
task_id: @current_task_id
|
|
522
|
+
})
|
|
523
|
+
|
|
524
|
+
Clacky::Logger.info(
|
|
525
|
+
"[llm_caller] Upstream truncation — injected 'smaller tool_call args' hint " \
|
|
526
|
+
"(tool=#{tool_name.inspect})"
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
@ui&.show_warning(
|
|
530
|
+
"Upstream response was truncated mid tool-call — asking model to use smaller steps and retrying..."
|
|
531
|
+
)
|
|
532
|
+
end
|
|
315
533
|
end
|
|
316
534
|
end
|
|
317
535
|
end
|
|
@@ -47,11 +47,41 @@ module Clacky
|
|
|
47
47
|
handle_compression_response(response, compression_context, progress: handle)
|
|
48
48
|
true
|
|
49
49
|
rescue Clacky::AgentInterrupted => e
|
|
50
|
-
|
|
50
|
+
# User cancelled the idle compression — finish the quiet progress
|
|
51
|
+
# slot in place so the user sees exactly what happened (rather
|
|
52
|
+
# than the "Idle detected..." line being silently removed).
|
|
53
|
+
final = "Idle compression cancelled: #{e.message}"
|
|
54
|
+
if handle
|
|
55
|
+
handle.finish(final_message: final)
|
|
56
|
+
else
|
|
57
|
+
@ui&.log(final, level: :info)
|
|
58
|
+
end
|
|
51
59
|
@history.rollback_before(compression_message)
|
|
60
|
+
Clacky::Logger.info("[idle-compress] cancelled: #{e.message}")
|
|
52
61
|
false
|
|
53
62
|
rescue => e
|
|
54
|
-
|
|
63
|
+
# Compression failed (most commonly: network errors after all
|
|
64
|
+
# LlmCaller retries exhausted). Previously this only wrote an
|
|
65
|
+
# @ui.log(:error) that was easy to miss — especially when no
|
|
66
|
+
# other output followed. Now we:
|
|
67
|
+
# 1. Replace the active quiet progress line with the error so
|
|
68
|
+
# the user always sees *something* where the spinner was.
|
|
69
|
+
# 2. Emit a show_warning for a more prominent entry.
|
|
70
|
+
# 3. Persist to Clacky::Logger so post-mortem is possible even
|
|
71
|
+
# if the terminal scrollback has rolled past.
|
|
72
|
+
final = "Idle compression failed: #{e.message}"
|
|
73
|
+
if handle
|
|
74
|
+
handle.finish(final_message: final)
|
|
75
|
+
else
|
|
76
|
+
@ui&.log(final, level: :error)
|
|
77
|
+
end
|
|
78
|
+
@ui&.show_warning(final)
|
|
79
|
+
Clacky::Logger.warn(
|
|
80
|
+
"[idle-compress] failed",
|
|
81
|
+
error_class: e.class.name,
|
|
82
|
+
error_message: e.message,
|
|
83
|
+
backtrace: e.backtrace&.first(5)
|
|
84
|
+
)
|
|
55
85
|
@history.rollback_before(compression_message)
|
|
56
86
|
false
|
|
57
87
|
end
|