openclacky 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +39 -0
  3. data/README.md +87 -53
  4. data/lib/clacky/agent/cost_tracker.rb +19 -2
  5. data/lib/clacky/agent/llm_caller.rb +218 -0
  6. data/lib/clacky/agent/message_compressor_helper.rb +32 -2
  7. data/lib/clacky/agent.rb +54 -22
  8. data/lib/clacky/client.rb +44 -5
  9. data/lib/clacky/default_parsers/pdf_parser.rb +58 -17
  10. data/lib/clacky/default_parsers/pdf_parser_ocr.py +103 -0
  11. data/lib/clacky/default_parsers/pdf_parser_plumber.py +62 -0
  12. data/lib/clacky/default_skills/deploy/SKILL.md +201 -77
  13. data/lib/clacky/default_skills/new/SKILL.md +3 -114
  14. data/lib/clacky/default_skills/onboard/SKILL.md +349 -133
  15. data/lib/clacky/default_skills/onboard/scripts/import_external_skills.rb +371 -0
  16. data/lib/clacky/default_skills/onboard/scripts/install_builtin_skills.rb +175 -0
  17. data/lib/clacky/default_skills/skill-add/scripts/install_from_zip.rb +59 -26
  18. data/lib/clacky/message_format/anthropic.rb +72 -8
  19. data/lib/clacky/message_format/bedrock.rb +6 -3
  20. data/lib/clacky/providers.rb +146 -3
  21. data/lib/clacky/server/channel/adapters/feishu/adapter.rb +14 -0
  22. data/lib/clacky/server/channel/adapters/feishu/bot.rb +10 -0
  23. data/lib/clacky/server/channel/adapters/feishu/message_parser.rb +1 -0
  24. data/lib/clacky/server/channel/channel_manager.rb +12 -4
  25. data/lib/clacky/server/channel/channel_ui_controller.rb +8 -2
  26. data/lib/clacky/server/http_server.rb +746 -13
  27. data/lib/clacky/server/session_registry.rb +55 -24
  28. data/lib/clacky/skill.rb +10 -9
  29. data/lib/clacky/skill_loader.rb +23 -11
  30. data/lib/clacky/tools/file_reader.rb +232 -127
  31. data/lib/clacky/tools/security.rb +42 -64
  32. data/lib/clacky/tools/terminal/persistent_session.rb +15 -4
  33. data/lib/clacky/tools/terminal/safe_rm.sh +106 -0
  34. data/lib/clacky/tools/terminal/session_manager.rb +8 -3
  35. data/lib/clacky/tools/terminal.rb +263 -16
  36. data/lib/clacky/ui2/layout_manager.rb +8 -1
  37. data/lib/clacky/ui2/output_buffer.rb +83 -23
  38. data/lib/clacky/ui2/ui_controller.rb +74 -7
  39. data/lib/clacky/utils/file_processor.rb +14 -40
  40. data/lib/clacky/utils/model_pricing.rb +215 -0
  41. data/lib/clacky/utils/parser_manager.rb +70 -6
  42. data/lib/clacky/utils/string_matcher.rb +23 -1
  43. data/lib/clacky/version.rb +1 -1
  44. data/lib/clacky/web/app.css +673 -9
  45. data/lib/clacky/web/app.js +40 -1608
  46. data/lib/clacky/web/i18n.js +209 -0
  47. data/lib/clacky/web/index.html +166 -2
  48. data/lib/clacky/web/onboard.js +77 -1
  49. data/lib/clacky/web/profile.js +442 -0
  50. data/lib/clacky/web/sessions.js +1034 -2
  51. data/lib/clacky/web/settings.js +127 -6
  52. data/lib/clacky/web/sidebar.js +39 -0
  53. data/lib/clacky/web/skills.js +460 -0
  54. data/lib/clacky/web/trash.js +343 -0
  55. data/lib/clacky/web/ws-dispatcher.js +255 -0
  56. data/lib/clacky.rb +5 -3
  57. metadata +16 -17
  58. data/lib/clacky/clacky_auth_client.rb +0 -152
  59. data/lib/clacky/clacky_cloud_config.rb +0 -123
  60. data/lib/clacky/cloud_project_client.rb +0 -169
  61. data/lib/clacky/default_skills/deploy/scripts/rails_deploy.rb +0 -1377
  62. data/lib/clacky/default_skills/deploy/tools/check_health.rb +0 -116
  63. data/lib/clacky/default_skills/deploy/tools/create_database_service.rb +0 -341
  64. data/lib/clacky/default_skills/deploy/tools/execute_deployment.rb +0 -99
  65. data/lib/clacky/default_skills/deploy/tools/fetch_runtime_logs.rb +0 -77
  66. data/lib/clacky/default_skills/deploy/tools/list_services.rb +0 -67
  67. data/lib/clacky/default_skills/deploy/tools/report_deploy_status.rb +0 -67
  68. data/lib/clacky/default_skills/deploy/tools/set_deploy_variables.rb +0 -189
  69. data/lib/clacky/default_skills/new/scripts/cloud_project_init.sh +0 -74
  70. data/lib/clacky/deploy_api_client.rb +0 -484
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 49800afa935670c288d9f421595df4246b61e76ed0f2a74e1a7a754e85e26162
4
- data.tar.gz: dba09cac5a79485b743aaad4568ce2e4fe2e13772d6b8c43a360ec11eca7c762
3
+ metadata.gz: d36230a47c25a8b5fb04dfc14f9359155489a2539d0a699843e140deed1434ba
4
+ data.tar.gz: c237725ed637d2d7a852d3624611cca101290e2348e0c6befb2650342550ec03
5
5
  SHA512:
6
- metadata.gz: 2b723771f71d880d99582f6bfd4d23a66f54ee3caa87f7ed228360f015cadb52a20be9d6869c6e35612740ddb889ceb762efa541a41bc25810f5897d47a333e1
7
- data.tar.gz: 5c425e94d2bf4c4d68175b740d840b9cd6270ef91f2e68e6d8403fbb6fbc5336b07bd65308907dbb8d8c3cd1cb906c4c5f64ae7710a7e0619ab2aaae0ddc278b
6
+ metadata.gz: 89c65d848c67dff3ed63ae70cd6a0539a7a8068682d72009b34741ea09c44749f5fa05c5839bc9c02c5c499709c8e5bce321165561bdbf8a43500539d1e4b21c
7
+ data.tar.gz: 74ebac898a16e090481c8ba423ac7c2d9cafe918f09cdc87066b54c911034b941c713650d24aaa8d71c627c48d3c8c56a780c2ffa6e717448e4712cdd5ca9512
data/CHANGELOG.md CHANGED
@@ -5,6 +5,45 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [1.0.2] - 2026-05-07
9
+
10
+ ### Added
11
+ - **Multi-region provider endpoints.** Providers can now expose multiple endpoint variants (e.g. global vs. CN-optimized Anthropic), and you can switch between them from both the onboarding flow and the Settings page. Bundled with updated model pricing data so cost estimates stay accurate across regions. (#67)
12
+ - **Pre-installed platform-recommended skills during onboarding.** New users get a curated set of skills automatically during onboard — downloaded concurrently with dual-host fallback and a hard deadline so onboarding never hangs on a slow mirror. (#68)
13
+ - **Builtin skills served via platform API.** Recommended skills are now fetched through `/api/v1/skills/builtin`, making the list easier to update without shipping a new gem. (#72)
14
+ - **Feishu group chats: respond only when @-mentioned.** The Feishu adapter now parses the mentions array and ignores group messages that don't @ the bot, so the bot no longer replies to every message in a busy group. Sessions are also isolated per (chat, user) pair by default (`:chat_user` binding mode), preventing context leaks between DMs and groups. (#71)
15
+
16
+ ### Fixed
17
+ - **Recover from truncated upstream tool calls.** When an upstream LLM response cuts off mid tool-call, the agent now detects the truncation and recovers automatically instead of getting stuck. Covered by extensive new tests.
18
+ - **Feedback option click now sends the message.** Clicking a suggested feedback option previously set the input text but silently failed to send (due to a `sendMessage` vs `_sendMessage` scope bug). Now it dispatches immediately as expected. (#69)
19
+ - **Sidebar footer and input area heights aligned.** Introduced a shared `--footer-height` CSS variable (56px) and reworked the stop button to use a pseudo-element square for pixel-perfect centering — both columns now line up cleanly. (#70)
20
+ - **Feishu bot fails closed on API outage.** If `/open-apis/bot/v3/info` fails and `bot_open_id` can't be resolved, the adapter now drops group messages (with a warning) instead of spamming every group message as a fallback.
21
+ - **`preview.md` no longer pollutes user project directories.** Preview files are written to the system tmpdir, and plain text formats (md/log/csv) skip preview generation entirely since they're already readable as-is.
22
+
23
+ ### More
24
+ - Added agent stop logging to make interrupt / stop chains easier to debug.
25
+
26
+ ## [1.0.1] - 2026-05-06
27
+
28
+ ### Added
29
+ - **OpenRouter Anthropic API support.** You can now route Claude model requests through OpenRouter, giving access to Anthropic models via a single OpenRouter API key — useful when Anthropic direct access is limited in your region.
30
+ - **GPT provider support.** Direct GPT provider configuration is now available alongside other providers, making it easier to switch between different OpenAI-compatible endpoints.
31
+ - **OCR-powered PDF reading.** PDF files that contain scanned images (non-text PDFs) are now readable via OCR, allowing the agent to extract content from scanned documents, invoices, and image-heavy PDFs.
32
+ - **Terminal output size control.** The agent now limits terminal output to a configurable size, preventing token overflows when running commands that produce very long output.
33
+ - **Memories & Trash manager in Web UI.** A new management panel lets you browse, review, and delete agent memories and trashed files directly from the Web UI.
34
+ - **Watchdog for interrupt messages.** A background watchdog ensures interrupt signals reliably stop the agent even when it's deep in a tool execution loop.
35
+ - **Skill import with category directory scanning.** When importing skills from openclaw packages, nested category directories are now scanned automatically, so all skills in a category bundle are imported at once.
36
+
37
+ ### Improved
38
+ - **Deploy skill simplified.** The deploy skill now uses Railway CLI directly without custom helper tools, making deployments more reliable and the codebase significantly lighter.
39
+ - **Fix double-render of progress indicators.** Progress spinners and status lines no longer render twice in quick succession, keeping the Web UI output clean.
40
+ - **Session idle status tracking and file descriptor cleanup.** Sessions now correctly report idle state when the agent finishes, and open file descriptors are properly closed to avoid resource leaks.
41
+ - **GPT-4.1 and GPT-5 pricing added.** Model cost tracking now includes the latest GPT-4.1 and GPT-5 pricing tiers.
42
+
43
+ ### Fixed
44
+ - **UTF-8 encoding error in file preview.** Opening files with non-UTF-8 characters no longer crashes the preview — they are now handled gracefully.
45
+ - **Expand `~` in openfile path.** The "open file in editor" API endpoint now correctly expands `~` to the user's home directory.
46
+
8
47
  ## [1.0.0] - 2026-04-30
9
48
 
10
49
  ### Added
data/README.md CHANGED
@@ -6,77 +6,79 @@
6
6
  [![Downloads](https://img.shields.io/gem/dt/openclacky?label=downloads&style=flat-square&color=brightgreen)](https://rubygems.org/gems/openclacky)
7
7
  [![License](https://img.shields.io/badge/license-MIT-lightgrey?style=flat-square)](LICENSE.txt)
8
8
 
9
- **From expertise to business turn your professional knowledge into a monetizable OpenClaw Skill.**
9
+ **The most Token-efficient open-source AI Agent.**
10
10
 
11
- OpenClacky is the creator-side platform for the OpenClaw ecosystem. Package your methods and workflows into encrypted, white-labeled Skills that your clients install and use under your name, your brand, your price.
11
+ OpenClacky matches Claude Code on capability at comparable cost, and saves significantly against other open-source agents (~50% vs OpenClaw, ~3× cheaper than Hermes). 100% open source (MIT), BYOK with any OpenAI-compatible model, built on two years of Agentic R&D and harness engineering.
12
12
 
13
- ## Why OpenClacky?
13
+ > Website: https://www.openclacky.com/ · Backed by **MiraclePlus · ZhenFund · Sequoia China · Hillhouse Capital**
14
14
 
15
- The OpenClaw ecosystem has 5,700+ Skills and growing. But almost all of them are open-sourced, free, and easily copied. The real scarcity isn't more Skills — it's **expertise-backed, production-grade Skills worth paying for**.
15
+ ## Why OpenClacky?
16
16
 
17
- OpenClacky is built for the people who have that expertise.
17
+ Same task, how much do you pay? Under comparable agent workloads, OpenClacky saves a large amount of Token spend compared to mainstream alternatives.
18
18
 
19
- | | **Openclaw** | **OpenClacky** |
19
+ | Agent | Relative cost | Notes |
20
20
  |---|---|---|
21
- | **Core model** | Open sharing | Encrypted & protected |
22
- | **Primary users** | Users who install Skills | Creators who sell Skills |
23
- | **Revenue** | None | Creator-defined pricing |
24
- | **Brand** | Platform brand | Your own brand |
25
- | **Driven by** | Technical contributors | Domain expertise |
21
+ | **OpenClacky** | **~0.8–1.2×** | 16 tools · ~100% cache hit · subagent routing |
22
+ | Claude Code | 1.0× (baseline) | World-class harness, closed-source subscription |
23
+ | OpenClaw | ~1.5× | Comparable harness agent |
24
+ | Hermes | ~3× | 52 built-in tools — schema bloat ~3–4× |
26
25
 
27
- ## How It Works
26
+ *Numbers are averages measured on internal common agent tasks, using Claude Code as the baseline. Full benchmark reports will be published on GitHub.*
28
27
 
29
- **Four steps from capability to business:**
28
+ ## Feature comparison
30
29
 
31
- 1. **Craft your Skill**Turn your domain methodology into a repeatable AI workflow
32
- 2. **Encrypt & protect** — Your logic stays yours; clients can't inspect or copy it
33
- 3. **Package your brand** — Ship under your name, your logo, your onboarding experience
34
- 4. **Launch & acquire** — One-click sales page, built-in SEO, start converting traffic
30
+ Core agent capability is roughly on par across the field the real differentiators are **cost, openness, Skill evolution, and integrations**.
35
31
 
36
- ## Who It's For
32
+ | Feature | Claude Code | OpenClaw | Hermes | **OpenClacky** |
33
+ |---|:---:|:---:|:---:|:---:|
34
+ | Token cost | 1.0× | ~1.5× | ~3× | **~0.8–1.2×** |
35
+ | Open source | ❌ Closed | ✅ Open | ✅ Open | ✅ MIT |
36
+ | BYOK / model freedom | ❌ Anthropic only | ✅ | ✅ | ✅ |
37
+ | Skill self-evolution | ❌ | ❌ | ✅ | ✅ |
38
+ | IM integration (Feishu / WeCom / WeChat) | ❌ | ✅ | ✅ | ✅ |
37
39
 
38
- OpenClacky is built for domain experts whose knowledge can be expressed as *information processing + executable actions*:
40
+ ## How we get the cost down
39
41
 
40
- - **SEO specialists**keyword research, content scoring, rank monitoring
41
- - **Lawyers** — contract review, case retrieval, risk flagging
42
- - **Traders** — signal detection, strategy backtesting, automated execution
43
- - **Data analysts** — cleaning, modeling, report generation
44
- - **Content strategists** — topic selection, outlines, drafts at scale
42
+ Not by cutting features by compounding the right choice at every layer.
45
43
 
46
- ## Features
44
+ ### 1. Ultra-high cache hit rate
45
+ Sessions never restart, double cache markers, **Insert-then-Compress** — the system prompt is never mutated, so compression still reuses the cache. **Measured cache hit rate: near 100%.**
47
46
 
48
- - [x] **Skill builder** — Create AI workflows via conversation or UI, iterate and ship fast
49
- - [x] **Encryption** Protect your knowledge assets; end users cannot read your Skill source
50
- - [x] **White-label packaging** — Your brand, your product line, your client experience
51
- - [x] **Auto-update delivery** — Push updates to all users seamlessly, with version control
52
- - [x] **Cross-platform distribution** — Windows, macOS, Linux — one Skill, every platform
53
- - [x] **Sales page generator** — Launch your storefront fast, with built-in SEO foundations
54
- - [x] **Cost monitoring** — Real-time token tracking, automatic compression (up to 90% savings)
55
- - [x] **Multi-provider support** — OpenAI, Anthropic, DeepSeek, and any OpenAI-compatible API
56
- - [ ] **Skill marketplace** — Discover and distribute premium Skills *(coming soon)*
47
+ ### 2. Minimal tool set
48
+ Only **16 core tools**. Capabilities are offloaded to the Skill ecosystem via a single `invoke_skill` meta-tool. Tool count is not the metric — task completion rate is.
57
49
 
58
- ## Coding Support
50
+ | OpenClacky | Claude Code | OpenClaw | Hermes |
51
+ |:--:|:--:|:--:|:--:|
52
+ | **16** | 40+ | 23 | 52 |
59
53
 
60
- OpenClacky also works as a general AI coding assistant — scaffold full-stack Rails apps, add features, or explore an unfamiliar codebase:
54
+ ### 3. Idle-time auto-compression
55
+ Go to a meeting, grab coffee — the agent compresses long context in the background and pre-warms the cache. Your first message back hits the cache directly. **Cold-start first-token cost reduced by 50%+.**
61
56
 
62
- ```bash
63
- $ openclacky
64
- > /new my-app # scaffold a full-stack Rails app
65
- > Add user auth with email and password
66
- > How does the payment module work?
67
- ```
57
+ ### 4. BYOK — you pick the model, you set the cost
58
+ Any OpenAI-compatible API, plug and play. Official direct, aggregate routing, compatible relays — the choice is 100% yours. Use Claude for code, auto-route subtasks to DeepSeek, save another chunk of tokens.
59
+
60
+ Built on **2 years · 3 generations of agentic architecture · 6 core harness engineering decisions**.
61
+
62
+ ## Skills — the soul of the agent
68
63
 
69
- Built on a production-ready Rails architecture with one-click deployment, dev/prod isolation, and automatic backups.
64
+ - **Invoke with `/`** instant browse, fuzzy search, direct call. Hundreds of Skills at your fingertips.
65
+ - **Create Skills in natural language** — just describe what you want; the agent drafts `SKILL.md`, breaks down steps, and runs validation. No code required.
66
+ - **Self-evolving** — after each run, the agent updates the Skill based on execution context and results. The next call is more stable and more accurate.
67
+ - **Open & compatible** — supports Claude Skills / Markdown Pack / custom formats.
68
+ - **Monetizable** — polished Skills can be packaged for sale, with encrypted distribution, License management, and creator-defined pricing.
70
69
 
71
70
  ## Installation
72
71
 
73
- ### Method 1: One-line Install (Recommended)
72
+ ### Desktop installer (recommended)
74
73
 
75
- ```bash
76
- /bin/bash -c "$(curl -sSL https://raw.githubusercontent.com/clacky-ai/openclacky/main/scripts/install.sh)"
77
- ```
74
+ Double-click to install — environment, dependencies, and Skills all set up automatically.
75
+
76
+ - **macOS** — [Download `.dmg`](https://oss.1024code.com/openclacky-installer/official/openclacky-installer.dmg) (Apple Silicon / Intel)
77
+ - **Windows** — [Download `.exe`](https://oss.1024code.com/openclacky-installer/official/openclacky-installer.exe) (Windows 10 2004+ / Windows 11)
78
78
 
79
- ### Method 2: RubyGems
79
+ More options: https://www.openclacky.com/
80
+
81
+ ### Command line
80
82
 
81
83
  **Requirements:** Ruby >= 3.1.0
82
84
 
@@ -84,6 +86,12 @@ Built on a production-ready Rails architecture with one-click deployment, dev/pr
84
86
  gem install openclacky
85
87
  ```
86
88
 
89
+ Or one-line install:
90
+
91
+ ```bash
92
+ /bin/bash -c "$(curl -sSL https://raw.githubusercontent.com/clacky-ai/openclacky/main/scripts/install.sh)"
93
+ ```
94
+
87
95
  ## Quick Start
88
96
 
89
97
  ### Terminal (CLI)
@@ -95,16 +103,16 @@ openclacky # start interactive agent in current directory
95
103
  ### Web UI
96
104
 
97
105
  ```bash
98
- openclacky server # start the web server (default: http://localhost:7070)
106
+ openclacky server # default: http://localhost:7070
99
107
  ```
100
108
 
101
- Then open **http://localhost:7070** in your browser. You'll get a full-featured chat interface with multi-session support — run separate sessions for coding, copywriting, research, and more, all in parallel.
109
+ Open **http://localhost:7070** for a full chat interface with multi-session support — run coding, copywriting, research sessions in parallel.
102
110
 
103
111
  Options:
104
112
 
105
113
  ```bash
106
- openclacky server --port 8080 # custom port
107
- openclacky server --host 0.0.0.0 # listen on all interfaces (e.g. remote access)
114
+ openclacky server --port 8080 # custom port
115
+ openclacky server --host 0.0.0.0 # listen on all interfaces (remote access)
108
116
  ```
109
117
 
110
118
  ## Configuration
@@ -114,7 +122,26 @@ $ openclacky
114
122
  > /config
115
123
  ```
116
124
 
117
- You'll be prompted to set your **API Key**, **Model**, and **Base URL** (any OpenAI-compatible provider).
125
+ Set your **API Key**, **Model**, and **Base URL** (any OpenAI-compatible provider).
126
+
127
+ Supported out of the box: **Claude (Anthropic) · GPT (OpenAI) · DeepSeek · Kimi (Moonshot) · MiniMax · OpenRouter** — or any custom endpoint.
128
+
129
+ ## Coding use case
130
+
131
+ OpenClacky works as a general AI coding assistant — scaffold full-stack apps, add features, or explore unfamiliar codebases:
132
+
133
+ ```bash
134
+ $ openclacky
135
+ > /new my-app # scaffold a new project
136
+ > Add user auth with email and password
137
+ > How does the payment module work?
138
+ ```
139
+
140
+ ## Advanced — Creator Program
141
+
142
+ Already power users are turning their workflows into vertical AI experts on OpenClacky — encrypted distribution, License management, self-set pricing. Legal, healthcare, financial planning, and more.
143
+
144
+ Learn more: https://www.openclacky.com/ → Creators
118
145
 
119
146
  ## Install from Source
120
147
 
@@ -125,6 +152,13 @@ bundle install
125
152
  bin/clacky
126
153
  ```
127
154
 
155
+ ## Trust & Credibility
156
+
157
+ - **100% open source** — MIT License, all code public, all decisions traceable
158
+ - **2 years of Agentic R&D** — 3 generations of architecture
159
+ - **16 core tools** — minimal by design
160
+ - **Backed by** MiraclePlus · ZhenFund · Sequoia China · Hillhouse Capital
161
+
128
162
  ## Contributing
129
163
 
130
164
  Bug reports and pull requests are welcome on GitHub at https://github.com/clacky-ai/openclacky. Contributors are expected to adhere to the [code of conduct](https://github.com/clacky-ai/openclacky/blob/main/CODE_OF_CONDUCT.md).
@@ -105,8 +105,25 @@ module Clacky
105
105
  cache_write = usage[:cache_creation_input_tokens] || 0
106
106
  cache_read = usage[:cache_read_input_tokens] || 0
107
107
 
108
- # Calculate token delta from previous iteration
109
- delta_tokens = total_tokens - @previous_total_tokens
108
+ # Calculate token delta from previous iteration.
109
+ #
110
+ # Two conventions exist for total_tokens across providers:
111
+ # - OpenAI (default): cumulative per-request input+output (grows
112
+ # with history every turn). Delta = total - prev.
113
+ # - Anthropic direct: already the per-turn new compute
114
+ # (raw_input + cache_creation + output).
115
+ # The MessageFormat sets :total_is_per_turn so
116
+ # we use total_tokens directly as the delta.
117
+ #
118
+ # Without this branch, Anthropic's per-turn total would be treated as
119
+ # cumulative and produce negative / nonsensical deltas whenever cached
120
+ # prefixes make the per-turn new-compute smaller than the previous turn.
121
+ delta_tokens =
122
+ if usage[:total_is_per_turn]
123
+ total_tokens
124
+ else
125
+ total_tokens - @previous_total_tokens
126
+ end
110
127
  @previous_total_tokens = total_tokens # Update for next iteration
111
128
 
112
129
  {
@@ -54,6 +54,20 @@ module Clacky
54
54
  max_retries = 10
55
55
  retry_delay = 5
56
56
  retries = 0
57
+
58
+ # Track whether any of the retry/fallback branches below opened a
59
+ # "retrying" progress slot via show_progress(progress_type:
60
+ # "retrying", phase: "active"). If so, we MUST close it before
61
+ # leaving call_llm — otherwise the UI's legacy shim in
62
+ # UI2::UIController keeps the :quiet ProgressHandle alive, its
63
+ # ticker thread keeps running, and the user sees a frozen
64
+ # "Network failed: ... (681s)" line long after the task finished.
65
+ #
66
+ # The close is done in the outer ensure below so it runs on:
67
+ # - normal success (response returned)
68
+ # - unrecoverable failure (raise propagates out)
69
+ # - BadRequestError reasoning-content retry success
70
+ retrying_progress_opened = false
57
71
  # One-shot flag set by the BadRequestError rescue below when the server
58
72
  # complained about missing reasoning_content. The subsequent retry will
59
73
  # pad every assistant message's reasoning_content, which satisfies
@@ -67,6 +81,7 @@ module Clacky
67
81
  thinking_retry_attempted = false
68
82
 
69
83
  begin
84
+ begin
70
85
  # Use active_messages (Time Machine) when undone, otherwise send full history.
71
86
  # to_api strips internal fields and handles orphaned tool_calls.
72
87
  messages_to_send = if respond_to?(:active_messages)
@@ -86,6 +101,19 @@ module Clacky
86
101
  # Successful response — if we were probing, confirm primary is healthy.
87
102
  handle_probe_success if @config.probing?
88
103
 
104
+ # ── Upstream truncation detector ──────────────────────────────────
105
+ # OpenRouter / Bedrock and other routers sometimes close the SSE
106
+ # stream mid-tool_use: we receive finish_reason="stop" together with
107
+ # a syntactically valid tool_call whose `arguments` JSON is empty,
108
+ # "{}" (placeholder before any key was streamed), or otherwise
109
+ # unparseable. Treat this as retryable — otherwise the agent would
110
+ # execute a tool with empty args (often failing cryptically) or
111
+ # silently exit thinking the task is done.
112
+ #
113
+ # Raises UpstreamTruncatedError (a RetryableError) so the rescue
114
+ # block below handles retry + fallback identically to 5xx/429.
115
+ detect_upstream_truncation!(response)
116
+
89
117
  rescue Faraday::TimeoutError => e
90
118
  # ── Read-timeout path (distinct from connection-level failures) ──
91
119
  # Faraday::TimeoutError on our non-streaming POST almost always means
@@ -118,6 +146,7 @@ module Clacky
118
146
  phase: "active",
119
147
  metadata: { attempt: retries, total: max_retries }
120
148
  )
149
+ retrying_progress_opened = true
121
150
  sleep retry_delay
122
151
  retry
123
152
  else
@@ -144,6 +173,7 @@ module Clacky
144
173
  phase: "active",
145
174
  metadata: { attempt: retries, total: max_retries }
146
175
  )
176
+ retrying_progress_opened = true
147
177
  sleep retry_delay
148
178
  retry
149
179
  else
@@ -180,6 +210,7 @@ module Clacky
180
210
  phase: "active",
181
211
  metadata: { attempt: retries, total: current_max }
182
212
  )
213
+ retrying_progress_opened = true
183
214
  sleep retry_delay
184
215
  retry
185
216
  else
@@ -212,7 +243,65 @@ module Clacky
212
243
  token_data = track_cost(response[:usage], raw_api_usage: response[:raw_api_usage])
213
244
  response[:token_usage] = token_data
214
245
 
246
+ # [DIAG] Log raw client response shape. Only emit when we see the
247
+ # "finish_reason=stop + non-empty tool_calls" combo, or when any
248
+ # tool_call's arguments look empty/unparseable — both indicate the
249
+ # upstream (Bedrock/relay/model) cut the tool_use stream short.
250
+ # Normal responses produce no log line (too noisy).
251
+ begin
252
+ tool_calls = response[:tool_calls] || []
253
+ if !tool_calls.empty?
254
+ raw_tcs = tool_calls.map do |c|
255
+ args_str = c[:arguments].is_a?(String) ? c[:arguments] : c[:arguments].to_s
256
+ parseable = begin
257
+ JSON.parse(args_str)
258
+ true
259
+ rescue StandardError
260
+ false
261
+ end
262
+ {
263
+ name: c[:name].to_s,
264
+ args_len: args_str.length,
265
+ args_parseable: parseable,
266
+ args_head: args_str[0, 120]
267
+ }
268
+ end
269
+ truncated_call = raw_tcs.any? { |t| t[:args_len] == 0 || t[:args_len] == 2 || !t[:args_parseable] }
270
+ suspicious = response[:finish_reason] == "stop"
271
+
272
+ if suspicious || truncated_call
273
+ Clacky::Logger.warn("llm.response_suspicious",
274
+ model: current_model,
275
+ finish_reason: response[:finish_reason].to_s,
276
+ tool_calls_count: raw_tcs.size,
277
+ tool_calls: raw_tcs,
278
+ completion_tokens: token_data[:completion_tokens],
279
+ ttft_ms: response.dig(:latency, :ttft_ms),
280
+ combo_stop_with_toolcalls: suspicious,
281
+ has_truncated_args: truncated_call
282
+ )
283
+ end
284
+ end
285
+ rescue StandardError => e
286
+ Clacky::Logger.warn("llm.response_log_failed", error: e.message)
287
+ end
288
+
215
289
  response
290
+ ensure
291
+ # Close any "retrying" progress slot that was opened during the
292
+ # retry/fallback loop above. The legacy UI shim allocates a
293
+ # separate :quiet ProgressHandle under the "retrying" key; if it
294
+ # is never finished its ticker thread keeps running and the user
295
+ # sees a stale "Network failed: ... (NNN s)" line long after the
296
+ # task has completed. This ensure runs on:
297
+ # - successful retry → close the slot, message is "Recovered"
298
+ # so the final frame is informative rather than blank
299
+ # - unrecoverable failure that raises out → close the slot so
300
+ # the spinner doesn't linger while the error bubbles up
301
+ if retrying_progress_opened
302
+ @ui&.show_progress(progress_type: "retrying", phase: "done")
303
+ end
304
+ end
216
305
  end
217
306
 
218
307
  # Attempt to activate the provider fallback model for the given primary model.
@@ -269,6 +358,87 @@ module Clacky
269
358
  msg.include?("must be provided"))
270
359
  end
271
360
 
361
+ # Detect upstream tool-call truncation and raise UpstreamTruncatedError
362
+ # so the standard RetryableError rescue (with fallback model support)
363
+ # handles retry identically to 5xx/429.
364
+ #
365
+ # Background: OpenRouter routes to Anthropic/Bedrock/etc. and passes
366
+ # through whatever the upstream sends. If the upstream closes the SSE
367
+ # stream mid-tool_use (observed with Anthropic at ~127 s TTFT under
368
+ # load), OpenRouter does NOT surface an error — it emits a valid
369
+ # `tool_calls[]` whose `arguments` is empty, `"{}"`, or non-parseable
370
+ # JSON. Without this check the agent would either execute the tool with
371
+ # empty args or (worse) silently exit thinking the task finished.
372
+ #
373
+ # Rule is deliberately narrow: we only intercept the case where the
374
+ # model streamed literally nothing into the tool_call arguments —
375
+ # i.e. `nil`, empty string, or the placeholder `"{}"`. Partial/invalid
376
+ # JSON (e.g. `{"path": "/tmp/x"`) is left to the existing
377
+ # ArgumentsParser → BadArgumentsError path, because the model already
378
+ # committed to specific values and feeding the parse error back as a
379
+ # tool_result lets it self-correct in one round-trip (faster than a
380
+ # blind retry from scratch).
381
+ private def detect_upstream_truncation!(response)
382
+ tool_calls = response[:tool_calls]
383
+ return if tool_calls.nil? || tool_calls.empty?
384
+
385
+ truncated = tool_calls.find { |tc| tool_call_args_truncated?(tc[:arguments]) }
386
+ return unless truncated
387
+
388
+ args_str = truncated[:arguments].is_a?(String) ? truncated[:arguments] : truncated[:arguments].to_s
389
+ Clacky::Logger.warn("llm.upstream_truncation_detected",
390
+ model: current_model,
391
+ tool_name: truncated[:name].to_s,
392
+ args_len: args_str.length,
393
+ args_head: args_str[0, 80],
394
+ finish_reason: response[:finish_reason].to_s,
395
+ completion_tokens: response.dig(:token_usage, :completion_tokens),
396
+ ttft_ms: response.dig(:latency, :ttft_ms)
397
+ )
398
+
399
+ # Inject a one-shot [SYSTEM] hint so a plain retry isn't doomed to the
400
+ # same fate when the truncation correlates with large tool_call args
401
+ # (e.g. writing a 5000-char file in one go). For infrastructure-level
402
+ # blips this hint is harmless — the retry usually succeeds on its own
403
+ # and the hint just sits in history without affecting behaviour.
404
+ inject_upstream_truncation_hint_if_first(truncated)
405
+
406
+ raise Clacky::UpstreamTruncatedError,
407
+ "[LLM] Upstream truncated tool_call `#{truncated[:name]}` " \
408
+ "(args=#{args_str[0, 40].inspect}). Retrying..."
409
+ end
410
+
411
+ # True when a tool_call's arguments field looks COMPLETELY empty —
412
+ # i.e. the upstream stream was cut before the model wrote any real
413
+ # content into the arguments JSON.
414
+ #
415
+ # Rules:
416
+ # - nil / non-String / empty string → truncated (nothing at all)
417
+ # - parses to {} (empty object) → truncated (placeholder only)
418
+ # - anything else (including partial/invalid JSON like `{"path":
419
+ # "/tmp/x"` where the model already started writing) → NOT
420
+ # truncated by this detector
421
+ #
422
+ # Partial-JSON cases are deliberately left to the existing
423
+ # ArgumentsParser → BadArgumentsError path, which surfaces the parse
424
+ # error back to the LLM as a tool_result so it can self-correct. That
425
+ # is more efficient than a blind retry when the model already wrote
426
+ # most of the args.
427
+ private def tool_call_args_truncated?(args)
428
+ return true if args.nil?
429
+ return true unless args.is_a?(String)
430
+ return true if args.empty?
431
+
432
+ parsed = begin
433
+ JSON.parse(args)
434
+ rescue JSON::ParserError
435
+ # Partial/invalid JSON — let ArgumentsParser handle it downstream.
436
+ return false
437
+ end
438
+
439
+ parsed.is_a?(Hash) && parsed.empty?
440
+ end
441
+
272
442
  # On the FIRST Faraday::TimeoutError within a task, append a [SYSTEM]
273
443
  # user message to the history instructing the model to break its work
274
444
  # into smaller steps. Subsequent timeouts in the same task are ignored
@@ -312,6 +482,54 @@ module Clacky
312
482
  "LLM response timed out — asking model to break the task into smaller steps and retrying..."
313
483
  )
314
484
  end
485
+
486
+ # On the FIRST upstream-truncation detection within a task, append a
487
+ # [SYSTEM] user message nudging the model toward smaller tool_call args.
488
+ # This guards against the (real but rare) case where the upstream SSE
489
+ # cut correlates with large tool_call payloads — a plain retry on the
490
+ # same oversized args would keep tripping the same wire.
491
+ #
492
+ # For purely infrastructural truncations (Anthropic edge blip, router
493
+ # hiccup), the hint is harmless — the retry will succeed and the hint
494
+ # just sits unused in history. Cheaper than letting the agent burn
495
+ # through its retry budget on the same oversized payload.
496
+ #
497
+ # Same plumbing as inject_large_output_hint_if_first_timeout: one-shot
498
+ # per task, carries `system_injected: true` so it's hidden from UI
499
+ # replay and skipped by compression/caching placement logic. Reset per
500
+ # task via Agent#run (see @task_upstream_truncation_hint_injected).
501
+ private def inject_upstream_truncation_hint_if_first(truncated_call)
502
+ return if @task_upstream_truncation_hint_injected
503
+
504
+ @task_upstream_truncation_hint_injected = true
505
+
506
+ tool_name = truncated_call[:name].to_s
507
+ hint = "[SYSTEM] The previous response was cut short by the upstream provider " \
508
+ "before the `#{tool_name}` tool_call finished streaming. " \
509
+ "The partial tool_call has been discarded. To avoid the same problem on retry, " \
510
+ "please adapt your approach:\n" \
511
+ "- Prefer smaller tool_call arguments — large single-shot payloads are more likely to be truncated.\n" \
512
+ "- For long file content: create the file first with a minimal skeleton via `write`, " \
513
+ "then append sections one at a time with `edit`.\n" \
514
+ "- Break large tasks into multiple smaller tool calls instead of one big one.\n" \
515
+ "- Keep each tool-call argument comfortably under ~2000 characters when possible."
516
+
517
+ @history.append({
518
+ role: "user",
519
+ content: hint,
520
+ system_injected: true,
521
+ task_id: @current_task_id
522
+ })
523
+
524
+ Clacky::Logger.info(
525
+ "[llm_caller] Upstream truncation — injected 'smaller tool_call args' hint " \
526
+ "(tool=#{tool_name.inspect})"
527
+ )
528
+
529
+ @ui&.show_warning(
530
+ "Upstream response was truncated mid tool-call — asking model to use smaller steps and retrying..."
531
+ )
532
+ end
315
533
  end
316
534
  end
317
535
  end
@@ -47,11 +47,41 @@ module Clacky
47
47
  handle_compression_response(response, compression_context, progress: handle)
48
48
  true
49
49
  rescue Clacky::AgentInterrupted => e
50
- @ui&.log("Idle compression canceled: #{e.message}", level: :info)
50
+ # User cancelled the idle compression finish the quiet progress
51
+ # slot in place so the user sees exactly what happened (rather
52
+ # than the "Idle detected..." line being silently removed).
53
+ final = "Idle compression cancelled: #{e.message}"
54
+ if handle
55
+ handle.finish(final_message: final)
56
+ else
57
+ @ui&.log(final, level: :info)
58
+ end
51
59
  @history.rollback_before(compression_message)
60
+ Clacky::Logger.info("[idle-compress] cancelled: #{e.message}")
52
61
  false
53
62
  rescue => e
54
- @ui&.log("Idle compression failed: #{e.message}", level: :error)
63
+ # Compression failed (most commonly: network errors after all
64
+ # LlmCaller retries exhausted). Previously this only wrote an
65
+ # @ui.log(:error) that was easy to miss — especially when no
66
+ # other output followed. Now we:
67
+ # 1. Replace the active quiet progress line with the error so
68
+ # the user always sees *something* where the spinner was.
69
+ # 2. Emit a show_warning for a more prominent entry.
70
+ # 3. Persist to Clacky::Logger so post-mortem is possible even
71
+ # if the terminal scrollback has rolled past.
72
+ final = "Idle compression failed: #{e.message}"
73
+ if handle
74
+ handle.finish(final_message: final)
75
+ else
76
+ @ui&.log(final, level: :error)
77
+ end
78
+ @ui&.show_warning(final)
79
+ Clacky::Logger.warn(
80
+ "[idle-compress] failed",
81
+ error_class: e.class.name,
82
+ error_message: e.message,
83
+ backtrace: e.backtrace&.first(5)
84
+ )
55
85
  @history.rollback_before(compression_message)
56
86
  false
57
87
  end