openclacky 1.0.0.beta.6 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +34 -1
  3. data/README.md +87 -53
  4. data/lib/clacky/agent/cost_tracker.rb +19 -2
  5. data/lib/clacky/agent/llm_caller.rb +120 -4
  6. data/lib/clacky/agent/message_compressor_helper.rb +32 -2
  7. data/lib/clacky/agent/session_serializer.rb +47 -2
  8. data/lib/clacky/agent.rb +16 -21
  9. data/lib/clacky/client.rb +88 -13
  10. data/lib/clacky/default_parsers/pdf_parser.rb +58 -17
  11. data/lib/clacky/default_parsers/pdf_parser_ocr.py +103 -0
  12. data/lib/clacky/default_parsers/pdf_parser_plumber.py +62 -0
  13. data/lib/clacky/default_skills/deploy/SKILL.md +201 -77
  14. data/lib/clacky/default_skills/new/SKILL.md +3 -114
  15. data/lib/clacky/default_skills/onboard/SKILL.md +340 -133
  16. data/lib/clacky/default_skills/onboard/scripts/import_external_skills.rb +371 -0
  17. data/lib/clacky/json_ui_controller.rb +2 -1
  18. data/lib/clacky/message_format/anthropic.rb +72 -8
  19. data/lib/clacky/message_format/bedrock.rb +6 -3
  20. data/lib/clacky/plain_ui_controller.rb +1 -1
  21. data/lib/clacky/providers.rb +91 -2
  22. data/lib/clacky/server/channel/channel_ui_controller.rb +1 -1
  23. data/lib/clacky/server/http_server.rb +830 -7
  24. data/lib/clacky/server/session_registry.rb +58 -20
  25. data/lib/clacky/server/web_ui_controller.rb +3 -2
  26. data/lib/clacky/skill.rb +10 -9
  27. data/lib/clacky/skill_loader.rb +23 -11
  28. data/lib/clacky/tools/file_reader.rb +232 -127
  29. data/lib/clacky/tools/security.rb +42 -64
  30. data/lib/clacky/tools/terminal/persistent_session.rb +15 -4
  31. data/lib/clacky/tools/terminal/safe_rm.sh +106 -0
  32. data/lib/clacky/tools/terminal/session_manager.rb +8 -3
  33. data/lib/clacky/tools/terminal.rb +263 -16
  34. data/lib/clacky/ui2/layout_manager.rb +8 -1
  35. data/lib/clacky/ui2/output_buffer.rb +83 -23
  36. data/lib/clacky/ui2/ui_controller.rb +76 -8
  37. data/lib/clacky/ui_interface.rb +1 -1
  38. data/lib/clacky/utils/model_pricing.rb +120 -0
  39. data/lib/clacky/utils/parser_manager.rb +70 -6
  40. data/lib/clacky/utils/string_matcher.rb +23 -1
  41. data/lib/clacky/version.rb +1 -1
  42. data/lib/clacky/web/app.css +732 -6
  43. data/lib/clacky/web/app.js +40 -1458
  44. data/lib/clacky/web/i18n.js +240 -24
  45. data/lib/clacky/web/index.html +168 -0
  46. data/lib/clacky/web/profile.js +442 -0
  47. data/lib/clacky/web/sessions.js +1120 -1
  48. data/lib/clacky/web/sidebar.js +39 -0
  49. data/lib/clacky/web/skills.js +456 -0
  50. data/lib/clacky/web/trash.js +343 -0
  51. data/lib/clacky/web/ws-dispatcher.js +255 -0
  52. data/lib/clacky.rb +0 -3
  53. metadata +15 -17
  54. data/lib/clacky/clacky_auth_client.rb +0 -152
  55. data/lib/clacky/clacky_cloud_config.rb +0 -123
  56. data/lib/clacky/cloud_project_client.rb +0 -169
  57. data/lib/clacky/default_skills/deploy/scripts/rails_deploy.rb +0 -1377
  58. data/lib/clacky/default_skills/deploy/tools/check_health.rb +0 -116
  59. data/lib/clacky/default_skills/deploy/tools/create_database_service.rb +0 -341
  60. data/lib/clacky/default_skills/deploy/tools/execute_deployment.rb +0 -99
  61. data/lib/clacky/default_skills/deploy/tools/fetch_runtime_logs.rb +0 -77
  62. data/lib/clacky/default_skills/deploy/tools/list_services.rb +0 -67
  63. data/lib/clacky/default_skills/deploy/tools/report_deploy_status.rb +0 -67
  64. data/lib/clacky/default_skills/deploy/tools/set_deploy_variables.rb +0 -189
  65. data/lib/clacky/default_skills/new/scripts/cloud_project_init.sh +0 -74
  66. data/lib/clacky/deploy_api_client.rb +0 -484
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: afc12c94c2b8b7580ca948625cc6c106004bbf385f341c783e36e1be9d93fd82
4
- data.tar.gz: 95508d829f02270b3fce4849b21e29b6766a46d9c663d47e37df817aed456da5
3
+ metadata.gz: 9d6ba5a62f7a352730705db11aff8ab76af059764903eb4413bd5a0aa835fecf
4
+ data.tar.gz: 58ba8fdcf23b5dabcc4a8ed709be0f34a9d27a5be83601fee685a638eb3ff445
5
5
  SHA512:
6
- metadata.gz: 8f44be2b9d9bf26f97490f5ddf2525a6cad937c5152b8486bb2840a263ab104cacfa5838600236b3a38a6806e69cd717fbce982838f2c2a65664158b0b4ed238
7
- data.tar.gz: aecb14f4b6f345d190e52de0c0816f380b4e6c3213453c9e69a04b78944f757115e8a1ac042b0a78398e79d27de65190f4c0cb61d1efe3c224416b6a2f55f6c6
6
+ metadata.gz: 00e3f00119cad74d7da43519a1a12332e509c0050946d713dea17db539bbadf0099e96ea5369cc19046fd0bc1c224849cbbaf43addfe0708858780a370067b3b
7
+ data.tar.gz: 4e7888c952dd49c664c67212c0986b62bd7745887dae7d85bce14b3f36c544fc5bd9ca27f1851f04e14477cfd9316938605b6ae0f89b19652cadd1442c6dc564
data/CHANGELOG.md CHANGED
@@ -5,7 +5,40 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
- ## [Unreleased]
8
+ ## [1.0.1] - 2026-05-06
9
+
10
+ ### Added
11
+ - **OpenRouter Anthropic API support.** You can now route Claude model requests through OpenRouter, giving access to Anthropic models via a single OpenRouter API key — useful when Anthropic direct access is limited in your region.
12
+ - **GPT provider support.** Direct GPT provider configuration is now available alongside other providers, making it easier to switch between different OpenAI-compatible endpoints.
13
+ - **OCR-powered PDF reading.** PDF files that contain scanned images (non-text PDFs) are now readable via OCR, allowing the agent to extract content from scanned documents, invoices, and image-heavy PDFs.
14
+ - **Terminal output size control.** The agent now limits terminal output to a configurable size, preventing token overflows when running commands that produce very long output.
15
+ - **Memories & Trash manager in Web UI.** A new management panel lets you browse, review, and delete agent memories and trashed files directly from the Web UI.
16
+ - **Watchdog for interrupt messages.** A background watchdog ensures interrupt signals reliably stop the agent even when it's deep in a tool execution loop.
17
+ - **Skill import with category directory scanning.** When importing skills from openclaw packages, nested category directories are now scanned automatically, so all skills in a category bundle are imported at once.
18
+
19
+ ### Improved
20
+ - **Deploy skill simplified.** The deploy skill now uses Railway CLI directly without custom helper tools, making deployments more reliable and the codebase significantly lighter.
21
+ - **Fix double-render of progress indicators.** Progress spinners and status lines no longer render twice in quick succession, keeping the Web UI output clean.
22
+ - **Session idle status tracking and file descriptor cleanup.** Sessions now correctly report idle state when the agent finishes, and open file descriptors are properly closed to avoid resource leaks.
23
+ - **GPT-4.1 and GPT-5 pricing added.** Model cost tracking now includes the latest GPT-4.1 and GPT-5 pricing tiers.
24
+
25
+ ### Fixed
26
+ - **UTF-8 encoding error in file preview.** Opening files with non-UTF-8 characters no longer crashes the preview — they are now handled gracefully.
27
+ - **Expand `~` in openfile path.** The "open file in editor" API endpoint now correctly expands `~` to the user's home directory.
28
+
29
+ ## [1.0.0] - 2026-04-30
30
+
31
+ ### Added
32
+ - **Speed test tool in Web UI.** Test API response latency for different models and providers directly from the settings panel, making it easy to find the fastest endpoint for your region.
33
+ - **History chunk loading.** Previously compressed conversation chunks can now be loaded back into the session when needed, so long-running conversations don't lose context.
34
+ - **Default model changed to 4.5.** New default model provides better balance of speed, quality, and cost for most tasks.
35
+
36
+ ### Improved
37
+ - **Thinking indicator now visible for more steps.** The "thinking..." indicator stays visible longer during complex operations, giving better feedback about what the agent is doing.
38
+ - **Message timestamps display correctly in Web UI.** User message times now show properly without layout issues, and the scroll behavior is smoother.
39
+
40
+ ### Fixed
41
+ - **Scroll position no longer jumps unexpectedly** in the Web UI when loading session history.
9
42
 
10
43
  ## [1.0.0.beta.6] - 2026-04-30
11
44
 
data/README.md CHANGED
@@ -6,77 +6,79 @@
6
6
  [![Downloads](https://img.shields.io/gem/dt/openclacky?label=downloads&style=flat-square&color=brightgreen)](https://rubygems.org/gems/openclacky)
7
7
  [![License](https://img.shields.io/badge/license-MIT-lightgrey?style=flat-square)](LICENSE.txt)
8
8
 
9
- **From expertise to business turn your professional knowledge into a monetizable OpenClaw Skill.**
9
+ **The most Token-efficient open-source AI Agent.**
10
10
 
11
- OpenClacky is the creator-side platform for the OpenClaw ecosystem. Package your methods and workflows into encrypted, white-labeled Skills that your clients install and use under your name, your brand, your price.
11
+ OpenClacky matches Claude Code on capability at comparable cost, and saves significantly against other open-source agents (~50% vs OpenClaw, ~3× cheaper than Hermes). 100% open source (MIT), BYOK with any OpenAI-compatible model, built on two years of Agentic R&D and harness engineering.
12
12
 
13
- ## Why OpenClacky?
13
+ > Website: https://www.openclacky.com/ · Backed by **MiraclePlus · ZhenFund · Sequoia China · Hillhouse Capital**
14
14
 
15
- The OpenClaw ecosystem has 5,700+ Skills and growing. But almost all of them are open-sourced, free, and easily copied. The real scarcity isn't more Skills — it's **expertise-backed, production-grade Skills worth paying for**.
15
+ ## Why OpenClacky?
16
16
 
17
- OpenClacky is built for the people who have that expertise.
17
+ Same task, how much do you pay? Under comparable agent workloads, OpenClacky saves a large amount of Token spend compared to mainstream alternatives.
18
18
 
19
- | | **Openclaw** | **OpenClacky** |
19
+ | Agent | Relative cost | Notes |
20
20
  |---|---|---|
21
- | **Core model** | Open sharing | Encrypted & protected |
22
- | **Primary users** | Users who install Skills | Creators who sell Skills |
23
- | **Revenue** | None | Creator-defined pricing |
24
- | **Brand** | Platform brand | Your own brand |
25
- | **Driven by** | Technical contributors | Domain expertise |
21
+ | **OpenClacky** | **~0.8–1.2×** | 16 tools · ~100% cache hit · subagent routing |
22
+ | Claude Code | 1.0× (baseline) | World-class harness, closed-source subscription |
23
+ | OpenClaw | ~1.5× | Comparable harness agent |
24
+ | Hermes | ~3× | 52 built-in tools — schema bloat ~3–4× |
26
25
 
27
- ## How It Works
26
+ *Numbers are averages measured on internal common agent tasks, using Claude Code as the baseline. Full benchmark reports will be published on GitHub.*
28
27
 
29
- **Four steps from capability to business:**
28
+ ## Feature comparison
30
29
 
31
- 1. **Craft your Skill**Turn your domain methodology into a repeatable AI workflow
32
- 2. **Encrypt & protect** — Your logic stays yours; clients can't inspect or copy it
33
- 3. **Package your brand** — Ship under your name, your logo, your onboarding experience
34
- 4. **Launch & acquire** — One-click sales page, built-in SEO, start converting traffic
30
+ Core agent capability is roughly on par across the field the real differentiators are **cost, openness, Skill evolution, and integrations**.
35
31
 
36
- ## Who It's For
32
+ | Feature | Claude Code | OpenClaw | Hermes | **OpenClacky** |
33
+ |---|:---:|:---:|:---:|:---:|
34
+ | Token cost | 1.0× | ~1.5× | ~3× | **~0.8–1.2×** |
35
+ | Open source | ❌ Closed | ✅ Open | ✅ Open | ✅ MIT |
36
+ | BYOK / model freedom | ❌ Anthropic only | ✅ | ✅ | ✅ |
37
+ | Skill self-evolution | ❌ | ❌ | ✅ | ✅ |
38
+ | IM integration (Feishu / WeCom / WeChat) | ❌ | ✅ | ✅ | ✅ |
37
39
 
38
- OpenClacky is built for domain experts whose knowledge can be expressed as *information processing + executable actions*:
40
+ ## How we get the cost down
39
41
 
40
- - **SEO specialists**keyword research, content scoring, rank monitoring
41
- - **Lawyers** — contract review, case retrieval, risk flagging
42
- - **Traders** — signal detection, strategy backtesting, automated execution
43
- - **Data analysts** — cleaning, modeling, report generation
44
- - **Content strategists** — topic selection, outlines, drafts at scale
42
+ Not by cutting features by compounding the right choice at every layer.
45
43
 
46
- ## Features
44
+ ### 1. Ultra-high cache hit rate
45
+ Sessions never restart, double cache markers, **Insert-then-Compress** — the system prompt is never mutated, so compression still reuses the cache. **Measured cache hit rate: near 100%.**
47
46
 
48
- - [x] **Skill builder** — Create AI workflows via conversation or UI, iterate and ship fast
49
- - [x] **Encryption** Protect your knowledge assets; end users cannot read your Skill source
50
- - [x] **White-label packaging** — Your brand, your product line, your client experience
51
- - [x] **Auto-update delivery** — Push updates to all users seamlessly, with version control
52
- - [x] **Cross-platform distribution** — Windows, macOS, Linux — one Skill, every platform
53
- - [x] **Sales page generator** — Launch your storefront fast, with built-in SEO foundations
54
- - [x] **Cost monitoring** — Real-time token tracking, automatic compression (up to 90% savings)
55
- - [x] **Multi-provider support** — OpenAI, Anthropic, DeepSeek, and any OpenAI-compatible API
56
- - [ ] **Skill marketplace** — Discover and distribute premium Skills *(coming soon)*
47
+ ### 2. Minimal tool set
48
+ Only **16 core tools**. Capabilities are offloaded to the Skill ecosystem via a single `invoke_skill` meta-tool. Tool count is not the metric — task completion rate is.
57
49
 
58
- ## Coding Support
50
+ | OpenClacky | Claude Code | OpenClaw | Hermes |
51
+ |:--:|:--:|:--:|:--:|
52
+ | **16** | 40+ | 23 | 52 |
59
53
 
60
- OpenClacky also works as a general AI coding assistant — scaffold full-stack Rails apps, add features, or explore an unfamiliar codebase:
54
+ ### 3. Idle-time auto-compression
55
+ Go to a meeting, grab coffee — the agent compresses long context in the background and pre-warms the cache. Your first message back hits the cache directly. **Cold-start first-token cost reduced by 50%+.**
61
56
 
62
- ```bash
63
- $ openclacky
64
- > /new my-app # scaffold a full-stack Rails app
65
- > Add user auth with email and password
66
- > How does the payment module work?
67
- ```
57
+ ### 4. BYOK — you pick the model, you set the cost
58
+ Any OpenAI-compatible API, plug and play. Official direct, aggregate routing, compatible relays — the choice is 100% yours. Use Claude for code, auto-route subtasks to DeepSeek, save another chunk of tokens.
59
+
60
+ Built on **2 years · 3 generations of agentic architecture · 6 core harness engineering decisions**.
61
+
62
+ ## Skills — the soul of the agent
68
63
 
69
- Built on a production-ready Rails architecture with one-click deployment, dev/prod isolation, and automatic backups.
64
+ - **Invoke with `/`** instant browse, fuzzy search, direct call. Hundreds of Skills at your fingertips.
65
+ - **Create Skills in natural language** — just describe what you want; the agent drafts `SKILL.md`, breaks down steps, and runs validation. No code required.
66
+ - **Self-evolving** — after each run, the agent updates the Skill based on execution context and results. The next call is more stable and more accurate.
67
+ - **Open & compatible** — supports Claude Skills / Markdown Pack / custom formats.
68
+ - **Monetizable** — polished Skills can be packaged for sale, with encrypted distribution, License management, and creator-defined pricing.
70
69
 
71
70
  ## Installation
72
71
 
73
- ### Method 1: One-line Install (Recommended)
72
+ ### Desktop installer (recommended)
74
73
 
75
- ```bash
76
- /bin/bash -c "$(curl -sSL https://raw.githubusercontent.com/clacky-ai/openclacky/main/scripts/install.sh)"
77
- ```
74
+ Double-click to install — environment, dependencies, and Skills all set up automatically.
75
+
76
+ - **macOS** — [Download `.dmg`](https://oss.1024code.com/openclacky-installer/official/openclacky-installer.dmg) (Apple Silicon / Intel)
77
+ - **Windows** — [Download `.exe`](https://oss.1024code.com/openclacky-installer/official/openclacky-installer.exe) (Windows 10 2004+ / Windows 11)
78
78
 
79
- ### Method 2: RubyGems
79
+ More options: https://www.openclacky.com/
80
+
81
+ ### Command line
80
82
 
81
83
  **Requirements:** Ruby >= 3.1.0
82
84
 
@@ -84,6 +86,12 @@ Built on a production-ready Rails architecture with one-click deployment, dev/pr
84
86
  gem install openclacky
85
87
  ```
86
88
 
89
+ Or one-line install:
90
+
91
+ ```bash
92
+ /bin/bash -c "$(curl -sSL https://raw.githubusercontent.com/clacky-ai/openclacky/main/scripts/install.sh)"
93
+ ```
94
+
87
95
  ## Quick Start
88
96
 
89
97
  ### Terminal (CLI)
@@ -95,16 +103,16 @@ openclacky # start interactive agent in current directory
95
103
  ### Web UI
96
104
 
97
105
  ```bash
98
- openclacky server # start the web server (default: http://localhost:7070)
106
+ openclacky server # default: http://localhost:7070
99
107
  ```
100
108
 
101
- Then open **http://localhost:7070** in your browser. You'll get a full-featured chat interface with multi-session support — run separate sessions for coding, copywriting, research, and more, all in parallel.
109
+ Open **http://localhost:7070** for a full chat interface with multi-session support — run coding, copywriting, research sessions in parallel.
102
110
 
103
111
  Options:
104
112
 
105
113
  ```bash
106
- openclacky server --port 8080 # custom port
107
- openclacky server --host 0.0.0.0 # listen on all interfaces (e.g. remote access)
114
+ openclacky server --port 8080 # custom port
115
+ openclacky server --host 0.0.0.0 # listen on all interfaces (remote access)
108
116
  ```
109
117
 
110
118
  ## Configuration
@@ -114,7 +122,26 @@ $ openclacky
114
122
  > /config
115
123
  ```
116
124
 
117
- You'll be prompted to set your **API Key**, **Model**, and **Base URL** (any OpenAI-compatible provider).
125
+ Set your **API Key**, **Model**, and **Base URL** (any OpenAI-compatible provider).
126
+
127
+ Supported out of the box: **Claude (Anthropic) · GPT (OpenAI) · DeepSeek · Kimi (Moonshot) · MiniMax · OpenRouter** — or any custom endpoint.
128
+
129
+ ## Coding use case
130
+
131
+ OpenClacky works as a general AI coding assistant — scaffold full-stack apps, add features, or explore unfamiliar codebases:
132
+
133
+ ```bash
134
+ $ openclacky
135
+ > /new my-app # scaffold a new project
136
+ > Add user auth with email and password
137
+ > How does the payment module work?
138
+ ```
139
+
140
+ ## Advanced — Creator Program
141
+
142
+ Already power users are turning their workflows into vertical AI experts on OpenClacky — encrypted distribution, License management, self-set pricing. Legal, healthcare, financial planning, and more.
143
+
144
+ Learn more: https://www.openclacky.com/ → Creators
118
145
 
119
146
  ## Install from Source
120
147
 
@@ -125,6 +152,13 @@ bundle install
125
152
  bin/clacky
126
153
  ```
127
154
 
155
+ ## Trust & Credibility
156
+
157
+ - **100% open source** — MIT License, all code public, all decisions traceable
158
+ - **2 years of Agentic R&D** — 3 generations of architecture
159
+ - **16 core tools** — minimal by design
160
+ - **Backed by** MiraclePlus · ZhenFund · Sequoia China · Hillhouse Capital
161
+
128
162
  ## Contributing
129
163
 
130
164
  Bug reports and pull requests are welcome on GitHub at https://github.com/clacky-ai/openclacky. Contributors are expected to adhere to the [code of conduct](https://github.com/clacky-ai/openclacky/blob/main/CODE_OF_CONDUCT.md).
@@ -105,8 +105,25 @@ module Clacky
105
105
  cache_write = usage[:cache_creation_input_tokens] || 0
106
106
  cache_read = usage[:cache_read_input_tokens] || 0
107
107
 
108
- # Calculate token delta from previous iteration
109
- delta_tokens = total_tokens - @previous_total_tokens
108
+ # Calculate token delta from previous iteration.
109
+ #
110
+ # Two conventions exist for total_tokens across providers:
111
+ # - OpenAI (default): cumulative per-request input+output (grows
112
+ # with history every turn). Delta = total - prev.
113
+ # - Anthropic direct: already the per-turn new compute
114
+ # (raw_input + cache_creation + output).
115
+ # The MessageFormat sets :total_is_per_turn so
116
+ # we use total_tokens directly as the delta.
117
+ #
118
+ # Without this branch, Anthropic's per-turn total would be treated as
119
+ # cumulative and produce negative / nonsensical deltas whenever cached
120
+ # prefixes make the per-turn new-compute smaller than the previous turn.
121
+ delta_tokens =
122
+ if usage[:total_is_per_turn]
123
+ total_tokens
124
+ else
125
+ total_tokens - @previous_total_tokens
126
+ end
110
127
  @previous_total_tokens = total_tokens # Update for next iteration
111
128
 
112
129
  {
@@ -54,6 +54,20 @@ module Clacky
54
54
  max_retries = 10
55
55
  retry_delay = 5
56
56
  retries = 0
57
+
58
+ # Track whether any of the retry/fallback branches below opened a
59
+ # "retrying" progress slot via show_progress(progress_type:
60
+ # "retrying", phase: "active"). If so, we MUST close it before
61
+ # leaving call_llm — otherwise the UI's legacy shim in
62
+ # UI2::UIController keeps the :quiet ProgressHandle alive, its
63
+ # ticker thread keeps running, and the user sees a frozen
64
+ # "Network failed: ... (681s)" line long after the task finished.
65
+ #
66
+ # The close is done in the outer ensure below so it runs on:
67
+ # - normal success (response returned)
68
+ # - unrecoverable failure (raise propagates out)
69
+ # - BadRequestError reasoning-content retry success
70
+ retrying_progress_opened = false
57
71
  # One-shot flag set by the BadRequestError rescue below when the server
58
72
  # complained about missing reasoning_content. The subsequent retry will
59
73
  # pad every assistant message's reasoning_content, which satisfies
@@ -67,6 +81,7 @@ module Clacky
67
81
  thinking_retry_attempted = false
68
82
 
69
83
  begin
84
+ begin
70
85
  # Use active_messages (Time Machine) when undone, otherwise send full history.
71
86
  # to_api strips internal fields and handles orphaned tool_calls.
72
87
  messages_to_send = if respond_to?(:active_messages)
@@ -86,7 +101,46 @@ module Clacky
86
101
  # Successful response — if we were probing, confirm primary is healthy.
87
102
  handle_probe_success if @config.probing?
88
103
 
89
- rescue Faraday::ConnectionFailed, Faraday::TimeoutError, Faraday::SSLError, Errno::ECONNREFUSED, Errno::ETIMEDOUT => e
104
+ rescue Faraday::TimeoutError => e
105
+ # ── Read-timeout path (distinct from connection-level failures) ──
106
+ # Faraday::TimeoutError on our non-streaming POST almost always means
107
+ # the *response* took longer than the 300s read-timeout to come back —
108
+ # i.e. the model is trying to produce a huge output in one shot
109
+ # (e.g. "write me a 2000-line snake game"). Blindly retrying the same
110
+ # request with the same prompt reproduces the same timeout.
111
+ #
112
+ # Strategy:
113
+ # 1. On the FIRST timeout in a task, inject a `[SYSTEM]` user message
114
+ # telling the model to break the work into smaller steps, then
115
+ # retry. The history edit changes the prompt, so the retry is
116
+ # materially different from the failed attempt.
117
+ # 2. On subsequent timeouts in the same task, fall back to the
118
+ # generic "just retry" behaviour (the model may have ignored
119
+ # the hint; don't pile on duplicate hints).
120
+ # 3. Probing-mode timeouts still go through handle_probe_failure.
121
+ retries += 1
122
+
123
+ if @config.probing?
124
+ handle_probe_failure
125
+ retry
126
+ end
127
+
128
+ if retries <= max_retries
129
+ inject_large_output_hint_if_first_timeout(e)
130
+ @ui&.show_progress(
131
+ "Response too slow (likely generating too much at once): #{e.message}",
132
+ progress_type: "retrying",
133
+ phase: "active",
134
+ metadata: { attempt: retries, total: max_retries }
135
+ )
136
+ retrying_progress_opened = true
137
+ sleep retry_delay
138
+ retry
139
+ else
140
+ raise AgentError, "[LLM] Request timed out after #{max_retries} retries: #{e.message}"
141
+ end
142
+
143
+ rescue Faraday::ConnectionFailed, Faraday::SSLError, Errno::ECONNREFUSED, Errno::ETIMEDOUT => e
90
144
  retries += 1
91
145
 
92
146
  # Probing failure: primary still down — renew cooling-off and retry with fallback.
@@ -95,9 +149,10 @@ module Clacky
95
149
  retry
96
150
  end
97
151
 
98
- # Network-level errors (timeouts, connection failures) are likely transient
99
- # infrastructure blips — do NOT trigger fallback. Just retry on the current
100
- # model (primary or already-active fallback) up to max_retries.
152
+ # Connection-level errors (DNS, TCP refused, open-timeout, TLS) are
153
+ # transient infrastructure blips — do NOT trigger fallback, and do
154
+ # NOT inject the "break into steps" hint (the model did nothing wrong).
155
+ # Just retry on the current model up to max_retries.
101
156
  if retries <= max_retries
102
157
  @ui&.show_progress(
103
158
  "Network failed: #{e.message}",
@@ -105,6 +160,7 @@ module Clacky
105
160
  phase: "active",
106
161
  metadata: { attempt: retries, total: max_retries }
107
162
  )
163
+ retrying_progress_opened = true
108
164
  sleep retry_delay
109
165
  retry
110
166
  else
@@ -141,6 +197,7 @@ module Clacky
141
197
  phase: "active",
142
198
  metadata: { attempt: retries, total: current_max }
143
199
  )
200
+ retrying_progress_opened = true
144
201
  sleep retry_delay
145
202
  retry
146
203
  else
@@ -174,6 +231,21 @@ module Clacky
174
231
  response[:token_usage] = token_data
175
232
 
176
233
  response
234
+ ensure
235
+ # Close any "retrying" progress slot that was opened during the
236
+ # retry/fallback loop above. The legacy UI shim allocates a
237
+ # separate :quiet ProgressHandle under the "retrying" key; if it
238
+ # is never finished its ticker thread keeps running and the user
239
+ # sees a stale "Network failed: ... (NNN s)" line long after the
240
+ # task has completed. This ensure runs on:
241
+ # - successful retry → close the slot, message is "Recovered"
242
+ # so the final frame is informative rather than blank
243
+ # - unrecoverable failure that raises out → close the slot so
244
+ # the spinner doesn't linger while the error bubbles up
245
+ if retrying_progress_opened
246
+ @ui&.show_progress(progress_type: "retrying", phase: "done")
247
+ end
248
+ end
177
249
  end
178
250
 
179
251
  # Attempt to activate the provider fallback model for the given primary model.
@@ -229,6 +301,50 @@ module Clacky
229
301
  (msg.include?("thinking") || msg.include?("must be passed back") ||
230
302
  msg.include?("must be provided"))
231
303
  end
304
+
305
+ # On the FIRST Faraday::TimeoutError within a task, append a [SYSTEM]
306
+ # user message to the history instructing the model to break its work
307
+ # into smaller steps. Subsequent timeouts in the same task are ignored
308
+ # here (caller just retries) so we don't pollute history with duplicate
309
+ # hints.
310
+ #
311
+ # The injected message carries `system_injected: true` so it is:
312
+ # - Hidden from UI replay (session_serializer / replay_history filters)
313
+ # - Skipped by prompt-caching marker placement (client.rb)
314
+ # - Skipped by message compression's "recent user turn" protection
315
+ # (message_compressor_helper.rb)
316
+ #
317
+ # Reset per-task via Agent#run (see @task_timeout_hint_injected = false).
318
+ private def inject_large_output_hint_if_first_timeout(err)
319
+ return if @task_timeout_hint_injected
320
+
321
+ @task_timeout_hint_injected = true
322
+
323
+ hint = "[SYSTEM] The previous LLM response timed out (read timeout after ~300s). " \
324
+ "This usually means the model was trying to produce too much output in a single response. " \
325
+ "Please change your approach:\n" \
326
+ "- Break the task into multiple smaller steps, each producing a short response.\n" \
327
+ "- For long files: first create a skeleton with `write` (structure + placeholder comments only), " \
328
+ "then fill in each section with separate `edit` calls.\n" \
329
+ "- Keep each single tool-call argument (especially file content) well under ~500 lines.\n" \
330
+ "- Do NOT attempt to output the entire deliverable in one response."
331
+
332
+ @history.append({
333
+ role: "user",
334
+ content: hint,
335
+ system_injected: true,
336
+ task_id: @current_task_id
337
+ })
338
+
339
+ Clacky::Logger.info(
340
+ "[llm_caller] Read-timeout detected — injected 'break into smaller steps' hint " \
341
+ "(error=#{err.class}: #{err.message})"
342
+ )
343
+
344
+ @ui&.show_warning(
345
+ "LLM response timed out — asking model to break the task into smaller steps and retrying..."
346
+ )
347
+ end
232
348
  end
233
349
  end
234
350
  end
@@ -47,11 +47,41 @@ module Clacky
47
47
  handle_compression_response(response, compression_context, progress: handle)
48
48
  true
49
49
  rescue Clacky::AgentInterrupted => e
50
- @ui&.log("Idle compression canceled: #{e.message}", level: :info)
50
+ # User cancelled the idle compression finish the quiet progress
51
+ # slot in place so the user sees exactly what happened (rather
52
+ # than the "Idle detected..." line being silently removed).
53
+ final = "Idle compression cancelled: #{e.message}"
54
+ if handle
55
+ handle.finish(final_message: final)
56
+ else
57
+ @ui&.log(final, level: :info)
58
+ end
51
59
  @history.rollback_before(compression_message)
60
+ Clacky::Logger.info("[idle-compress] cancelled: #{e.message}")
52
61
  false
53
62
  rescue => e
54
- @ui&.log("Idle compression failed: #{e.message}", level: :error)
63
+ # Compression failed (most commonly: network errors after all
64
+ # LlmCaller retries exhausted). Previously this only wrote an
65
+ # @ui.log(:error) that was easy to miss — especially when no
66
+ # other output followed. Now we:
67
+ # 1. Replace the active quiet progress line with the error so
68
+ # the user always sees *something* where the spinner was.
69
+ # 2. Emit a show_warning for a more prominent entry.
70
+ # 3. Persist to Clacky::Logger so post-mortem is possible even
71
+ # if the terminal scrollback has rolled past.
72
+ final = "Idle compression failed: #{e.message}"
73
+ if handle
74
+ handle.finish(final_message: final)
75
+ else
76
+ @ui&.log(final, level: :error)
77
+ end
78
+ @ui&.show_warning(final)
79
+ Clacky::Logger.warn(
80
+ "[idle-compress] failed",
81
+ error_class: e.class.name,
82
+ error_message: e.message,
83
+ backtrace: e.backtrace&.first(5)
84
+ )
55
85
  @history.rollback_before(compression_message)
56
86
  false
57
87
  end
@@ -36,6 +36,15 @@ module Clacky
36
36
  # Restore previous_total_tokens for accurate delta calculation across sessions
37
37
  @previous_total_tokens = session_data.dig(:stats, :previous_total_tokens) || 0
38
38
 
39
+ # Recover the latest latency metric from the most recent assistant message
40
+ # that carries a :latency field. This is the source of truth for the status-bar
41
+ # signal — no separate session-level field is needed. Older sessions (pre-feature)
42
+ # simply start with nil; the signal stays hidden until the next LLM call populates it.
43
+ last_assistant_with_latency = @history.to_a.reverse.find do |m|
44
+ m[:role].to_s == "assistant" && m[:latency]
45
+ end
46
+ @latest_latency = last_assistant_with_latency&.dig(:latency)
47
+
39
48
  # Restore Time Machine state
40
49
  @task_parents = session_data.dig(:time_machine, :task_parents) || {}
41
50
  @current_task_id = session_data.dig(:time_machine, :current_task_id) || 0
@@ -178,8 +187,18 @@ module Clacky
178
187
  elsif current_round
179
188
  current_round[:events] << msg
180
189
  elsif msg[:compressed_summary] && msg[:chunk_path]
181
- # Compressed summary sitting before any user rounds — expand it from chunk md
182
- chunk_rounds = parse_chunk_md_to_rounds(msg[:chunk_path])
190
+ # Compressed summary sitting before any user rounds — expand ALL chunk
191
+ # MD files that belong to the same session (siblings of chunk_path),
192
+ # in chunk-index ascending order.
193
+ #
194
+ # Under the current "single summary + previous_chunks index" scheme,
195
+ # session.json only keeps the newest compressed_summary message (which
196
+ # points at the newest chunk). Older chunks (chunk-1..chunk-N-1) are
197
+ # referenced only as basenames inside the summary text. Expanding just
198
+ # msg[:chunk_path] would therefore lose all prior chunks on replay.
199
+ chunk_rounds = sibling_chunks_of(msg[:chunk_path]).flat_map { |p|
200
+ parse_chunk_md_to_rounds(p)
201
+ }
183
202
  rounds.concat(chunk_rounds)
184
203
  # After expanding, treat the last chunk round as the current round so that
185
204
  # any orphaned assistant/tool messages that follow in session.json (belonging
@@ -243,6 +262,32 @@ module Clacky
243
262
  { has_more: has_more }
244
263
  end
245
264
 
265
+ # Return all chunk MD file paths that belong to the same session as
266
+ # +chunk_path+, sorted by chunk index ascending (chunk-1, chunk-2, …).
267
+ # Uses the filename convention "<base>-chunk-<N>.md".
268
+ #
269
+ # Handles path resolution the same way parse_chunk_md_to_rounds does:
270
+ # if the stored path doesn't exist, fall back to SESSIONS_DIR + basename
271
+ # (cross-machine / cross-user session bundles).
272
+ private def sibling_chunks_of(chunk_path)
273
+ return [] unless chunk_path
274
+
275
+ resolved = chunk_path.to_s
276
+ unless File.exist?(resolved)
277
+ resolved = File.join(Clacky::SessionManager::SESSIONS_DIR, File.basename(resolved))
278
+ end
279
+ return [] unless File.exist?(resolved)
280
+
281
+ dir = File.dirname(resolved)
282
+ base = File.basename(resolved).sub(/-chunk-\d+\.md\z/, "")
283
+ return [resolved] if base == File.basename(resolved) # unconventional name — just use as-is
284
+
285
+ Dir.glob(File.join(dir, "#{base}-chunk-*.md")).sort_by do |p|
286
+ m = File.basename(p).match(/-chunk-(\d+)\.md\z/)
287
+ m ? m[1].to_i : Float::INFINITY
288
+ end
289
+ end
290
+
246
291
  # Parse a chunk MD file into an array of rounds compatible with replay_history.
247
292
  # Each round is { user_msg: Hash, events: Array<Hash> }.
248
293
  # Timestamps are synthesised from the chunk's archived_at, spread backwards.