openclacky 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +21 -0
  3. data/README.md +28 -7
  4. data/lib/clacky/agent/llm_caller.rb +23 -1
  5. data/lib/clacky/agent/session_serializer.rb +6 -1
  6. data/lib/clacky/agent.rb +14 -5
  7. data/lib/clacky/anthropic_stream_aggregator.rb +135 -0
  8. data/lib/clacky/bedrock_stream_aggregator.rb +137 -0
  9. data/lib/clacky/cli.rb +9 -2
  10. data/lib/clacky/client.rb +146 -17
  11. data/lib/clacky/default_skills/onboard/SKILL.md +6 -2
  12. data/lib/clacky/default_skills/onboard/scripts/import_external_skills.rb +50 -6
  13. data/lib/clacky/openai_stream_aggregator.rb +130 -0
  14. data/lib/clacky/server/http_server.rb +2 -3
  15. data/lib/clacky/server/web_ui_controller.rb +8 -4
  16. data/lib/clacky/ui2/progress_handle.rb +77 -15
  17. data/lib/clacky/ui2/ui_controller.rb +4 -2
  18. data/lib/clacky/version.rb +1 -1
  19. data/lib/clacky/web/app.css +6 -4
  20. data/lib/clacky/web/i18n.js +6 -0
  21. data/lib/clacky/web/index.html +3 -1
  22. data/lib/clacky/web/sessions.js +152 -48
  23. data/lib/clacky/web/vendor/katex/auto-render.min.js +1 -0
  24. data/lib/clacky/web/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
  25. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
  26. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
  27. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
  28. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
  29. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
  30. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
  31. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
  32. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
  33. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
  34. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
  35. data/lib/clacky/web/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
  36. data/lib/clacky/web/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
  37. data/lib/clacky/web/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
  38. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
  39. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
  40. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
  41. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
  42. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
  43. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
  44. data/lib/clacky/web/vendor/katex/katex.min.css +1 -0
  45. data/lib/clacky/web/vendor/katex/katex.min.js +1 -0
  46. data/lib/clacky/web/ws-dispatcher.js +19 -4
  47. data/lib/clacky.rb +3 -0
  48. data/scripts/install.ps1 +14 -3
  49. metadata +28 -2
data/lib/clacky/client.rb CHANGED
@@ -119,37 +119,59 @@ module Clacky
119
119
  # signal metric — see docs). When we migrate to streaming later, this
120
120
  # same `ttft_ms` field will start carrying the *actual* first-token
121
121
  # latency without any schema change.
122
- def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false)
122
+ # @param on_chunk [Proc, nil] optional streaming progress callback.
123
+ # Receives keyword args { input_tokens:, output_tokens: } with cumulative
124
+ # token counts. When nil, behaves exactly as the historical non-streaming
125
+ # path. When given but streaming is not yet wired for the active provider,
126
+ # a single synthetic invocation is fired after the response is received,
127
+ # so UI plumbing can be exercised end-to-end without the proxy work.
128
+ def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false, on_chunk: nil)
123
129
  caching_enabled = enable_caching && supports_prompt_caching?(model)
124
130
  cloned = deep_clone(messages)
125
131
 
132
+ streaming_used = false
133
+ first_chunk_at = nil
134
+ wrapped_on_chunk = on_chunk && lambda do |**kwargs|
135
+ first_chunk_at ||= Process.clock_gettime(Process::CLOCK_MONOTONIC)
136
+ on_chunk.call(**kwargs)
137
+ end
138
+
126
139
  t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
127
140
  response =
128
141
  if bedrock?
129
- send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled)
142
+ streaming_used = !on_chunk.nil?
143
+ send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled, on_chunk: wrapped_on_chunk)
130
144
  elsif anthropic_format?
131
- send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled)
145
+ streaming_used = !on_chunk.nil?
146
+ send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled, on_chunk: wrapped_on_chunk)
132
147
  else
133
- send_openai_request(cloned, model, tools, max_tokens, caching_enabled)
148
+ streaming_used = !on_chunk.nil?
149
+ send_openai_request(cloned, model, tools, max_tokens, caching_enabled, on_chunk: wrapped_on_chunk)
134
150
  end
135
151
  t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
136
152
 
153
+ if on_chunk && !streaming_used
154
+ usage = response[:usage] || {}
155
+ safe_invoke_on_chunk(
156
+ on_chunk,
157
+ input_tokens: usage[:prompt_tokens].to_i,
158
+ output_tokens: usage[:completion_tokens].to_i
159
+ )
160
+ end
161
+
137
162
  duration_ms = ((t1 - t0) * 1000).round
138
- # Throughput is only meaningful with a reasonable output size; below ~10
139
- # tokens the sample is too small to be informative and the result is
140
- # wildly high (e.g. 1 token / 50ms → 20 tok/s is meaningless).
141
- # Canonical usage hashes from message_format/* all use :completion_tokens.
163
+ ttft_ms = first_chunk_at ? ((first_chunk_at - t0) * 1000).round : duration_ms
142
164
  output_tokens = response[:usage]&.dig(:completion_tokens).to_i
143
165
  tps = (output_tokens >= 10 && duration_ms > 0) ? (output_tokens * 1000.0 / duration_ms).round(1) : nil
144
166
 
145
167
  response[:latency] = {
146
- ttft_ms: duration_ms, # non-streaming: TTFT == full duration
168
+ ttft_ms: ttft_ms,
147
169
  duration_ms: duration_ms,
148
170
  output_tokens: output_tokens,
149
171
  tps: tps,
150
172
  model: model,
151
173
  measured_at: Time.now.to_f,
152
- streaming: false # future flag — true when we migrate
174
+ streaming: streaming_used
153
175
  }
154
176
  response
155
177
  end
@@ -195,8 +217,10 @@ module Clacky
195
217
 
196
218
  # ── Bedrock Converse request / response ───────────────────────────────────
197
219
 
198
- def send_bedrock_request(messages, model, tools, max_tokens, caching_enabled)
199
- body = MessageFormat::Bedrock.build_request_body(messages, model, tools, max_tokens, caching_enabled)
220
+ def send_bedrock_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil)
221
+ body = MessageFormat::Bedrock.build_request_body(messages, model, tools, max_tokens, caching_enabled)
222
+ return send_bedrock_stream_request(body, model, on_chunk) if on_chunk
223
+
200
224
  response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }
201
225
 
202
226
  raise_error(response) unless response.status == 200
@@ -205,6 +229,29 @@ module Clacky
205
229
  MessageFormat::Bedrock.parse_response(parsed_body)
206
230
  end
207
231
 
232
+ # Streaming variant for Bedrock Converse.
233
+ # Posts to /model/{m}/converse-stream with stream:true; the proxy returns
234
+ # SSE frames whose `event` is the Bedrock event-type and whose `data` is
235
+ # the raw Bedrock event JSON. We accumulate frames into a synthetic
236
+ # non-streaming response and feed it back through the existing parser so
237
+ # downstream code is identical.
238
+ private def send_bedrock_stream_request(body, model, on_chunk)
239
+ stream_body = body.merge(stream: true)
240
+ aggregator = BedrockStreamAggregator.new(on_chunk: on_chunk)
241
+ sse_buf = +""
242
+
243
+ response = bedrock_connection.post(bedrock_stream_endpoint(model)) do |req|
244
+ req.body = stream_body.to_json
245
+ req.options.on_data = proc do |chunk, _bytes_received, _env|
246
+ sse_buf << chunk
247
+ drain_sse_frames(sse_buf) { |event, data| aggregator.handle(event, data) }
248
+ end
249
+ end
250
+
251
+ raise_error(response) unless response.status == 200
252
+ MessageFormat::Bedrock.parse_response(aggregator.to_h)
253
+ end
254
+
208
255
  def parse_simple_bedrock_response(response)
209
256
  raise_error(response) unless response.status == 200
210
257
  data = safe_json_parse(response.body, context: "LLM response")
@@ -216,11 +263,13 @@ module Clacky
216
263
 
217
264
  # ── Anthropic request / response ──────────────────────────────────────────
218
265
 
219
- def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled)
266
+ def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil)
220
267
  # Apply cache_control to the message that marks the cache breakpoint
221
268
  messages = apply_message_caching(messages) if caching_enabled
222
269
 
223
- body = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled)
270
+ body = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled)
271
+ return send_anthropic_stream_request(body, on_chunk) if on_chunk
272
+
224
273
  response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }
225
274
 
226
275
  raise_error(response) unless response.status == 200
@@ -229,6 +278,24 @@ module Clacky
229
278
  MessageFormat::Anthropic.parse_response(parsed_body)
230
279
  end
231
280
 
281
+ private def send_anthropic_stream_request(body, on_chunk)
282
+ stream_body = body.merge(stream: true)
283
+ aggregator = AnthropicStreamAggregator.new(on_chunk: on_chunk)
284
+ sse_buf = +""
285
+
286
+ response = anthropic_connection.post(anthropic_messages_path) do |req|
287
+ req.headers["Accept"] = "text/event-stream"
288
+ req.body = stream_body.to_json
289
+ req.options.on_data = proc do |chunk, _bytes_received, _env|
290
+ sse_buf << chunk
291
+ drain_sse_frames(sse_buf) { |event, data| aggregator.handle(event, data) }
292
+ end
293
+ end
294
+
295
+ raise_error(response) unless response.status == 200
296
+ MessageFormat::Anthropic.parse_response(aggregator.to_h)
297
+ end
298
+
232
299
  def parse_simple_anthropic_response(response)
233
300
  raise_error(response) unless response.status == 200
234
301
  data = safe_json_parse(response.body, context: "LLM response")
@@ -237,24 +304,47 @@ module Clacky
237
304
 
238
305
  # ── OpenAI request / response ─────────────────────────────────────────────
239
306
 
240
- def send_openai_request(messages, model, tools, max_tokens, caching_enabled)
307
+ def send_openai_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil)
241
308
  # Apply cache_control markers to messages when caching is enabled.
242
309
  # OpenRouter proxies Claude with the same cache_control field convention as Anthropic direct.
243
310
  messages = apply_message_caching(messages) if caching_enabled
244
311
 
245
- body = MessageFormat::OpenAI.build_request_body(
312
+ body = MessageFormat::OpenAI.build_request_body(
246
313
  messages, model, tools, max_tokens, caching_enabled,
247
314
  vision_supported: @vision_supported
248
315
  )
316
+ return send_openai_stream_request(body, on_chunk) if on_chunk
317
+
249
318
  response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
250
319
 
251
320
  raise_error(response) unless response.status == 200
252
321
  check_html_response(response)
253
-
322
+
254
323
  parsed_body = safe_json_parse(response.body, context: "LLM response")
255
324
  MessageFormat::OpenAI.parse_response(parsed_body)
256
325
  end
257
326
 
327
+ # Streaming variant for OpenAI-compatible chat completions (DeepSeek/OpenRouter
328
+ # via platform/llm_proxy). Uses Faraday's on_data hook to consume SSE frames,
329
+ # accumulates them, and reconstructs the non-streaming JSON response shape so
330
+ # MessageFormat::OpenAI.parse_response works unchanged.
331
+ private def send_openai_stream_request(body, on_chunk)
332
+ stream_body = body.merge(stream: true, stream_options: { include_usage: true })
333
+ aggregator = OpenAIStreamAggregator.new(on_chunk: on_chunk)
334
+ sse_buf = +""
335
+
336
+ response = openai_connection.post("chat/completions") do |req|
337
+ req.body = stream_body.to_json
338
+ req.options.on_data = proc do |chunk, _bytes_received, _env|
339
+ sse_buf << chunk
340
+ drain_sse_frames(sse_buf) { |_event, data| aggregator.handle(data) }
341
+ end
342
+ end
343
+
344
+ raise_error(response) unless response.status == 200
345
+ MessageFormat::OpenAI.parse_response(aggregator.to_h)
346
+ end
347
+
258
348
  def parse_simple_openai_response(response)
259
349
  raise_error(response) unless response.status == 200
260
350
  parsed_body = safe_json_parse(response.body, context: "LLM response")
@@ -320,6 +410,33 @@ module Clacky
320
410
  "/model/#{model}/converse"
321
411
  end
322
412
 
413
+ # Bedrock Converse streaming endpoint path.
414
+ private def bedrock_stream_endpoint(model)
415
+ "/model/#{model}/converse-stream"
416
+ end
417
+
418
+ # Pull complete SSE frames out of a buffer and yield them as (event, data).
419
+ # An SSE frame ends at a blank line ("\n\n"); incomplete trailing data
420
+ # stays in the buffer for the next chunk. Frames without an explicit
421
+ # `event:` line use the default "message" type per the SSE spec.
422
+ private def drain_sse_frames(buf)
423
+ while (sep = buf.index("\n\n"))
424
+ frame = buf.slice!(0, sep + 2)
425
+ event = "message"
426
+ data_lines = []
427
+ frame.each_line do |line|
428
+ line = line.chomp
429
+ if line.start_with?("event:")
430
+ event = line.sub(/^event:\s*/, "")
431
+ elsif line.start_with?("data:")
432
+ data_lines << line.sub(/^data:\s*/, "")
433
+ end
434
+ end
435
+ next if data_lines.empty?
436
+ yield event, data_lines.join("\n")
437
+ end
438
+ end
439
+
323
440
  def bedrock_connection
324
441
  @bedrock_connection ||= Faraday.new(url: @base_url) do |conn|
325
442
  conn.headers["Content-Type"] = "application/json"
@@ -477,6 +594,18 @@ module Clacky
477
594
  "The request will be retried automatically."
478
595
  end
479
596
 
597
+ # ── Streaming helpers ─────────────────────────────────────────────────────
598
+
599
+ # Invoke the user's on_chunk callback in a way that never lets a callback
600
+ # error tear down the LLM request. Streaming chunks are best-effort UI
601
+ # updates; a buggy progress renderer must not abort an in-flight call.
602
+ private def safe_invoke_on_chunk(on_chunk, **kwargs)
603
+ return unless on_chunk
604
+ on_chunk.call(**kwargs)
605
+ rescue => e
606
+ Clacky::Logger.warn("[on_chunk] callback raised #{e.class}: #{e.message}")
607
+ end
608
+
480
609
  # ── Utilities ─────────────────────────────────────────────────────────────
481
610
 
482
611
  def deep_clone(obj)
@@ -221,8 +221,12 @@ then parse the last stdout line as JSON and read `installed` as N.
221
221
 
222
222
  ### A.10. Import external skills (optional)
223
223
 
224
- Run `test -d ~/.openclaw && echo yes || echo no`. If `no`, skip silently.
225
- If `yes`:
224
+ Check if OpenClaw is installed:
225
+ - Run `test -d ~/.openclaw && echo yes || echo no`
226
+ - If `no` and on WSL (i.e. `/proc/version` contains `microsoft`), also run:
227
+ `powershell.exe -NoProfile -Command '$env:USERPROFILE' 2>/dev/null | tr -d '\r'` to get the Windows home, then check `test -d "$(wslpath '<win_home>')/.openclaw" && echo yes || echo no`
228
+ - If all checks return `no`, skip silently.
229
+ If any check returns `yes`:
226
230
  1. `ruby "SKILL_DIR/scripts/import_external_skills.rb" --source openclaw --dry-run`
227
231
  2. Parse the skill count N.
228
232
  3. Ask via `request_user_feedback`:
@@ -172,7 +172,7 @@ class OpenClawImporter < ExternalSkillsImporter
172
172
  end
173
173
 
174
174
  private def source_available?
175
- @openclaw_dir.exist?
175
+ openclaw_dirs.any?(&:exist?)
176
176
  end
177
177
 
178
178
  # Returns all directories that may contain OpenClaw skills.
@@ -182,12 +182,56 @@ class OpenClawImporter < ExternalSkillsImporter
182
182
  # - ~/.openclaw/workspace/skills/ (workspace skills)
183
183
  # - ~/.openclaw/skills/ (managed/shared skills)
184
184
  # - ~/.openclaw/workspace/.agents/skills/ (project-level shared skills)
185
+ #
186
+ # On WSL, also scans the Windows-native %USERPROFILE%\.openclaw directory.
185
187
  private def source_dirs
186
- [
187
- @openclaw_dir.join('workspace', 'skills'),
188
- @openclaw_dir.join('skills'),
189
- @openclaw_dir.join('workspace', '.agents', 'skills')
190
- ].select(&:exist?)
188
+ openclaw_dirs.flat_map do |root|
189
+ [
190
+ root.join('workspace', 'skills'),
191
+ root.join('skills'),
192
+ root.join('workspace', '.agents', 'skills')
193
+ ]
194
+ end.select(&:exist?)
195
+ end
196
+
197
+ # All candidate OpenClaw root directories.
198
+ # On WSL, includes both ~/.openclaw and the Windows-native path.
199
+ private def openclaw_dirs
200
+ dirs = [@openclaw_dir]
201
+ win_home = windows_home
202
+ dirs << win_home.join('.openclaw') if win_home && win_home.join('.openclaw') != @openclaw_dir
203
+ dirs
204
+ end
205
+
206
+ # True when running inside WSL.
207
+ # Mirrors EnvironmentDetector#wsl? — reads /proc/version for "microsoft".
208
+ private def wsl?
209
+ return @wsl if defined?(@wsl)
210
+
211
+ @wsl = File.exist?('/proc/version') &&
212
+ File.read('/proc/version').downcase.include?('microsoft')
213
+ rescue StandardError
214
+ @wsl = false
215
+ end
216
+
217
+ # Resolve the Windows %USERPROFILE% as a WSL-accessible Pathname.
218
+ # Uses powershell.exe (standard in WSL) then wslpath for conversion,
219
+ # mirroring the approach in EnvironmentDetector#wsl_desktop_path.
220
+ # Returns nil when not on WSL or when the path cannot be resolved.
221
+ private def windows_home
222
+ return nil unless wsl?
223
+ return nil if `which powershell.exe 2>/dev/null`.strip.empty?
224
+
225
+ win_path = `powershell.exe -NoProfile -Command '$env:USERPROFILE' 2>/dev/null`.strip.tr("\r\n", '')
226
+ return nil if win_path.empty?
227
+
228
+ linux_path = `wslpath '#{win_path}' 2>/dev/null`.strip
229
+ return nil if linux_path.empty?
230
+
231
+ path = Pathname.new(linux_path)
232
+ path.exist? ? path : nil
233
+ rescue StandardError
234
+ nil
191
235
  end
192
236
 
193
237
  private def discover_skills
@@ -0,0 +1,130 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Clacky
6
+ # Reassembles an OpenAI-compatible chat-completion event stream into the
7
+ # non-streaming response shape that MessageFormat::OpenAI.parse_response
8
+ # consumes, while invoking on_chunk(input_tokens:, output_tokens:) every
9
+ # time the upstream emits a new usage frame.
10
+ #
11
+ # Streaming frames look like:
12
+ #
13
+ # {"id":"...","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
14
+ # {"id":"...","choices":[{"index":0,"delta":{"content":"Hi"},"finish_reason":null}]}
15
+ # {"id":"...","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"id":"call_x","function":{"name":"shell","arguments":"{\"cmd"}}]}}]}
16
+ # {"id":"...","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\"ls\"}"}}]}}]}
17
+ # {"id":"...","choices":[{"index":0,"delta":{},"finish_reason":"tool_calls"}]}
18
+ # {"id":"...","choices":[],"usage":{"prompt_tokens":12,"completion_tokens":3,"prompt_tokens_details":{"cached_tokens":2}}}
19
+ # data: [DONE]
20
+ class OpenAIStreamAggregator
21
+ def initialize(on_chunk: nil)
22
+ @on_chunk = on_chunk
23
+ @content = +""
24
+ @reasoning_content = +""
25
+ @role = "assistant"
26
+ @finish_reason = nil
27
+ @tool_calls = {}
28
+ @usage = nil
29
+ @last_input_tokens = 0
30
+ @last_output_tokens = 0
31
+ end
32
+
33
+ def handle(data_str)
34
+ return if data_str == "[DONE]"
35
+ data = parse_or_nil(data_str)
36
+ return unless data
37
+
38
+ if (choice = (data["choices"] || []).first)
39
+ delta = choice["delta"] || {}
40
+ @role = delta["role"] if delta["role"]
41
+ @content << delta["content"] if delta["content"].is_a?(String)
42
+ @reasoning_content << delta["reasoning_content"] if delta["reasoning_content"].is_a?(String)
43
+ if (tcs = delta["tool_calls"])
44
+ tcs.each { |tc| merge_tool_call(tc) }
45
+ end
46
+ @finish_reason = choice["finish_reason"] if choice["finish_reason"]
47
+ emit_estimate_progress
48
+ end
49
+
50
+ if (u = data["usage"])
51
+ @usage = u
52
+ emit_usage_progress(u)
53
+ end
54
+ end
55
+
56
+ # Render the canonical non-streaming response shape.
57
+ def to_h
58
+ tool_calls = @tool_calls.keys.sort.map do |idx|
59
+ tc = @tool_calls[idx]
60
+ {
61
+ "id" => tc[:id],
62
+ "type" => tc[:type] || "function",
63
+ "function" => {
64
+ "name" => tc[:name],
65
+ "arguments" => tc[:arguments].to_s
66
+ }
67
+ }
68
+ end
69
+
70
+ message = {
71
+ "role" => @role,
72
+ "content" => @content.empty? ? nil : @content
73
+ }
74
+ message["tool_calls"] = tool_calls unless tool_calls.empty?
75
+ message["reasoning_content"] = @reasoning_content unless @reasoning_content.empty?
76
+
77
+ {
78
+ "choices" => [{ "index" => 0, "message" => message, "finish_reason" => @finish_reason }],
79
+ "usage" => @usage || {}
80
+ }
81
+ end
82
+
83
+ private def merge_tool_call(tc)
84
+ idx = tc["index"] || @tool_calls.size
85
+ slot = (@tool_calls[idx] ||= { id: nil, type: nil, name: nil, arguments: +"" })
86
+ slot[:id] ||= tc["id"] if tc["id"]
87
+ slot[:type] ||= tc["type"] if tc["type"]
88
+ if (fn = tc["function"])
89
+ slot[:name] ||= fn["name"] if fn["name"]
90
+ slot[:arguments] << fn["arguments"].to_s if fn["arguments"]
91
+ end
92
+ end
93
+
94
+ private def parse_or_nil(s)
95
+ JSON.parse(s)
96
+ rescue JSON::ParserError
97
+ nil
98
+ end
99
+
100
+ private def emit_estimate_progress
101
+ return unless @on_chunk
102
+ output = approximate_output_tokens
103
+ return if output == @last_output_tokens
104
+ @last_output_tokens = output
105
+ @on_chunk.call(input_tokens: @last_input_tokens, output_tokens: output)
106
+ rescue => e
107
+ Clacky::Logger.warn("[OpenAIStreamAggregator] on_chunk: #{e.class}: #{e.message}")
108
+ end
109
+
110
+ # Rough char/4 estimate; replaced by the real count when the upstream
111
+ # finally emits a usage frame (with stream_options.include_usage=true).
112
+ private def approximate_output_tokens
113
+ total_chars = @content.bytesize + @reasoning_content.bytesize +
114
+ @tool_calls.values.sum { |tc| tc[:arguments].to_s.bytesize }
115
+ (total_chars / 4.0).ceil
116
+ end
117
+
118
+ private def emit_usage_progress(u)
119
+ return unless @on_chunk
120
+ total_prompt = u["prompt_tokens"].to_i
121
+ output = u["completion_tokens"].to_i
122
+ return if total_prompt == @last_input_tokens && output == @last_output_tokens
123
+ @last_input_tokens = total_prompt
124
+ @last_output_tokens = output
125
+ @on_chunk.call(input_tokens: total_prompt, output_tokens: output)
126
+ rescue => e
127
+ Clacky::Logger.warn("[OpenAIStreamAggregator] on_chunk: #{e.class}: #{e.message}")
128
+ end
129
+ end
130
+ end
@@ -3372,9 +3372,8 @@ module Clacky
3372
3372
  return unless agent
3373
3373
 
3374
3374
  # Auto-name the session from the first user message (before agent starts running).
3375
- # Check messages.empty? only — agent.name may already hold a default placeholder
3376
- # like "Session 1" assigned at creation time, so it's not a reliable signal.
3377
- if agent.history.empty?
3375
+ # Skip if the name looks like it was set by the user (not a system-generated "Session N").
3376
+ if agent.history.empty? && agent.name.match?(/\ASession \d+\z/)
3378
3377
  auto_name = content.gsub(/\s+/, " ").strip[0, 30]
3379
3378
  auto_name += "…" if content.strip.length > 30
3380
3379
  agent.rename(auto_name)
@@ -225,15 +225,19 @@ module Clacky
225
225
 
226
226
  def show_progress(message = nil, prefix_newline: true, progress_type: "thinking", phase: "active", metadata: {})
227
227
  if phase == "active"
228
- @progress_start_time = Time.now
229
- # Store complete progress state for replay when user switches back to this session
228
+ # Only set start time when transitioning into a fresh progress phase.
229
+ # Streaming LLM calls fire show_progress every chunk for token updates;
230
+ # resetting the timer each time would make the elapsed counter jitter
231
+ # back to 0 in the UI and force the frontend to rebuild its interval.
232
+ if @live_progress_state.nil? || @live_progress_state[:progress_type] != progress_type
233
+ @progress_start_time = Time.now
234
+ @live_stdout_buffer = []
235
+ end
230
236
  @live_progress_state = {
231
237
  message: message,
232
238
  progress_type: progress_type,
233
239
  metadata: metadata
234
240
  }
235
- # Reset stdout buffer for each new command so re-subscribe only replays current run
236
- @live_stdout_buffer = []
237
241
  elsif phase == "done"
238
242
  @live_tool_call = nil # command finished — nothing left to replay
239
243
  # Keep @live_stdout_buffer intact — it will be reset on the next show_progress call.
@@ -71,7 +71,7 @@ module Clacky
71
71
  class ProgressHandle
72
72
  # Default tick interval (seconds). Matches the old global spinner
73
73
  # cadence. Tests may pass a smaller interval for speed.
74
- DEFAULT_TICK_INTERVAL = 0.5
74
+ DEFAULT_TICK_INTERVAL = 0.25
75
75
 
76
76
  # Style hint for the renderer. The owner decides what colors to use;
77
77
  # the handle only forwards the hint as part of the frame metadata
@@ -93,6 +93,12 @@ module Clacky
93
93
  # frame would be visual noise.
94
94
  FAST_FINISH_THRESHOLD_SECONDS = 2
95
95
 
96
+ # Show "Thinking for Ns" once the gap since the last LLM stream
97
+ # chunk reaches this many seconds. Bedrock often pauses 5–18s
98
+ # while generating large content blocks (long tool_use JSON in
99
+ # particular); without this hint users assume the agent is stuck.
100
+ IDLE_HINT_THRESHOLD_SECONDS = 2
101
+
96
102
  # @param owner [#register_progress, #unregister_progress, #render_frame]
97
103
  # @param message [String] Initial progress message.
98
104
  # @param style [Symbol] :primary or :quiet (see VALID_STYLES).
@@ -122,6 +128,7 @@ module Clacky
122
128
  @ticker = nil
123
129
  @state = :fresh # :fresh → :running → :closed
124
130
  @metadata = {}
131
+ @last_chunk_at = nil
125
132
  @monitor = Monitor.new
126
133
  end
127
134
 
@@ -133,9 +140,10 @@ module Clacky
133
140
  @monitor.synchronize do
134
141
  return self unless @state == :fresh
135
142
 
136
- @state = :running
137
- @start_time = @clock.call
138
- @entry_id = @owner.register_progress(self)
143
+ @state = :running
144
+ @start_time = @clock.call
145
+ @last_chunk_at = @start_time
146
+ @entry_id = @owner.register_progress(self)
139
147
  end
140
148
 
141
149
  # Fire one initial frame synchronously so the user sees the
@@ -156,9 +164,11 @@ module Clacky
156
164
  @monitor.synchronize do
157
165
  return if @state != :running
158
166
  @message = message.to_s if message
159
- @metadata = metadata if metadata
167
+ if metadata
168
+ @metadata = metadata
169
+ @last_chunk_at = @clock.call
170
+ end
160
171
  end
161
- render_now
162
172
  end
163
173
 
164
174
  # Stop the ticker, render one final frame, and unregister from the
@@ -203,7 +213,7 @@ module Clacky
203
213
  # +render_frame+ and is responsible for writing it into the entry.
204
214
  def current_frame
205
215
  @monitor.synchronize do
206
- compose_frame(@message, elapsed_seconds, @metadata)
216
+ compose_frame(@message, elapsed_seconds, @metadata, idle_seconds)
207
217
  end
208
218
  end
209
219
 
@@ -278,16 +288,68 @@ module Clacky
278
288
  (@clock.call - @start_time).to_i
279
289
  end
280
290
 
281
- # Live-frame format: "<message>… (<elapsed>s)"
282
- # Metadata like { attempt:, total: } is appended as "[i/N]" when
283
- # present, to keep renderer-agnostic callers (e.g. tests) readable.
284
- private def compose_frame(message, elapsed, metadata)
285
- parts = [message.to_s]
291
+ # Seconds since the last metadata update (i.e. the last LLM stream
292
+ # chunk that carried token info). Used to surface "Thinking for Ns"
293
+ # in the live frame so users can see the agent isn't stuck even
294
+ # when token counts plateau during long Bedrock content blocks.
295
+ private def idle_seconds
296
+ return 0 unless @last_chunk_at
297
+ (@clock.call - @last_chunk_at).to_i
298
+ end
299
+
300
+ # Live-frame format:
301
+ # "<message>… (<elapsed>s · ↓N tokens · reasoning…)"
302
+ # The "reasoning" tail only appears once tokens have started
303
+ # streaming AND the gap since the last chunk reaches the threshold
304
+ # — signalling the model is between tool_use blocks doing extended
305
+ # thinking. No seconds shown there to avoid duplicating elapsed;
306
+ # animated dots (1→2→3) provide the "still alive" cue.
307
+ private def compose_frame(message, elapsed, metadata, idle = 0)
308
+ head = message.to_s
286
309
  if metadata && (attempt = metadata[:attempt]) && (total = metadata[:total])
287
- parts << "[#{attempt}/#{total}]"
310
+ head = "#{head} [#{attempt}/#{total}]"
311
+ end
312
+
313
+ token_part = metadata && format_token_progress(metadata)
314
+
315
+ suffix_parts = []
316
+ suffix_parts << "#{elapsed}s" if elapsed > 0
317
+ suffix_parts << token_part if token_part
318
+ if token_part && idle >= IDLE_HINT_THRESHOLD_SECONDS
319
+ suffix_parts << "reasoning #{spinner_frame} "
320
+ end
321
+
322
+ return "#{head}…" if suffix_parts.empty?
323
+ "#{head}… (#{suffix_parts.join(" · ")})"
324
+ end
325
+
326
+ SPINNER_FRAMES = %w[⠋ ⠙ ⠹ ⠸ ⠼ ⠴ ⠦ ⠧ ⠇ ⠏].freeze
327
+ SPINNER_INTERVAL_MS = 250
328
+
329
+ private def spinner_frame
330
+ ms = (@clock.call.to_f * 1000).to_i
331
+ SPINNER_FRAMES[(ms / SPINNER_INTERVAL_MS) % SPINNER_FRAMES.length]
332
+ end
333
+
334
+ # Render LLM streaming token counts as "↑1.2k ↓234 tokens".
335
+ # When input_tokens is unknown (e.g. OpenAI-compat streaming where
336
+ # prompt_tokens only arrives in the final frame), shows "↑—" so the
337
+ # column doesn't flicker between absent / present.
338
+ private def format_token_progress(metadata)
339
+ output = metadata[:output_tokens]
340
+ return nil if output.nil? || output.to_i <= 0
341
+ "↓ #{compact_count(output.to_i)} tokens"
342
+ end
343
+
344
+ private def compact_count(n)
345
+ return n.to_s if n < 1000
346
+ if n < 1_000_000
347
+ k = n / 1000.0
348
+ k >= 10 ? "#{k.to_i}k" : "%.1fk" % k
349
+ else
350
+ m = n / 1_000_000.0
351
+ m >= 10 ? "#{m.to_i}M" : "%.1fM" % m
288
352
  end
289
- head = parts.join(" ")
290
- elapsed > 0 ? "#{head}… (#{elapsed}s)" : "#{head}…"
291
353
  end
292
354
 
293
355
  # Final frame (used by +finish+). Same as +compose_frame+ but we
@@ -1384,8 +1384,10 @@ module Clacky
1384
1384
  # Add action buttons
1385
1385
  choices << { name: "─" * 50, disabled: true }
1386
1386
  choices << { name: "[+] Add New Model", value: { action: :add } }
1387
- choices << { name: "[*] Edit Current Model", value: { action: :edit } }
1388
- choices << { name: "[-] Delete Model", value: { action: :delete } } if current_config.models.length > 1
1387
+ if current_config.models.length > 0
1388
+ choices << { name: "[*] Edit Current Model", value: { action: :edit } }
1389
+ choices << { name: "[-] Delete Model", value: { action: :delete } } if current_config.models.length > 1
1390
+ end
1389
1391
  choices << { name: "[X] Close", value: { action: :close } }
1390
1392
 
1391
1393
  # Show menu