RubyGems - openclacky - Versions diffs - 1.1.1 → 1.1.2 - Mend

openclacky 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +21 -0
data/README.md +28 -7
data/lib/clacky/agent/llm_caller.rb +23 -1
data/lib/clacky/agent/session_serializer.rb +6 -1
data/lib/clacky/agent.rb +14 -5
data/lib/clacky/anthropic_stream_aggregator.rb +135 -0
data/lib/clacky/bedrock_stream_aggregator.rb +137 -0
data/lib/clacky/cli.rb +9 -2
data/lib/clacky/client.rb +146 -17
data/lib/clacky/default_skills/onboard/SKILL.md +6 -2
data/lib/clacky/default_skills/onboard/scripts/import_external_skills.rb +50 -6
data/lib/clacky/openai_stream_aggregator.rb +130 -0
data/lib/clacky/server/http_server.rb +2 -3
data/lib/clacky/server/web_ui_controller.rb +8 -4
data/lib/clacky/ui2/progress_handle.rb +77 -15
data/lib/clacky/ui2/ui_controller.rb +4 -2
data/lib/clacky/version.rb +1 -1
data/lib/clacky/web/app.css +6 -4
data/lib/clacky/web/i18n.js +6 -0
data/lib/clacky/web/index.html +3 -1
data/lib/clacky/web/sessions.js +152 -48
data/lib/clacky/web/vendor/katex/auto-render.min.js +1 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
data/lib/clacky/web/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
data/lib/clacky/web/vendor/katex/katex.min.css +1 -0
data/lib/clacky/web/vendor/katex/katex.min.js +1 -0
data/lib/clacky/web/ws-dispatcher.js +19 -4
data/lib/clacky.rb +3 -0
data/scripts/install.ps1 +14 -3
metadata +28 -2

data/lib/clacky/client.rb CHANGED Viewed

@@ -119,37 +119,59 @@ module Clacky
     #   signal metric — see docs). When we migrate to streaming later, this
     #   same `ttft_ms` field will start carrying the *actual* first-token
     #   latency without any schema change.
-    def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false)
+    # @param on_chunk [Proc, nil] optional streaming progress callback.
+    #   Receives keyword args { input_tokens:, output_tokens: } with cumulative
+    #   token counts. When nil, behaves exactly as the historical non-streaming
+    #   path. When given but streaming is not yet wired for the active provider,
+    #   a single synthetic invocation is fired after the response is received,
+    #   so UI plumbing can be exercised end-to-end without the proxy work.
+    def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false, on_chunk: nil)
       caching_enabled = enable_caching && supports_prompt_caching?(model)
       cloned = deep_clone(messages)
+      streaming_used = false
+      first_chunk_at = nil
+      wrapped_on_chunk = on_chunk && lambda do |**kwargs|
+        first_chunk_at ||= Process.clock_gettime(Process::CLOCK_MONOTONIC)
+        on_chunk.call(**kwargs)
+      end
       t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
       response =
         if bedrock?
-          send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled)
+          streaming_used = !on_chunk.nil?
+          send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled, on_chunk: wrapped_on_chunk)
         elsif anthropic_format?
-          send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled)
+          streaming_used = !on_chunk.nil?
+          send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled, on_chunk: wrapped_on_chunk)
         else
-          send_openai_request(cloned, model, tools, max_tokens, caching_enabled)
+          streaming_used = !on_chunk.nil?
+          send_openai_request(cloned, model, tools, max_tokens, caching_enabled, on_chunk: wrapped_on_chunk)
         end
       t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+      if on_chunk && !streaming_used
+        usage = response[:usage] || {}
+        safe_invoke_on_chunk(
+          on_chunk,
+          input_tokens:  usage[:prompt_tokens].to_i,
+          output_tokens: usage[:completion_tokens].to_i
+        )
+      end
       duration_ms = ((t1 - t0) * 1000).round
-      # Throughput is only meaningful with a reasonable output size; below ~10
-      # tokens the sample is too small to be informative and the result is
-      # wildly high (e.g. 1 token / 50ms → 20 tok/s is meaningless).
-      # Canonical usage hashes from message_format/* all use :completion_tokens.
+      ttft_ms = first_chunk_at ? ((first_chunk_at - t0) * 1000).round : duration_ms
       output_tokens = response[:usage]&.dig(:completion_tokens).to_i
       tps = (output_tokens >= 10 && duration_ms > 0) ? (output_tokens * 1000.0 / duration_ms).round(1) : nil
       response[:latency] = {
-        ttft_ms:     duration_ms,      # non-streaming: TTFT == full duration
+        ttft_ms:     ttft_ms,
         duration_ms: duration_ms,
         output_tokens: output_tokens,
         tps:         tps,
         model:       model,
         measured_at: Time.now.to_f,
-        streaming:   false              # future flag — true when we migrate
+        streaming:   streaming_used
       }
       response
     end
@@ -195,8 +217,10 @@ module Clacky
     # ── Bedrock Converse request / response ───────────────────────────────────
-    def send_bedrock_request(messages, model, tools, max_tokens, caching_enabled)
-      body     = MessageFormat::Bedrock.build_request_body(messages, model, tools, max_tokens, caching_enabled)
+    def send_bedrock_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil)
+      body = MessageFormat::Bedrock.build_request_body(messages, model, tools, max_tokens, caching_enabled)
+      return send_bedrock_stream_request(body, model, on_chunk) if on_chunk
       response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }
       raise_error(response) unless response.status == 200
@@ -205,6 +229,29 @@ module Clacky
       MessageFormat::Bedrock.parse_response(parsed_body)
     end
+    # Streaming variant for Bedrock Converse.
+    # Posts to /model/{m}/converse-stream with stream:true; the proxy returns
+    # SSE frames whose `event` is the Bedrock event-type and whose `data` is
+    # the raw Bedrock event JSON. We accumulate frames into a synthetic
+    # non-streaming response and feed it back through the existing parser so
+    # downstream code is identical.
+    private def send_bedrock_stream_request(body, model, on_chunk)
+      stream_body = body.merge(stream: true)
+      aggregator = BedrockStreamAggregator.new(on_chunk: on_chunk)
+      sse_buf = +""
+      response = bedrock_connection.post(bedrock_stream_endpoint(model)) do |req|
+        req.body = stream_body.to_json
+        req.options.on_data = proc do |chunk, _bytes_received, _env|
+          sse_buf << chunk
+          drain_sse_frames(sse_buf) { |event, data| aggregator.handle(event, data) }
+        end
+      end
+      raise_error(response) unless response.status == 200
+      MessageFormat::Bedrock.parse_response(aggregator.to_h)
+    end
     def parse_simple_bedrock_response(response)
       raise_error(response) unless response.status == 200
       data = safe_json_parse(response.body, context: "LLM response")
@@ -216,11 +263,13 @@ module Clacky
     # ── Anthropic request / response ──────────────────────────────────────────
-    def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled)
+    def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil)
       # Apply cache_control to the message that marks the cache breakpoint
       messages = apply_message_caching(messages) if caching_enabled
-      body     = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled)
+      body = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled)
+      return send_anthropic_stream_request(body, on_chunk) if on_chunk
       response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }
       raise_error(response) unless response.status == 200
@@ -229,6 +278,24 @@ module Clacky
       MessageFormat::Anthropic.parse_response(parsed_body)
     end
+    private def send_anthropic_stream_request(body, on_chunk)
+      stream_body = body.merge(stream: true)
+      aggregator = AnthropicStreamAggregator.new(on_chunk: on_chunk)
+      sse_buf = +""
+      response = anthropic_connection.post(anthropic_messages_path) do |req|
+        req.headers["Accept"] = "text/event-stream"
+        req.body = stream_body.to_json
+        req.options.on_data = proc do |chunk, _bytes_received, _env|
+          sse_buf << chunk
+          drain_sse_frames(sse_buf) { |event, data| aggregator.handle(event, data) }
+        end
+      end
+      raise_error(response) unless response.status == 200
+      MessageFormat::Anthropic.parse_response(aggregator.to_h)
+    end
     def parse_simple_anthropic_response(response)
       raise_error(response) unless response.status == 200
       data = safe_json_parse(response.body, context: "LLM response")
@@ -237,24 +304,47 @@ module Clacky
     # ── OpenAI request / response ─────────────────────────────────────────────
-    def send_openai_request(messages, model, tools, max_tokens, caching_enabled)
+    def send_openai_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil)
       # Apply cache_control markers to messages when caching is enabled.
       # OpenRouter proxies Claude with the same cache_control field convention as Anthropic direct.
       messages = apply_message_caching(messages) if caching_enabled
-      body     = MessageFormat::OpenAI.build_request_body(
+      body = MessageFormat::OpenAI.build_request_body(
         messages, model, tools, max_tokens, caching_enabled,
         vision_supported: @vision_supported
       )
+      return send_openai_stream_request(body, on_chunk) if on_chunk
       response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
       raise_error(response) unless response.status == 200
       check_html_response(response)
       parsed_body = safe_json_parse(response.body, context: "LLM response")
       MessageFormat::OpenAI.parse_response(parsed_body)
     end
+    # Streaming variant for OpenAI-compatible chat completions (DeepSeek/OpenRouter
+    # via platform/llm_proxy). Uses Faraday's on_data hook to consume SSE frames,
+    # accumulates them, and reconstructs the non-streaming JSON response shape so
+    # MessageFormat::OpenAI.parse_response works unchanged.
+    private def send_openai_stream_request(body, on_chunk)
+      stream_body = body.merge(stream: true, stream_options: { include_usage: true })
+      aggregator = OpenAIStreamAggregator.new(on_chunk: on_chunk)
+      sse_buf = +""
+      response = openai_connection.post("chat/completions") do |req|
+        req.body = stream_body.to_json
+        req.options.on_data = proc do |chunk, _bytes_received, _env|
+          sse_buf << chunk
+          drain_sse_frames(sse_buf) { |_event, data| aggregator.handle(data) }
+        end
+      end
+      raise_error(response) unless response.status == 200
+      MessageFormat::OpenAI.parse_response(aggregator.to_h)
+    end
     def parse_simple_openai_response(response)
       raise_error(response) unless response.status == 200
       parsed_body = safe_json_parse(response.body, context: "LLM response")
@@ -320,6 +410,33 @@ module Clacky
       "/model/#{model}/converse"
     end
+    # Bedrock Converse streaming endpoint path.
+    private def bedrock_stream_endpoint(model)
+      "/model/#{model}/converse-stream"
+    end
+    # Pull complete SSE frames out of a buffer and yield them as (event, data).
+    # An SSE frame ends at a blank line ("\n\n"); incomplete trailing data
+    # stays in the buffer for the next chunk. Frames without an explicit
+    # `event:` line use the default "message" type per the SSE spec.
+    private def drain_sse_frames(buf)
+      while (sep = buf.index("\n\n"))
+        frame = buf.slice!(0, sep + 2)
+        event = "message"
+        data_lines = []
+        frame.each_line do |line|
+          line = line.chomp
+          if line.start_with?("event:")
+            event = line.sub(/^event:\s*/, "")
+          elsif line.start_with?("data:")
+            data_lines << line.sub(/^data:\s*/, "")
+          end
+        end
+        next if data_lines.empty?
+        yield event, data_lines.join("\n")
+      end
+    end
     def bedrock_connection
       @bedrock_connection ||= Faraday.new(url: @base_url) do |conn|
         conn.headers["Content-Type"]  = "application/json"
@@ -477,6 +594,18 @@ module Clacky
                            "The request will be retried automatically."
     end
+    # ── Streaming helpers ─────────────────────────────────────────────────────
+    # Invoke the user's on_chunk callback in a way that never lets a callback
+    # error tear down the LLM request. Streaming chunks are best-effort UI
+    # updates; a buggy progress renderer must not abort an in-flight call.
+    private def safe_invoke_on_chunk(on_chunk, **kwargs)
+      return unless on_chunk
+      on_chunk.call(**kwargs)
+    rescue => e
+      Clacky::Logger.warn("[on_chunk] callback raised #{e.class}: #{e.message}")
+    end
     # ── Utilities ─────────────────────────────────────────────────────────────
     def deep_clone(obj)

data/lib/clacky/default_skills/onboard/SKILL.md CHANGED Viewed

@@ -221,8 +221,12 @@ then parse the last stdout line as JSON and read `installed` as N.
 ### A.10. Import external skills (optional)
-Run `test -d ~/.openclaw && echo yes || echo no`. If `no`, skip silently.
-If `yes`:
+Check if OpenClaw is installed:
+- Run `test -d ~/.openclaw && echo yes || echo no`
+- If `no` and on WSL (i.e. `/proc/version` contains `microsoft`), also run:
+  `powershell.exe -NoProfile -Command '$env:USERPROFILE' 2>/dev/null | tr -d '\r'` to get the Windows home, then check `test -d "$(wslpath '<win_home>')/.openclaw" && echo yes || echo no`
+- If all checks return `no`, skip silently.
+If any check returns `yes`:
 1. `ruby "SKILL_DIR/scripts/import_external_skills.rb" --source openclaw --dry-run`
 2. Parse the skill count N.
 3. Ask via `request_user_feedback`:

data/lib/clacky/default_skills/onboard/scripts/import_external_skills.rb CHANGED Viewed

@@ -172,7 +172,7 @@ class OpenClawImporter < ExternalSkillsImporter
   end
   private def source_available?
-    @openclaw_dir.exist?
+    openclaw_dirs.any?(&:exist?)
   end
   # Returns all directories that may contain OpenClaw skills.
@@ -182,12 +182,56 @@ class OpenClawImporter < ExternalSkillsImporter
   #   - ~/.openclaw/workspace/skills/             (workspace skills)
   #   - ~/.openclaw/skills/                        (managed/shared skills)
   #   - ~/.openclaw/workspace/.agents/skills/      (project-level shared skills)
+  #
+  # On WSL, also scans the Windows-native %USERPROFILE%\.openclaw directory.
   private def source_dirs
-    [
-      @openclaw_dir.join('workspace', 'skills'),
-      @openclaw_dir.join('skills'),
-      @openclaw_dir.join('workspace', '.agents', 'skills')
-    ].select(&:exist?)
+    openclaw_dirs.flat_map do |root|
+      [
+        root.join('workspace', 'skills'),
+        root.join('skills'),
+        root.join('workspace', '.agents', 'skills')
+      ]
+    end.select(&:exist?)
+  end
+  # All candidate OpenClaw root directories.
+  # On WSL, includes both ~/.openclaw and the Windows-native path.
+  private def openclaw_dirs
+    dirs = [@openclaw_dir]
+    win_home = windows_home
+    dirs << win_home.join('.openclaw') if win_home && win_home.join('.openclaw') != @openclaw_dir
+    dirs
+  end
+  # True when running inside WSL.
+  # Mirrors EnvironmentDetector#wsl? — reads /proc/version for "microsoft".
+  private def wsl?
+    return @wsl if defined?(@wsl)
+    @wsl = File.exist?('/proc/version') &&
+           File.read('/proc/version').downcase.include?('microsoft')
+  rescue StandardError
+    @wsl = false
+  end
+  # Resolve the Windows %USERPROFILE% as a WSL-accessible Pathname.
+  # Uses powershell.exe (standard in WSL) then wslpath for conversion,
+  # mirroring the approach in EnvironmentDetector#wsl_desktop_path.
+  # Returns nil when not on WSL or when the path cannot be resolved.
+  private def windows_home
+    return nil unless wsl?
+    return nil if `which powershell.exe 2>/dev/null`.strip.empty?
+    win_path = `powershell.exe -NoProfile -Command '$env:USERPROFILE' 2>/dev/null`.strip.tr("\r\n", '')
+    return nil if win_path.empty?
+    linux_path = `wslpath '#{win_path}' 2>/dev/null`.strip
+    return nil if linux_path.empty?
+    path = Pathname.new(linux_path)
+    path.exist? ? path : nil
+  rescue StandardError
+    nil
   end
   private def discover_skills

data/lib/clacky/openai_stream_aggregator.rb ADDED Viewed

@@ -0,0 +1,130 @@
+# frozen_string_literal: true
+require "json"
+module Clacky
+  # Reassembles an OpenAI-compatible chat-completion event stream into the
+  # non-streaming response shape that MessageFormat::OpenAI.parse_response
+  # consumes, while invoking on_chunk(input_tokens:, output_tokens:) every
+  # time the upstream emits a new usage frame.
+  #
+  # Streaming frames look like:
+  #
+  #   {"id":"...","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
+  #   {"id":"...","choices":[{"index":0,"delta":{"content":"Hi"},"finish_reason":null}]}
+  #   {"id":"...","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"id":"call_x","function":{"name":"shell","arguments":"{\"cmd"}}]}}]}
+  #   {"id":"...","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\"ls\"}"}}]}}]}
+  #   {"id":"...","choices":[{"index":0,"delta":{},"finish_reason":"tool_calls"}]}
+  #   {"id":"...","choices":[],"usage":{"prompt_tokens":12,"completion_tokens":3,"prompt_tokens_details":{"cached_tokens":2}}}
+  #   data: [DONE]
+  class OpenAIStreamAggregator
+    def initialize(on_chunk: nil)
+      @on_chunk = on_chunk
+      @content = +""
+      @reasoning_content = +""
+      @role = "assistant"
+      @finish_reason = nil
+      @tool_calls = {}
+      @usage = nil
+      @last_input_tokens = 0
+      @last_output_tokens = 0
+    end
+    def handle(data_str)
+      return if data_str == "[DONE]"
+      data = parse_or_nil(data_str)
+      return unless data
+      if (choice = (data["choices"] || []).first)
+        delta = choice["delta"] || {}
+        @role = delta["role"] if delta["role"]
+        @content << delta["content"] if delta["content"].is_a?(String)
+        @reasoning_content << delta["reasoning_content"] if delta["reasoning_content"].is_a?(String)
+        if (tcs = delta["tool_calls"])
+          tcs.each { |tc| merge_tool_call(tc) }
+        end
+        @finish_reason = choice["finish_reason"] if choice["finish_reason"]
+        emit_estimate_progress
+      end
+      if (u = data["usage"])
+        @usage = u
+        emit_usage_progress(u)
+      end
+    end
+    # Render the canonical non-streaming response shape.
+    def to_h
+      tool_calls = @tool_calls.keys.sort.map do |idx|
+        tc = @tool_calls[idx]
+        {
+          "id"       => tc[:id],
+          "type"     => tc[:type] || "function",
+          "function" => {
+            "name"      => tc[:name],
+            "arguments" => tc[:arguments].to_s
+          }
+        }
+      end
+      message = {
+        "role"    => @role,
+        "content" => @content.empty? ? nil : @content
+      }
+      message["tool_calls"] = tool_calls unless tool_calls.empty?
+      message["reasoning_content"] = @reasoning_content unless @reasoning_content.empty?
+      {
+        "choices" => [{ "index" => 0, "message" => message, "finish_reason" => @finish_reason }],
+        "usage"   => @usage || {}
+      }
+    end
+    private def merge_tool_call(tc)
+      idx = tc["index"] || @tool_calls.size
+      slot = (@tool_calls[idx] ||= { id: nil, type: nil, name: nil, arguments: +"" })
+      slot[:id] ||= tc["id"] if tc["id"]
+      slot[:type] ||= tc["type"] if tc["type"]
+      if (fn = tc["function"])
+        slot[:name] ||= fn["name"] if fn["name"]
+        slot[:arguments] << fn["arguments"].to_s if fn["arguments"]
+      end
+    end
+    private def parse_or_nil(s)
+      JSON.parse(s)
+    rescue JSON::ParserError
+      nil
+    end
+    private def emit_estimate_progress
+      return unless @on_chunk
+      output = approximate_output_tokens
+      return if output == @last_output_tokens
+      @last_output_tokens = output
+      @on_chunk.call(input_tokens: @last_input_tokens, output_tokens: output)
+    rescue => e
+      Clacky::Logger.warn("[OpenAIStreamAggregator] on_chunk: #{e.class}: #{e.message}")
+    end
+    # Rough char/4 estimate; replaced by the real count when the upstream
+    # finally emits a usage frame (with stream_options.include_usage=true).
+    private def approximate_output_tokens
+      total_chars = @content.bytesize + @reasoning_content.bytesize +
+        @tool_calls.values.sum { |tc| tc[:arguments].to_s.bytesize }
+      (total_chars / 4.0).ceil
+    end
+    private def emit_usage_progress(u)
+      return unless @on_chunk
+      total_prompt = u["prompt_tokens"].to_i
+      output       = u["completion_tokens"].to_i
+      return if total_prompt == @last_input_tokens && output == @last_output_tokens
+      @last_input_tokens = total_prompt
+      @last_output_tokens = output
+      @on_chunk.call(input_tokens: total_prompt, output_tokens: output)
+    rescue => e
+      Clacky::Logger.warn("[OpenAIStreamAggregator] on_chunk: #{e.class}: #{e.message}")
+    end
+  end
+end

data/lib/clacky/server/http_server.rb CHANGED Viewed

@@ -3372,9 +3372,8 @@ module Clacky
         return unless agent
         # Auto-name the session from the first user message (before agent starts running).
-        # Check messages.empty? only — agent.name may already hold a default placeholder
-        # like "Session 1" assigned at creation time, so it's not a reliable signal.
-        if agent.history.empty?
+        # Skip if the name looks like it was set by the user (not a system-generated "Session N").
+        if agent.history.empty? && agent.name.match?(/\ASession \d+\z/)
           auto_name = content.gsub(/\s+/, " ").strip[0, 30]
           auto_name += "…" if content.strip.length > 30
           agent.rename(auto_name)

data/lib/clacky/server/web_ui_controller.rb CHANGED Viewed

@@ -225,15 +225,19 @@ module Clacky
       def show_progress(message = nil, prefix_newline: true, progress_type: "thinking", phase: "active", metadata: {})
         if phase == "active"
-          @progress_start_time = Time.now
-          # Store complete progress state for replay when user switches back to this session
+          # Only set start time when transitioning into a fresh progress phase.
+          # Streaming LLM calls fire show_progress every chunk for token updates;
+          # resetting the timer each time would make the elapsed counter jitter
+          # back to 0 in the UI and force the frontend to rebuild its interval.
+          if @live_progress_state.nil? || @live_progress_state[:progress_type] != progress_type
+            @progress_start_time = Time.now
+            @live_stdout_buffer = []
+          end
           @live_progress_state = {
             message: message,
             progress_type: progress_type,
             metadata: metadata
           }
-          # Reset stdout buffer for each new command so re-subscribe only replays current run
-          @live_stdout_buffer = []
         elsif phase == "done"
           @live_tool_call = nil   # command finished — nothing left to replay
           # Keep @live_stdout_buffer intact — it will be reset on the next show_progress call.

data/lib/clacky/ui2/progress_handle.rb CHANGED Viewed

@@ -71,7 +71,7 @@ module Clacky
     class ProgressHandle
       # Default tick interval (seconds). Matches the old global spinner
       # cadence. Tests may pass a smaller interval for speed.
-      DEFAULT_TICK_INTERVAL = 0.5
+      DEFAULT_TICK_INTERVAL = 0.25
       # Style hint for the renderer. The owner decides what colors to use;
       # the handle only forwards the hint as part of the frame metadata
@@ -93,6 +93,12 @@ module Clacky
       # frame would be visual noise.
       FAST_FINISH_THRESHOLD_SECONDS = 2
+      # Show "Thinking for Ns" once the gap since the last LLM stream
+      # chunk reaches this many seconds. Bedrock often pauses 5–18s
+      # while generating large content blocks (long tool_use JSON in
+      # particular); without this hint users assume the agent is stuck.
+      IDLE_HINT_THRESHOLD_SECONDS = 2
       # @param owner [#register_progress, #unregister_progress, #render_frame]
       # @param message [String] Initial progress message.
       # @param style [Symbol] :primary or :quiet (see VALID_STYLES).
@@ -122,6 +128,7 @@ module Clacky
         @ticker        = nil
         @state         = :fresh     # :fresh → :running → :closed
         @metadata      = {}
+        @last_chunk_at = nil
         @monitor       = Monitor.new
       end
@@ -133,9 +140,10 @@ module Clacky
         @monitor.synchronize do
           return self unless @state == :fresh
-          @state      = :running
-          @start_time = @clock.call
-          @entry_id   = @owner.register_progress(self)
+          @state         = :running
+          @start_time    = @clock.call
+          @last_chunk_at = @start_time
+          @entry_id      = @owner.register_progress(self)
         end
         # Fire one initial frame synchronously so the user sees the
@@ -156,9 +164,11 @@ module Clacky
         @monitor.synchronize do
           return if @state != :running
           @message  = message.to_s if message
-          @metadata = metadata     if metadata
+          if metadata
+            @metadata = metadata
+            @last_chunk_at = @clock.call
+          end
         end
-        render_now
       end
       # Stop the ticker, render one final frame, and unregister from the
@@ -203,7 +213,7 @@ module Clacky
       # +render_frame+ and is responsible for writing it into the entry.
       def current_frame
         @monitor.synchronize do
-          compose_frame(@message, elapsed_seconds, @metadata)
+          compose_frame(@message, elapsed_seconds, @metadata, idle_seconds)
         end
       end
@@ -278,16 +288,68 @@ module Clacky
         (@clock.call - @start_time).to_i
       end
-      # Live-frame format: "<message>… (<elapsed>s)"
-      # Metadata like { attempt:, total: } is appended as "[i/N]" when
-      # present, to keep renderer-agnostic callers (e.g. tests) readable.
-      private def compose_frame(message, elapsed, metadata)
-        parts = [message.to_s]
+      # Seconds since the last metadata update (i.e. the last LLM stream
+      # chunk that carried token info). Used to surface "Thinking for Ns"
+      # in the live frame so users can see the agent isn't stuck even
+      # when token counts plateau during long Bedrock content blocks.
+      private def idle_seconds
+        return 0 unless @last_chunk_at
+        (@clock.call - @last_chunk_at).to_i
+      end
+      # Live-frame format:
+      #   "<message>… (<elapsed>s · ↓N tokens · reasoning…)"
+      # The "reasoning" tail only appears once tokens have started
+      # streaming AND the gap since the last chunk reaches the threshold
+      # — signalling the model is between tool_use blocks doing extended
+      # thinking. No seconds shown there to avoid duplicating elapsed;
+      # animated dots (1→2→3) provide the "still alive" cue.
+      private def compose_frame(message, elapsed, metadata, idle = 0)
+        head = message.to_s
         if metadata && (attempt = metadata[:attempt]) && (total = metadata[:total])
-          parts << "[#{attempt}/#{total}]"
+          head = "#{head} [#{attempt}/#{total}]"
+        end
+        token_part = metadata && format_token_progress(metadata)
+        suffix_parts = []
+        suffix_parts << "#{elapsed}s" if elapsed > 0
+        suffix_parts << token_part if token_part
+        if token_part && idle >= IDLE_HINT_THRESHOLD_SECONDS
+          suffix_parts << "reasoning #{spinner_frame} "
+        end
+        return "#{head}…" if suffix_parts.empty?
+        "#{head}… (#{suffix_parts.join(" · ")})"
+      end
+      SPINNER_FRAMES = %w[⠋ ⠙ ⠹ ⠸ ⠼ ⠴ ⠦ ⠧ ⠇ ⠏].freeze
+      SPINNER_INTERVAL_MS = 250
+      private def spinner_frame
+        ms = (@clock.call.to_f * 1000).to_i
+        SPINNER_FRAMES[(ms / SPINNER_INTERVAL_MS) % SPINNER_FRAMES.length]
+      end
+      # Render LLM streaming token counts as "↑1.2k ↓234 tokens".
+      # When input_tokens is unknown (e.g. OpenAI-compat streaming where
+      # prompt_tokens only arrives in the final frame), shows "↑—" so the
+      # column doesn't flicker between absent / present.
+      private def format_token_progress(metadata)
+        output = metadata[:output_tokens]
+        return nil if output.nil? || output.to_i <= 0
+        "↓ #{compact_count(output.to_i)} tokens"
+      end
+      private def compact_count(n)
+        return n.to_s if n < 1000
+        if n < 1_000_000
+          k = n / 1000.0
+          k >= 10 ? "#{k.to_i}k" : "%.1fk" % k
+        else
+          m = n / 1_000_000.0
+          m >= 10 ? "#{m.to_i}M" : "%.1fM" % m
         end
-        head = parts.join(" ")
-        elapsed > 0 ? "#{head}… (#{elapsed}s)" : "#{head}…"
       end
       # Final frame (used by +finish+). Same as +compose_frame+ but we

data/lib/clacky/ui2/ui_controller.rb CHANGED Viewed

@@ -1384,8 +1384,10 @@ module Clacky
           # Add action buttons
           choices << { name: "─" * 50, disabled: true }
           choices << { name: "[+] Add New Model", value: { action: :add } }
-          choices << { name: "[*] Edit Current Model", value: { action: :edit } }
-          choices << { name: "[-] Delete Model", value: { action: :delete } } if current_config.models.length > 1
+          if current_config.models.length > 0
+            choices << { name: "[*] Edit Current Model", value: { action: :edit } }
+            choices << { name: "[-] Delete Model", value: { action: :delete } } if current_config.models.length > 1
+          end
           choices << { name: "[X] Close", value: { action: :close } }
           # Show menu