RubyGems - openclacky - Versions diffs - 1.0.0.beta.6 → 1.0.0 - Mend

openclacky 1.0.0.beta.6 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +13 -1
data/lib/clacky/agent/llm_caller.rb +87 -4
data/lib/clacky/agent/session_serializer.rb +47 -2
data/lib/clacky/agent.rb +15 -1
data/lib/clacky/client.rb +44 -8
data/lib/clacky/json_ui_controller.rb +2 -1
data/lib/clacky/plain_ui_controller.rb +1 -1
data/lib/clacky/providers.rb +2 -2
data/lib/clacky/server/channel/channel_ui_controller.rb +1 -1
data/lib/clacky/server/http_server.rb +94 -0
data/lib/clacky/server/session_registry.rb +8 -1
data/lib/clacky/server/web_ui_controller.rb +3 -2
data/lib/clacky/ui2/ui_controller.rb +2 -1
data/lib/clacky/ui_interface.rb +1 -1
data/lib/clacky/version.rb +1 -1
data/lib/clacky/web/app.css +158 -6
data/lib/clacky/web/app.js +157 -7
data/lib/clacky/web/i18n.js +45 -24
data/lib/clacky/web/index.html +10 -0
data/lib/clacky/web/sessions.js +88 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: afc12c94c2b8b7580ca948625cc6c106004bbf385f341c783e36e1be9d93fd82
-  data.tar.gz: 95508d829f02270b3fce4849b21e29b6766a46d9c663d47e37df817aed456da5
+  metadata.gz: 49800afa935670c288d9f421595df4246b61e76ed0f2a74e1a7a754e85e26162
+  data.tar.gz: dba09cac5a79485b743aaad4568ce2e4fe2e13772d6b8c43a360ec11eca7c762
 SHA512:
-  metadata.gz: 8f44be2b9d9bf26f97490f5ddf2525a6cad937c5152b8486bb2840a263ab104cacfa5838600236b3a38a6806e69cd717fbce982838f2c2a65664158b0b4ed238
-  data.tar.gz: aecb14f4b6f345d190e52de0c0816f380b4e6c3213453c9e69a04b78944f757115e8a1ac042b0a78398e79d27de65190f4c0cb61d1efe3c224416b6a2f55f6c6
+  metadata.gz: 2b723771f71d880d99582f6bfd4d23a66f54ee3caa87f7ed228360f015cadb52a20be9d6869c6e35612740ddb889ceb762efa541a41bc25810f5897d47a333e1
+  data.tar.gz: 5c425e94d2bf4c4d68175b740d840b9cd6270ef91f2e68e6d8403fbb6fbc5336b07bd65308907dbb8d8c3cd1cb906c4c5f64ae7710a7e0619ab2aaae0ddc278b

data/CHANGELOG.md CHANGED Viewed

@@ -5,7 +5,19 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## [Unreleased]
+## [1.0.0] - 2026-04-30
+### Added
+- **Speed test tool in Web UI.** Test API response latency for different models and providers directly from the settings panel, making it easy to find the fastest endpoint for your region.
+- **History chunk loading.** Previously compressed conversation chunks can now be loaded back into the session when needed, so long-running conversations don't lose context.
+- **Default model changed to 4.5.** New default model provides better balance of speed, quality, and cost for most tasks.
+### Improved
+- **Thinking indicator now visible for more steps.** The "thinking..." indicator stays visible longer during complex operations, giving better feedback about what the agent is doing.
+- **Message timestamps display correctly in Web UI.** User message times now show properly without layout issues, and the scroll behavior is smoother.
+### Fixed
+- **Scroll position no longer jumps unexpectedly** in the Web UI when loading session history.
 ## [1.0.0.beta.6] - 2026-04-30

data/lib/clacky/agent/llm_caller.rb CHANGED Viewed

@@ -86,7 +86,45 @@ module Clacky
           # Successful response — if we were probing, confirm primary is healthy.
           handle_probe_success if @config.probing?
-        rescue Faraday::ConnectionFailed, Faraday::TimeoutError, Faraday::SSLError, Errno::ECONNREFUSED, Errno::ETIMEDOUT => e
+        rescue Faraday::TimeoutError => e
+          # ── Read-timeout path (distinct from connection-level failures) ──
+          # Faraday::TimeoutError on our non-streaming POST almost always means
+          # the *response* took longer than the 300s read-timeout to come back —
+          # i.e. the model is trying to produce a huge output in one shot
+          # (e.g. "write me a 2000-line snake game"). Blindly retrying the same
+          # request with the same prompt reproduces the same timeout.
+          #
+          # Strategy:
+          #   1. On the FIRST timeout in a task, inject a `[SYSTEM]` user message
+          #      telling the model to break the work into smaller steps, then
+          #      retry. The history edit changes the prompt, so the retry is
+          #      materially different from the failed attempt.
+          #   2. On subsequent timeouts in the same task, fall back to the
+          #      generic "just retry" behaviour (the model may have ignored
+          #      the hint; don't pile on duplicate hints).
+          #   3. Probing-mode timeouts still go through handle_probe_failure.
+          retries += 1
+          if @config.probing?
+            handle_probe_failure
+            retry
+          end
+          if retries <= max_retries
+            inject_large_output_hint_if_first_timeout(e)
+            @ui&.show_progress(
+              "Response too slow (likely generating too much at once): #{e.message}",
+              progress_type: "retrying",
+              phase: "active",
+              metadata: { attempt: retries, total: max_retries }
+            )
+            sleep retry_delay
+            retry
+          else
+            raise AgentError, "[LLM] Request timed out after #{max_retries} retries: #{e.message}"
+          end
+        rescue Faraday::ConnectionFailed, Faraday::SSLError, Errno::ECONNREFUSED, Errno::ETIMEDOUT => e
           retries += 1
           # Probing failure: primary still down — renew cooling-off and retry with fallback.
@@ -95,9 +133,10 @@ module Clacky
             retry
           end
-          # Network-level errors (timeouts, connection failures) are likely transient
-          # infrastructure blips — do NOT trigger fallback.  Just retry on the current
-          # model (primary or already-active fallback) up to max_retries.
+          # Connection-level errors (DNS, TCP refused, open-timeout, TLS) are
+          # transient infrastructure blips — do NOT trigger fallback, and do
+          # NOT inject the "break into steps" hint (the model did nothing wrong).
+          # Just retry on the current model up to max_retries.
           if retries <= max_retries
             @ui&.show_progress(
               "Network failed: #{e.message}",
@@ -229,6 +268,50 @@ module Clacky
           (msg.include?("thinking") || msg.include?("must be passed back") ||
            msg.include?("must be provided"))
       end
+      # On the FIRST Faraday::TimeoutError within a task, append a [SYSTEM]
+      # user message to the history instructing the model to break its work
+      # into smaller steps. Subsequent timeouts in the same task are ignored
+      # here (caller just retries) so we don't pollute history with duplicate
+      # hints.
+      #
+      # The injected message carries `system_injected: true` so it is:
+      #   - Hidden from UI replay (session_serializer / replay_history filters)
+      #   - Skipped by prompt-caching marker placement (client.rb)
+      #   - Skipped by message compression's "recent user turn" protection
+      #     (message_compressor_helper.rb)
+      #
+      # Reset per-task via Agent#run (see @task_timeout_hint_injected = false).
+      private def inject_large_output_hint_if_first_timeout(err)
+        return if @task_timeout_hint_injected
+        @task_timeout_hint_injected = true
+        hint = "[SYSTEM] The previous LLM response timed out (read timeout after ~300s). " \
+               "This usually means the model was trying to produce too much output in a single response. " \
+               "Please change your approach:\n" \
+               "- Break the task into multiple smaller steps, each producing a short response.\n" \
+               "- For long files: first create a skeleton with `write` (structure + placeholder comments only), " \
+               "then fill in each section with separate `edit` calls.\n" \
+               "- Keep each single tool-call argument (especially file content) well under ~500 lines.\n" \
+               "- Do NOT attempt to output the entire deliverable in one response."
+        @history.append({
+          role: "user",
+          content: hint,
+          system_injected: true,
+          task_id: @current_task_id
+        })
+        Clacky::Logger.info(
+          "[llm_caller] Read-timeout detected — injected 'break into smaller steps' hint " \
+          "(error=#{err.class}: #{err.message})"
+        )
+        @ui&.show_warning(
+          "LLM response timed out — asking model to break the task into smaller steps and retrying..."
+        )
+      end
     end
   end
 end

data/lib/clacky/agent/session_serializer.rb CHANGED Viewed

@@ -36,6 +36,15 @@ module Clacky
         # Restore previous_total_tokens for accurate delta calculation across sessions
         @previous_total_tokens = session_data.dig(:stats, :previous_total_tokens) || 0
+        # Recover the latest latency metric from the most recent assistant message
+        # that carries a :latency field. This is the source of truth for the status-bar
+        # signal — no separate session-level field is needed. Older sessions (pre-feature)
+        # simply start with nil; the signal stays hidden until the next LLM call populates it.
+        last_assistant_with_latency = @history.to_a.reverse.find do |m|
+          m[:role].to_s == "assistant" && m[:latency]
+        end
+        @latest_latency = last_assistant_with_latency&.dig(:latency)
         # Restore Time Machine state
         @task_parents = session_data.dig(:time_machine, :task_parents) || {}
         @current_task_id = session_data.dig(:time_machine, :current_task_id) || 0
@@ -178,8 +187,18 @@ module Clacky
           elsif current_round
             current_round[:events] << msg
           elsif msg[:compressed_summary] && msg[:chunk_path]
-            # Compressed summary sitting before any user rounds — expand it from chunk md
-            chunk_rounds = parse_chunk_md_to_rounds(msg[:chunk_path])
+            # Compressed summary sitting before any user rounds — expand ALL chunk
+            # MD files that belong to the same session (siblings of chunk_path),
+            # in chunk-index ascending order.
+            #
+            # Under the current "single summary + previous_chunks index" scheme,
+            # session.json only keeps the newest compressed_summary message (which
+            # points at the newest chunk). Older chunks (chunk-1..chunk-N-1) are
+            # referenced only as basenames inside the summary text. Expanding just
+            # msg[:chunk_path] would therefore lose all prior chunks on replay.
+            chunk_rounds = sibling_chunks_of(msg[:chunk_path]).flat_map { |p|
+              parse_chunk_md_to_rounds(p)
+            }
             rounds.concat(chunk_rounds)
             # After expanding, treat the last chunk round as the current round so that
             # any orphaned assistant/tool messages that follow in session.json (belonging
@@ -243,6 +262,32 @@ module Clacky
         { has_more: has_more }
       end
+      # Return all chunk MD file paths that belong to the same session as
+      # +chunk_path+, sorted by chunk index ascending (chunk-1, chunk-2, …).
+      # Uses the filename convention "<base>-chunk-<N>.md".
+      #
+      # Handles path resolution the same way parse_chunk_md_to_rounds does:
+      # if the stored path doesn't exist, fall back to SESSIONS_DIR + basename
+      # (cross-machine / cross-user session bundles).
+      private def sibling_chunks_of(chunk_path)
+        return [] unless chunk_path
+        resolved = chunk_path.to_s
+        unless File.exist?(resolved)
+          resolved = File.join(Clacky::SessionManager::SESSIONS_DIR, File.basename(resolved))
+        end
+        return [] unless File.exist?(resolved)
+        dir  = File.dirname(resolved)
+        base = File.basename(resolved).sub(/-chunk-\d+\.md\z/, "")
+        return [resolved] if base == File.basename(resolved)  # unconventional name — just use as-is
+        Dir.glob(File.join(dir, "#{base}-chunk-*.md")).sort_by do |p|
+          m = File.basename(p).match(/-chunk-(\d+)\.md\z/)
+          m ? m[1].to_i : Float::INFINITY
+        end
+      end
       # Parse a chunk MD file into an array of rounds compatible with replay_history.
       # Each round is { user_msg: Hash, events: Array<Hash> }.
       # Timestamps are synthesised from the chunk's archived_at, spread backwards.

data/lib/clacky/agent.rb CHANGED Viewed

@@ -42,7 +42,8 @@ module Clacky
     attr_reader :session_id, :name, :history, :iterations, :total_cost, :working_dir, :created_at, :total_tasks, :todos,
                 :cache_stats, :cost_source, :ui, :skill_loader, :agent_profile,
-                :status, :error, :updated_at, :source
+                :status, :error, :updated_at, :source,
+                :latest_latency  # Hash of latency metrics from the most recent LLM call (see Client#send_messages_with_tools)
     attr_accessor :pinned
     def permission_mode
@@ -78,6 +79,7 @@ module Clacky
       @task_cost_source = :estimated  # Track cost source for current task
       @previous_total_tokens = 0  # Track tokens from previous iteration for delta calculation
       @interrupted = false  # Flag for user interrupt
+      @latest_latency = nil  # Most recent LLM call's latency metrics (see Client#send_messages_with_tools)
       @ui = ui  # UIController for direct UI interaction
       @debug_logs = []  # Debug logs for troubleshooting
       @pending_injections = []     # Pending inline skill injections to flush after observe()
@@ -208,6 +210,7 @@ module Clacky
       @start_time = Time.now
       @task_truncation_count = 0  # Reset truncation counter for each task
+      @task_timeout_hint_injected = false  # Reset read-timeout hint injection (see LlmCaller)
       @task_cost_source = :estimated  # Reset for new task
       # Note: Do NOT reset @previous_total_tokens here - it should maintain the value from the last iteration
       # across tasks to correctly calculate delta tokens in each iteration
@@ -681,6 +684,17 @@ module Clacky
       end
       # Store token_usage in the message so replay_history can re-emit it
       msg[:token_usage] = response[:token_usage] if response[:token_usage]
+      # Store per-message latency — this is the source of truth (session.json)
+      # for all time-to-first-token / duration / throughput info. The status
+      # bar signal reads the last assistant message's latency; no separate
+      # config file or top-level session field is introduced.
+      if response[:latency]
+        msg[:latency] = response[:latency]
+        @latest_latency = response[:latency]
+        # Push to UI so the status-bar signal updates immediately after the
+        # model finishes (before any tool execution delays the next event).
+        @ui&.update_sessionbar(latency: response[:latency])
+      end
       # Preserve reasoning_content from the real LLM response.
       # This is the authoritative signal used by MessageHistory#to_api to
       # detect thinking-mode providers (DeepSeek V4, Kimi K2 thinking, etc.)

data/lib/clacky/client.rb CHANGED Viewed

@@ -89,18 +89,54 @@ module Clacky
     # ── Agent main path ───────────────────────────────────────────────────────
     # Send messages with tool-calling support.
-    # Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage: }
+    # Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage:, latency: }
+    #
+    # Latency measurement:
+    #   Because the current HTTP path is *non-streaming* (plain POST, response
+    #   body read in one shot), TTFB (time to response headers) is not exposed
+    #   by Faraday's default adapter without extra plumbing. What we CAN measure
+    #   cheaply — and what users actually feel — is total request duration,
+    #   which for a non-streaming call equals the time from "hit Enter" to
+    #   "first token visible" (since we receive everything at once).
+    #
+    #   So we record `duration_ms` as the authoritative number and alias it to
+    #   `ttft_ms` for downstream consumers (status bar uses ttft_ms as its
+    #   signal metric — see docs). When we migrate to streaming later, this
+    #   same `ttft_ms` field will start carrying the *actual* first-token
+    #   latency without any schema change.
     def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false)
       caching_enabled = enable_caching && supports_prompt_caching?(model)
       cloned = deep_clone(messages)
-      if bedrock?
-        send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled)
-      elsif anthropic_format?
-        send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled)
-      else
-        send_openai_request(cloned, model, tools, max_tokens, caching_enabled)
-      end
+      t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+      response =
+        if bedrock?
+          send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled)
+        elsif anthropic_format?
+          send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled)
+        else
+          send_openai_request(cloned, model, tools, max_tokens, caching_enabled)
+        end
+      t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+      duration_ms = ((t1 - t0) * 1000).round
+      # Throughput is only meaningful with a reasonable output size; below ~10
+      # tokens the sample is too small to be informative and the result is
+      # wildly high (e.g. 1 token / 50ms → 20 tok/s is meaningless).
+      # Canonical usage hashes from message_format/* all use :completion_tokens.
+      output_tokens = response[:usage]&.dig(:completion_tokens).to_i
+      tps = (output_tokens >= 10 && duration_ms > 0) ? (output_tokens * 1000.0 / duration_ms).round(1) : nil
+      response[:latency] = {
+        ttft_ms:     duration_ms,      # non-streaming: TTFT == full duration
+        duration_ms: duration_ms,
+        output_tokens: output_tokens,
+        tps:         tps,
+        model:       model,
+        measured_at: Time.now.to_f,
+        streaming:   false              # future flag — true when we migrate
+      }
+      response
     end
     # Format tool results into canonical messages ready to append to @messages.

data/lib/clacky/json_ui_controller.rb CHANGED Viewed

@@ -134,12 +134,13 @@ module Clacky
     # === State updates ===
-    def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil)
+    def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil)
       data = {}
       data[:tasks] = tasks if tasks
       data[:cost] = cost if cost
       data[:cost_source] = cost_source if cost_source
       data[:status] = status if status
+      data[:latency] = latency if latency
       emit("session_update", **data) unless data.empty?
     end

data/lib/clacky/plain_ui_controller.rb CHANGED Viewed

@@ -136,7 +136,7 @@ module Clacky
     # === State updates (no-ops) ===
-    def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil); end
+    def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil); end
     def update_todos(todos); end
     def set_working_status; end
     def set_idle_status; end

data/lib/clacky/providers.rb CHANGED Viewed

@@ -22,7 +22,7 @@ module Clacky
         "name" => "OpenClacky",
         "base_url" => "https://api.openclacky.com",
         "api" => "bedrock",
-        "default_model" => "abs-claude-sonnet-4-6",
+        "default_model" => "abs-claude-sonnet-4-5",
         "models" => [
           "abs-claude-opus-4-7",
           "abs-claude-opus-4-6",
@@ -131,7 +131,7 @@ module Clacky
       }.freeze,
       "clackyai-sea" => {
-        "name" => "ClackyAI( Sea )",
+        "name" => "ClackyAI(Sea)",
         "base_url" => "https://api.clacky.ai",
         "api" => "bedrock",
         "default_model" => "abs-claude-sonnet-4-5",

data/lib/clacky/server/channel/channel_ui_controller.rb CHANGED Viewed

@@ -152,7 +152,7 @@ module Clacky
       # === State updates (no-ops for IM) ===
-      def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil); end
+      def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil); end
       def update_todos(todos); end
       def set_working_status; end
       def set_idle_status; end

data/lib/clacky/server/http_server.rb CHANGED Viewed

@@ -426,6 +426,9 @@ module Clacky
           elsif method == "PATCH" && path.match?(%r{^/api/sessions/[^/]+/model$})
             session_id = path.sub("/api/sessions/", "").sub("/model", "")
             api_switch_session_model(session_id, req, res)
+          elsif method == "POST" && path.match?(%r{^/api/sessions/[^/]+/benchmark$})
+            session_id = path.sub("/api/sessions/", "").sub("/benchmark", "")
+            api_benchmark_session_models(session_id, req, res)
           elsif method == "PATCH" && path.match?(%r{^/api/sessions/[^/]+/working_dir$})
             session_id = path.sub("/api/sessions/", "").sub("/working_dir", "")
             api_change_session_working_dir(session_id, req, res)
@@ -2333,6 +2336,97 @@ module Clacky
         json_response(res, 500, { error: e.message })
       end
+      # POST /api/sessions/:id/benchmark
+      #
+      # Speed-test every configured model in one shot so the user can pick the
+      # fastest available model for this session. We send a minimal one-token
+      # request to each model *in parallel* (one thread per model) and measure
+      # total HTTP duration — for non-streaming calls this equals the user's
+      # perceived time-to-first-token, so the field is named `ttft_ms` for
+      # forward-compatibility with a future streaming implementation.
+      #
+      # Cost note: each request is `max_tokens: 1` + a 2-byte prompt, so the
+      # total cost across a dozen models is well under one cent.
+      #
+      # Response shape:
+      #   {
+      #     ok: true,
+      #     results: [
+      #       { model_id: "...", model: "...", ttft_ms: 812, ok: true },
+      #       { model_id: "...", model: "...", ok: false, error: "timeout" },
+      #       ...
+      #     ]
+      #   }
+      def api_benchmark_session_models(session_id, _req, res)
+        return json_response(res, 404, { error: "Session not found" }) unless @registry.ensure(session_id)
+        # Snapshot the models list — @agent_config.models is a shared reference
+        # that the user might mutate from the settings panel during the test;
+        # a shallow dup is enough since we only read string fields below.
+        models = Array(@agent_config.models).dup
+        return json_response(res, 200, { ok: true, results: [] }) if models.empty?
+        # Kick off one thread per model. We deliberately cap per-request wall
+        # time inside each thread via a Faraday timeout so a single dead model
+        # can't block the response. The outer join uses a generous ceiling
+        # (timeout + small buffer) as a last-resort safety net.
+        per_model_timeout = 15
+        threads = models.map do |m|
+          Thread.new do
+            Thread.current.report_on_exception = false
+            benchmark_single_model(m, per_model_timeout)
+          end
+        end
+        results = threads.map do |t|
+          t.join(per_model_timeout + 3)
+          t.value rescue { ok: false, error: "thread failed" }
+        end
+        json_response(res, 200, { ok: true, results: results })
+      rescue => e
+        Clacky::Logger.error("[benchmark] #{e.class}: #{e.message}", error: e)
+        json_response(res, 500, { error: e.message })
+      end
+      # Runs one speed-test request against a single model config hash and
+      # returns a result row for api_benchmark_session_models. Pure function —
+      # no shared state — so it's safe to call from worker threads.
+      private def benchmark_single_model(model_cfg, timeout_sec)
+        model_id   = model_cfg["id"].to_s
+        model_name = model_cfg["model"].to_s
+        base       = { model_id: model_id, model: model_name }
+        client = Clacky::Client.new(
+          model_cfg["api_key"].to_s,
+          base_url:         model_cfg["base_url"].to_s,
+          model:            model_name,
+          anthropic_format: model_cfg["anthropic_format"] || false
+        )
+        # Override Faraday timeouts via a short-lived env var isn't ideal;
+        # instead we rely on test_connection's own network path and wrap
+        # the call in Timeout as a last line of defence. Most providers
+        # respond within 2-3s for a 16-token reply.
+        t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+        result = nil
+        begin
+          Timeout.timeout(timeout_sec) { result = client.test_connection(model: model_name) }
+        rescue Timeout::Error
+          return base.merge(ok: false, error: "timeout after #{timeout_sec}s")
+        end
+        t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+        if result && result[:success]
+          base.merge(ok: true, ttft_ms: ((t1 - t0) * 1000).round)
+        else
+          base.merge(ok: false, error: (result && result[:error]).to_s[0, 200])
+        end
+      rescue => e
+        base.merge(ok: false, error: "#{e.class}: #{e.message}"[0, 200])
+      end
       def api_change_session_working_dir(session_id, req, res)
         body = parse_json_body(req)
         new_dir = body["working_dir"].to_s.strip

data/lib/clacky/server/session_registry.rb CHANGED Viewed

@@ -169,7 +169,8 @@ module Clacky
             live_cost_source = s[:agent]&.cost_source
             { status: s[:status], error: s[:error], model: model_info&.dig(:model), name: live_name,
               total_tasks: s[:agent]&.total_tasks, total_cost: s[:agent]&.total_cost,
-              cost_source: live_cost_source }
+              cost_source: live_cost_source,
+              latest_latency: s[:agent]&.latest_latency }
           end
         end
@@ -234,6 +235,11 @@ module Clacky
             total_tasks:   ls&.dig(:total_tasks) || s.dig(:stats, :total_tasks) || 0,
             total_cost:    ls&.dig(:total_cost)  || s.dig(:stats, :total_cost_usd) || 0.0,
             cost_source:   (ls&.dig(:cost_source) || s.dig(:stats, :cost_source) || "estimated").to_s,
+            # latest_latency is in-memory only (live sessions) — not persisted
+            # at the session-level on disk. The on-disk source of truth is
+            # per-assistant-message `latency` fields in messages[]. Reloaded
+            # sessions start with nil and get populated on the next LLM call.
+            latest_latency: ls&.dig(:latest_latency),
             pinned:        s[:pinned] || false,
           }
         end
@@ -311,6 +317,7 @@ module Clacky
           source:          agent.source.to_s,
           agent_profile:   agent.agent_profile.name,
           pinned:          agent.pinned || false,
+          latest_latency:  agent.latest_latency,
         }
       end
     end

data/lib/clacky/server/web_ui_controller.rb CHANGED Viewed

@@ -302,14 +302,15 @@ module Clacky
       # === State updates ===
-      def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil)
+      def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil)
         data = {}
         data[:tasks]       = tasks       if tasks
         data[:cost]        = cost        if cost
         data[:cost_source] = cost_source if cost_source
         data[:status]      = status      if status
+        data[:latency]     = latency     if latency
         emit("session_update", **data) unless data.empty?
-        forward_to_subscribers { |sub| sub.update_sessionbar(tasks: tasks, cost: cost, cost_source: cost_source, status: status) }
+        forward_to_subscribers { |sub| sub.update_sessionbar(tasks: tasks, cost: cost, cost_source: cost_source, status: status, latency: latency) }
       end
       def update_todos(todos)

data/lib/clacky/ui2/ui_controller.rb CHANGED Viewed

@@ -108,7 +108,8 @@ module Clacky
       # @param cost [Float] Total cost (optional)
       # @param cost_source [Symbol, nil] :api / :price / :default (optional)
       # @param status [String] Workspace status ('idle' or 'working') (optional)
-      def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil)
+      # @param latency [Hash, nil] Latency metrics; accepted but not displayed in the TUI.
+      def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil)
         @tasks_count = tasks if tasks
         @total_cost = cost if cost
         @input_area.update_sessionbar(

data/lib/clacky/ui_interface.rb CHANGED Viewed

@@ -106,7 +106,7 @@ module Clacky
     end
     # === State updates ===
-    def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil); end
+    def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil); end
     def update_todos(todos); end
     def set_working_status; end
     def set_idle_status; end

data/lib/clacky/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Clacky
-  VERSION = "1.0.0.beta.6"
+  VERSION = "1.0.0"
 end

data/lib/clacky/web/app.css CHANGED Viewed

@@ -1524,11 +1524,15 @@ body {
 .msg-time {
   /* Rendered as a footnote *below* the bubble, floating inside the #messages
      flex gap (12px). Absolute-positioned so showing/hiding it on hover does
-     NOT reflow the message list — surrounding messages stay put. */
+     NOT reflow the message list — surrounding messages stay put.
+     Per-side anchoring (see .msg-user / .msg-assistant overrides below) is
+     critical: we must NOT set both left:0 and right:0, because with
+     white-space:nowrap a short bubble (e.g. just "1") would force the time
+     text to extend past the bubble edge and trigger horizontal page scroll.
+     Instead each variant anchors to one side and grows naturally inward. */
   position: absolute;
   top: 100%;
-  left: 0;
-  right: 0;
   margin-top: 2px;
   display: block;
   font-size: 10px;
@@ -1545,9 +1549,10 @@ body {
   opacity: 1;
   transform: translateY(0);
 }
-/* Time color / alignment: match the bubble's alignment in the column. */
-.msg-user .msg-time      { color: var(--color-text-secondary); text-align: right; padding-right: 4px; }
-.msg-assistant .msg-time { color: var(--color-text-secondary); text-align: left;  padding-left: 4px; }
+/* Time color / alignment: anchor to the bubble's own side, let width be
+   driven by content — prevents overflow on narrow bubbles. */
+.msg-user .msg-time      { color: var(--color-text-secondary); right: 0; left: auto; padding-right: 4px; }
+.msg-assistant .msg-time { color: var(--color-text-secondary); left: 0;  right: auto; padding-left: 4px; }
 .msg-user      { background: var(--color-accent-primary); color: var(--color-button-primary-text); align-self: flex-end; }
 [data-theme="dark"] .msg-user { background: var(--color-accent-hover); }
@@ -2204,6 +2209,65 @@ body {
 #sib-tasks { opacity: 0.75; flex-shrink: 0; }   /* tier 2 */
 #sib-cost  { opacity: 0.45; flex-shrink: 0; }   /* tier 3 */
+/* ── Latency signal (right after model name) ──────────────────────────────
+   A compact 4-bar signal + TTFT value. Placed adjacent to #sib-model so the
+   user's mental mapping "this model is fast/slow" is immediate. Variant
+   classes (-ok/-warn/-bad) are applied by Sessions._renderSignal based on
+   TTFT thresholds; colours intentionally use CSS vars so the same palette
+   works in both light and dark themes. */
+#sib-signal-wrap { position: relative; flex-shrink: 0; }
+.sib-signal-clickable {
+  display: inline-flex;
+  align-items: center;
+  gap: 5px;
+  padding: 1px 6px;
+  cursor: default;                         /* no click handler yet — step 3/4 will add one */
+  border-radius: 3px;
+  opacity: 0.85;
+  transition: opacity 0.15s ease, background-color 0.15s ease;
+  font-variant-numeric: tabular-nums;      /* prevents the text from jittering as values change */
+}
+.sib-signal-clickable:hover {
+  opacity: 1;
+  background: var(--color-bg-hover);
+}
+/* Bar stack: four 2-px wide vertical bars of increasing height, mimicking
+   a phone signal-strength icon. Each <i> is hollow by default; Sessions adds
+   .on to the ones that should light up for the current signal level. */
+.sib-signal-clickable .sig-bars {
+  display: inline-flex;
+  align-items: flex-end;
+  gap: 1px;
+  height: 11px;
+}
+.sib-signal-clickable .sig-bars i {
+  display: inline-block;
+  width: 2px;
+  background: var(--color-text-secondary);
+  opacity: 0.25;                           /* dim "off" bar */
+  border-radius: 1px;
+  transition: background-color 0.15s, opacity 0.15s;
+}
+/* Individual bar heights — short→tall */
+.sib-signal-clickable .sig-bars i:nth-child(1) { height: 3px;  }
+.sib-signal-clickable .sig-bars i:nth-child(2) { height: 5px;  }
+.sib-signal-clickable .sig-bars i:nth-child(3) { height: 8px;  }
+.sib-signal-clickable .sig-bars i:nth-child(4) { height: 11px; }
+.sib-signal-clickable .sig-bars i.on { opacity: 1; }
+/* Signal level → bar colour. Applied to .on bars only; "off" bars stay grey. */
+.sib-signal-ok   .sig-bars i.on { background: var(--color-accent-primary); }  /* green / brand */
+.sib-signal-warn .sig-bars i.on { background: #d39e00; }                      /* amber */
+.sib-signal-bad  .sig-bars i.on { background: #d9534f; }                      /* red */
+.sib-signal-clickable .sig-text {
+  font-size: 11px;
+  color: var(--color-text-secondary);
+}
+.sib-signal-ok   .sig-text { color: var(--color-text-primary); }
+.sib-signal-warn .sig-text { color: #d39e00; }
+.sib-signal-bad  .sig-text { color: #d9534f; }
 /* Model name dropdown in session info bar */
 #sib-model-wrap {
   position: relative;
@@ -2266,6 +2330,94 @@ body {
   color: var(--color-accent-primary);
 }
+/* ── Model switcher benchmark banner & latency column ──────────────────────
+   The banner sits at the top of the dropdown with a subtle border so it
+   visually separates from the scrollable model list below. The ⚡ button is
+   pushed to the RIGHT edge (where the eye naturally lands after scanning a
+   model name → latency row), while the optional hint ("done in 1.2s") sits
+   on the left. The per-row latency cell is right-aligned and uses
+   tabular-nums so numbers line up vertically regardless of width. */
+.sib-model-bench {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;   /* hint on the left, button on the right */
+  gap: 8px;
+  padding: 4px 8px 4px 10px;         /* compact: tighter top/bottom + tighter right side */
+  border-bottom: 1px solid var(--color-border-primary);
+  background: var(--color-bg-primary);
+  position: sticky;    /* keep visible while scrolling a long model list */
+  top: 0;
+  z-index: 1;
+  min-height: 0;
+}
+.sib-bench-btn {
+  display: inline-flex;
+  align-items: center;
+  gap: 3px;
+  padding: 2px 8px;
+  font-size: 10px;
+  line-height: 1.4;
+  font-family: inherit;
+  background: var(--color-bg-secondary);
+  color: var(--color-text-secondary);
+  border: 1px solid var(--color-border-primary);
+  border-radius: 10px;
+  cursor: pointer;
+  transition: background-color 0.15s, border-color 0.15s, color 0.15s;
+  order: 2;   /* force button to the right even if DOM order changes */
+  flex: 0 0 auto;
+}
+.sib-bench-btn:hover:not(:disabled) {
+  background: var(--color-bg-hover);
+  border-color: var(--color-accent-primary);
+  color: var(--color-accent-primary);
+}
+.sib-bench-btn:disabled {
+  opacity: 0.55;
+  cursor: progress;
+}
+.sib-bench-hint {
+  font-size: 10px;
+  color: var(--color-text-secondary);
+  font-variant-numeric: tabular-nums;
+  order: 1;   /* hint stays on the left */
+  flex: 1 1 auto;
+  min-width: 0;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
+.sib-model-option .sib-model-name {
+  /* Keep long model names from pushing the latency cell offscreen. */
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+  flex: 1 1 auto;
+  min-width: 0;
+}
+.sib-model-option .sib-model-right {
+  display: inline-flex;
+  align-items: center;
+  gap: 8px;
+  flex-shrink: 0;
+}
+.sib-model-option .sib-model-latency {
+  font-size: 10px;
+  font-variant-numeric: tabular-nums;
+  min-width: 44px;            /* reserves space so rows don't jitter before benchmark */
+  text-align: right;
+  color: var(--color-text-secondary);
+}
+.sib-model-option .sib-model-latency.is-ok   { color: var(--color-accent-primary); }
+.sib-model-option .sib-model-latency.is-warn { color: #d39e00; }
+.sib-model-option .sib-model-latency.is-bad  { color: #d9534f; }
+.sib-model-option .sib-model-latency.is-err  { color: #d9534f; }
+.sib-model-option .sib-model-latency.is-pending {
+  color: var(--color-text-secondary);
+  opacity: 0.7;
+}
 /* ── Input area (wraps preview strip + input bar) ────────────────────────── */
 #ws-disconnect-hint {
   position: absolute;

data/lib/clacky/web/app.js CHANGED Viewed

@@ -410,9 +410,13 @@ WS.onEvent(ev => {
         // Shape (2): partial update — build patch from top-level fields
         sid   = ev.session_id;
         patch = {};
-        if (ev.cost   !== undefined) patch.total_cost  = ev.cost;
-        if (ev.tasks  !== undefined) patch.total_tasks = ev.tasks;
-        if (ev.status !== undefined) patch.status      = ev.status;
+        if (ev.cost    !== undefined) patch.total_cost     = ev.cost;
+        if (ev.tasks   !== undefined) patch.total_tasks    = ev.tasks;
+        if (ev.status  !== undefined) patch.status         = ev.status;
+        // Latency pushed by Agent after each LLM call (see update_sessionbar).
+        // Stored under latest_latency — same field name the HTTP /api/sessions
+        // list returns, so updateInfoBar doesn't need to branch on the source.
+        if (ev.latency !== undefined) patch.latest_latency = ev.latency;
       }
       if (!sid) break;
       Sessions.patch(sid, patch);
@@ -1637,6 +1641,13 @@ window.bootAfterBrand = async function() {
 // ── Session Info Bar Model Switcher ───────────────────────────────────────
 (function() {
   let _isOpen = false;
+  // Cache of the most recent benchmark results, keyed by model_id. Kept at
+  // closure scope so the numbers survive closing & reopening the dropdown —
+  // the user shouldn't have to re-run the test just to peek at results. We
+  // intentionally do NOT persist this to disk: latency is a point-in-time
+  // measurement, and yesterday's numbers are misleading.
+  let _benchCache = {};        // { [model_id]: { ttft_ms, ok, error, ts } }
+  let _benchInFlight = false;  // prevent double-click spam
   // Toggle model dropdown when clicking on model name
   document.addEventListener("click", async (e) => {
@@ -1692,23 +1703,63 @@ window.bootAfterBrand = async function() {
       dropdown.innerHTML = "";
+      // ── Benchmark floating button (top-right of dropdown) ──────────────
+      // Tiny ⚡ button pinned to the dropdown's top-right corner. Runs one
+      // concurrent request per model and back-fills each row's latency cell.
+      // We deliberately avoid a full-width banner — it ate visual space that
+      // the model list needs, and most users open the dropdown to SWITCH,
+      // not to benchmark. The floating button is discoverable but unobtrusive.
+      const bench = document.createElement("div");
+      bench.className = "sib-model-bench";
+      const btnLabel   = (typeof I18n !== "undefined") ? I18n.t("sib.bench.btn")     : "Benchmark";
+      const btnTooltip = (typeof I18n !== "undefined") ? I18n.t("sib.bench.tooltip") : "Test response latency for every configured model";
+      bench.innerHTML = `
+        <button type="button" class="sib-bench-btn" title="${btnTooltip}">⚡ <span class="sib-bench-label">${btnLabel}</span></button>
+        <span class="sib-bench-hint"></span>
+      `;
+      dropdown.appendChild(bench);
+      const benchBtn   = bench.querySelector(".sib-bench-btn");
+      const benchLabel = bench.querySelector(".sib-bench-label");
+      const benchHint  = bench.querySelector(".sib-bench-hint");
+      benchBtn.addEventListener("click", (ev) => {
+        ev.stopPropagation();
+        _runBenchmark(sessionId, dropdown, benchBtn, benchLabel, benchHint);
+      });
+      // ── Model rows ─────────────────────────────────────────────────────
       models.forEach(m => {
         console.log("[Model Switcher] Adding model:", m.model, "id:", m.id, "current:", currentModel);
         const opt = document.createElement("div");
         opt.className = "sib-model-option";
+        opt.dataset.modelId = m.id;
         if (m.model === currentModel) opt.classList.add("current");
-        const modelName = document.createElement("span");
-        modelName.textContent = m.model;
-        opt.appendChild(modelName);
+        const left = document.createElement("span");
+        left.className = "sib-model-name";
+        left.textContent = m.model;
+        opt.appendChild(left);
+        const right = document.createElement("span");
+        right.className = "sib-model-right";
         if (m.type === "default") {
           const badge = document.createElement("span");
           badge.className = `model-badge ${m.type}`;
           badge.textContent = m.type;
-          opt.appendChild(badge);
+          right.appendChild(badge);
         }
+        // Latency cell — populated from _benchCache on open, updated live
+        // when a benchmark run completes. Empty slot keeps row heights stable
+        // so the list doesn't visually jump mid-benchmark.
+        const lat = document.createElement("span");
+        lat.className = "sib-model-latency";
+        _fillLatencyCell(lat, _benchCache[m.id]);
+        right.appendChild(lat);
+        opt.appendChild(right);
         // Switch by id (stable across reorders/edits). Keep model name for UI update.
         opt.addEventListener("click", () => _switchModel(sessionId, m.id, m.model));
         dropdown.appendChild(opt);
@@ -1720,6 +1771,105 @@ window.bootAfterBrand = async function() {
     }
   }
+  // Render one latency cell based on a cached result.
+  //   undefined    → empty slot (never tested / in-flight starts from here)
+  //   { ok:true }  → "812ms" in green/amber/red per threshold
+  //   { ok:false } → "✕" with error in tooltip
+  //   { pending:true } → "…" spinner-ish marker
+  function _fillLatencyCell(el, entry) {
+    el.className = "sib-model-latency";
+    el.textContent = "";
+    el.removeAttribute("title");
+    if (!entry) return;
+    if (entry.pending) {
+      el.textContent = "…";
+      el.classList.add("is-pending");
+      return;
+    }
+    if (!entry.ok) {
+      el.textContent = "✕";
+      el.classList.add("is-err");
+      el.title = entry.error || "failed";
+      return;
+    }
+    const ms = entry.ttft_ms;
+    // Same thresholds as the sib-signal status bar — keep them aligned so
+    // "3 bars in the status bar" ≈ "green number in the picker".
+    // We measure full non-streaming response time (not real TTFT), so ≤60s is
+    // normal, ≤120s is slow, beyond is bad. ≤2s still gets the "feels instant"
+    // green treatment like the 4-bar signal.
+    let cls = "is-bad";
+    if      (ms <= 2000)   cls = "is-ok";
+    else if (ms <= 60000)  cls = "is-ok";
+    else if (ms <= 120000) cls = "is-warn";
+    el.classList.add(cls);
+    el.textContent = ms >= 1000 ? (ms / 1000).toFixed(1) + "s" : ms + "ms";
+    if (typeof I18n !== "undefined") {
+      el.title = I18n.t("sib.bench.latencyTooltip", {
+        ttft: el.textContent,
+        time: new Date(entry.ts).toLocaleTimeString(),
+      });
+    } else {
+      el.title = `TTFT ${el.textContent} · tested ${new Date(entry.ts).toLocaleTimeString()}`;
+    }
+  }
+  async function _runBenchmark(sessionId, dropdown, btn, label, hint) {
+    if (_benchInFlight) return;
+    _benchInFlight = true;
+    btn.disabled = true;
+    const origLabel = label.textContent;
+    const _t = (key, vars) => (typeof I18n !== "undefined") ? I18n.t(key, vars) : key;
+    label.textContent = _t("sib.bench.running");
+    hint.textContent = "";
+    // Mark every row as pending so the user sees instant feedback instead of
+    // a silent button. _fillLatencyCell handles the visual treatment.
+    dropdown.querySelectorAll(".sib-model-option").forEach(opt => {
+      const id = opt.dataset.modelId;
+      if (!id) return;
+      _benchCache[id] = { pending: true };
+      _fillLatencyCell(opt.querySelector(".sib-model-latency"), _benchCache[id]);
+    });
+    const t0 = performance.now();
+    try {
+      const res = await fetch(`/api/sessions/${sessionId}/benchmark`, { method: "POST" });
+      const data = await res.json();
+      if (!res.ok || !data.ok) throw new Error(data.error || "benchmark failed");
+      const now = Date.now();
+      (data.results || []).forEach(r => {
+        _benchCache[r.model_id] = {
+          ok: !!r.ok,
+          ttft_ms: r.ttft_ms,
+          error: r.error,
+          ts: now,
+        };
+        const opt = dropdown.querySelector(`.sib-model-option[data-model-id="${CSS.escape(r.model_id)}"]`);
+        if (opt) _fillLatencyCell(opt.querySelector(".sib-model-latency"), _benchCache[r.model_id]);
+      });
+      const elapsed = ((performance.now() - t0) / 1000).toFixed(1);
+      hint.textContent = _t("sib.bench.done", { t: elapsed });
+    } catch (e) {
+      console.error("Benchmark failed:", e);
+      hint.textContent = _t("sib.bench.failed", { msg: e.message });
+      // Clear pending markers so rows don't stay stuck on "…"
+      dropdown.querySelectorAll(".sib-model-option").forEach(opt => {
+        const id = opt.dataset.modelId;
+        if (id && _benchCache[id]?.pending) {
+          _benchCache[id] = undefined;
+          _fillLatencyCell(opt.querySelector(".sib-model-latency"), undefined);
+        }
+      });
+    } finally {
+      _benchInFlight = false;
+      btn.disabled = false;
+      label.textContent = origLabel;
+    }
+  }
   // Switch session model via API
   // modelId — stable runtime id (required by backend)
   // modelName — display name, used for optimistic UI update

data/lib/clacky/web/i18n.js CHANGED Viewed

@@ -394,6 +394,14 @@ const I18n = (() => {
       "header.owner.tooltip":     "Creator — click to open Creator Hub",
+      // ── Session info bar / Model switcher benchmark ──
+      "sib.bench.btn":        "Benchmark",
+      "sib.bench.tooltip":    "Test response latency for every configured model",
+      "sib.bench.running":    "Testing…",
+      "sib.bench.done":       "done in {{t}}s",
+      "sib.bench.failed":     "failed: {{msg}}",
+      "sib.bench.latencyTooltip": "TTFT {{ttft}} · tested {{time}}",
       "onboard.welcome":         "Welcome to {{name}}",
     },
@@ -779,6 +787,14 @@ const I18n = (() => {
       "header.owner.tooltip":     "创作者 — 点击进入创作者中心",
+      // ── 会话信息栏 / 模型切换器 测速 ──
+      "sib.bench.btn":        "测速",
+      "sib.bench.tooltip":    "测试所有已配置模型的响应延迟",
+      "sib.bench.running":    "测速中…",
+      "sib.bench.done":       "用时 {{t}} 秒",
+      "sib.bench.failed":     "失败：{{msg}}",
+      "sib.bench.latencyTooltip": "TTFT {{ttft}} · 测试于 {{time}}",
       "onboard.welcome":         "欢迎使用 {{name}}",
     }
   };
@@ -858,6 +874,11 @@ const I18n = (() => {
 })();
 // ── Thinking Verbs for Progress Animation ──────────────────────────────────
+//
+// The primary verb ("Thinking" / "思考中") is chosen 90% of the time inside
+// getRandomThinkingVerb(). The lists below are ONLY the 10% flavor variants —
+// do not include the primary verb here, and do not rely on duplicates for
+// weighting (probability is controlled in code, not data).
 const THINKING_VERBS = {
   en: [
     "Cogitating",
@@ -882,38 +903,38 @@ const THINKING_VERBS = {
     "Reasoning"
   ],
   zh: [
-    "思考中",      // 5x weight (appears 5 times for higher probability)
-    "思考中",
-    "思考中",
-    "思考中",
-    "思考中",
-    "琢磨中",      // 2x weight
-    "琢磨中",
-    "思忖中",
-    "盘算中",
-    "酝酿中",
-    "捋一捋",
-    "理理头绪",
-    "掂量掂量",
-    "寻思中",
-    "琢磨琢磨",
-    "想想办法",
-    "推演中",
+    "推理中",
+    "深度思考中",
+    "分析中",
     "解析中",
     "拆解中",
-    "组装中",
+    "推演中",
     "梳理中",
-    "验证中",
+    "归纳中",
     "演算中",
-    "分析中",
-    "推理中",
-    "构思中"
+    "验证中",
+    "权衡中",
+    "构思中",
+    "酝酿中",
+    "思忖中",
+    "琢磨中"
   ]
 };
-// Get a random thinking verb based on current language
+// Get a random thinking verb based on current language.
+//
+// Behavior: 90% of the time return the primary verb ("思考中" / "Thinking"),
+// 10% of the time pick a random variant from the list for a bit of flavor.
+// The primary is intentionally kept outside the list so tuning the probability
+// is a single-number change here, independent of the list contents.
 function getRandomThinkingVerb() {
-  const lang = I18n.lang();
+  const lang    = I18n.lang();
+  const primary = lang === "zh" ? "思考中" : "Thinking";
+  // 90% primary, 10% variant
+  if (Math.random() < 0.9) return primary;
   const verbs = THINKING_VERBS[lang] || THINKING_VERBS.en;
+  if (!verbs || verbs.length === 0) return primary;
   return verbs[Math.floor(Math.random() * verbs.length)];
 }

data/lib/clacky/web/index.html CHANGED Viewed

@@ -271,6 +271,16 @@
           <div id="sib-model-dropdown" class="sib-model-dropdown" style="display:none"></div>
         </span>
         <span class="sib-sep sib-sep-after-model">│</span>
+        <!-- Latency signal: 4-bar signal + TTFT number. Hidden until the first LLM
+             call completes (see updateInfoBar / Sessions.renderSignalBars). Click
+             opens a mini benchmark panel (see Step 3/4 — not yet implemented). -->
+        <span id="sib-signal-wrap" style="display:none">
+          <span id="sib-signal" class="sib-signal-clickable" title="Recent LLM latency">
+            <span class="sig-bars" aria-hidden="true"><i></i><i></i><i></i><i></i></span>
+            <span class="sig-text"></span>
+          </span>
+        </span>
+        <span class="sib-sep sib-sep-after-signal" style="display:none">│</span>
         <!-- Detail fields: mode, tasks, cost -->
         <span class="sib-detail">
           <span id="sib-mode"></span>

data/lib/clacky/web/sessions.js CHANGED Viewed

@@ -740,9 +740,18 @@ const Sessions = (() => {
   // Format a timestamp for display inside a message bubble.
   // Same-day: "HH:MM"; cross-day: "MM-DD HH:MM".
+  //
+  // Accepts:
+  //   - ISO string ("2026-04-30T21:45:00Z")
+  //   - JS millisecond epoch (number ≥ 1e12)
+  //   - Unix second epoch (number < 1e12) — what the Ruby backend emits via
+  //     Time.now.to_f; we multiply by 1000 before handing to Date(), otherwise
+  //     JS interprets 1.77e9 as ~1970-01-21 and we get bogus timestamps.
   function _formatMsgTime(dateOrStr) {
     if (!dateOrStr) return "";
-    const d   = new Date(dateOrStr);
+    let input = dateOrStr;
+    if (typeof input === "number" && input < 1e12) input = input * 1000;
+    const d   = new Date(input);
     if (isNaN(d)) return "";
     const now = new Date();
     const pad = n => String(n).padStart(2, "0");
@@ -1574,6 +1583,13 @@ const Sessions = (() => {
       }
       if (sibModelWrap) sibModelWrap.style.display = s.model ? "" : "none";
+      // Latency signal — read from s.latest_latency (populated by:
+      //   - HTTP /api/sessions → session_registry#list (from agent.latest_latency)
+      //   - WS session_update events patched by app.js
+      // Hidden entirely when no latency recorded yet (fresh session, or old
+      // pre-feature sessions that have never made an LLM call this run).
+      this._renderSignal(s.latest_latency);
       // Tasks
       const sibTasks = $("sib-tasks");
       if (sibTasks) sibTasks.textContent = `${s.total_tasks || 0} tasks`;
@@ -1592,6 +1608,77 @@ const Sessions = (() => {
       if (bar) bar.style.display = "flex";
     },
+    /** Render the 4-bar latency signal next to the model name in the status bar.
+     *
+     *  @param {Object|null} lat   latency metrics from agent.latest_latency
+     *                              shape: { ttft_ms, duration_ms, output_tokens, tps, model, streaming }
+     *
+     *  Visibility: hidden whenever lat is falsy (no measurement yet). Never
+     *  renders a "loading" state — we would rather show nothing than a stale or
+     *  misleading number.
+     *
+     *  Signal thresholds (TTFT):
+     *    Note: this is measured over the WHOLE non-streaming response (we
+     *    don't have a real TTFT yet — the server returns one completed body),
+     *    so for a large generation — "write me a 2000-line snake game" — the
+     *    number naturally balloons. Thresholds below are tuned to that reality:
+     *    60s is considered NORMAL, 120s is slow, beyond that we flag bad.
+     *
+     *    ≤ 2000  ms → 4 bars, green, "⚡" fast
+     *    ≤ 60000 ms → 3 bars, green, normal
+     *    ≤ 120000 ms → 2 bars, amber, slow
+     *    >  120000 ms → 1 bar, red,   very slow
+     *
+     *  Hover tooltip: built from the latency hash — full breakdown for power
+     *  users; the compact inline text is just "1.2s" style for scannability.
+     */
+    _renderSignal(lat) {
+      const wrap = $("sib-signal-wrap");
+      const sep  = document.querySelector(".sib-sep-after-signal");
+      const el   = $("sib-signal");
+      if (!wrap || !el) return;
+      if (!lat || !lat.ttft_ms) {
+        wrap.style.display = "none";
+        if (sep) sep.style.display = "none";
+        return;
+      }
+      const ttft = Number(lat.ttft_ms) || 0;
+      let bars, level;
+      if      (ttft <= 2000)   { bars = 4; level = "ok";    }
+      else if (ttft <= 60000)  { bars = 3; level = "ok";    }
+      else if (ttft <= 120000) { bars = 2; level = "warn";  }
+      else                     { bars = 1; level = "bad";   }
+      // Paint bars: active ones get .on, others stay dim
+      el.querySelectorAll(".sig-bars i").forEach((bar, i) => {
+        bar.classList.toggle("on", i < bars);
+      });
+      el.className = `sib-signal-clickable sib-signal-${level}`;
+      // Inline text: just the TTFT in human-friendly units
+      const ttftStr = ttft >= 1000 ? (ttft / 1000).toFixed(1) + "s" : ttft + "ms";
+      const text = el.querySelector(".sig-text");
+      if (text) text.textContent = ttftStr;
+      // Tooltip: full metrics breakdown
+      const parts = [`TTFT ${ttftStr}`];
+      if (lat.duration_ms && lat.duration_ms !== ttft) {
+        const durStr = lat.duration_ms >= 1000
+          ? (lat.duration_ms / 1000).toFixed(1) + "s"
+          : lat.duration_ms + "ms";
+        parts.push(`total ${durStr}`);
+      }
+      if (lat.tps) parts.push(`${lat.tps} tok/s`);
+      if (lat.output_tokens) parts.push(`${lat.output_tokens} tokens`);
+      if (lat.model) parts.push(`@ ${lat.model}`);
+      el.title = "Last LLM call — " + parts.join(" · ");
+      wrap.style.display = "";
+      if (sep) sep.style.display = "";
+    },
     // ── Message helpers ────────────────────────────────────────────────────
     // Live tool group state (one active group per session at a time)

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: openclacky
 version: !ruby/object:Gem::Version
-  version: 1.0.0.beta.6
+  version: 1.0.0
 platform: ruby
 authors:
 - windy