openclacky 1.0.0.beta.6 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: afc12c94c2b8b7580ca948625cc6c106004bbf385f341c783e36e1be9d93fd82
4
- data.tar.gz: 95508d829f02270b3fce4849b21e29b6766a46d9c663d47e37df817aed456da5
3
+ metadata.gz: 49800afa935670c288d9f421595df4246b61e76ed0f2a74e1a7a754e85e26162
4
+ data.tar.gz: dba09cac5a79485b743aaad4568ce2e4fe2e13772d6b8c43a360ec11eca7c762
5
5
  SHA512:
6
- metadata.gz: 8f44be2b9d9bf26f97490f5ddf2525a6cad937c5152b8486bb2840a263ab104cacfa5838600236b3a38a6806e69cd717fbce982838f2c2a65664158b0b4ed238
7
- data.tar.gz: aecb14f4b6f345d190e52de0c0816f380b4e6c3213453c9e69a04b78944f757115e8a1ac042b0a78398e79d27de65190f4c0cb61d1efe3c224416b6a2f55f6c6
6
+ metadata.gz: 2b723771f71d880d99582f6bfd4d23a66f54ee3caa87f7ed228360f015cadb52a20be9d6869c6e35612740ddb889ceb762efa541a41bc25810f5897d47a333e1
7
+ data.tar.gz: 5c425e94d2bf4c4d68175b740d840b9cd6270ef91f2e68e6d8403fbb6fbc5336b07bd65308907dbb8d8c3cd1cb906c4c5f64ae7710a7e0619ab2aaae0ddc278b
data/CHANGELOG.md CHANGED
@@ -5,7 +5,19 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
- ## [Unreleased]
8
+ ## [1.0.0] - 2026-04-30
9
+
10
+ ### Added
11
+ - **Speed test tool in Web UI.** Test API response latency for different models and providers directly from the settings panel, making it easy to find the fastest endpoint for your region.
12
+ - **History chunk loading.** Previously compressed conversation chunks can now be loaded back into the session when needed, so long-running conversations don't lose context.
13
+ - **Default model changed to 4.5.** New default model provides better balance of speed, quality, and cost for most tasks.
14
+
15
+ ### Improved
16
+ - **Thinking indicator now visible for more steps.** The "thinking..." indicator stays visible longer during complex operations, giving better feedback about what the agent is doing.
17
+ - **Message timestamps display correctly in Web UI.** User message times now show properly without layout issues, and the scroll behavior is smoother.
18
+
19
+ ### Fixed
20
+ - **Scroll position no longer jumps unexpectedly** in the Web UI when loading session history.
9
21
 
10
22
  ## [1.0.0.beta.6] - 2026-04-30
11
23
 
@@ -86,7 +86,45 @@ module Clacky
86
86
  # Successful response — if we were probing, confirm primary is healthy.
87
87
  handle_probe_success if @config.probing?
88
88
 
89
- rescue Faraday::ConnectionFailed, Faraday::TimeoutError, Faraday::SSLError, Errno::ECONNREFUSED, Errno::ETIMEDOUT => e
89
+ rescue Faraday::TimeoutError => e
90
+ # ── Read-timeout path (distinct from connection-level failures) ──
91
+ # Faraday::TimeoutError on our non-streaming POST almost always means
92
+ # the *response* took longer than the 300s read-timeout to come back —
93
+ # i.e. the model is trying to produce a huge output in one shot
94
+ # (e.g. "write me a 2000-line snake game"). Blindly retrying the same
95
+ # request with the same prompt reproduces the same timeout.
96
+ #
97
+ # Strategy:
98
+ # 1. On the FIRST timeout in a task, inject a `[SYSTEM]` user message
99
+ # telling the model to break the work into smaller steps, then
100
+ # retry. The history edit changes the prompt, so the retry is
101
+ # materially different from the failed attempt.
102
+ # 2. On subsequent timeouts in the same task, fall back to the
103
+ # generic "just retry" behaviour (the model may have ignored
104
+ # the hint; don't pile on duplicate hints).
105
+ # 3. Probing-mode timeouts still go through handle_probe_failure.
106
+ retries += 1
107
+
108
+ if @config.probing?
109
+ handle_probe_failure
110
+ retry
111
+ end
112
+
113
+ if retries <= max_retries
114
+ inject_large_output_hint_if_first_timeout(e)
115
+ @ui&.show_progress(
116
+ "Response too slow (likely generating too much at once): #{e.message}",
117
+ progress_type: "retrying",
118
+ phase: "active",
119
+ metadata: { attempt: retries, total: max_retries }
120
+ )
121
+ sleep retry_delay
122
+ retry
123
+ else
124
+ raise AgentError, "[LLM] Request timed out after #{max_retries} retries: #{e.message}"
125
+ end
126
+
127
+ rescue Faraday::ConnectionFailed, Faraday::SSLError, Errno::ECONNREFUSED, Errno::ETIMEDOUT => e
90
128
  retries += 1
91
129
 
92
130
  # Probing failure: primary still down — renew cooling-off and retry with fallback.
@@ -95,9 +133,10 @@ module Clacky
95
133
  retry
96
134
  end
97
135
 
98
- # Network-level errors (timeouts, connection failures) are likely transient
99
- # infrastructure blips — do NOT trigger fallback. Just retry on the current
100
- # model (primary or already-active fallback) up to max_retries.
136
+ # Connection-level errors (DNS, TCP refused, open-timeout, TLS) are
137
+ # transient infrastructure blips — do NOT trigger fallback, and do
138
+ # NOT inject the "break into steps" hint (the model did nothing wrong).
139
+ # Just retry on the current model up to max_retries.
101
140
  if retries <= max_retries
102
141
  @ui&.show_progress(
103
142
  "Network failed: #{e.message}",
@@ -229,6 +268,50 @@ module Clacky
229
268
  (msg.include?("thinking") || msg.include?("must be passed back") ||
230
269
  msg.include?("must be provided"))
231
270
  end
271
+
272
+ # On the FIRST Faraday::TimeoutError within a task, append a [SYSTEM]
273
+ # user message to the history instructing the model to break its work
274
+ # into smaller steps. Subsequent timeouts in the same task are ignored
275
+ # here (caller just retries) so we don't pollute history with duplicate
276
+ # hints.
277
+ #
278
+ # The injected message carries `system_injected: true` so it is:
279
+ # - Hidden from UI replay (session_serializer / replay_history filters)
280
+ # - Skipped by prompt-caching marker placement (client.rb)
281
+ # - Skipped by message compression's "recent user turn" protection
282
+ # (message_compressor_helper.rb)
283
+ #
284
+ # Reset per-task via Agent#run (see @task_timeout_hint_injected = false).
285
+ private def inject_large_output_hint_if_first_timeout(err)
286
+ return if @task_timeout_hint_injected
287
+
288
+ @task_timeout_hint_injected = true
289
+
290
+ hint = "[SYSTEM] The previous LLM response timed out (read timeout after ~300s). " \
291
+ "This usually means the model was trying to produce too much output in a single response. " \
292
+ "Please change your approach:\n" \
293
+ "- Break the task into multiple smaller steps, each producing a short response.\n" \
294
+ "- For long files: first create a skeleton with `write` (structure + placeholder comments only), " \
295
+ "then fill in each section with separate `edit` calls.\n" \
296
+ "- Keep each single tool-call argument (especially file content) well under ~500 lines.\n" \
297
+ "- Do NOT attempt to output the entire deliverable in one response."
298
+
299
+ @history.append({
300
+ role: "user",
301
+ content: hint,
302
+ system_injected: true,
303
+ task_id: @current_task_id
304
+ })
305
+
306
+ Clacky::Logger.info(
307
+ "[llm_caller] Read-timeout detected — injected 'break into smaller steps' hint " \
308
+ "(error=#{err.class}: #{err.message})"
309
+ )
310
+
311
+ @ui&.show_warning(
312
+ "LLM response timed out — asking model to break the task into smaller steps and retrying..."
313
+ )
314
+ end
232
315
  end
233
316
  end
234
317
  end
@@ -36,6 +36,15 @@ module Clacky
36
36
  # Restore previous_total_tokens for accurate delta calculation across sessions
37
37
  @previous_total_tokens = session_data.dig(:stats, :previous_total_tokens) || 0
38
38
 
39
+ # Recover the latest latency metric from the most recent assistant message
40
+ # that carries a :latency field. This is the source of truth for the status-bar
41
+ # signal — no separate session-level field is needed. Older sessions (pre-feature)
42
+ # simply start with nil; the signal stays hidden until the next LLM call populates it.
43
+ last_assistant_with_latency = @history.to_a.reverse.find do |m|
44
+ m[:role].to_s == "assistant" && m[:latency]
45
+ end
46
+ @latest_latency = last_assistant_with_latency&.dig(:latency)
47
+
39
48
  # Restore Time Machine state
40
49
  @task_parents = session_data.dig(:time_machine, :task_parents) || {}
41
50
  @current_task_id = session_data.dig(:time_machine, :current_task_id) || 0
@@ -178,8 +187,18 @@ module Clacky
178
187
  elsif current_round
179
188
  current_round[:events] << msg
180
189
  elsif msg[:compressed_summary] && msg[:chunk_path]
181
- # Compressed summary sitting before any user rounds — expand it from chunk md
182
- chunk_rounds = parse_chunk_md_to_rounds(msg[:chunk_path])
190
+ # Compressed summary sitting before any user rounds — expand ALL chunk
191
+ # MD files that belong to the same session (siblings of chunk_path),
192
+ # in chunk-index ascending order.
193
+ #
194
+ # Under the current "single summary + previous_chunks index" scheme,
195
+ # session.json only keeps the newest compressed_summary message (which
196
+ # points at the newest chunk). Older chunks (chunk-1..chunk-N-1) are
197
+ # referenced only as basenames inside the summary text. Expanding just
198
+ # msg[:chunk_path] would therefore lose all prior chunks on replay.
199
+ chunk_rounds = sibling_chunks_of(msg[:chunk_path]).flat_map { |p|
200
+ parse_chunk_md_to_rounds(p)
201
+ }
183
202
  rounds.concat(chunk_rounds)
184
203
  # After expanding, treat the last chunk round as the current round so that
185
204
  # any orphaned assistant/tool messages that follow in session.json (belonging
@@ -243,6 +262,32 @@ module Clacky
243
262
  { has_more: has_more }
244
263
  end
245
264
 
265
+ # Return all chunk MD file paths that belong to the same session as
266
+ # +chunk_path+, sorted by chunk index ascending (chunk-1, chunk-2, …).
267
+ # Uses the filename convention "<base>-chunk-<N>.md".
268
+ #
269
+ # Handles path resolution the same way parse_chunk_md_to_rounds does:
270
+ # if the stored path doesn't exist, fall back to SESSIONS_DIR + basename
271
+ # (cross-machine / cross-user session bundles).
272
+ private def sibling_chunks_of(chunk_path)
273
+ return [] unless chunk_path
274
+
275
+ resolved = chunk_path.to_s
276
+ unless File.exist?(resolved)
277
+ resolved = File.join(Clacky::SessionManager::SESSIONS_DIR, File.basename(resolved))
278
+ end
279
+ return [] unless File.exist?(resolved)
280
+
281
+ dir = File.dirname(resolved)
282
+ base = File.basename(resolved).sub(/-chunk-\d+\.md\z/, "")
283
+ return [resolved] if base == File.basename(resolved) # unconventional name — just use as-is
284
+
285
+ Dir.glob(File.join(dir, "#{base}-chunk-*.md")).sort_by do |p|
286
+ m = File.basename(p).match(/-chunk-(\d+)\.md\z/)
287
+ m ? m[1].to_i : Float::INFINITY
288
+ end
289
+ end
290
+
246
291
  # Parse a chunk MD file into an array of rounds compatible with replay_history.
247
292
  # Each round is { user_msg: Hash, events: Array<Hash> }.
248
293
  # Timestamps are synthesised from the chunk's archived_at, spread backwards.
data/lib/clacky/agent.rb CHANGED
@@ -42,7 +42,8 @@ module Clacky
42
42
 
43
43
  attr_reader :session_id, :name, :history, :iterations, :total_cost, :working_dir, :created_at, :total_tasks, :todos,
44
44
  :cache_stats, :cost_source, :ui, :skill_loader, :agent_profile,
45
- :status, :error, :updated_at, :source
45
+ :status, :error, :updated_at, :source,
46
+ :latest_latency # Hash of latency metrics from the most recent LLM call (see Client#send_messages_with_tools)
46
47
  attr_accessor :pinned
47
48
 
48
49
  def permission_mode
@@ -78,6 +79,7 @@ module Clacky
78
79
  @task_cost_source = :estimated # Track cost source for current task
79
80
  @previous_total_tokens = 0 # Track tokens from previous iteration for delta calculation
80
81
  @interrupted = false # Flag for user interrupt
82
+ @latest_latency = nil # Most recent LLM call's latency metrics (see Client#send_messages_with_tools)
81
83
  @ui = ui # UIController for direct UI interaction
82
84
  @debug_logs = [] # Debug logs for troubleshooting
83
85
  @pending_injections = [] # Pending inline skill injections to flush after observe()
@@ -208,6 +210,7 @@ module Clacky
208
210
 
209
211
  @start_time = Time.now
210
212
  @task_truncation_count = 0 # Reset truncation counter for each task
213
+ @task_timeout_hint_injected = false # Reset read-timeout hint injection (see LlmCaller)
211
214
  @task_cost_source = :estimated # Reset for new task
212
215
  # Note: Do NOT reset @previous_total_tokens here - it should maintain the value from the last iteration
213
216
  # across tasks to correctly calculate delta tokens in each iteration
@@ -681,6 +684,17 @@ module Clacky
681
684
  end
682
685
  # Store token_usage in the message so replay_history can re-emit it
683
686
  msg[:token_usage] = response[:token_usage] if response[:token_usage]
687
+ # Store per-message latency — this is the source of truth (session.json)
688
+ # for all time-to-first-token / duration / throughput info. The status
689
+ # bar signal reads the last assistant message's latency; no separate
690
+ # config file or top-level session field is introduced.
691
+ if response[:latency]
692
+ msg[:latency] = response[:latency]
693
+ @latest_latency = response[:latency]
694
+ # Push to UI so the status-bar signal updates immediately after the
695
+ # model finishes (before any tool execution delays the next event).
696
+ @ui&.update_sessionbar(latency: response[:latency])
697
+ end
684
698
  # Preserve reasoning_content from the real LLM response.
685
699
  # This is the authoritative signal used by MessageHistory#to_api to
686
700
  # detect thinking-mode providers (DeepSeek V4, Kimi K2 thinking, etc.)
data/lib/clacky/client.rb CHANGED
@@ -89,18 +89,54 @@ module Clacky
89
89
  # ── Agent main path ───────────────────────────────────────────────────────
90
90
 
91
91
  # Send messages with tool-calling support.
92
- # Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage: }
92
+ # Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage:, latency: }
93
+ #
94
+ # Latency measurement:
95
+ # Because the current HTTP path is *non-streaming* (plain POST, response
96
+ # body read in one shot), TTFB (time to response headers) is not exposed
97
+ # by Faraday's default adapter without extra plumbing. What we CAN measure
98
+ # cheaply — and what users actually feel — is total request duration,
99
+ # which for a non-streaming call equals the time from "hit Enter" to
100
+ # "first token visible" (since we receive everything at once).
101
+ #
102
+ # So we record `duration_ms` as the authoritative number and alias it to
103
+ # `ttft_ms` for downstream consumers (status bar uses ttft_ms as its
104
+ # signal metric — see docs). When we migrate to streaming later, this
105
+ # same `ttft_ms` field will start carrying the *actual* first-token
106
+ # latency without any schema change.
93
107
  def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false)
94
108
  caching_enabled = enable_caching && supports_prompt_caching?(model)
95
109
  cloned = deep_clone(messages)
96
110
 
97
- if bedrock?
98
- send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled)
99
- elsif anthropic_format?
100
- send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled)
101
- else
102
- send_openai_request(cloned, model, tools, max_tokens, caching_enabled)
103
- end
111
+ t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
112
+ response =
113
+ if bedrock?
114
+ send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled)
115
+ elsif anthropic_format?
116
+ send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled)
117
+ else
118
+ send_openai_request(cloned, model, tools, max_tokens, caching_enabled)
119
+ end
120
+ t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
121
+
122
+ duration_ms = ((t1 - t0) * 1000).round
123
+ # Throughput is only meaningful with a reasonable output size; below ~10
124
+ # tokens the sample is too small to be informative and the result is
125
+ # wildly high (e.g. 1 token / 50ms → 20 tok/s is meaningless).
126
+ # Canonical usage hashes from message_format/* all use :completion_tokens.
127
+ output_tokens = response[:usage]&.dig(:completion_tokens).to_i
128
+ tps = (output_tokens >= 10 && duration_ms > 0) ? (output_tokens * 1000.0 / duration_ms).round(1) : nil
129
+
130
+ response[:latency] = {
131
+ ttft_ms: duration_ms, # non-streaming: TTFT == full duration
132
+ duration_ms: duration_ms,
133
+ output_tokens: output_tokens,
134
+ tps: tps,
135
+ model: model,
136
+ measured_at: Time.now.to_f,
137
+ streaming: false # future flag — true when we migrate
138
+ }
139
+ response
104
140
  end
105
141
 
106
142
  # Format tool results into canonical messages ready to append to @messages.
@@ -134,12 +134,13 @@ module Clacky
134
134
 
135
135
  # === State updates ===
136
136
 
137
- def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil)
137
+ def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil)
138
138
  data = {}
139
139
  data[:tasks] = tasks if tasks
140
140
  data[:cost] = cost if cost
141
141
  data[:cost_source] = cost_source if cost_source
142
142
  data[:status] = status if status
143
+ data[:latency] = latency if latency
143
144
  emit("session_update", **data) unless data.empty?
144
145
  end
145
146
 
@@ -136,7 +136,7 @@ module Clacky
136
136
 
137
137
  # === State updates (no-ops) ===
138
138
 
139
- def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil); end
139
+ def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil); end
140
140
  def update_todos(todos); end
141
141
  def set_working_status; end
142
142
  def set_idle_status; end
@@ -22,7 +22,7 @@ module Clacky
22
22
  "name" => "OpenClacky",
23
23
  "base_url" => "https://api.openclacky.com",
24
24
  "api" => "bedrock",
25
- "default_model" => "abs-claude-sonnet-4-6",
25
+ "default_model" => "abs-claude-sonnet-4-5",
26
26
  "models" => [
27
27
  "abs-claude-opus-4-7",
28
28
  "abs-claude-opus-4-6",
@@ -131,7 +131,7 @@ module Clacky
131
131
  }.freeze,
132
132
 
133
133
  "clackyai-sea" => {
134
- "name" => "ClackyAI( Sea )",
134
+ "name" => "ClackyAI(Sea)",
135
135
  "base_url" => "https://api.clacky.ai",
136
136
  "api" => "bedrock",
137
137
  "default_model" => "abs-claude-sonnet-4-5",
@@ -152,7 +152,7 @@ module Clacky
152
152
 
153
153
  # === State updates (no-ops for IM) ===
154
154
 
155
- def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil); end
155
+ def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil); end
156
156
  def update_todos(todos); end
157
157
  def set_working_status; end
158
158
  def set_idle_status; end
@@ -426,6 +426,9 @@ module Clacky
426
426
  elsif method == "PATCH" && path.match?(%r{^/api/sessions/[^/]+/model$})
427
427
  session_id = path.sub("/api/sessions/", "").sub("/model", "")
428
428
  api_switch_session_model(session_id, req, res)
429
+ elsif method == "POST" && path.match?(%r{^/api/sessions/[^/]+/benchmark$})
430
+ session_id = path.sub("/api/sessions/", "").sub("/benchmark", "")
431
+ api_benchmark_session_models(session_id, req, res)
429
432
  elsif method == "PATCH" && path.match?(%r{^/api/sessions/[^/]+/working_dir$})
430
433
  session_id = path.sub("/api/sessions/", "").sub("/working_dir", "")
431
434
  api_change_session_working_dir(session_id, req, res)
@@ -2333,6 +2336,97 @@ module Clacky
2333
2336
  json_response(res, 500, { error: e.message })
2334
2337
  end
2335
2338
 
2339
+ # POST /api/sessions/:id/benchmark
2340
+ #
2341
+ # Speed-test every configured model in one shot so the user can pick the
2342
+ # fastest available model for this session. We send a minimal one-token
2343
+ # request to each model *in parallel* (one thread per model) and measure
2344
+ # total HTTP duration — for non-streaming calls this equals the user's
2345
+ # perceived time-to-first-token, so the field is named `ttft_ms` for
2346
+ # forward-compatibility with a future streaming implementation.
2347
+ #
2348
+ # Cost note: each request is `max_tokens: 1` + a 2-byte prompt, so the
2349
+ # total cost across a dozen models is well under one cent.
2350
+ #
2351
+ # Response shape:
2352
+ # {
2353
+ # ok: true,
2354
+ # results: [
2355
+ # { model_id: "...", model: "...", ttft_ms: 812, ok: true },
2356
+ # { model_id: "...", model: "...", ok: false, error: "timeout" },
2357
+ # ...
2358
+ # ]
2359
+ # }
2360
+ def api_benchmark_session_models(session_id, _req, res)
2361
+ return json_response(res, 404, { error: "Session not found" }) unless @registry.ensure(session_id)
2362
+
2363
+ # Snapshot the models list — @agent_config.models is a shared reference
2364
+ # that the user might mutate from the settings panel during the test;
2365
+ # a shallow dup is enough since we only read string fields below.
2366
+ models = Array(@agent_config.models).dup
2367
+ return json_response(res, 200, { ok: true, results: [] }) if models.empty?
2368
+
2369
+ # Kick off one thread per model. We deliberately cap per-request wall
2370
+ # time inside each thread via a Faraday timeout so a single dead model
2371
+ # can't block the response. The outer join uses a generous ceiling
2372
+ # (timeout + small buffer) as a last-resort safety net.
2373
+ per_model_timeout = 15
2374
+ threads = models.map do |m|
2375
+ Thread.new do
2376
+ Thread.current.report_on_exception = false
2377
+ benchmark_single_model(m, per_model_timeout)
2378
+ end
2379
+ end
2380
+
2381
+ results = threads.map do |t|
2382
+ t.join(per_model_timeout + 3)
2383
+ t.value rescue { ok: false, error: "thread failed" }
2384
+ end
2385
+
2386
+ json_response(res, 200, { ok: true, results: results })
2387
+ rescue => e
2388
+ Clacky::Logger.error("[benchmark] #{e.class}: #{e.message}", error: e)
2389
+ json_response(res, 500, { error: e.message })
2390
+ end
2391
+
2392
+ # Runs one speed-test request against a single model config hash and
2393
+ # returns a result row for api_benchmark_session_models. Pure function —
2394
+ # no shared state — so it's safe to call from worker threads.
2395
+ private def benchmark_single_model(model_cfg, timeout_sec)
2396
+ model_id = model_cfg["id"].to_s
2397
+ model_name = model_cfg["model"].to_s
2398
+ base = { model_id: model_id, model: model_name }
2399
+
2400
+ client = Clacky::Client.new(
2401
+ model_cfg["api_key"].to_s,
2402
+ base_url: model_cfg["base_url"].to_s,
2403
+ model: model_name,
2404
+ anthropic_format: model_cfg["anthropic_format"] || false
2405
+ )
2406
+
2407
+ # Override Faraday timeouts via a short-lived env var isn't ideal;
2408
+ # instead we rely on test_connection's own network path and wrap
2409
+ # the call in Timeout as a last line of defence. Most providers
2410
+ # respond within 2-3s for a 16-token reply.
2411
+ t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
2412
+ result = nil
2413
+ begin
2414
+ Timeout.timeout(timeout_sec) { result = client.test_connection(model: model_name) }
2415
+ rescue Timeout::Error
2416
+ return base.merge(ok: false, error: "timeout after #{timeout_sec}s")
2417
+ end
2418
+ t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
2419
+
2420
+ if result && result[:success]
2421
+ base.merge(ok: true, ttft_ms: ((t1 - t0) * 1000).round)
2422
+ else
2423
+ base.merge(ok: false, error: (result && result[:error]).to_s[0, 200])
2424
+ end
2425
+ rescue => e
2426
+ base.merge(ok: false, error: "#{e.class}: #{e.message}"[0, 200])
2427
+ end
2428
+
2429
+
2336
2430
  def api_change_session_working_dir(session_id, req, res)
2337
2431
  body = parse_json_body(req)
2338
2432
  new_dir = body["working_dir"].to_s.strip
@@ -169,7 +169,8 @@ module Clacky
169
169
  live_cost_source = s[:agent]&.cost_source
170
170
  { status: s[:status], error: s[:error], model: model_info&.dig(:model), name: live_name,
171
171
  total_tasks: s[:agent]&.total_tasks, total_cost: s[:agent]&.total_cost,
172
- cost_source: live_cost_source }
172
+ cost_source: live_cost_source,
173
+ latest_latency: s[:agent]&.latest_latency }
173
174
  end
174
175
  end
175
176
 
@@ -234,6 +235,11 @@ module Clacky
234
235
  total_tasks: ls&.dig(:total_tasks) || s.dig(:stats, :total_tasks) || 0,
235
236
  total_cost: ls&.dig(:total_cost) || s.dig(:stats, :total_cost_usd) || 0.0,
236
237
  cost_source: (ls&.dig(:cost_source) || s.dig(:stats, :cost_source) || "estimated").to_s,
238
+ # latest_latency is in-memory only (live sessions) — not persisted
239
+ # at the session-level on disk. The on-disk source of truth is
240
+ # per-assistant-message `latency` fields in messages[]. Reloaded
241
+ # sessions start with nil and get populated on the next LLM call.
242
+ latest_latency: ls&.dig(:latest_latency),
237
243
  pinned: s[:pinned] || false,
238
244
  }
239
245
  end
@@ -311,6 +317,7 @@ module Clacky
311
317
  source: agent.source.to_s,
312
318
  agent_profile: agent.agent_profile.name,
313
319
  pinned: agent.pinned || false,
320
+ latest_latency: agent.latest_latency,
314
321
  }
315
322
  end
316
323
  end
@@ -302,14 +302,15 @@ module Clacky
302
302
 
303
303
  # === State updates ===
304
304
 
305
- def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil)
305
+ def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil)
306
306
  data = {}
307
307
  data[:tasks] = tasks if tasks
308
308
  data[:cost] = cost if cost
309
309
  data[:cost_source] = cost_source if cost_source
310
310
  data[:status] = status if status
311
+ data[:latency] = latency if latency
311
312
  emit("session_update", **data) unless data.empty?
312
- forward_to_subscribers { |sub| sub.update_sessionbar(tasks: tasks, cost: cost, cost_source: cost_source, status: status) }
313
+ forward_to_subscribers { |sub| sub.update_sessionbar(tasks: tasks, cost: cost, cost_source: cost_source, status: status, latency: latency) }
313
314
  end
314
315
 
315
316
  def update_todos(todos)
@@ -108,7 +108,8 @@ module Clacky
108
108
  # @param cost [Float] Total cost (optional)
109
109
  # @param cost_source [Symbol, nil] :api / :price / :default (optional)
110
110
  # @param status [String] Workspace status ('idle' or 'working') (optional)
111
- def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil)
111
+ # @param latency [Hash, nil] Latency metrics; accepted but not displayed in the TUI.
112
+ def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil)
112
113
  @tasks_count = tasks if tasks
113
114
  @total_cost = cost if cost
114
115
  @input_area.update_sessionbar(
@@ -106,7 +106,7 @@ module Clacky
106
106
  end
107
107
 
108
108
  # === State updates ===
109
- def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil); end
109
+ def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil); end
110
110
  def update_todos(todos); end
111
111
  def set_working_status; end
112
112
  def set_idle_status; end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Clacky
4
- VERSION = "1.0.0.beta.6"
4
+ VERSION = "1.0.0"
5
5
  end
@@ -1524,11 +1524,15 @@ body {
1524
1524
  .msg-time {
1525
1525
  /* Rendered as a footnote *below* the bubble, floating inside the #messages
1526
1526
  flex gap (12px). Absolute-positioned so showing/hiding it on hover does
1527
- NOT reflow the message list — surrounding messages stay put. */
1527
+ NOT reflow the message list — surrounding messages stay put.
1528
+
1529
+ Per-side anchoring (see .msg-user / .msg-assistant overrides below) is
1530
+ critical: we must NOT set both left:0 and right:0, because with
1531
+ white-space:nowrap a short bubble (e.g. just "1") would force the time
1532
+ text to extend past the bubble edge and trigger horizontal page scroll.
1533
+ Instead each variant anchors to one side and grows naturally inward. */
1528
1534
  position: absolute;
1529
1535
  top: 100%;
1530
- left: 0;
1531
- right: 0;
1532
1536
  margin-top: 2px;
1533
1537
  display: block;
1534
1538
  font-size: 10px;
@@ -1545,9 +1549,10 @@ body {
1545
1549
  opacity: 1;
1546
1550
  transform: translateY(0);
1547
1551
  }
1548
- /* Time color / alignment: match the bubble's alignment in the column. */
1549
- .msg-user .msg-time { color: var(--color-text-secondary); text-align: right; padding-right: 4px; }
1550
- .msg-assistant .msg-time { color: var(--color-text-secondary); text-align: left; padding-left: 4px; }
1552
+ /* Time color / alignment: anchor to the bubble's own side, let width be
1553
+ driven by content prevents overflow on narrow bubbles. */
1554
+ .msg-user .msg-time { color: var(--color-text-secondary); right: 0; left: auto; padding-right: 4px; }
1555
+ .msg-assistant .msg-time { color: var(--color-text-secondary); left: 0; right: auto; padding-left: 4px; }
1551
1556
 
1552
1557
  .msg-user { background: var(--color-accent-primary); color: var(--color-button-primary-text); align-self: flex-end; }
1553
1558
  [data-theme="dark"] .msg-user { background: var(--color-accent-hover); }
@@ -2204,6 +2209,65 @@ body {
2204
2209
  #sib-tasks { opacity: 0.75; flex-shrink: 0; } /* tier 2 */
2205
2210
  #sib-cost { opacity: 0.45; flex-shrink: 0; } /* tier 3 */
2206
2211
 
2212
+ /* ── Latency signal (right after model name) ──────────────────────────────
2213
+ A compact 4-bar signal + TTFT value. Placed adjacent to #sib-model so the
2214
+ user's mental mapping "this model is fast/slow" is immediate. Variant
2215
+ classes (-ok/-warn/-bad) are applied by Sessions._renderSignal based on
2216
+ TTFT thresholds; colours intentionally use CSS vars so the same palette
2217
+ works in both light and dark themes. */
2218
+ #sib-signal-wrap { position: relative; flex-shrink: 0; }
2219
+ .sib-signal-clickable {
2220
+ display: inline-flex;
2221
+ align-items: center;
2222
+ gap: 5px;
2223
+ padding: 1px 6px;
2224
+ cursor: default; /* no click handler yet — step 3/4 will add one */
2225
+ border-radius: 3px;
2226
+ opacity: 0.85;
2227
+ transition: opacity 0.15s ease, background-color 0.15s ease;
2228
+ font-variant-numeric: tabular-nums; /* prevents the text from jittering as values change */
2229
+ }
2230
+ .sib-signal-clickable:hover {
2231
+ opacity: 1;
2232
+ background: var(--color-bg-hover);
2233
+ }
2234
+ /* Bar stack: four 2-px wide vertical bars of increasing height, mimicking
2235
+ a phone signal-strength icon. Each <i> is hollow by default; Sessions adds
2236
+ .on to the ones that should light up for the current signal level. */
2237
+ .sib-signal-clickable .sig-bars {
2238
+ display: inline-flex;
2239
+ align-items: flex-end;
2240
+ gap: 1px;
2241
+ height: 11px;
2242
+ }
2243
+ .sib-signal-clickable .sig-bars i {
2244
+ display: inline-block;
2245
+ width: 2px;
2246
+ background: var(--color-text-secondary);
2247
+ opacity: 0.25; /* dim "off" bar */
2248
+ border-radius: 1px;
2249
+ transition: background-color 0.15s, opacity 0.15s;
2250
+ }
2251
+ /* Individual bar heights — short→tall */
2252
+ .sib-signal-clickable .sig-bars i:nth-child(1) { height: 3px; }
2253
+ .sib-signal-clickable .sig-bars i:nth-child(2) { height: 5px; }
2254
+ .sib-signal-clickable .sig-bars i:nth-child(3) { height: 8px; }
2255
+ .sib-signal-clickable .sig-bars i:nth-child(4) { height: 11px; }
2256
+ .sib-signal-clickable .sig-bars i.on { opacity: 1; }
2257
+
2258
+ /* Signal level → bar colour. Applied to .on bars only; "off" bars stay grey. */
2259
+ .sib-signal-ok .sig-bars i.on { background: var(--color-accent-primary); } /* green / brand */
2260
+ .sib-signal-warn .sig-bars i.on { background: #d39e00; } /* amber */
2261
+ .sib-signal-bad .sig-bars i.on { background: #d9534f; } /* red */
2262
+
2263
+ .sib-signal-clickable .sig-text {
2264
+ font-size: 11px;
2265
+ color: var(--color-text-secondary);
2266
+ }
2267
+ .sib-signal-ok .sig-text { color: var(--color-text-primary); }
2268
+ .sib-signal-warn .sig-text { color: #d39e00; }
2269
+ .sib-signal-bad .sig-text { color: #d9534f; }
2270
+
2207
2271
  /* Model name dropdown in session info bar */
2208
2272
  #sib-model-wrap {
2209
2273
  position: relative;
@@ -2266,6 +2330,94 @@ body {
2266
2330
  color: var(--color-accent-primary);
2267
2331
  }
2268
2332
 
2333
+ /* ── Model switcher benchmark banner & latency column ──────────────────────
2334
+ The banner sits at the top of the dropdown with a subtle border so it
2335
+ visually separates from the scrollable model list below. The ⚡ button is
2336
+ pushed to the RIGHT edge (where the eye naturally lands after scanning a
2337
+ model name → latency row), while the optional hint ("done in 1.2s") sits
2338
+ on the left. The per-row latency cell is right-aligned and uses
2339
+ tabular-nums so numbers line up vertically regardless of width. */
2340
+ .sib-model-bench {
2341
+ display: flex;
2342
+ align-items: center;
2343
+ justify-content: space-between; /* hint on the left, button on the right */
2344
+ gap: 8px;
2345
+ padding: 4px 8px 4px 10px; /* compact: tighter top/bottom + tighter right side */
2346
+ border-bottom: 1px solid var(--color-border-primary);
2347
+ background: var(--color-bg-primary);
2348
+ position: sticky; /* keep visible while scrolling a long model list */
2349
+ top: 0;
2350
+ z-index: 1;
2351
+ min-height: 0;
2352
+ }
2353
+ .sib-bench-btn {
2354
+ display: inline-flex;
2355
+ align-items: center;
2356
+ gap: 3px;
2357
+ padding: 2px 8px;
2358
+ font-size: 10px;
2359
+ line-height: 1.4;
2360
+ font-family: inherit;
2361
+ background: var(--color-bg-secondary);
2362
+ color: var(--color-text-secondary);
2363
+ border: 1px solid var(--color-border-primary);
2364
+ border-radius: 10px;
2365
+ cursor: pointer;
2366
+ transition: background-color 0.15s, border-color 0.15s, color 0.15s;
2367
+ order: 2; /* force button to the right even if DOM order changes */
2368
+ flex: 0 0 auto;
2369
+ }
2370
+ .sib-bench-btn:hover:not(:disabled) {
2371
+ background: var(--color-bg-hover);
2372
+ border-color: var(--color-accent-primary);
2373
+ color: var(--color-accent-primary);
2374
+ }
2375
+ .sib-bench-btn:disabled {
2376
+ opacity: 0.55;
2377
+ cursor: progress;
2378
+ }
2379
+ .sib-bench-hint {
2380
+ font-size: 10px;
2381
+ color: var(--color-text-secondary);
2382
+ font-variant-numeric: tabular-nums;
2383
+ order: 1; /* hint stays on the left */
2384
+ flex: 1 1 auto;
2385
+ min-width: 0;
2386
+ overflow: hidden;
2387
+ text-overflow: ellipsis;
2388
+ white-space: nowrap;
2389
+ }
2390
+
2391
+ .sib-model-option .sib-model-name {
2392
+ /* Keep long model names from pushing the latency cell offscreen. */
2393
+ overflow: hidden;
2394
+ text-overflow: ellipsis;
2395
+ white-space: nowrap;
2396
+ flex: 1 1 auto;
2397
+ min-width: 0;
2398
+ }
2399
+ .sib-model-option .sib-model-right {
2400
+ display: inline-flex;
2401
+ align-items: center;
2402
+ gap: 8px;
2403
+ flex-shrink: 0;
2404
+ }
2405
+ .sib-model-option .sib-model-latency {
2406
+ font-size: 10px;
2407
+ font-variant-numeric: tabular-nums;
2408
+ min-width: 44px; /* reserves space so rows don't jitter before benchmark */
2409
+ text-align: right;
2410
+ color: var(--color-text-secondary);
2411
+ }
2412
+ .sib-model-option .sib-model-latency.is-ok { color: var(--color-accent-primary); }
2413
+ .sib-model-option .sib-model-latency.is-warn { color: #d39e00; }
2414
+ .sib-model-option .sib-model-latency.is-bad { color: #d9534f; }
2415
+ .sib-model-option .sib-model-latency.is-err { color: #d9534f; }
2416
+ .sib-model-option .sib-model-latency.is-pending {
2417
+ color: var(--color-text-secondary);
2418
+ opacity: 0.7;
2419
+ }
2420
+
2269
2421
  /* ── Input area (wraps preview strip + input bar) ────────────────────────── */
2270
2422
  #ws-disconnect-hint {
2271
2423
  position: absolute;
@@ -410,9 +410,13 @@ WS.onEvent(ev => {
410
410
  // Shape (2): partial update — build patch from top-level fields
411
411
  sid = ev.session_id;
412
412
  patch = {};
413
- if (ev.cost !== undefined) patch.total_cost = ev.cost;
414
- if (ev.tasks !== undefined) patch.total_tasks = ev.tasks;
415
- if (ev.status !== undefined) patch.status = ev.status;
413
+ if (ev.cost !== undefined) patch.total_cost = ev.cost;
414
+ if (ev.tasks !== undefined) patch.total_tasks = ev.tasks;
415
+ if (ev.status !== undefined) patch.status = ev.status;
416
+ // Latency pushed by Agent after each LLM call (see update_sessionbar).
417
+ // Stored under latest_latency — same field name the HTTP /api/sessions
418
+ // list returns, so updateInfoBar doesn't need to branch on the source.
419
+ if (ev.latency !== undefined) patch.latest_latency = ev.latency;
416
420
  }
417
421
  if (!sid) break;
418
422
  Sessions.patch(sid, patch);
@@ -1637,6 +1641,13 @@ window.bootAfterBrand = async function() {
1637
1641
  // ── Session Info Bar Model Switcher ───────────────────────────────────────
1638
1642
  (function() {
1639
1643
  let _isOpen = false;
1644
+ // Cache of the most recent benchmark results, keyed by model_id. Kept at
1645
+ // closure scope so the numbers survive closing & reopening the dropdown —
1646
+ // the user shouldn't have to re-run the test just to peek at results. We
1647
+ // intentionally do NOT persist this to disk: latency is a point-in-time
1648
+ // measurement, and yesterday's numbers are misleading.
1649
+ let _benchCache = {}; // { [model_id]: { ttft_ms, ok, error, ts } }
1650
+ let _benchInFlight = false; // prevent double-click spam
1640
1651
 
1641
1652
  // Toggle model dropdown when clicking on model name
1642
1653
  document.addEventListener("click", async (e) => {
@@ -1692,23 +1703,63 @@ window.bootAfterBrand = async function() {
1692
1703
 
1693
1704
  dropdown.innerHTML = "";
1694
1705
 
1706
+ // ── Benchmark floating button (top-right of dropdown) ──────────────
1707
+ // Tiny ⚡ button pinned to the dropdown's top-right corner. Runs one
1708
+ // concurrent request per model and back-fills each row's latency cell.
1709
+ // We deliberately avoid a full-width banner — it ate visual space that
1710
+ // the model list needs, and most users open the dropdown to SWITCH,
1711
+ // not to benchmark. The floating button is discoverable but unobtrusive.
1712
+ const bench = document.createElement("div");
1713
+ bench.className = "sib-model-bench";
1714
+ const btnLabel = (typeof I18n !== "undefined") ? I18n.t("sib.bench.btn") : "Benchmark";
1715
+ const btnTooltip = (typeof I18n !== "undefined") ? I18n.t("sib.bench.tooltip") : "Test response latency for every configured model";
1716
+ bench.innerHTML = `
1717
+ <button type="button" class="sib-bench-btn" title="${btnTooltip}">⚡ <span class="sib-bench-label">${btnLabel}</span></button>
1718
+ <span class="sib-bench-hint"></span>
1719
+ `;
1720
+ dropdown.appendChild(bench);
1721
+
1722
+ const benchBtn = bench.querySelector(".sib-bench-btn");
1723
+ const benchLabel = bench.querySelector(".sib-bench-label");
1724
+ const benchHint = bench.querySelector(".sib-bench-hint");
1725
+ benchBtn.addEventListener("click", (ev) => {
1726
+ ev.stopPropagation();
1727
+ _runBenchmark(sessionId, dropdown, benchBtn, benchLabel, benchHint);
1728
+ });
1729
+
1730
+ // ── Model rows ─────────────────────────────────────────────────────
1695
1731
  models.forEach(m => {
1696
1732
  console.log("[Model Switcher] Adding model:", m.model, "id:", m.id, "current:", currentModel);
1697
1733
  const opt = document.createElement("div");
1698
1734
  opt.className = "sib-model-option";
1735
+ opt.dataset.modelId = m.id;
1699
1736
  if (m.model === currentModel) opt.classList.add("current");
1700
1737
 
1701
- const modelName = document.createElement("span");
1702
- modelName.textContent = m.model;
1703
- opt.appendChild(modelName);
1738
+ const left = document.createElement("span");
1739
+ left.className = "sib-model-name";
1740
+ left.textContent = m.model;
1741
+ opt.appendChild(left);
1742
+
1743
+ const right = document.createElement("span");
1744
+ right.className = "sib-model-right";
1704
1745
 
1705
1746
  if (m.type === "default") {
1706
1747
  const badge = document.createElement("span");
1707
1748
  badge.className = `model-badge ${m.type}`;
1708
1749
  badge.textContent = m.type;
1709
- opt.appendChild(badge);
1750
+ right.appendChild(badge);
1710
1751
  }
1711
1752
 
1753
+ // Latency cell — populated from _benchCache on open, updated live
1754
+ // when a benchmark run completes. Empty slot keeps row heights stable
1755
+ // so the list doesn't visually jump mid-benchmark.
1756
+ const lat = document.createElement("span");
1757
+ lat.className = "sib-model-latency";
1758
+ _fillLatencyCell(lat, _benchCache[m.id]);
1759
+ right.appendChild(lat);
1760
+
1761
+ opt.appendChild(right);
1762
+
1712
1763
  // Switch by id (stable across reorders/edits). Keep model name for UI update.
1713
1764
  opt.addEventListener("click", () => _switchModel(sessionId, m.id, m.model));
1714
1765
  dropdown.appendChild(opt);
@@ -1720,6 +1771,105 @@ window.bootAfterBrand = async function() {
1720
1771
  }
1721
1772
  }
1722
1773
 
1774
+ // Render one latency cell based on a cached result.
1775
+ // undefined → empty slot (never tested / in-flight starts from here)
1776
+ // { ok:true } → "812ms" in green/amber/red per threshold
1777
+ // { ok:false } → "✕" with error in tooltip
1778
+ // { pending:true } → "…" spinner-ish marker
1779
+ function _fillLatencyCell(el, entry) {
1780
+ el.className = "sib-model-latency";
1781
+ el.textContent = "";
1782
+ el.removeAttribute("title");
1783
+ if (!entry) return;
1784
+ if (entry.pending) {
1785
+ el.textContent = "…";
1786
+ el.classList.add("is-pending");
1787
+ return;
1788
+ }
1789
+ if (!entry.ok) {
1790
+ el.textContent = "✕";
1791
+ el.classList.add("is-err");
1792
+ el.title = entry.error || "failed";
1793
+ return;
1794
+ }
1795
+ const ms = entry.ttft_ms;
1796
+ // Same thresholds as the sib-signal status bar — keep them aligned so
1797
+ // "3 bars in the status bar" ≈ "green number in the picker".
1798
+ // We measure full non-streaming response time (not real TTFT), so ≤60s is
1799
+ // normal, ≤120s is slow, beyond is bad. ≤2s still gets the "feels instant"
1800
+ // green treatment like the 4-bar signal.
1801
+ let cls = "is-bad";
1802
+ if (ms <= 2000) cls = "is-ok";
1803
+ else if (ms <= 60000) cls = "is-ok";
1804
+ else if (ms <= 120000) cls = "is-warn";
1805
+ el.classList.add(cls);
1806
+ el.textContent = ms >= 1000 ? (ms / 1000).toFixed(1) + "s" : ms + "ms";
1807
+ if (typeof I18n !== "undefined") {
1808
+ el.title = I18n.t("sib.bench.latencyTooltip", {
1809
+ ttft: el.textContent,
1810
+ time: new Date(entry.ts).toLocaleTimeString(),
1811
+ });
1812
+ } else {
1813
+ el.title = `TTFT ${el.textContent} · tested ${new Date(entry.ts).toLocaleTimeString()}`;
1814
+ }
1815
+ }
1816
+
1817
+ async function _runBenchmark(sessionId, dropdown, btn, label, hint) {
1818
+ if (_benchInFlight) return;
1819
+ _benchInFlight = true;
1820
+ btn.disabled = true;
1821
+ const origLabel = label.textContent;
1822
+ const _t = (key, vars) => (typeof I18n !== "undefined") ? I18n.t(key, vars) : key;
1823
+ label.textContent = _t("sib.bench.running");
1824
+ hint.textContent = "";
1825
+
1826
+ // Mark every row as pending so the user sees instant feedback instead of
1827
+ // a silent button. _fillLatencyCell handles the visual treatment.
1828
+ dropdown.querySelectorAll(".sib-model-option").forEach(opt => {
1829
+ const id = opt.dataset.modelId;
1830
+ if (!id) return;
1831
+ _benchCache[id] = { pending: true };
1832
+ _fillLatencyCell(opt.querySelector(".sib-model-latency"), _benchCache[id]);
1833
+ });
1834
+
1835
+ const t0 = performance.now();
1836
+ try {
1837
+ const res = await fetch(`/api/sessions/${sessionId}/benchmark`, { method: "POST" });
1838
+ const data = await res.json();
1839
+ if (!res.ok || !data.ok) throw new Error(data.error || "benchmark failed");
1840
+
1841
+ const now = Date.now();
1842
+ (data.results || []).forEach(r => {
1843
+ _benchCache[r.model_id] = {
1844
+ ok: !!r.ok,
1845
+ ttft_ms: r.ttft_ms,
1846
+ error: r.error,
1847
+ ts: now,
1848
+ };
1849
+ const opt = dropdown.querySelector(`.sib-model-option[data-model-id="${CSS.escape(r.model_id)}"]`);
1850
+ if (opt) _fillLatencyCell(opt.querySelector(".sib-model-latency"), _benchCache[r.model_id]);
1851
+ });
1852
+
1853
+ const elapsed = ((performance.now() - t0) / 1000).toFixed(1);
1854
+ hint.textContent = _t("sib.bench.done", { t: elapsed });
1855
+ } catch (e) {
1856
+ console.error("Benchmark failed:", e);
1857
+ hint.textContent = _t("sib.bench.failed", { msg: e.message });
1858
+ // Clear pending markers so rows don't stay stuck on "…"
1859
+ dropdown.querySelectorAll(".sib-model-option").forEach(opt => {
1860
+ const id = opt.dataset.modelId;
1861
+ if (id && _benchCache[id]?.pending) {
1862
+ _benchCache[id] = undefined;
1863
+ _fillLatencyCell(opt.querySelector(".sib-model-latency"), undefined);
1864
+ }
1865
+ });
1866
+ } finally {
1867
+ _benchInFlight = false;
1868
+ btn.disabled = false;
1869
+ label.textContent = origLabel;
1870
+ }
1871
+ }
1872
+
1723
1873
  // Switch session model via API
1724
1874
  // modelId — stable runtime id (required by backend)
1725
1875
  // modelName — display name, used for optimistic UI update
@@ -394,6 +394,14 @@ const I18n = (() => {
394
394
 
395
395
  "header.owner.tooltip": "Creator — click to open Creator Hub",
396
396
 
397
+ // ── Session info bar / Model switcher benchmark ──
398
+ "sib.bench.btn": "Benchmark",
399
+ "sib.bench.tooltip": "Test response latency for every configured model",
400
+ "sib.bench.running": "Testing…",
401
+ "sib.bench.done": "done in {{t}}s",
402
+ "sib.bench.failed": "failed: {{msg}}",
403
+ "sib.bench.latencyTooltip": "TTFT {{ttft}} · tested {{time}}",
404
+
397
405
  "onboard.welcome": "Welcome to {{name}}",
398
406
  },
399
407
 
@@ -779,6 +787,14 @@ const I18n = (() => {
779
787
 
780
788
  "header.owner.tooltip": "创作者 — 点击进入创作者中心",
781
789
 
790
+ // ── 会话信息栏 / 模型切换器 测速 ──
791
+ "sib.bench.btn": "测速",
792
+ "sib.bench.tooltip": "测试所有已配置模型的响应延迟",
793
+ "sib.bench.running": "测速中…",
794
+ "sib.bench.done": "用时 {{t}} 秒",
795
+ "sib.bench.failed": "失败:{{msg}}",
796
+ "sib.bench.latencyTooltip": "TTFT {{ttft}} · 测试于 {{time}}",
797
+
782
798
  "onboard.welcome": "欢迎使用 {{name}}",
783
799
  }
784
800
  };
@@ -858,6 +874,11 @@ const I18n = (() => {
858
874
  })();
859
875
 
860
876
  // ── Thinking Verbs for Progress Animation ──────────────────────────────────
877
+ //
878
+ // The primary verb ("Thinking" / "思考中") is chosen 90% of the time inside
879
+ // getRandomThinkingVerb(). The lists below are ONLY the 10% flavor variants —
880
+ // do not include the primary verb here, and do not rely on duplicates for
881
+ // weighting (probability is controlled in code, not data).
861
882
  const THINKING_VERBS = {
862
883
  en: [
863
884
  "Cogitating",
@@ -882,38 +903,38 @@ const THINKING_VERBS = {
882
903
  "Reasoning"
883
904
  ],
884
905
  zh: [
885
- "思考中", // 5x weight (appears 5 times for higher probability)
886
- "思考中",
887
- "思考中",
888
- "思考中",
889
- "思考中",
890
- "琢磨中", // 2x weight
891
- "琢磨中",
892
- "思忖中",
893
- "盘算中",
894
- "酝酿中",
895
- "捋一捋",
896
- "理理头绪",
897
- "掂量掂量",
898
- "寻思中",
899
- "琢磨琢磨",
900
- "想想办法",
901
- "推演中",
906
+ "推理中",
907
+ "深度思考中",
908
+ "分析中",
902
909
  "解析中",
903
910
  "拆解中",
904
- "组装中",
911
+ "推演中",
905
912
  "梳理中",
906
- "验证中",
913
+ "归纳中",
907
914
  "演算中",
908
- "分析中",
909
- "推理中",
910
- "构思中"
915
+ "验证中",
916
+ "权衡中",
917
+ "构思中",
918
+ "酝酿中",
919
+ "思忖中",
920
+ "琢磨中"
911
921
  ]
912
922
  };
913
923
 
914
- // Get a random thinking verb based on current language
924
+ // Get a random thinking verb based on current language.
925
+ //
926
+ // Behavior: 90% of the time return the primary verb ("思考中" / "Thinking"),
927
+ // 10% of the time pick a random variant from the list for a bit of flavor.
928
+ // The primary is intentionally kept outside the list so tuning the probability
929
+ // is a single-number change here, independent of the list contents.
915
930
  function getRandomThinkingVerb() {
916
- const lang = I18n.lang();
931
+ const lang = I18n.lang();
932
+ const primary = lang === "zh" ? "思考中" : "Thinking";
933
+
934
+ // 90% primary, 10% variant
935
+ if (Math.random() < 0.9) return primary;
936
+
917
937
  const verbs = THINKING_VERBS[lang] || THINKING_VERBS.en;
938
+ if (!verbs || verbs.length === 0) return primary;
918
939
  return verbs[Math.floor(Math.random() * verbs.length)];
919
940
  }
@@ -271,6 +271,16 @@
271
271
  <div id="sib-model-dropdown" class="sib-model-dropdown" style="display:none"></div>
272
272
  </span>
273
273
  <span class="sib-sep sib-sep-after-model">│</span>
274
+ <!-- Latency signal: 4-bar signal + TTFT number. Hidden until the first LLM
275
+ call completes (see updateInfoBar / Sessions.renderSignalBars). Click
276
+ opens a mini benchmark panel (see Step 3/4 — not yet implemented). -->
277
+ <span id="sib-signal-wrap" style="display:none">
278
+ <span id="sib-signal" class="sib-signal-clickable" title="Recent LLM latency">
279
+ <span class="sig-bars" aria-hidden="true"><i></i><i></i><i></i><i></i></span>
280
+ <span class="sig-text"></span>
281
+ </span>
282
+ </span>
283
+ <span class="sib-sep sib-sep-after-signal" style="display:none">│</span>
274
284
  <!-- Detail fields: mode, tasks, cost -->
275
285
  <span class="sib-detail">
276
286
  <span id="sib-mode"></span>
@@ -740,9 +740,18 @@ const Sessions = (() => {
740
740
 
741
741
  // Format a timestamp for display inside a message bubble.
742
742
  // Same-day: "HH:MM"; cross-day: "MM-DD HH:MM".
743
+ //
744
+ // Accepts:
745
+ // - ISO string ("2026-04-30T21:45:00Z")
746
+ // - JS millisecond epoch (number ≥ 1e12)
747
+ // - Unix second epoch (number < 1e12) — what the Ruby backend emits via
748
+ // Time.now.to_f; we multiply by 1000 before handing to Date(), otherwise
749
+ // JS interprets 1.77e9 as ~1970-01-21 and we get bogus timestamps.
743
750
  function _formatMsgTime(dateOrStr) {
744
751
  if (!dateOrStr) return "";
745
- const d = new Date(dateOrStr);
752
+ let input = dateOrStr;
753
+ if (typeof input === "number" && input < 1e12) input = input * 1000;
754
+ const d = new Date(input);
746
755
  if (isNaN(d)) return "";
747
756
  const now = new Date();
748
757
  const pad = n => String(n).padStart(2, "0");
@@ -1574,6 +1583,13 @@ const Sessions = (() => {
1574
1583
  }
1575
1584
  if (sibModelWrap) sibModelWrap.style.display = s.model ? "" : "none";
1576
1585
 
1586
+ // Latency signal — read from s.latest_latency (populated by:
1587
+ // - HTTP /api/sessions → session_registry#list (from agent.latest_latency)
1588
+ // - WS session_update events patched by app.js
1589
+ // Hidden entirely when no latency recorded yet (fresh session, or old
1590
+ // pre-feature sessions that have never made an LLM call this run).
1591
+ this._renderSignal(s.latest_latency);
1592
+
1577
1593
  // Tasks
1578
1594
  const sibTasks = $("sib-tasks");
1579
1595
  if (sibTasks) sibTasks.textContent = `${s.total_tasks || 0} tasks`;
@@ -1592,6 +1608,77 @@ const Sessions = (() => {
1592
1608
  if (bar) bar.style.display = "flex";
1593
1609
  },
1594
1610
 
1611
+ /** Render the 4-bar latency signal next to the model name in the status bar.
1612
+ *
1613
+ * @param {Object|null} lat latency metrics from agent.latest_latency
1614
+ * shape: { ttft_ms, duration_ms, output_tokens, tps, model, streaming }
1615
+ *
1616
+ * Visibility: hidden whenever lat is falsy (no measurement yet). Never
1617
+ * renders a "loading" state — we would rather show nothing than a stale or
1618
+ * misleading number.
1619
+ *
1620
+ * Signal thresholds (TTFT):
1621
+ * Note: this is measured over the WHOLE non-streaming response (we
1622
+ * don't have a real TTFT yet — the server returns one completed body),
1623
+ * so for a large generation — "write me a 2000-line snake game" — the
1624
+ * number naturally balloons. Thresholds below are tuned to that reality:
1625
+ * 60s is considered NORMAL, 120s is slow, beyond that we flag bad.
1626
+ *
1627
+ * ≤ 2000 ms → 4 bars, green, "⚡" fast
1628
+ * ≤ 60000 ms → 3 bars, green, normal
1629
+ * ≤ 120000 ms → 2 bars, amber, slow
1630
+ * > 120000 ms → 1 bar, red, very slow
1631
+ *
1632
+ * Hover tooltip: built from the latency hash — full breakdown for power
1633
+ * users; the compact inline text is just "1.2s" style for scannability.
1634
+ */
1635
+ _renderSignal(lat) {
1636
+ const wrap = $("sib-signal-wrap");
1637
+ const sep = document.querySelector(".sib-sep-after-signal");
1638
+ const el = $("sib-signal");
1639
+ if (!wrap || !el) return;
1640
+
1641
+ if (!lat || !lat.ttft_ms) {
1642
+ wrap.style.display = "none";
1643
+ if (sep) sep.style.display = "none";
1644
+ return;
1645
+ }
1646
+
1647
+ const ttft = Number(lat.ttft_ms) || 0;
1648
+ let bars, level;
1649
+ if (ttft <= 2000) { bars = 4; level = "ok"; }
1650
+ else if (ttft <= 60000) { bars = 3; level = "ok"; }
1651
+ else if (ttft <= 120000) { bars = 2; level = "warn"; }
1652
+ else { bars = 1; level = "bad"; }
1653
+
1654
+ // Paint bars: active ones get .on, others stay dim
1655
+ el.querySelectorAll(".sig-bars i").forEach((bar, i) => {
1656
+ bar.classList.toggle("on", i < bars);
1657
+ });
1658
+ el.className = `sib-signal-clickable sib-signal-${level}`;
1659
+
1660
+ // Inline text: just the TTFT in human-friendly units
1661
+ const ttftStr = ttft >= 1000 ? (ttft / 1000).toFixed(1) + "s" : ttft + "ms";
1662
+ const text = el.querySelector(".sig-text");
1663
+ if (text) text.textContent = ttftStr;
1664
+
1665
+ // Tooltip: full metrics breakdown
1666
+ const parts = [`TTFT ${ttftStr}`];
1667
+ if (lat.duration_ms && lat.duration_ms !== ttft) {
1668
+ const durStr = lat.duration_ms >= 1000
1669
+ ? (lat.duration_ms / 1000).toFixed(1) + "s"
1670
+ : lat.duration_ms + "ms";
1671
+ parts.push(`total ${durStr}`);
1672
+ }
1673
+ if (lat.tps) parts.push(`${lat.tps} tok/s`);
1674
+ if (lat.output_tokens) parts.push(`${lat.output_tokens} tokens`);
1675
+ if (lat.model) parts.push(`@ ${lat.model}`);
1676
+ el.title = "Last LLM call — " + parts.join(" · ");
1677
+
1678
+ wrap.style.display = "";
1679
+ if (sep) sep.style.display = "";
1680
+ },
1681
+
1595
1682
  // ── Message helpers ────────────────────────────────────────────────────
1596
1683
 
1597
1684
  // Live tool group state (one active group per session at a time)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: openclacky
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0.beta.6
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - windy