openclacky 1.0.0.beta.6 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -1
- data/lib/clacky/agent/llm_caller.rb +87 -4
- data/lib/clacky/agent/session_serializer.rb +47 -2
- data/lib/clacky/agent.rb +15 -1
- data/lib/clacky/client.rb +44 -8
- data/lib/clacky/json_ui_controller.rb +2 -1
- data/lib/clacky/plain_ui_controller.rb +1 -1
- data/lib/clacky/providers.rb +2 -2
- data/lib/clacky/server/channel/channel_ui_controller.rb +1 -1
- data/lib/clacky/server/http_server.rb +94 -0
- data/lib/clacky/server/session_registry.rb +8 -1
- data/lib/clacky/server/web_ui_controller.rb +3 -2
- data/lib/clacky/ui2/ui_controller.rb +2 -1
- data/lib/clacky/ui_interface.rb +1 -1
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/web/app.css +158 -6
- data/lib/clacky/web/app.js +157 -7
- data/lib/clacky/web/i18n.js +45 -24
- data/lib/clacky/web/index.html +10 -0
- data/lib/clacky/web/sessions.js +88 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 49800afa935670c288d9f421595df4246b61e76ed0f2a74e1a7a754e85e26162
|
|
4
|
+
data.tar.gz: dba09cac5a79485b743aaad4568ce2e4fe2e13772d6b8c43a360ec11eca7c762
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2b723771f71d880d99582f6bfd4d23a66f54ee3caa87f7ed228360f015cadb52a20be9d6869c6e35612740ddb889ceb762efa541a41bc25810f5897d47a333e1
|
|
7
|
+
data.tar.gz: 5c425e94d2bf4c4d68175b740d840b9cd6270ef91f2e68e6d8403fbb6fbc5336b07bd65308907dbb8d8c3cd1cb906c4c5f64ae7710a7e0619ab2aaae0ddc278b
|
data/CHANGELOG.md
CHANGED
|
@@ -5,7 +5,19 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
-
## [
|
|
8
|
+
## [1.0.0] - 2026-04-30
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- **Speed test tool in Web UI.** Test API response latency for different models and providers directly from the settings panel, making it easy to find the fastest endpoint for your region.
|
|
12
|
+
- **History chunk loading.** Previously compressed conversation chunks can now be loaded back into the session when needed, so long-running conversations don't lose context.
|
|
13
|
+
- **Default model changed to 4.5.** New default model provides better balance of speed, quality, and cost for most tasks.
|
|
14
|
+
|
|
15
|
+
### Improved
|
|
16
|
+
- **Thinking indicator now visible for more steps.** The "thinking..." indicator stays visible longer during complex operations, giving better feedback about what the agent is doing.
|
|
17
|
+
- **Message timestamps display correctly in Web UI.** User message times now show properly without layout issues, and the scroll behavior is smoother.
|
|
18
|
+
|
|
19
|
+
### Fixed
|
|
20
|
+
- **Scroll position no longer jumps unexpectedly** in the Web UI when loading session history.
|
|
9
21
|
|
|
10
22
|
## [1.0.0.beta.6] - 2026-04-30
|
|
11
23
|
|
|
@@ -86,7 +86,45 @@ module Clacky
|
|
|
86
86
|
# Successful response — if we were probing, confirm primary is healthy.
|
|
87
87
|
handle_probe_success if @config.probing?
|
|
88
88
|
|
|
89
|
-
rescue Faraday::
|
|
89
|
+
rescue Faraday::TimeoutError => e
|
|
90
|
+
# ── Read-timeout path (distinct from connection-level failures) ──
|
|
91
|
+
# Faraday::TimeoutError on our non-streaming POST almost always means
|
|
92
|
+
# the *response* took longer than the 300s read-timeout to come back —
|
|
93
|
+
# i.e. the model is trying to produce a huge output in one shot
|
|
94
|
+
# (e.g. "write me a 2000-line snake game"). Blindly retrying the same
|
|
95
|
+
# request with the same prompt reproduces the same timeout.
|
|
96
|
+
#
|
|
97
|
+
# Strategy:
|
|
98
|
+
# 1. On the FIRST timeout in a task, inject a `[SYSTEM]` user message
|
|
99
|
+
# telling the model to break the work into smaller steps, then
|
|
100
|
+
# retry. The history edit changes the prompt, so the retry is
|
|
101
|
+
# materially different from the failed attempt.
|
|
102
|
+
# 2. On subsequent timeouts in the same task, fall back to the
|
|
103
|
+
# generic "just retry" behaviour (the model may have ignored
|
|
104
|
+
# the hint; don't pile on duplicate hints).
|
|
105
|
+
# 3. Probing-mode timeouts still go through handle_probe_failure.
|
|
106
|
+
retries += 1
|
|
107
|
+
|
|
108
|
+
if @config.probing?
|
|
109
|
+
handle_probe_failure
|
|
110
|
+
retry
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
if retries <= max_retries
|
|
114
|
+
inject_large_output_hint_if_first_timeout(e)
|
|
115
|
+
@ui&.show_progress(
|
|
116
|
+
"Response too slow (likely generating too much at once): #{e.message}",
|
|
117
|
+
progress_type: "retrying",
|
|
118
|
+
phase: "active",
|
|
119
|
+
metadata: { attempt: retries, total: max_retries }
|
|
120
|
+
)
|
|
121
|
+
sleep retry_delay
|
|
122
|
+
retry
|
|
123
|
+
else
|
|
124
|
+
raise AgentError, "[LLM] Request timed out after #{max_retries} retries: #{e.message}"
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
rescue Faraday::ConnectionFailed, Faraday::SSLError, Errno::ECONNREFUSED, Errno::ETIMEDOUT => e
|
|
90
128
|
retries += 1
|
|
91
129
|
|
|
92
130
|
# Probing failure: primary still down — renew cooling-off and retry with fallback.
|
|
@@ -95,9 +133,10 @@ module Clacky
|
|
|
95
133
|
retry
|
|
96
134
|
end
|
|
97
135
|
|
|
98
|
-
#
|
|
99
|
-
# infrastructure blips — do NOT trigger fallback
|
|
100
|
-
#
|
|
136
|
+
# Connection-level errors (DNS, TCP refused, open-timeout, TLS) are
|
|
137
|
+
# transient infrastructure blips — do NOT trigger fallback, and do
|
|
138
|
+
# NOT inject the "break into steps" hint (the model did nothing wrong).
|
|
139
|
+
# Just retry on the current model up to max_retries.
|
|
101
140
|
if retries <= max_retries
|
|
102
141
|
@ui&.show_progress(
|
|
103
142
|
"Network failed: #{e.message}",
|
|
@@ -229,6 +268,50 @@ module Clacky
|
|
|
229
268
|
(msg.include?("thinking") || msg.include?("must be passed back") ||
|
|
230
269
|
msg.include?("must be provided"))
|
|
231
270
|
end
|
|
271
|
+
|
|
272
|
+
# On the FIRST Faraday::TimeoutError within a task, append a [SYSTEM]
|
|
273
|
+
# user message to the history instructing the model to break its work
|
|
274
|
+
# into smaller steps. Subsequent timeouts in the same task are ignored
|
|
275
|
+
# here (caller just retries) so we don't pollute history with duplicate
|
|
276
|
+
# hints.
|
|
277
|
+
#
|
|
278
|
+
# The injected message carries `system_injected: true` so it is:
|
|
279
|
+
# - Hidden from UI replay (session_serializer / replay_history filters)
|
|
280
|
+
# - Skipped by prompt-caching marker placement (client.rb)
|
|
281
|
+
# - Skipped by message compression's "recent user turn" protection
|
|
282
|
+
# (message_compressor_helper.rb)
|
|
283
|
+
#
|
|
284
|
+
# Reset per-task via Agent#run (see @task_timeout_hint_injected = false).
|
|
285
|
+
private def inject_large_output_hint_if_first_timeout(err)
|
|
286
|
+
return if @task_timeout_hint_injected
|
|
287
|
+
|
|
288
|
+
@task_timeout_hint_injected = true
|
|
289
|
+
|
|
290
|
+
hint = "[SYSTEM] The previous LLM response timed out (read timeout after ~300s). " \
|
|
291
|
+
"This usually means the model was trying to produce too much output in a single response. " \
|
|
292
|
+
"Please change your approach:\n" \
|
|
293
|
+
"- Break the task into multiple smaller steps, each producing a short response.\n" \
|
|
294
|
+
"- For long files: first create a skeleton with `write` (structure + placeholder comments only), " \
|
|
295
|
+
"then fill in each section with separate `edit` calls.\n" \
|
|
296
|
+
"- Keep each single tool-call argument (especially file content) well under ~500 lines.\n" \
|
|
297
|
+
"- Do NOT attempt to output the entire deliverable in one response."
|
|
298
|
+
|
|
299
|
+
@history.append({
|
|
300
|
+
role: "user",
|
|
301
|
+
content: hint,
|
|
302
|
+
system_injected: true,
|
|
303
|
+
task_id: @current_task_id
|
|
304
|
+
})
|
|
305
|
+
|
|
306
|
+
Clacky::Logger.info(
|
|
307
|
+
"[llm_caller] Read-timeout detected — injected 'break into smaller steps' hint " \
|
|
308
|
+
"(error=#{err.class}: #{err.message})"
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
@ui&.show_warning(
|
|
312
|
+
"LLM response timed out — asking model to break the task into smaller steps and retrying..."
|
|
313
|
+
)
|
|
314
|
+
end
|
|
232
315
|
end
|
|
233
316
|
end
|
|
234
317
|
end
|
|
@@ -36,6 +36,15 @@ module Clacky
|
|
|
36
36
|
# Restore previous_total_tokens for accurate delta calculation across sessions
|
|
37
37
|
@previous_total_tokens = session_data.dig(:stats, :previous_total_tokens) || 0
|
|
38
38
|
|
|
39
|
+
# Recover the latest latency metric from the most recent assistant message
|
|
40
|
+
# that carries a :latency field. This is the source of truth for the status-bar
|
|
41
|
+
# signal — no separate session-level field is needed. Older sessions (pre-feature)
|
|
42
|
+
# simply start with nil; the signal stays hidden until the next LLM call populates it.
|
|
43
|
+
last_assistant_with_latency = @history.to_a.reverse.find do |m|
|
|
44
|
+
m[:role].to_s == "assistant" && m[:latency]
|
|
45
|
+
end
|
|
46
|
+
@latest_latency = last_assistant_with_latency&.dig(:latency)
|
|
47
|
+
|
|
39
48
|
# Restore Time Machine state
|
|
40
49
|
@task_parents = session_data.dig(:time_machine, :task_parents) || {}
|
|
41
50
|
@current_task_id = session_data.dig(:time_machine, :current_task_id) || 0
|
|
@@ -178,8 +187,18 @@ module Clacky
|
|
|
178
187
|
elsif current_round
|
|
179
188
|
current_round[:events] << msg
|
|
180
189
|
elsif msg[:compressed_summary] && msg[:chunk_path]
|
|
181
|
-
# Compressed summary sitting before any user rounds — expand
|
|
182
|
-
|
|
190
|
+
# Compressed summary sitting before any user rounds — expand ALL chunk
|
|
191
|
+
# MD files that belong to the same session (siblings of chunk_path),
|
|
192
|
+
# in chunk-index ascending order.
|
|
193
|
+
#
|
|
194
|
+
# Under the current "single summary + previous_chunks index" scheme,
|
|
195
|
+
# session.json only keeps the newest compressed_summary message (which
|
|
196
|
+
# points at the newest chunk). Older chunks (chunk-1..chunk-N-1) are
|
|
197
|
+
# referenced only as basenames inside the summary text. Expanding just
|
|
198
|
+
# msg[:chunk_path] would therefore lose all prior chunks on replay.
|
|
199
|
+
chunk_rounds = sibling_chunks_of(msg[:chunk_path]).flat_map { |p|
|
|
200
|
+
parse_chunk_md_to_rounds(p)
|
|
201
|
+
}
|
|
183
202
|
rounds.concat(chunk_rounds)
|
|
184
203
|
# After expanding, treat the last chunk round as the current round so that
|
|
185
204
|
# any orphaned assistant/tool messages that follow in session.json (belonging
|
|
@@ -243,6 +262,32 @@ module Clacky
|
|
|
243
262
|
{ has_more: has_more }
|
|
244
263
|
end
|
|
245
264
|
|
|
265
|
+
# Return all chunk MD file paths that belong to the same session as
|
|
266
|
+
# +chunk_path+, sorted by chunk index ascending (chunk-1, chunk-2, …).
|
|
267
|
+
# Uses the filename convention "<base>-chunk-<N>.md".
|
|
268
|
+
#
|
|
269
|
+
# Handles path resolution the same way parse_chunk_md_to_rounds does:
|
|
270
|
+
# if the stored path doesn't exist, fall back to SESSIONS_DIR + basename
|
|
271
|
+
# (cross-machine / cross-user session bundles).
|
|
272
|
+
private def sibling_chunks_of(chunk_path)
|
|
273
|
+
return [] unless chunk_path
|
|
274
|
+
|
|
275
|
+
resolved = chunk_path.to_s
|
|
276
|
+
unless File.exist?(resolved)
|
|
277
|
+
resolved = File.join(Clacky::SessionManager::SESSIONS_DIR, File.basename(resolved))
|
|
278
|
+
end
|
|
279
|
+
return [] unless File.exist?(resolved)
|
|
280
|
+
|
|
281
|
+
dir = File.dirname(resolved)
|
|
282
|
+
base = File.basename(resolved).sub(/-chunk-\d+\.md\z/, "")
|
|
283
|
+
return [resolved] if base == File.basename(resolved) # unconventional name — just use as-is
|
|
284
|
+
|
|
285
|
+
Dir.glob(File.join(dir, "#{base}-chunk-*.md")).sort_by do |p|
|
|
286
|
+
m = File.basename(p).match(/-chunk-(\d+)\.md\z/)
|
|
287
|
+
m ? m[1].to_i : Float::INFINITY
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
|
|
246
291
|
# Parse a chunk MD file into an array of rounds compatible with replay_history.
|
|
247
292
|
# Each round is { user_msg: Hash, events: Array<Hash> }.
|
|
248
293
|
# Timestamps are synthesised from the chunk's archived_at, spread backwards.
|
data/lib/clacky/agent.rb
CHANGED
|
@@ -42,7 +42,8 @@ module Clacky
|
|
|
42
42
|
|
|
43
43
|
attr_reader :session_id, :name, :history, :iterations, :total_cost, :working_dir, :created_at, :total_tasks, :todos,
|
|
44
44
|
:cache_stats, :cost_source, :ui, :skill_loader, :agent_profile,
|
|
45
|
-
:status, :error, :updated_at, :source
|
|
45
|
+
:status, :error, :updated_at, :source,
|
|
46
|
+
:latest_latency # Hash of latency metrics from the most recent LLM call (see Client#send_messages_with_tools)
|
|
46
47
|
attr_accessor :pinned
|
|
47
48
|
|
|
48
49
|
def permission_mode
|
|
@@ -78,6 +79,7 @@ module Clacky
|
|
|
78
79
|
@task_cost_source = :estimated # Track cost source for current task
|
|
79
80
|
@previous_total_tokens = 0 # Track tokens from previous iteration for delta calculation
|
|
80
81
|
@interrupted = false # Flag for user interrupt
|
|
82
|
+
@latest_latency = nil # Most recent LLM call's latency metrics (see Client#send_messages_with_tools)
|
|
81
83
|
@ui = ui # UIController for direct UI interaction
|
|
82
84
|
@debug_logs = [] # Debug logs for troubleshooting
|
|
83
85
|
@pending_injections = [] # Pending inline skill injections to flush after observe()
|
|
@@ -208,6 +210,7 @@ module Clacky
|
|
|
208
210
|
|
|
209
211
|
@start_time = Time.now
|
|
210
212
|
@task_truncation_count = 0 # Reset truncation counter for each task
|
|
213
|
+
@task_timeout_hint_injected = false # Reset read-timeout hint injection (see LlmCaller)
|
|
211
214
|
@task_cost_source = :estimated # Reset for new task
|
|
212
215
|
# Note: Do NOT reset @previous_total_tokens here - it should maintain the value from the last iteration
|
|
213
216
|
# across tasks to correctly calculate delta tokens in each iteration
|
|
@@ -681,6 +684,17 @@ module Clacky
|
|
|
681
684
|
end
|
|
682
685
|
# Store token_usage in the message so replay_history can re-emit it
|
|
683
686
|
msg[:token_usage] = response[:token_usage] if response[:token_usage]
|
|
687
|
+
# Store per-message latency — this is the source of truth (session.json)
|
|
688
|
+
# for all time-to-first-token / duration / throughput info. The status
|
|
689
|
+
# bar signal reads the last assistant message's latency; no separate
|
|
690
|
+
# config file or top-level session field is introduced.
|
|
691
|
+
if response[:latency]
|
|
692
|
+
msg[:latency] = response[:latency]
|
|
693
|
+
@latest_latency = response[:latency]
|
|
694
|
+
# Push to UI so the status-bar signal updates immediately after the
|
|
695
|
+
# model finishes (before any tool execution delays the next event).
|
|
696
|
+
@ui&.update_sessionbar(latency: response[:latency])
|
|
697
|
+
end
|
|
684
698
|
# Preserve reasoning_content from the real LLM response.
|
|
685
699
|
# This is the authoritative signal used by MessageHistory#to_api to
|
|
686
700
|
# detect thinking-mode providers (DeepSeek V4, Kimi K2 thinking, etc.)
|
data/lib/clacky/client.rb
CHANGED
|
@@ -89,18 +89,54 @@ module Clacky
|
|
|
89
89
|
# ── Agent main path ───────────────────────────────────────────────────────
|
|
90
90
|
|
|
91
91
|
# Send messages with tool-calling support.
|
|
92
|
-
# Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage: }
|
|
92
|
+
# Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage:, latency: }
|
|
93
|
+
#
|
|
94
|
+
# Latency measurement:
|
|
95
|
+
# Because the current HTTP path is *non-streaming* (plain POST, response
|
|
96
|
+
# body read in one shot), TTFB (time to response headers) is not exposed
|
|
97
|
+
# by Faraday's default adapter without extra plumbing. What we CAN measure
|
|
98
|
+
# cheaply — and what users actually feel — is total request duration,
|
|
99
|
+
# which for a non-streaming call equals the time from "hit Enter" to
|
|
100
|
+
# "first token visible" (since we receive everything at once).
|
|
101
|
+
#
|
|
102
|
+
# So we record `duration_ms` as the authoritative number and alias it to
|
|
103
|
+
# `ttft_ms` for downstream consumers (status bar uses ttft_ms as its
|
|
104
|
+
# signal metric — see docs). When we migrate to streaming later, this
|
|
105
|
+
# same `ttft_ms` field will start carrying the *actual* first-token
|
|
106
|
+
# latency without any schema change.
|
|
93
107
|
def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false)
|
|
94
108
|
caching_enabled = enable_caching && supports_prompt_caching?(model)
|
|
95
109
|
cloned = deep_clone(messages)
|
|
96
110
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
111
|
+
t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
112
|
+
response =
|
|
113
|
+
if bedrock?
|
|
114
|
+
send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled)
|
|
115
|
+
elsif anthropic_format?
|
|
116
|
+
send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled)
|
|
117
|
+
else
|
|
118
|
+
send_openai_request(cloned, model, tools, max_tokens, caching_enabled)
|
|
119
|
+
end
|
|
120
|
+
t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
121
|
+
|
|
122
|
+
duration_ms = ((t1 - t0) * 1000).round
|
|
123
|
+
# Throughput is only meaningful with a reasonable output size; below ~10
|
|
124
|
+
# tokens the sample is too small to be informative and the result is
|
|
125
|
+
# wildly high (e.g. 1 token / 50ms → 20 tok/s is meaningless).
|
|
126
|
+
# Canonical usage hashes from message_format/* all use :completion_tokens.
|
|
127
|
+
output_tokens = response[:usage]&.dig(:completion_tokens).to_i
|
|
128
|
+
tps = (output_tokens >= 10 && duration_ms > 0) ? (output_tokens * 1000.0 / duration_ms).round(1) : nil
|
|
129
|
+
|
|
130
|
+
response[:latency] = {
|
|
131
|
+
ttft_ms: duration_ms, # non-streaming: TTFT == full duration
|
|
132
|
+
duration_ms: duration_ms,
|
|
133
|
+
output_tokens: output_tokens,
|
|
134
|
+
tps: tps,
|
|
135
|
+
model: model,
|
|
136
|
+
measured_at: Time.now.to_f,
|
|
137
|
+
streaming: false # future flag — true when we migrate
|
|
138
|
+
}
|
|
139
|
+
response
|
|
104
140
|
end
|
|
105
141
|
|
|
106
142
|
# Format tool results into canonical messages ready to append to @messages.
|
|
@@ -134,12 +134,13 @@ module Clacky
|
|
|
134
134
|
|
|
135
135
|
# === State updates ===
|
|
136
136
|
|
|
137
|
-
def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil)
|
|
137
|
+
def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil)
|
|
138
138
|
data = {}
|
|
139
139
|
data[:tasks] = tasks if tasks
|
|
140
140
|
data[:cost] = cost if cost
|
|
141
141
|
data[:cost_source] = cost_source if cost_source
|
|
142
142
|
data[:status] = status if status
|
|
143
|
+
data[:latency] = latency if latency
|
|
143
144
|
emit("session_update", **data) unless data.empty?
|
|
144
145
|
end
|
|
145
146
|
|
|
@@ -136,7 +136,7 @@ module Clacky
|
|
|
136
136
|
|
|
137
137
|
# === State updates (no-ops) ===
|
|
138
138
|
|
|
139
|
-
def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil); end
|
|
139
|
+
def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil); end
|
|
140
140
|
def update_todos(todos); end
|
|
141
141
|
def set_working_status; end
|
|
142
142
|
def set_idle_status; end
|
data/lib/clacky/providers.rb
CHANGED
|
@@ -22,7 +22,7 @@ module Clacky
|
|
|
22
22
|
"name" => "OpenClacky",
|
|
23
23
|
"base_url" => "https://api.openclacky.com",
|
|
24
24
|
"api" => "bedrock",
|
|
25
|
-
"default_model" => "abs-claude-sonnet-4-
|
|
25
|
+
"default_model" => "abs-claude-sonnet-4-5",
|
|
26
26
|
"models" => [
|
|
27
27
|
"abs-claude-opus-4-7",
|
|
28
28
|
"abs-claude-opus-4-6",
|
|
@@ -131,7 +131,7 @@ module Clacky
|
|
|
131
131
|
}.freeze,
|
|
132
132
|
|
|
133
133
|
"clackyai-sea" => {
|
|
134
|
-
"name" => "ClackyAI(
|
|
134
|
+
"name" => "ClackyAI(Sea)",
|
|
135
135
|
"base_url" => "https://api.clacky.ai",
|
|
136
136
|
"api" => "bedrock",
|
|
137
137
|
"default_model" => "abs-claude-sonnet-4-5",
|
|
@@ -152,7 +152,7 @@ module Clacky
|
|
|
152
152
|
|
|
153
153
|
# === State updates (no-ops for IM) ===
|
|
154
154
|
|
|
155
|
-
def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil); end
|
|
155
|
+
def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil); end
|
|
156
156
|
def update_todos(todos); end
|
|
157
157
|
def set_working_status; end
|
|
158
158
|
def set_idle_status; end
|
|
@@ -426,6 +426,9 @@ module Clacky
|
|
|
426
426
|
elsif method == "PATCH" && path.match?(%r{^/api/sessions/[^/]+/model$})
|
|
427
427
|
session_id = path.sub("/api/sessions/", "").sub("/model", "")
|
|
428
428
|
api_switch_session_model(session_id, req, res)
|
|
429
|
+
elsif method == "POST" && path.match?(%r{^/api/sessions/[^/]+/benchmark$})
|
|
430
|
+
session_id = path.sub("/api/sessions/", "").sub("/benchmark", "")
|
|
431
|
+
api_benchmark_session_models(session_id, req, res)
|
|
429
432
|
elsif method == "PATCH" && path.match?(%r{^/api/sessions/[^/]+/working_dir$})
|
|
430
433
|
session_id = path.sub("/api/sessions/", "").sub("/working_dir", "")
|
|
431
434
|
api_change_session_working_dir(session_id, req, res)
|
|
@@ -2333,6 +2336,97 @@ module Clacky
|
|
|
2333
2336
|
json_response(res, 500, { error: e.message })
|
|
2334
2337
|
end
|
|
2335
2338
|
|
|
2339
|
+
# POST /api/sessions/:id/benchmark
|
|
2340
|
+
#
|
|
2341
|
+
# Speed-test every configured model in one shot so the user can pick the
|
|
2342
|
+
# fastest available model for this session. We send a minimal one-token
|
|
2343
|
+
# request to each model *in parallel* (one thread per model) and measure
|
|
2344
|
+
# total HTTP duration — for non-streaming calls this equals the user's
|
|
2345
|
+
# perceived time-to-first-token, so the field is named `ttft_ms` for
|
|
2346
|
+
# forward-compatibility with a future streaming implementation.
|
|
2347
|
+
#
|
|
2348
|
+
# Cost note: each request is `max_tokens: 1` + a 2-byte prompt, so the
|
|
2349
|
+
# total cost across a dozen models is well under one cent.
|
|
2350
|
+
#
|
|
2351
|
+
# Response shape:
|
|
2352
|
+
# {
|
|
2353
|
+
# ok: true,
|
|
2354
|
+
# results: [
|
|
2355
|
+
# { model_id: "...", model: "...", ttft_ms: 812, ok: true },
|
|
2356
|
+
# { model_id: "...", model: "...", ok: false, error: "timeout" },
|
|
2357
|
+
# ...
|
|
2358
|
+
# ]
|
|
2359
|
+
# }
|
|
2360
|
+
def api_benchmark_session_models(session_id, _req, res)
|
|
2361
|
+
return json_response(res, 404, { error: "Session not found" }) unless @registry.ensure(session_id)
|
|
2362
|
+
|
|
2363
|
+
# Snapshot the models list — @agent_config.models is a shared reference
|
|
2364
|
+
# that the user might mutate from the settings panel during the test;
|
|
2365
|
+
# a shallow dup is enough since we only read string fields below.
|
|
2366
|
+
models = Array(@agent_config.models).dup
|
|
2367
|
+
return json_response(res, 200, { ok: true, results: [] }) if models.empty?
|
|
2368
|
+
|
|
2369
|
+
# Kick off one thread per model. We deliberately cap per-request wall
|
|
2370
|
+
# time inside each thread via a Faraday timeout so a single dead model
|
|
2371
|
+
# can't block the response. The outer join uses a generous ceiling
|
|
2372
|
+
# (timeout + small buffer) as a last-resort safety net.
|
|
2373
|
+
per_model_timeout = 15
|
|
2374
|
+
threads = models.map do |m|
|
|
2375
|
+
Thread.new do
|
|
2376
|
+
Thread.current.report_on_exception = false
|
|
2377
|
+
benchmark_single_model(m, per_model_timeout)
|
|
2378
|
+
end
|
|
2379
|
+
end
|
|
2380
|
+
|
|
2381
|
+
results = threads.map do |t|
|
|
2382
|
+
t.join(per_model_timeout + 3)
|
|
2383
|
+
t.value rescue { ok: false, error: "thread failed" }
|
|
2384
|
+
end
|
|
2385
|
+
|
|
2386
|
+
json_response(res, 200, { ok: true, results: results })
|
|
2387
|
+
rescue => e
|
|
2388
|
+
Clacky::Logger.error("[benchmark] #{e.class}: #{e.message}", error: e)
|
|
2389
|
+
json_response(res, 500, { error: e.message })
|
|
2390
|
+
end
|
|
2391
|
+
|
|
2392
|
+
# Runs one speed-test request against a single model config hash and
|
|
2393
|
+
# returns a result row for api_benchmark_session_models. Pure function —
|
|
2394
|
+
# no shared state — so it's safe to call from worker threads.
|
|
2395
|
+
private def benchmark_single_model(model_cfg, timeout_sec)
|
|
2396
|
+
model_id = model_cfg["id"].to_s
|
|
2397
|
+
model_name = model_cfg["model"].to_s
|
|
2398
|
+
base = { model_id: model_id, model: model_name }
|
|
2399
|
+
|
|
2400
|
+
client = Clacky::Client.new(
|
|
2401
|
+
model_cfg["api_key"].to_s,
|
|
2402
|
+
base_url: model_cfg["base_url"].to_s,
|
|
2403
|
+
model: model_name,
|
|
2404
|
+
anthropic_format: model_cfg["anthropic_format"] || false
|
|
2405
|
+
)
|
|
2406
|
+
|
|
2407
|
+
# Override Faraday timeouts via a short-lived env var isn't ideal;
|
|
2408
|
+
# instead we rely on test_connection's own network path and wrap
|
|
2409
|
+
# the call in Timeout as a last line of defence. Most providers
|
|
2410
|
+
# respond within 2-3s for a 16-token reply.
|
|
2411
|
+
t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
2412
|
+
result = nil
|
|
2413
|
+
begin
|
|
2414
|
+
Timeout.timeout(timeout_sec) { result = client.test_connection(model: model_name) }
|
|
2415
|
+
rescue Timeout::Error
|
|
2416
|
+
return base.merge(ok: false, error: "timeout after #{timeout_sec}s")
|
|
2417
|
+
end
|
|
2418
|
+
t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
2419
|
+
|
|
2420
|
+
if result && result[:success]
|
|
2421
|
+
base.merge(ok: true, ttft_ms: ((t1 - t0) * 1000).round)
|
|
2422
|
+
else
|
|
2423
|
+
base.merge(ok: false, error: (result && result[:error]).to_s[0, 200])
|
|
2424
|
+
end
|
|
2425
|
+
rescue => e
|
|
2426
|
+
base.merge(ok: false, error: "#{e.class}: #{e.message}"[0, 200])
|
|
2427
|
+
end
|
|
2428
|
+
|
|
2429
|
+
|
|
2336
2430
|
def api_change_session_working_dir(session_id, req, res)
|
|
2337
2431
|
body = parse_json_body(req)
|
|
2338
2432
|
new_dir = body["working_dir"].to_s.strip
|
|
@@ -169,7 +169,8 @@ module Clacky
|
|
|
169
169
|
live_cost_source = s[:agent]&.cost_source
|
|
170
170
|
{ status: s[:status], error: s[:error], model: model_info&.dig(:model), name: live_name,
|
|
171
171
|
total_tasks: s[:agent]&.total_tasks, total_cost: s[:agent]&.total_cost,
|
|
172
|
-
cost_source: live_cost_source
|
|
172
|
+
cost_source: live_cost_source,
|
|
173
|
+
latest_latency: s[:agent]&.latest_latency }
|
|
173
174
|
end
|
|
174
175
|
end
|
|
175
176
|
|
|
@@ -234,6 +235,11 @@ module Clacky
|
|
|
234
235
|
total_tasks: ls&.dig(:total_tasks) || s.dig(:stats, :total_tasks) || 0,
|
|
235
236
|
total_cost: ls&.dig(:total_cost) || s.dig(:stats, :total_cost_usd) || 0.0,
|
|
236
237
|
cost_source: (ls&.dig(:cost_source) || s.dig(:stats, :cost_source) || "estimated").to_s,
|
|
238
|
+
# latest_latency is in-memory only (live sessions) — not persisted
|
|
239
|
+
# at the session-level on disk. The on-disk source of truth is
|
|
240
|
+
# per-assistant-message `latency` fields in messages[]. Reloaded
|
|
241
|
+
# sessions start with nil and get populated on the next LLM call.
|
|
242
|
+
latest_latency: ls&.dig(:latest_latency),
|
|
237
243
|
pinned: s[:pinned] || false,
|
|
238
244
|
}
|
|
239
245
|
end
|
|
@@ -311,6 +317,7 @@ module Clacky
|
|
|
311
317
|
source: agent.source.to_s,
|
|
312
318
|
agent_profile: agent.agent_profile.name,
|
|
313
319
|
pinned: agent.pinned || false,
|
|
320
|
+
latest_latency: agent.latest_latency,
|
|
314
321
|
}
|
|
315
322
|
end
|
|
316
323
|
end
|
|
@@ -302,14 +302,15 @@ module Clacky
|
|
|
302
302
|
|
|
303
303
|
# === State updates ===
|
|
304
304
|
|
|
305
|
-
def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil)
|
|
305
|
+
def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil)
|
|
306
306
|
data = {}
|
|
307
307
|
data[:tasks] = tasks if tasks
|
|
308
308
|
data[:cost] = cost if cost
|
|
309
309
|
data[:cost_source] = cost_source if cost_source
|
|
310
310
|
data[:status] = status if status
|
|
311
|
+
data[:latency] = latency if latency
|
|
311
312
|
emit("session_update", **data) unless data.empty?
|
|
312
|
-
forward_to_subscribers { |sub| sub.update_sessionbar(tasks: tasks, cost: cost, cost_source: cost_source, status: status) }
|
|
313
|
+
forward_to_subscribers { |sub| sub.update_sessionbar(tasks: tasks, cost: cost, cost_source: cost_source, status: status, latency: latency) }
|
|
313
314
|
end
|
|
314
315
|
|
|
315
316
|
def update_todos(todos)
|
|
@@ -108,7 +108,8 @@ module Clacky
|
|
|
108
108
|
# @param cost [Float] Total cost (optional)
|
|
109
109
|
# @param cost_source [Symbol, nil] :api / :price / :default (optional)
|
|
110
110
|
# @param status [String] Workspace status ('idle' or 'working') (optional)
|
|
111
|
-
|
|
111
|
+
# @param latency [Hash, nil] Latency metrics; accepted but not displayed in the TUI.
|
|
112
|
+
def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil)
|
|
112
113
|
@tasks_count = tasks if tasks
|
|
113
114
|
@total_cost = cost if cost
|
|
114
115
|
@input_area.update_sessionbar(
|
data/lib/clacky/ui_interface.rb
CHANGED
|
@@ -106,7 +106,7 @@ module Clacky
|
|
|
106
106
|
end
|
|
107
107
|
|
|
108
108
|
# === State updates ===
|
|
109
|
-
def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil); end
|
|
109
|
+
def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil); end
|
|
110
110
|
def update_todos(todos); end
|
|
111
111
|
def set_working_status; end
|
|
112
112
|
def set_idle_status; end
|
data/lib/clacky/version.rb
CHANGED
data/lib/clacky/web/app.css
CHANGED
|
@@ -1524,11 +1524,15 @@ body {
|
|
|
1524
1524
|
.msg-time {
|
|
1525
1525
|
/* Rendered as a footnote *below* the bubble, floating inside the #messages
|
|
1526
1526
|
flex gap (12px). Absolute-positioned so showing/hiding it on hover does
|
|
1527
|
-
NOT reflow the message list — surrounding messages stay put.
|
|
1527
|
+
NOT reflow the message list — surrounding messages stay put.
|
|
1528
|
+
|
|
1529
|
+
Per-side anchoring (see .msg-user / .msg-assistant overrides below) is
|
|
1530
|
+
critical: we must NOT set both left:0 and right:0, because with
|
|
1531
|
+
white-space:nowrap a short bubble (e.g. just "1") would force the time
|
|
1532
|
+
text to extend past the bubble edge and trigger horizontal page scroll.
|
|
1533
|
+
Instead each variant anchors to one side and grows naturally inward. */
|
|
1528
1534
|
position: absolute;
|
|
1529
1535
|
top: 100%;
|
|
1530
|
-
left: 0;
|
|
1531
|
-
right: 0;
|
|
1532
1536
|
margin-top: 2px;
|
|
1533
1537
|
display: block;
|
|
1534
1538
|
font-size: 10px;
|
|
@@ -1545,9 +1549,10 @@ body {
|
|
|
1545
1549
|
opacity: 1;
|
|
1546
1550
|
transform: translateY(0);
|
|
1547
1551
|
}
|
|
1548
|
-
/* Time color / alignment:
|
|
1549
|
-
|
|
1550
|
-
.msg-
|
|
1552
|
+
/* Time color / alignment: anchor to the bubble's own side, let width be
|
|
1553
|
+
driven by content — prevents overflow on narrow bubbles. */
|
|
1554
|
+
.msg-user .msg-time { color: var(--color-text-secondary); right: 0; left: auto; padding-right: 4px; }
|
|
1555
|
+
.msg-assistant .msg-time { color: var(--color-text-secondary); left: 0; right: auto; padding-left: 4px; }
|
|
1551
1556
|
|
|
1552
1557
|
.msg-user { background: var(--color-accent-primary); color: var(--color-button-primary-text); align-self: flex-end; }
|
|
1553
1558
|
[data-theme="dark"] .msg-user { background: var(--color-accent-hover); }
|
|
@@ -2204,6 +2209,65 @@ body {
|
|
|
2204
2209
|
#sib-tasks { opacity: 0.75; flex-shrink: 0; } /* tier 2 */
|
|
2205
2210
|
#sib-cost { opacity: 0.45; flex-shrink: 0; } /* tier 3 */
|
|
2206
2211
|
|
|
2212
|
+
/* ── Latency signal (right after model name) ──────────────────────────────
|
|
2213
|
+
A compact 4-bar signal + TTFT value. Placed adjacent to #sib-model so the
|
|
2214
|
+
user's mental mapping "this model is fast/slow" is immediate. Variant
|
|
2215
|
+
classes (-ok/-warn/-bad) are applied by Sessions._renderSignal based on
|
|
2216
|
+
TTFT thresholds; colours intentionally use CSS vars so the same palette
|
|
2217
|
+
works in both light and dark themes. */
|
|
2218
|
+
#sib-signal-wrap { position: relative; flex-shrink: 0; }
|
|
2219
|
+
.sib-signal-clickable {
|
|
2220
|
+
display: inline-flex;
|
|
2221
|
+
align-items: center;
|
|
2222
|
+
gap: 5px;
|
|
2223
|
+
padding: 1px 6px;
|
|
2224
|
+
cursor: default; /* no click handler yet — step 3/4 will add one */
|
|
2225
|
+
border-radius: 3px;
|
|
2226
|
+
opacity: 0.85;
|
|
2227
|
+
transition: opacity 0.15s ease, background-color 0.15s ease;
|
|
2228
|
+
font-variant-numeric: tabular-nums; /* prevents the text from jittering as values change */
|
|
2229
|
+
}
|
|
2230
|
+
.sib-signal-clickable:hover {
|
|
2231
|
+
opacity: 1;
|
|
2232
|
+
background: var(--color-bg-hover);
|
|
2233
|
+
}
|
|
2234
|
+
/* Bar stack: four 2-px wide vertical bars of increasing height, mimicking
|
|
2235
|
+
a phone signal-strength icon. Each <i> is hollow by default; Sessions adds
|
|
2236
|
+
.on to the ones that should light up for the current signal level. */
|
|
2237
|
+
.sib-signal-clickable .sig-bars {
|
|
2238
|
+
display: inline-flex;
|
|
2239
|
+
align-items: flex-end;
|
|
2240
|
+
gap: 1px;
|
|
2241
|
+
height: 11px;
|
|
2242
|
+
}
|
|
2243
|
+
.sib-signal-clickable .sig-bars i {
|
|
2244
|
+
display: inline-block;
|
|
2245
|
+
width: 2px;
|
|
2246
|
+
background: var(--color-text-secondary);
|
|
2247
|
+
opacity: 0.25; /* dim "off" bar */
|
|
2248
|
+
border-radius: 1px;
|
|
2249
|
+
transition: background-color 0.15s, opacity 0.15s;
|
|
2250
|
+
}
|
|
2251
|
+
/* Individual bar heights — short→tall */
|
|
2252
|
+
.sib-signal-clickable .sig-bars i:nth-child(1) { height: 3px; }
|
|
2253
|
+
.sib-signal-clickable .sig-bars i:nth-child(2) { height: 5px; }
|
|
2254
|
+
.sib-signal-clickable .sig-bars i:nth-child(3) { height: 8px; }
|
|
2255
|
+
.sib-signal-clickable .sig-bars i:nth-child(4) { height: 11px; }
|
|
2256
|
+
.sib-signal-clickable .sig-bars i.on { opacity: 1; }
|
|
2257
|
+
|
|
2258
|
+
/* Signal level → bar colour. Applied to .on bars only; "off" bars stay grey. */
|
|
2259
|
+
.sib-signal-ok .sig-bars i.on { background: var(--color-accent-primary); } /* green / brand */
|
|
2260
|
+
.sib-signal-warn .sig-bars i.on { background: #d39e00; } /* amber */
|
|
2261
|
+
.sib-signal-bad .sig-bars i.on { background: #d9534f; } /* red */
|
|
2262
|
+
|
|
2263
|
+
.sib-signal-clickable .sig-text {
|
|
2264
|
+
font-size: 11px;
|
|
2265
|
+
color: var(--color-text-secondary);
|
|
2266
|
+
}
|
|
2267
|
+
.sib-signal-ok .sig-text { color: var(--color-text-primary); }
|
|
2268
|
+
.sib-signal-warn .sig-text { color: #d39e00; }
|
|
2269
|
+
.sib-signal-bad .sig-text { color: #d9534f; }
|
|
2270
|
+
|
|
2207
2271
|
/* Model name dropdown in session info bar */
|
|
2208
2272
|
#sib-model-wrap {
|
|
2209
2273
|
position: relative;
|
|
@@ -2266,6 +2330,94 @@ body {
|
|
|
2266
2330
|
color: var(--color-accent-primary);
|
|
2267
2331
|
}
|
|
2268
2332
|
|
|
2333
|
+
/* ── Model switcher benchmark banner & latency column ──────────────────────
|
|
2334
|
+
The banner sits at the top of the dropdown with a subtle border so it
|
|
2335
|
+
visually separates from the scrollable model list below. The ⚡ button is
|
|
2336
|
+
pushed to the RIGHT edge (where the eye naturally lands after scanning a
|
|
2337
|
+
model name → latency row), while the optional hint ("done in 1.2s") sits
|
|
2338
|
+
on the left. The per-row latency cell is right-aligned and uses
|
|
2339
|
+
tabular-nums so numbers line up vertically regardless of width. */
|
|
2340
|
+
.sib-model-bench {
|
|
2341
|
+
display: flex;
|
|
2342
|
+
align-items: center;
|
|
2343
|
+
justify-content: space-between; /* hint on the left, button on the right */
|
|
2344
|
+
gap: 8px;
|
|
2345
|
+
padding: 4px 8px 4px 10px; /* compact: tighter top/bottom + tighter right side */
|
|
2346
|
+
border-bottom: 1px solid var(--color-border-primary);
|
|
2347
|
+
background: var(--color-bg-primary);
|
|
2348
|
+
position: sticky; /* keep visible while scrolling a long model list */
|
|
2349
|
+
top: 0;
|
|
2350
|
+
z-index: 1;
|
|
2351
|
+
min-height: 0;
|
|
2352
|
+
}
|
|
2353
|
+
.sib-bench-btn {
|
|
2354
|
+
display: inline-flex;
|
|
2355
|
+
align-items: center;
|
|
2356
|
+
gap: 3px;
|
|
2357
|
+
padding: 2px 8px;
|
|
2358
|
+
font-size: 10px;
|
|
2359
|
+
line-height: 1.4;
|
|
2360
|
+
font-family: inherit;
|
|
2361
|
+
background: var(--color-bg-secondary);
|
|
2362
|
+
color: var(--color-text-secondary);
|
|
2363
|
+
border: 1px solid var(--color-border-primary);
|
|
2364
|
+
border-radius: 10px;
|
|
2365
|
+
cursor: pointer;
|
|
2366
|
+
transition: background-color 0.15s, border-color 0.15s, color 0.15s;
|
|
2367
|
+
order: 2; /* force button to the right even if DOM order changes */
|
|
2368
|
+
flex: 0 0 auto;
|
|
2369
|
+
}
|
|
2370
|
+
.sib-bench-btn:hover:not(:disabled) {
|
|
2371
|
+
background: var(--color-bg-hover);
|
|
2372
|
+
border-color: var(--color-accent-primary);
|
|
2373
|
+
color: var(--color-accent-primary);
|
|
2374
|
+
}
|
|
2375
|
+
.sib-bench-btn:disabled {
|
|
2376
|
+
opacity: 0.55;
|
|
2377
|
+
cursor: progress;
|
|
2378
|
+
}
|
|
2379
|
+
.sib-bench-hint {
|
|
2380
|
+
font-size: 10px;
|
|
2381
|
+
color: var(--color-text-secondary);
|
|
2382
|
+
font-variant-numeric: tabular-nums;
|
|
2383
|
+
order: 1; /* hint stays on the left */
|
|
2384
|
+
flex: 1 1 auto;
|
|
2385
|
+
min-width: 0;
|
|
2386
|
+
overflow: hidden;
|
|
2387
|
+
text-overflow: ellipsis;
|
|
2388
|
+
white-space: nowrap;
|
|
2389
|
+
}
|
|
2390
|
+
|
|
2391
|
+
.sib-model-option .sib-model-name {
|
|
2392
|
+
/* Keep long model names from pushing the latency cell offscreen. */
|
|
2393
|
+
overflow: hidden;
|
|
2394
|
+
text-overflow: ellipsis;
|
|
2395
|
+
white-space: nowrap;
|
|
2396
|
+
flex: 1 1 auto;
|
|
2397
|
+
min-width: 0;
|
|
2398
|
+
}
|
|
2399
|
+
.sib-model-option .sib-model-right {
|
|
2400
|
+
display: inline-flex;
|
|
2401
|
+
align-items: center;
|
|
2402
|
+
gap: 8px;
|
|
2403
|
+
flex-shrink: 0;
|
|
2404
|
+
}
|
|
2405
|
+
.sib-model-option .sib-model-latency {
|
|
2406
|
+
font-size: 10px;
|
|
2407
|
+
font-variant-numeric: tabular-nums;
|
|
2408
|
+
min-width: 44px; /* reserves space so rows don't jitter before benchmark */
|
|
2409
|
+
text-align: right;
|
|
2410
|
+
color: var(--color-text-secondary);
|
|
2411
|
+
}
|
|
2412
|
+
.sib-model-option .sib-model-latency.is-ok { color: var(--color-accent-primary); }
|
|
2413
|
+
.sib-model-option .sib-model-latency.is-warn { color: #d39e00; }
|
|
2414
|
+
.sib-model-option .sib-model-latency.is-bad { color: #d9534f; }
|
|
2415
|
+
.sib-model-option .sib-model-latency.is-err { color: #d9534f; }
|
|
2416
|
+
.sib-model-option .sib-model-latency.is-pending {
|
|
2417
|
+
color: var(--color-text-secondary);
|
|
2418
|
+
opacity: 0.7;
|
|
2419
|
+
}
|
|
2420
|
+
|
|
2269
2421
|
/* ── Input area (wraps preview strip + input bar) ────────────────────────── */
|
|
2270
2422
|
#ws-disconnect-hint {
|
|
2271
2423
|
position: absolute;
|
data/lib/clacky/web/app.js
CHANGED
|
@@ -410,9 +410,13 @@ WS.onEvent(ev => {
|
|
|
410
410
|
// Shape (2): partial update — build patch from top-level fields
|
|
411
411
|
sid = ev.session_id;
|
|
412
412
|
patch = {};
|
|
413
|
-
if (ev.cost
|
|
414
|
-
if (ev.tasks
|
|
415
|
-
if (ev.status
|
|
413
|
+
if (ev.cost !== undefined) patch.total_cost = ev.cost;
|
|
414
|
+
if (ev.tasks !== undefined) patch.total_tasks = ev.tasks;
|
|
415
|
+
if (ev.status !== undefined) patch.status = ev.status;
|
|
416
|
+
// Latency pushed by Agent after each LLM call (see update_sessionbar).
|
|
417
|
+
// Stored under latest_latency — same field name the HTTP /api/sessions
|
|
418
|
+
// list returns, so updateInfoBar doesn't need to branch on the source.
|
|
419
|
+
if (ev.latency !== undefined) patch.latest_latency = ev.latency;
|
|
416
420
|
}
|
|
417
421
|
if (!sid) break;
|
|
418
422
|
Sessions.patch(sid, patch);
|
|
@@ -1637,6 +1641,13 @@ window.bootAfterBrand = async function() {
|
|
|
1637
1641
|
// ── Session Info Bar Model Switcher ───────────────────────────────────────
|
|
1638
1642
|
(function() {
|
|
1639
1643
|
let _isOpen = false;
|
|
1644
|
+
// Cache of the most recent benchmark results, keyed by model_id. Kept at
|
|
1645
|
+
// closure scope so the numbers survive closing & reopening the dropdown —
|
|
1646
|
+
// the user shouldn't have to re-run the test just to peek at results. We
|
|
1647
|
+
// intentionally do NOT persist this to disk: latency is a point-in-time
|
|
1648
|
+
// measurement, and yesterday's numbers are misleading.
|
|
1649
|
+
let _benchCache = {}; // { [model_id]: { ttft_ms, ok, error, ts } }
|
|
1650
|
+
let _benchInFlight = false; // prevent double-click spam
|
|
1640
1651
|
|
|
1641
1652
|
// Toggle model dropdown when clicking on model name
|
|
1642
1653
|
document.addEventListener("click", async (e) => {
|
|
@@ -1692,23 +1703,63 @@ window.bootAfterBrand = async function() {
|
|
|
1692
1703
|
|
|
1693
1704
|
dropdown.innerHTML = "";
|
|
1694
1705
|
|
|
1706
|
+
// ── Benchmark floating button (top-right of dropdown) ──────────────
|
|
1707
|
+
// Tiny ⚡ button pinned to the dropdown's top-right corner. Runs one
|
|
1708
|
+
// concurrent request per model and back-fills each row's latency cell.
|
|
1709
|
+
// We deliberately avoid a full-width banner — it ate visual space that
|
|
1710
|
+
// the model list needs, and most users open the dropdown to SWITCH,
|
|
1711
|
+
// not to benchmark. The floating button is discoverable but unobtrusive.
|
|
1712
|
+
const bench = document.createElement("div");
|
|
1713
|
+
bench.className = "sib-model-bench";
|
|
1714
|
+
const btnLabel = (typeof I18n !== "undefined") ? I18n.t("sib.bench.btn") : "Benchmark";
|
|
1715
|
+
const btnTooltip = (typeof I18n !== "undefined") ? I18n.t("sib.bench.tooltip") : "Test response latency for every configured model";
|
|
1716
|
+
bench.innerHTML = `
|
|
1717
|
+
<button type="button" class="sib-bench-btn" title="${btnTooltip}">⚡ <span class="sib-bench-label">${btnLabel}</span></button>
|
|
1718
|
+
<span class="sib-bench-hint"></span>
|
|
1719
|
+
`;
|
|
1720
|
+
dropdown.appendChild(bench);
|
|
1721
|
+
|
|
1722
|
+
const benchBtn = bench.querySelector(".sib-bench-btn");
|
|
1723
|
+
const benchLabel = bench.querySelector(".sib-bench-label");
|
|
1724
|
+
const benchHint = bench.querySelector(".sib-bench-hint");
|
|
1725
|
+
benchBtn.addEventListener("click", (ev) => {
|
|
1726
|
+
ev.stopPropagation();
|
|
1727
|
+
_runBenchmark(sessionId, dropdown, benchBtn, benchLabel, benchHint);
|
|
1728
|
+
});
|
|
1729
|
+
|
|
1730
|
+
// ── Model rows ─────────────────────────────────────────────────────
|
|
1695
1731
|
models.forEach(m => {
|
|
1696
1732
|
console.log("[Model Switcher] Adding model:", m.model, "id:", m.id, "current:", currentModel);
|
|
1697
1733
|
const opt = document.createElement("div");
|
|
1698
1734
|
opt.className = "sib-model-option";
|
|
1735
|
+
opt.dataset.modelId = m.id;
|
|
1699
1736
|
if (m.model === currentModel) opt.classList.add("current");
|
|
1700
1737
|
|
|
1701
|
-
const
|
|
1702
|
-
|
|
1703
|
-
|
|
1738
|
+
const left = document.createElement("span");
|
|
1739
|
+
left.className = "sib-model-name";
|
|
1740
|
+
left.textContent = m.model;
|
|
1741
|
+
opt.appendChild(left);
|
|
1742
|
+
|
|
1743
|
+
const right = document.createElement("span");
|
|
1744
|
+
right.className = "sib-model-right";
|
|
1704
1745
|
|
|
1705
1746
|
if (m.type === "default") {
|
|
1706
1747
|
const badge = document.createElement("span");
|
|
1707
1748
|
badge.className = `model-badge ${m.type}`;
|
|
1708
1749
|
badge.textContent = m.type;
|
|
1709
|
-
|
|
1750
|
+
right.appendChild(badge);
|
|
1710
1751
|
}
|
|
1711
1752
|
|
|
1753
|
+
// Latency cell — populated from _benchCache on open, updated live
|
|
1754
|
+
// when a benchmark run completes. Empty slot keeps row heights stable
|
|
1755
|
+
// so the list doesn't visually jump mid-benchmark.
|
|
1756
|
+
const lat = document.createElement("span");
|
|
1757
|
+
lat.className = "sib-model-latency";
|
|
1758
|
+
_fillLatencyCell(lat, _benchCache[m.id]);
|
|
1759
|
+
right.appendChild(lat);
|
|
1760
|
+
|
|
1761
|
+
opt.appendChild(right);
|
|
1762
|
+
|
|
1712
1763
|
// Switch by id (stable across reorders/edits). Keep model name for UI update.
|
|
1713
1764
|
opt.addEventListener("click", () => _switchModel(sessionId, m.id, m.model));
|
|
1714
1765
|
dropdown.appendChild(opt);
|
|
@@ -1720,6 +1771,105 @@ window.bootAfterBrand = async function() {
|
|
|
1720
1771
|
}
|
|
1721
1772
|
}
|
|
1722
1773
|
|
|
1774
|
+
// Render one latency cell based on a cached result.
|
|
1775
|
+
// undefined → empty slot (never tested / in-flight starts from here)
|
|
1776
|
+
// { ok:true } → "812ms" in green/amber/red per threshold
|
|
1777
|
+
// { ok:false } → "✕" with error in tooltip
|
|
1778
|
+
// { pending:true } → "…" spinner-ish marker
|
|
1779
|
+
function _fillLatencyCell(el, entry) {
|
|
1780
|
+
el.className = "sib-model-latency";
|
|
1781
|
+
el.textContent = "";
|
|
1782
|
+
el.removeAttribute("title");
|
|
1783
|
+
if (!entry) return;
|
|
1784
|
+
if (entry.pending) {
|
|
1785
|
+
el.textContent = "…";
|
|
1786
|
+
el.classList.add("is-pending");
|
|
1787
|
+
return;
|
|
1788
|
+
}
|
|
1789
|
+
if (!entry.ok) {
|
|
1790
|
+
el.textContent = "✕";
|
|
1791
|
+
el.classList.add("is-err");
|
|
1792
|
+
el.title = entry.error || "failed";
|
|
1793
|
+
return;
|
|
1794
|
+
}
|
|
1795
|
+
const ms = entry.ttft_ms;
|
|
1796
|
+
// Same thresholds as the sib-signal status bar — keep them aligned so
|
|
1797
|
+
// "3 bars in the status bar" ≈ "green number in the picker".
|
|
1798
|
+
// We measure full non-streaming response time (not real TTFT), so ≤60s is
|
|
1799
|
+
// normal, ≤120s is slow, beyond is bad. ≤2s still gets the "feels instant"
|
|
1800
|
+
// green treatment like the 4-bar signal.
|
|
1801
|
+
let cls = "is-bad";
|
|
1802
|
+
if (ms <= 2000) cls = "is-ok";
|
|
1803
|
+
else if (ms <= 60000) cls = "is-ok";
|
|
1804
|
+
else if (ms <= 120000) cls = "is-warn";
|
|
1805
|
+
el.classList.add(cls);
|
|
1806
|
+
el.textContent = ms >= 1000 ? (ms / 1000).toFixed(1) + "s" : ms + "ms";
|
|
1807
|
+
if (typeof I18n !== "undefined") {
|
|
1808
|
+
el.title = I18n.t("sib.bench.latencyTooltip", {
|
|
1809
|
+
ttft: el.textContent,
|
|
1810
|
+
time: new Date(entry.ts).toLocaleTimeString(),
|
|
1811
|
+
});
|
|
1812
|
+
} else {
|
|
1813
|
+
el.title = `TTFT ${el.textContent} · tested ${new Date(entry.ts).toLocaleTimeString()}`;
|
|
1814
|
+
}
|
|
1815
|
+
}
|
|
1816
|
+
|
|
1817
|
+
async function _runBenchmark(sessionId, dropdown, btn, label, hint) {
|
|
1818
|
+
if (_benchInFlight) return;
|
|
1819
|
+
_benchInFlight = true;
|
|
1820
|
+
btn.disabled = true;
|
|
1821
|
+
const origLabel = label.textContent;
|
|
1822
|
+
const _t = (key, vars) => (typeof I18n !== "undefined") ? I18n.t(key, vars) : key;
|
|
1823
|
+
label.textContent = _t("sib.bench.running");
|
|
1824
|
+
hint.textContent = "";
|
|
1825
|
+
|
|
1826
|
+
// Mark every row as pending so the user sees instant feedback instead of
|
|
1827
|
+
// a silent button. _fillLatencyCell handles the visual treatment.
|
|
1828
|
+
dropdown.querySelectorAll(".sib-model-option").forEach(opt => {
|
|
1829
|
+
const id = opt.dataset.modelId;
|
|
1830
|
+
if (!id) return;
|
|
1831
|
+
_benchCache[id] = { pending: true };
|
|
1832
|
+
_fillLatencyCell(opt.querySelector(".sib-model-latency"), _benchCache[id]);
|
|
1833
|
+
});
|
|
1834
|
+
|
|
1835
|
+
const t0 = performance.now();
|
|
1836
|
+
try {
|
|
1837
|
+
const res = await fetch(`/api/sessions/${sessionId}/benchmark`, { method: "POST" });
|
|
1838
|
+
const data = await res.json();
|
|
1839
|
+
if (!res.ok || !data.ok) throw new Error(data.error || "benchmark failed");
|
|
1840
|
+
|
|
1841
|
+
const now = Date.now();
|
|
1842
|
+
(data.results || []).forEach(r => {
|
|
1843
|
+
_benchCache[r.model_id] = {
|
|
1844
|
+
ok: !!r.ok,
|
|
1845
|
+
ttft_ms: r.ttft_ms,
|
|
1846
|
+
error: r.error,
|
|
1847
|
+
ts: now,
|
|
1848
|
+
};
|
|
1849
|
+
const opt = dropdown.querySelector(`.sib-model-option[data-model-id="${CSS.escape(r.model_id)}"]`);
|
|
1850
|
+
if (opt) _fillLatencyCell(opt.querySelector(".sib-model-latency"), _benchCache[r.model_id]);
|
|
1851
|
+
});
|
|
1852
|
+
|
|
1853
|
+
const elapsed = ((performance.now() - t0) / 1000).toFixed(1);
|
|
1854
|
+
hint.textContent = _t("sib.bench.done", { t: elapsed });
|
|
1855
|
+
} catch (e) {
|
|
1856
|
+
console.error("Benchmark failed:", e);
|
|
1857
|
+
hint.textContent = _t("sib.bench.failed", { msg: e.message });
|
|
1858
|
+
// Clear pending markers so rows don't stay stuck on "…"
|
|
1859
|
+
dropdown.querySelectorAll(".sib-model-option").forEach(opt => {
|
|
1860
|
+
const id = opt.dataset.modelId;
|
|
1861
|
+
if (id && _benchCache[id]?.pending) {
|
|
1862
|
+
_benchCache[id] = undefined;
|
|
1863
|
+
_fillLatencyCell(opt.querySelector(".sib-model-latency"), undefined);
|
|
1864
|
+
}
|
|
1865
|
+
});
|
|
1866
|
+
} finally {
|
|
1867
|
+
_benchInFlight = false;
|
|
1868
|
+
btn.disabled = false;
|
|
1869
|
+
label.textContent = origLabel;
|
|
1870
|
+
}
|
|
1871
|
+
}
|
|
1872
|
+
|
|
1723
1873
|
// Switch session model via API
|
|
1724
1874
|
// modelId — stable runtime id (required by backend)
|
|
1725
1875
|
// modelName — display name, used for optimistic UI update
|
data/lib/clacky/web/i18n.js
CHANGED
|
@@ -394,6 +394,14 @@ const I18n = (() => {
|
|
|
394
394
|
|
|
395
395
|
"header.owner.tooltip": "Creator — click to open Creator Hub",
|
|
396
396
|
|
|
397
|
+
// ── Session info bar / Model switcher benchmark ──
|
|
398
|
+
"sib.bench.btn": "Benchmark",
|
|
399
|
+
"sib.bench.tooltip": "Test response latency for every configured model",
|
|
400
|
+
"sib.bench.running": "Testing…",
|
|
401
|
+
"sib.bench.done": "done in {{t}}s",
|
|
402
|
+
"sib.bench.failed": "failed: {{msg}}",
|
|
403
|
+
"sib.bench.latencyTooltip": "TTFT {{ttft}} · tested {{time}}",
|
|
404
|
+
|
|
397
405
|
"onboard.welcome": "Welcome to {{name}}",
|
|
398
406
|
},
|
|
399
407
|
|
|
@@ -779,6 +787,14 @@ const I18n = (() => {
|
|
|
779
787
|
|
|
780
788
|
"header.owner.tooltip": "创作者 — 点击进入创作者中心",
|
|
781
789
|
|
|
790
|
+
// ── 会话信息栏 / 模型切换器 测速 ──
|
|
791
|
+
"sib.bench.btn": "测速",
|
|
792
|
+
"sib.bench.tooltip": "测试所有已配置模型的响应延迟",
|
|
793
|
+
"sib.bench.running": "测速中…",
|
|
794
|
+
"sib.bench.done": "用时 {{t}} 秒",
|
|
795
|
+
"sib.bench.failed": "失败:{{msg}}",
|
|
796
|
+
"sib.bench.latencyTooltip": "TTFT {{ttft}} · 测试于 {{time}}",
|
|
797
|
+
|
|
782
798
|
"onboard.welcome": "欢迎使用 {{name}}",
|
|
783
799
|
}
|
|
784
800
|
};
|
|
@@ -858,6 +874,11 @@ const I18n = (() => {
|
|
|
858
874
|
})();
|
|
859
875
|
|
|
860
876
|
// ── Thinking Verbs for Progress Animation ──────────────────────────────────
|
|
877
|
+
//
|
|
878
|
+
// The primary verb ("Thinking" / "思考中") is chosen 90% of the time inside
|
|
879
|
+
// getRandomThinkingVerb(). The lists below are ONLY the 10% flavor variants —
|
|
880
|
+
// do not include the primary verb here, and do not rely on duplicates for
|
|
881
|
+
// weighting (probability is controlled in code, not data).
|
|
861
882
|
const THINKING_VERBS = {
|
|
862
883
|
en: [
|
|
863
884
|
"Cogitating",
|
|
@@ -882,38 +903,38 @@ const THINKING_VERBS = {
|
|
|
882
903
|
"Reasoning"
|
|
883
904
|
],
|
|
884
905
|
zh: [
|
|
885
|
-
"
|
|
886
|
-
"
|
|
887
|
-
"
|
|
888
|
-
"思考中",
|
|
889
|
-
"思考中",
|
|
890
|
-
"琢磨中", // 2x weight
|
|
891
|
-
"琢磨中",
|
|
892
|
-
"思忖中",
|
|
893
|
-
"盘算中",
|
|
894
|
-
"酝酿中",
|
|
895
|
-
"捋一捋",
|
|
896
|
-
"理理头绪",
|
|
897
|
-
"掂量掂量",
|
|
898
|
-
"寻思中",
|
|
899
|
-
"琢磨琢磨",
|
|
900
|
-
"想想办法",
|
|
901
|
-
"推演中",
|
|
906
|
+
"推理中",
|
|
907
|
+
"深度思考中",
|
|
908
|
+
"分析中",
|
|
902
909
|
"解析中",
|
|
903
910
|
"拆解中",
|
|
904
|
-
"
|
|
911
|
+
"推演中",
|
|
905
912
|
"梳理中",
|
|
906
|
-
"
|
|
913
|
+
"归纳中",
|
|
907
914
|
"演算中",
|
|
908
|
-
"
|
|
909
|
-
"
|
|
910
|
-
"构思中"
|
|
915
|
+
"验证中",
|
|
916
|
+
"权衡中",
|
|
917
|
+
"构思中",
|
|
918
|
+
"酝酿中",
|
|
919
|
+
"思忖中",
|
|
920
|
+
"琢磨中"
|
|
911
921
|
]
|
|
912
922
|
};
|
|
913
923
|
|
|
914
|
-
// Get a random thinking verb based on current language
|
|
924
|
+
// Get a random thinking verb based on current language.
|
|
925
|
+
//
|
|
926
|
+
// Behavior: 90% of the time return the primary verb ("思考中" / "Thinking"),
|
|
927
|
+
// 10% of the time pick a random variant from the list for a bit of flavor.
|
|
928
|
+
// The primary is intentionally kept outside the list so tuning the probability
|
|
929
|
+
// is a single-number change here, independent of the list contents.
|
|
915
930
|
function getRandomThinkingVerb() {
|
|
916
|
-
const lang
|
|
931
|
+
const lang = I18n.lang();
|
|
932
|
+
const primary = lang === "zh" ? "思考中" : "Thinking";
|
|
933
|
+
|
|
934
|
+
// 90% primary, 10% variant
|
|
935
|
+
if (Math.random() < 0.9) return primary;
|
|
936
|
+
|
|
917
937
|
const verbs = THINKING_VERBS[lang] || THINKING_VERBS.en;
|
|
938
|
+
if (!verbs || verbs.length === 0) return primary;
|
|
918
939
|
return verbs[Math.floor(Math.random() * verbs.length)];
|
|
919
940
|
}
|
data/lib/clacky/web/index.html
CHANGED
|
@@ -271,6 +271,16 @@
|
|
|
271
271
|
<div id="sib-model-dropdown" class="sib-model-dropdown" style="display:none"></div>
|
|
272
272
|
</span>
|
|
273
273
|
<span class="sib-sep sib-sep-after-model">│</span>
|
|
274
|
+
<!-- Latency signal: 4-bar signal + TTFT number. Hidden until the first LLM
|
|
275
|
+
call completes (see updateInfoBar / Sessions.renderSignalBars). Click
|
|
276
|
+
opens a mini benchmark panel (see Step 3/4 — not yet implemented). -->
|
|
277
|
+
<span id="sib-signal-wrap" style="display:none">
|
|
278
|
+
<span id="sib-signal" class="sib-signal-clickable" title="Recent LLM latency">
|
|
279
|
+
<span class="sig-bars" aria-hidden="true"><i></i><i></i><i></i><i></i></span>
|
|
280
|
+
<span class="sig-text"></span>
|
|
281
|
+
</span>
|
|
282
|
+
</span>
|
|
283
|
+
<span class="sib-sep sib-sep-after-signal" style="display:none">│</span>
|
|
274
284
|
<!-- Detail fields: mode, tasks, cost -->
|
|
275
285
|
<span class="sib-detail">
|
|
276
286
|
<span id="sib-mode"></span>
|
data/lib/clacky/web/sessions.js
CHANGED
|
@@ -740,9 +740,18 @@ const Sessions = (() => {
|
|
|
740
740
|
|
|
741
741
|
// Format a timestamp for display inside a message bubble.
|
|
742
742
|
// Same-day: "HH:MM"; cross-day: "MM-DD HH:MM".
|
|
743
|
+
//
|
|
744
|
+
// Accepts:
|
|
745
|
+
// - ISO string ("2026-04-30T21:45:00Z")
|
|
746
|
+
// - JS millisecond epoch (number ≥ 1e12)
|
|
747
|
+
// - Unix second epoch (number < 1e12) — what the Ruby backend emits via
|
|
748
|
+
// Time.now.to_f; we multiply by 1000 before handing to Date(), otherwise
|
|
749
|
+
// JS interprets 1.77e9 as ~1970-01-21 and we get bogus timestamps.
|
|
743
750
|
function _formatMsgTime(dateOrStr) {
|
|
744
751
|
if (!dateOrStr) return "";
|
|
745
|
-
|
|
752
|
+
let input = dateOrStr;
|
|
753
|
+
if (typeof input === "number" && input < 1e12) input = input * 1000;
|
|
754
|
+
const d = new Date(input);
|
|
746
755
|
if (isNaN(d)) return "";
|
|
747
756
|
const now = new Date();
|
|
748
757
|
const pad = n => String(n).padStart(2, "0");
|
|
@@ -1574,6 +1583,13 @@ const Sessions = (() => {
|
|
|
1574
1583
|
}
|
|
1575
1584
|
if (sibModelWrap) sibModelWrap.style.display = s.model ? "" : "none";
|
|
1576
1585
|
|
|
1586
|
+
// Latency signal — read from s.latest_latency (populated by:
|
|
1587
|
+
// - HTTP /api/sessions → session_registry#list (from agent.latest_latency)
|
|
1588
|
+
// - WS session_update events patched by app.js
|
|
1589
|
+
// Hidden entirely when no latency recorded yet (fresh session, or old
|
|
1590
|
+
// pre-feature sessions that have never made an LLM call this run).
|
|
1591
|
+
this._renderSignal(s.latest_latency);
|
|
1592
|
+
|
|
1577
1593
|
// Tasks
|
|
1578
1594
|
const sibTasks = $("sib-tasks");
|
|
1579
1595
|
if (sibTasks) sibTasks.textContent = `${s.total_tasks || 0} tasks`;
|
|
@@ -1592,6 +1608,77 @@ const Sessions = (() => {
|
|
|
1592
1608
|
if (bar) bar.style.display = "flex";
|
|
1593
1609
|
},
|
|
1594
1610
|
|
|
1611
|
+
/** Render the 4-bar latency signal next to the model name in the status bar.
|
|
1612
|
+
*
|
|
1613
|
+
* @param {Object|null} lat latency metrics from agent.latest_latency
|
|
1614
|
+
* shape: { ttft_ms, duration_ms, output_tokens, tps, model, streaming }
|
|
1615
|
+
*
|
|
1616
|
+
* Visibility: hidden whenever lat is falsy (no measurement yet). Never
|
|
1617
|
+
* renders a "loading" state — we would rather show nothing than a stale or
|
|
1618
|
+
* misleading number.
|
|
1619
|
+
*
|
|
1620
|
+
* Signal thresholds (TTFT):
|
|
1621
|
+
* Note: this is measured over the WHOLE non-streaming response (we
|
|
1622
|
+
* don't have a real TTFT yet — the server returns one completed body),
|
|
1623
|
+
* so for a large generation — "write me a 2000-line snake game" — the
|
|
1624
|
+
* number naturally balloons. Thresholds below are tuned to that reality:
|
|
1625
|
+
* 60s is considered NORMAL, 120s is slow, beyond that we flag bad.
|
|
1626
|
+
*
|
|
1627
|
+
* ≤ 2000 ms → 4 bars, green, "⚡" fast
|
|
1628
|
+
* ≤ 60000 ms → 3 bars, green, normal
|
|
1629
|
+
* ≤ 120000 ms → 2 bars, amber, slow
|
|
1630
|
+
* > 120000 ms → 1 bar, red, very slow
|
|
1631
|
+
*
|
|
1632
|
+
* Hover tooltip: built from the latency hash — full breakdown for power
|
|
1633
|
+
* users; the compact inline text is just "1.2s" style for scannability.
|
|
1634
|
+
*/
|
|
1635
|
+
_renderSignal(lat) {
|
|
1636
|
+
const wrap = $("sib-signal-wrap");
|
|
1637
|
+
const sep = document.querySelector(".sib-sep-after-signal");
|
|
1638
|
+
const el = $("sib-signal");
|
|
1639
|
+
if (!wrap || !el) return;
|
|
1640
|
+
|
|
1641
|
+
if (!lat || !lat.ttft_ms) {
|
|
1642
|
+
wrap.style.display = "none";
|
|
1643
|
+
if (sep) sep.style.display = "none";
|
|
1644
|
+
return;
|
|
1645
|
+
}
|
|
1646
|
+
|
|
1647
|
+
const ttft = Number(lat.ttft_ms) || 0;
|
|
1648
|
+
let bars, level;
|
|
1649
|
+
if (ttft <= 2000) { bars = 4; level = "ok"; }
|
|
1650
|
+
else if (ttft <= 60000) { bars = 3; level = "ok"; }
|
|
1651
|
+
else if (ttft <= 120000) { bars = 2; level = "warn"; }
|
|
1652
|
+
else { bars = 1; level = "bad"; }
|
|
1653
|
+
|
|
1654
|
+
// Paint bars: active ones get .on, others stay dim
|
|
1655
|
+
el.querySelectorAll(".sig-bars i").forEach((bar, i) => {
|
|
1656
|
+
bar.classList.toggle("on", i < bars);
|
|
1657
|
+
});
|
|
1658
|
+
el.className = `sib-signal-clickable sib-signal-${level}`;
|
|
1659
|
+
|
|
1660
|
+
// Inline text: just the TTFT in human-friendly units
|
|
1661
|
+
const ttftStr = ttft >= 1000 ? (ttft / 1000).toFixed(1) + "s" : ttft + "ms";
|
|
1662
|
+
const text = el.querySelector(".sig-text");
|
|
1663
|
+
if (text) text.textContent = ttftStr;
|
|
1664
|
+
|
|
1665
|
+
// Tooltip: full metrics breakdown
|
|
1666
|
+
const parts = [`TTFT ${ttftStr}`];
|
|
1667
|
+
if (lat.duration_ms && lat.duration_ms !== ttft) {
|
|
1668
|
+
const durStr = lat.duration_ms >= 1000
|
|
1669
|
+
? (lat.duration_ms / 1000).toFixed(1) + "s"
|
|
1670
|
+
: lat.duration_ms + "ms";
|
|
1671
|
+
parts.push(`total ${durStr}`);
|
|
1672
|
+
}
|
|
1673
|
+
if (lat.tps) parts.push(`${lat.tps} tok/s`);
|
|
1674
|
+
if (lat.output_tokens) parts.push(`${lat.output_tokens} tokens`);
|
|
1675
|
+
if (lat.model) parts.push(`@ ${lat.model}`);
|
|
1676
|
+
el.title = "Last LLM call — " + parts.join(" · ");
|
|
1677
|
+
|
|
1678
|
+
wrap.style.display = "";
|
|
1679
|
+
if (sep) sep.style.display = "";
|
|
1680
|
+
},
|
|
1681
|
+
|
|
1595
1682
|
// ── Message helpers ────────────────────────────────────────────────────
|
|
1596
1683
|
|
|
1597
1684
|
// Live tool group state (one active group per session at a time)
|