openclacky 1.0.0.beta.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 39e25cd04a3d01fdacbb0382c2c367a1e72e8d2be88408e7fb29f804b3af1ba6
4
- data.tar.gz: 492ca66bcfb55a6cfc3f2cf38f171ce983f142a7a4b0f8655e5aafa317b79a69
3
+ metadata.gz: 49800afa935670c288d9f421595df4246b61e76ed0f2a74e1a7a754e85e26162
4
+ data.tar.gz: dba09cac5a79485b743aaad4568ce2e4fe2e13772d6b8c43a360ec11eca7c762
5
5
  SHA512:
6
- metadata.gz: 014eeb8227bcc4cd94104a1da3bb2877083a1c70c4baaaf408233eec57ef684cbc2bcbac632ca52a771e2f1a8f436f2a09d89b697a165f1147891cabfe3708a0
7
- data.tar.gz: cc54f77d960bfd2db73906b713a84d0da6465fc18c65d9ec3ceb75d250bf426adaf4d9ba42c71900beab889bb6acf6a6472fa3843420fec8bbd3460a13f00088
6
+ metadata.gz: 2b723771f71d880d99582f6bfd4d23a66f54ee3caa87f7ed228360f015cadb52a20be9d6869c6e35612740ddb889ceb762efa541a41bc25810f5897d47a333e1
7
+ data.tar.gz: 5c425e94d2bf4c4d68175b740d840b9cd6270ef91f2e68e6d8403fbb6fbc5336b07bd65308907dbb8d8c3cd1cb906c4c5f64ae7710a7e0619ab2aaae0ddc278b
data/CHANGELOG.md CHANGED
@@ -5,7 +5,28 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
- ## [Unreleased]
8
+ ## [1.0.0] - 2026-04-30
9
+
10
+ ### Added
11
+ - **Speed test tool in Web UI.** Test API response latency for different models and providers directly from the settings panel, making it easy to find the fastest endpoint for your region.
12
+ - **History chunk loading.** Previously compressed conversation chunks can now be loaded back into the session when needed, so long-running conversations don't lose context.
13
+ - **Default model changed to 4.5.** New default model provides better balance of speed, quality, and cost for most tasks.
14
+
15
+ ### Improved
16
+ - **Thinking indicator now visible for more steps.** The "thinking..." indicator stays visible longer during complex operations, giving better feedback about what the agent is doing.
17
+ - **Message timestamps display correctly in Web UI.** User message times now show properly without layout issues, and the scroll behavior is smoother.
18
+
19
+ ### Fixed
20
+ - **Scroll position no longer jumps unexpectedly** in the Web UI when loading session history.
21
+
22
+ ## [1.0.0.beta.6] - 2026-04-30
23
+
24
+ ### Fixed
25
+ - **Compression chunk indexing now uses disk-based discovery.** Chunk files are no longer incorrectly overwritten after the second compression. Previously, chunk index was counted from compressed_summary messages in history — which caps at 1 after rebuild — causing chunk-2.md to be overwritten on every subsequent compression. Now uses durable disk-based chunk discovery via SessionManager, ensuring all compressed chunks are preserved.
26
+ - **Skill evolution no longer creates duplicate skills.** The reflect and auto-create scenarios in skill evolution are now mutually exclusive: when a skill was just used, only reflection runs; when no skill was used, only auto-creation is considered. This prevents near-duplicate "auto-*" skills from being extracted from tasks already served by an existing skill.
27
+
28
+ ### Improved
29
+ - **Slash commands no longer misinterpret filesystem paths.** Pasted paths like `/Users/alice/foo` or `/tmp/bar` are no longer mistaken for slash commands, avoiding confusing "skill not found" notices.
9
30
 
10
31
  ## [1.0.0.beta.5] - 2026-04-29
11
32
 
@@ -86,7 +86,45 @@ module Clacky
86
86
  # Successful response — if we were probing, confirm primary is healthy.
87
87
  handle_probe_success if @config.probing?
88
88
 
89
- rescue Faraday::ConnectionFailed, Faraday::TimeoutError, Faraday::SSLError, Errno::ECONNREFUSED, Errno::ETIMEDOUT => e
89
+ rescue Faraday::TimeoutError => e
90
+ # ── Read-timeout path (distinct from connection-level failures) ──
91
+ # Faraday::TimeoutError on our non-streaming POST almost always means
92
+ # the *response* took longer than the 300s read-timeout to come back —
93
+ # i.e. the model is trying to produce a huge output in one shot
94
+ # (e.g. "write me a 2000-line snake game"). Blindly retrying the same
95
+ # request with the same prompt reproduces the same timeout.
96
+ #
97
+ # Strategy:
98
+ # 1. On the FIRST timeout in a task, inject a `[SYSTEM]` user message
99
+ # telling the model to break the work into smaller steps, then
100
+ # retry. The history edit changes the prompt, so the retry is
101
+ # materially different from the failed attempt.
102
+ # 2. On subsequent timeouts in the same task, fall back to the
103
+ # generic "just retry" behaviour (the model may have ignored
104
+ # the hint; don't pile on duplicate hints).
105
+ # 3. Probing-mode timeouts still go through handle_probe_failure.
106
+ retries += 1
107
+
108
+ if @config.probing?
109
+ handle_probe_failure
110
+ retry
111
+ end
112
+
113
+ if retries <= max_retries
114
+ inject_large_output_hint_if_first_timeout(e)
115
+ @ui&.show_progress(
116
+ "Response too slow (likely generating too much at once): #{e.message}",
117
+ progress_type: "retrying",
118
+ phase: "active",
119
+ metadata: { attempt: retries, total: max_retries }
120
+ )
121
+ sleep retry_delay
122
+ retry
123
+ else
124
+ raise AgentError, "[LLM] Request timed out after #{max_retries} retries: #{e.message}"
125
+ end
126
+
127
+ rescue Faraday::ConnectionFailed, Faraday::SSLError, Errno::ECONNREFUSED, Errno::ETIMEDOUT => e
90
128
  retries += 1
91
129
 
92
130
  # Probing failure: primary still down — renew cooling-off and retry with fallback.
@@ -95,9 +133,10 @@ module Clacky
95
133
  retry
96
134
  end
97
135
 
98
- # Network-level errors (timeouts, connection failures) are likely transient
99
- # infrastructure blips — do NOT trigger fallback. Just retry on the current
100
- # model (primary or already-active fallback) up to max_retries.
136
+ # Connection-level errors (DNS, TCP refused, open-timeout, TLS) are
137
+ # transient infrastructure blips — do NOT trigger fallback, and do
138
+ # NOT inject the "break into steps" hint (the model did nothing wrong).
139
+ # Just retry on the current model up to max_retries.
101
140
  if retries <= max_retries
102
141
  @ui&.show_progress(
103
142
  "Network failed: #{e.message}",
@@ -229,6 +268,50 @@ module Clacky
229
268
  (msg.include?("thinking") || msg.include?("must be passed back") ||
230
269
  msg.include?("must be provided"))
231
270
  end
271
+
272
+ # On the FIRST Faraday::TimeoutError within a task, append a [SYSTEM]
273
+ # user message to the history instructing the model to break its work
274
+ # into smaller steps. Subsequent timeouts in the same task are ignored
275
+ # here (caller just retries) so we don't pollute history with duplicate
276
+ # hints.
277
+ #
278
+ # The injected message carries `system_injected: true` so it is:
279
+ # - Hidden from UI replay (session_serializer / replay_history filters)
280
+ # - Skipped by prompt-caching marker placement (client.rb)
281
+ # - Skipped by message compression's "recent user turn" protection
282
+ # (message_compressor_helper.rb)
283
+ #
284
+ # Reset per-task via Agent#run (see @task_timeout_hint_injected = false).
285
+ private def inject_large_output_hint_if_first_timeout(err)
286
+ return if @task_timeout_hint_injected
287
+
288
+ @task_timeout_hint_injected = true
289
+
290
+ hint = "[SYSTEM] The previous LLM response timed out (read timeout after ~300s). " \
291
+ "This usually means the model was trying to produce too much output in a single response. " \
292
+ "Please change your approach:\n" \
293
+ "- Break the task into multiple smaller steps, each producing a short response.\n" \
294
+ "- For long files: first create a skeleton with `write` (structure + placeholder comments only), " \
295
+ "then fill in each section with separate `edit` calls.\n" \
296
+ "- Keep each single tool-call argument (especially file content) well under ~500 lines.\n" \
297
+ "- Do NOT attempt to output the entire deliverable in one response."
298
+
299
+ @history.append({
300
+ role: "user",
301
+ content: hint,
302
+ system_injected: true,
303
+ task_id: @current_task_id
304
+ })
305
+
306
+ Clacky::Logger.info(
307
+ "[llm_caller] Read-timeout detected — injected 'break into smaller steps' hint " \
308
+ "(error=#{err.class}: #{err.message})"
309
+ )
310
+
311
+ @ui&.show_warning(
312
+ "LLM response timed out — asking model to break the task into smaller steps and retrying..."
313
+ )
314
+ end
232
315
  end
233
316
  end
234
317
  end
@@ -154,12 +154,22 @@ module Clacky
154
154
  # Note: we need to remove the compression instruction message we just added
155
155
  original_messages = @history.to_a[0..-2] # All except the last (compression instruction)
156
156
 
157
- # Archive compressed messages to a chunk MD file before discarding them
158
- # Count existing compressed_summary messages in history to determine the next chunk index.
159
- # Using @compressed_summaries.size would reset to 0 on process restart and overwrite existing
160
- # chunk files, creating circular chunk references. Counting from history is always accurate.
161
- existing_chunk_count = original_messages.count { |m| m[:compressed_summary] }
162
- chunk_index = existing_chunk_count + 1
157
+ # Archive compressed messages to a chunk MD file before discarding them.
158
+ #
159
+ # IMPORTANT: chunk_index and previous_chunks MUST come from disk, not from
160
+ # message history. Each compression's rebuild_with_compression keeps only
161
+ # ONE compressed_summary message (the new one), dropping older summaries
162
+ # and embedding their references into the new summary's content. So
163
+ # counting compressed_summary messages in history caps at 1 from the
164
+ # second compression onward — causing chunk-2.md to be overwritten on
165
+ # every subsequent compression, and losing references to chunk-1.md.
166
+ #
167
+ # Disk is the only durable source of truth: chunk files survive process
168
+ # restarts, session reloads, and message rebuilds. SessionManager owns
169
+ # all chunk file I/O (naming, writing, discovery) — we just ask it.
170
+ sm = session_manager
171
+ existing_chunks = sm.chunks_for_current(@session_id, @created_at)
172
+ chunk_index = sm.next_chunk_index(@session_id, @created_at)
163
173
 
164
174
  # Extract topics from the LLM response to store in both the chunk MD front
165
175
  # matter and the compressed_summary message hash (for future chunk indexing).
@@ -173,14 +183,13 @@ module Clacky
173
183
  topics: topics
174
184
  )
175
185
 
176
- # Collect previous chunk references so the new summary carries a complete
177
- # index of all older archives. Without this, each new compression would
178
- # lose all prior chunk references leaving only the newest chunk reachable
179
- # via replay_history. The AI can still access older chunks via file_reader
180
- # using the embedded basenames and topics.
181
- previous_chunks = original_messages
182
- .select { |m| m[:compressed_summary] && m[:chunk_path] }
183
- .map { |m| { basename: File.basename(m[:chunk_path]), path: m[:chunk_path], topics: m[:topics] } }
186
+ # Build previous_chunks index from the disk-discovered chunks (already
187
+ # sorted by index ascending). This gives the new summary a complete
188
+ # chronological index of all older archives so the AI can recall any
189
+ # past chunk via file_reader, not just the most recent one.
190
+ previous_chunks = existing_chunks.map do |c|
191
+ { basename: c[:basename], path: c[:path], topics: c[:topics] }
192
+ end
184
193
 
185
194
  @history.replace_all(@message_compressor.rebuild_with_compression(
186
195
  compressed_content,
@@ -348,8 +357,22 @@ module Clacky
348
357
  end
349
358
  end
350
359
 
351
- # Save the messages being compressed to a chunk MD file for future recall
352
- # File path: ~/.clacky/sessions/{datetime}-{short_id}-chunk-{n}.md
360
+ # Lazy accessor for a SessionManager instance used by compression chunk I/O.
361
+ # We keep this local to the helper rather than threading a manager instance
362
+ # through the Agent constructor — Agent itself doesn't persist sessions
363
+ # (CLI / HTTP server do that), but the compression archive lives in the
364
+ # same directory under SessionManager's ownership.
365
+ #
366
+ # NOTE: Uses Clacky::SessionManager::SESSIONS_DIR by default. Tests can
367
+ # stub that constant to point at a tmpdir.
368
+ private def session_manager
369
+ @session_manager ||= Clacky::SessionManager.new
370
+ end
371
+
372
+ # Save the messages being compressed to a chunk MD file for future recall.
373
+ # The filesystem concerns (path, write, chmod) are delegated to SessionManager;
374
+ # this method is responsible only for the business rules of WHAT gets archived.
375
+ #
353
376
  # @param original_messages [Array<Hash>] All messages before compression (excluding compression instruction)
354
377
  # @param recent_messages [Array<Hash>] Recent messages being kept (to exclude from chunk)
355
378
  # @param chunk_index [Integer] Sequential chunk number
@@ -373,19 +396,14 @@ module Clacky
373
396
 
374
397
  return nil if messages_to_archive.empty?
375
398
 
376
- sessions_dir = Clacky::SessionManager::SESSIONS_DIR
377
- datetime = Time.parse(@created_at).strftime("%Y-%m-%d-%H-%M-%S")
378
- short_id = @session_id[0..7]
379
- base_name = "#{datetime}-#{short_id}"
380
- chunk_filename = "#{base_name}-chunk-#{chunk_index}.md"
381
- chunk_path = File.join(sessions_dir, chunk_filename)
382
-
383
- md_content = build_chunk_md(messages_to_archive, chunk_index: chunk_index, compression_level: compression_level, topics: topics)
384
-
385
- File.write(chunk_path, md_content)
386
- FileUtils.chmod(0o600, chunk_path)
399
+ md_content = build_chunk_md(messages_to_archive,
400
+ chunk_index: chunk_index,
401
+ compression_level: compression_level,
402
+ topics: topics)
387
403
 
388
- chunk_path
404
+ # Delegate filesystem concerns (path assembly, write, chmod) to SessionManager —
405
+ # it owns the on-disk layout for sessions and their chunk archives.
406
+ session_manager.write_chunk(@session_id, @created_at, chunk_index, md_content)
389
407
  rescue => e
390
408
  @ui&.log("Failed to save chunk MD: #{e.message}", level: :warn)
391
409
  nil
@@ -36,6 +36,15 @@ module Clacky
36
36
  # Restore previous_total_tokens for accurate delta calculation across sessions
37
37
  @previous_total_tokens = session_data.dig(:stats, :previous_total_tokens) || 0
38
38
 
39
+ # Recover the latest latency metric from the most recent assistant message
40
+ # that carries a :latency field. This is the source of truth for the status-bar
41
+ # signal — no separate session-level field is needed. Older sessions (pre-feature)
42
+ # simply start with nil; the signal stays hidden until the next LLM call populates it.
43
+ last_assistant_with_latency = @history.to_a.reverse.find do |m|
44
+ m[:role].to_s == "assistant" && m[:latency]
45
+ end
46
+ @latest_latency = last_assistant_with_latency&.dig(:latency)
47
+
39
48
  # Restore Time Machine state
40
49
  @task_parents = session_data.dig(:time_machine, :task_parents) || {}
41
50
  @current_task_id = session_data.dig(:time_machine, :current_task_id) || 0
@@ -178,8 +187,18 @@ module Clacky
178
187
  elsif current_round
179
188
  current_round[:events] << msg
180
189
  elsif msg[:compressed_summary] && msg[:chunk_path]
181
- # Compressed summary sitting before any user rounds — expand it from chunk md
182
- chunk_rounds = parse_chunk_md_to_rounds(msg[:chunk_path])
190
+ # Compressed summary sitting before any user rounds — expand ALL chunk
191
+ # MD files that belong to the same session (siblings of chunk_path),
192
+ # in chunk-index ascending order.
193
+ #
194
+ # Under the current "single summary + previous_chunks index" scheme,
195
+ # session.json only keeps the newest compressed_summary message (which
196
+ # points at the newest chunk). Older chunks (chunk-1..chunk-N-1) are
197
+ # referenced only as basenames inside the summary text. Expanding just
198
+ # msg[:chunk_path] would therefore lose all prior chunks on replay.
199
+ chunk_rounds = sibling_chunks_of(msg[:chunk_path]).flat_map { |p|
200
+ parse_chunk_md_to_rounds(p)
201
+ }
183
202
  rounds.concat(chunk_rounds)
184
203
  # After expanding, treat the last chunk round as the current round so that
185
204
  # any orphaned assistant/tool messages that follow in session.json (belonging
@@ -243,6 +262,32 @@ module Clacky
243
262
  { has_more: has_more }
244
263
  end
245
264
 
265
+ # Return all chunk MD file paths that belong to the same session as
266
+ # +chunk_path+, sorted by chunk index ascending (chunk-1, chunk-2, …).
267
+ # Uses the filename convention "<base>-chunk-<N>.md".
268
+ #
269
+ # Handles path resolution the same way parse_chunk_md_to_rounds does:
270
+ # if the stored path doesn't exist, fall back to SESSIONS_DIR + basename
271
+ # (cross-machine / cross-user session bundles).
272
+ private def sibling_chunks_of(chunk_path)
273
+ return [] unless chunk_path
274
+
275
+ resolved = chunk_path.to_s
276
+ unless File.exist?(resolved)
277
+ resolved = File.join(Clacky::SessionManager::SESSIONS_DIR, File.basename(resolved))
278
+ end
279
+ return [] unless File.exist?(resolved)
280
+
281
+ dir = File.dirname(resolved)
282
+ base = File.basename(resolved).sub(/-chunk-\d+\.md\z/, "")
283
+ return [resolved] if base == File.basename(resolved) # unconventional name — just use as-is
284
+
285
+ Dir.glob(File.join(dir, "#{base}-chunk-*.md")).sort_by do |p|
286
+ m = File.basename(p).match(/-chunk-(\d+)\.md\z/)
287
+ m ? m[1].to_i : Float::INFINITY
288
+ end
289
+ end
290
+
246
291
  # Parse a chunk MD file into an array of rounds compatible with replay_history.
247
292
  # Each round is { user_msg: Hash, events: Array<Hash> }.
248
293
  # Timestamps are synthesised from the chunk's archived_at, spread backwards.
@@ -10,16 +10,31 @@ module Clacky
10
10
  # Triggered at the end of Agent#run (post-run hooks), only for main agents.
11
11
  module SkillEvolution
12
12
  # Main entry point - runs all skill evolution checks
13
- # Called from Agent#run after the main loop completes
13
+ # Called from Agent#run after the main loop completes.
14
+ #
15
+ # The two scenarios are mutually exclusive by design:
16
+ #
17
+ # * If a skill just ran (@skill_execution_context is set), the user's
18
+ # need was already served by an existing skill. Run Scenario 2
19
+ # (reflect + possibly improve that skill) and skip Scenario 1 —
20
+ # otherwise we would auto-extract a near-duplicate "auto-*" skill
21
+ # from the same task, polluting the skills directory.
22
+ #
23
+ # * If no skill ran, the task was solved with raw tools. That is the
24
+ # signal for Scenario 1: if the pattern is complex/repeatable enough,
25
+ # consider extracting it into a new skill.
14
26
  def run_skill_evolution_hooks
15
27
  return unless skill_evolution_enabled?
16
28
  return if @is_subagent
17
29
 
18
- # Scenario 2: Reflect on executed skill (if one just ran)
19
- maybe_reflect_on_skill if @skill_execution_context
20
-
21
- # Scenario 1: Auto-create new skill from complex task
22
- maybe_create_skill_from_task
30
+ if @skill_execution_context
31
+ # Scenario 2: Reflect on executed skill (may invoke skill-creator
32
+ # to UPDATE the existing skill, but will not create a new one).
33
+ maybe_reflect_on_skill
34
+ else
35
+ # Scenario 1: Auto-create new skill from complex task.
36
+ maybe_create_skill_from_task
37
+ end
23
38
  end
24
39
 
25
40
  # Check if skill evolution is enabled in config
@@ -33,12 +33,46 @@ module Clacky
33
33
  def parse_skill_command(input)
34
34
  return { matched: false } unless input.start_with?("/")
35
35
 
36
- match = input.match(%r{^/(\S+)(?:\s+(.*))?$})
36
+ # Split off the first whitespace-delimited token after the leading "/".
37
+ # Shape of a slash command:
38
+ # /<command>
39
+ # /<command> <arguments...>
40
+ #
41
+ # The key distinction we need to make is "slash command" vs. "filesystem
42
+ # path starting with /". Paths look like "/xxx/yyy", "/Users/alice/foo",
43
+ # "/tmp/bar" — what they all share is a *second* "/" inside the first
44
+ # token. Slash commands, on the other hand, may legitimately contain
45
+ # non-slug characters like ':' or '.' (e.g. "/guizang-ppt-skill:create"),
46
+ # so we deliberately DO NOT require the command to be a clean slug here —
47
+ # find_by_command handles the lookup, and a pilot-error like "/foo.bar"
48
+ # should still surface a friendly "skill not found" notice.
49
+ #
50
+ # Rejected as slash commands (treated as plain user messages):
51
+ # - "/", "//", "/*.rb" — token is empty or begins with a separator/glob
52
+ # - "/ leading space" — whitespace immediately after /
53
+ # - "/Users/alice/foo" — second "/" inside the first token ⇒ a path
54
+ # - "/xxxx/zzzz/" — same
55
+ #
56
+ # Accepted (routed to find_by_command, may yield :not_found notice):
57
+ # - "/commit"
58
+ # - "/skill-add https://…" — "/" appears only in arguments, fine
59
+ # - "/guizang-ppt-skill:create", "/foo.bar" — non-slug but no path shape
60
+ match = input.match(%r{^/(\S+?)(?:\s+(.*))?$})
37
61
  return { matched: false } unless match
38
62
 
39
63
  skill_name = match[1]
40
64
  arguments = match[2] || ""
41
65
 
66
+ # Reject path-like first tokens: anything containing a "/" after the
67
+ # leading one belongs to the filesystem, not the command namespace.
68
+ # This also naturally rejects "" (from "/" alone) and "*…" / ".…" style
69
+ # tokens because they won't be registered as a command — but those edge
70
+ # cases fall through to :not_found which is acceptable. The main goal is
71
+ # to stop pasted paths like "/Users/foo/bar" from producing a bogus
72
+ # "skill /Users/foo/bar not found" reply.
73
+ return { matched: false } if skill_name.include?("/")
74
+ return { matched: false } if skill_name.empty?
75
+
42
76
  skill = @skill_loader.find_by_command("/#{skill_name}")
43
77
  return { matched: true, found: false, skill_name: skill_name, reason: :not_found } unless skill
44
78
 
data/lib/clacky/agent.rb CHANGED
@@ -42,7 +42,8 @@ module Clacky
42
42
 
43
43
  attr_reader :session_id, :name, :history, :iterations, :total_cost, :working_dir, :created_at, :total_tasks, :todos,
44
44
  :cache_stats, :cost_source, :ui, :skill_loader, :agent_profile,
45
- :status, :error, :updated_at, :source
45
+ :status, :error, :updated_at, :source,
46
+ :latest_latency # Hash of latency metrics from the most recent LLM call (see Client#send_messages_with_tools)
46
47
  attr_accessor :pinned
47
48
 
48
49
  def permission_mode
@@ -78,6 +79,7 @@ module Clacky
78
79
  @task_cost_source = :estimated # Track cost source for current task
79
80
  @previous_total_tokens = 0 # Track tokens from previous iteration for delta calculation
80
81
  @interrupted = false # Flag for user interrupt
82
+ @latest_latency = nil # Most recent LLM call's latency metrics (see Client#send_messages_with_tools)
81
83
  @ui = ui # UIController for direct UI interaction
82
84
  @debug_logs = [] # Debug logs for troubleshooting
83
85
  @pending_injections = [] # Pending inline skill injections to flush after observe()
@@ -208,6 +210,7 @@ module Clacky
208
210
 
209
211
  @start_time = Time.now
210
212
  @task_truncation_count = 0 # Reset truncation counter for each task
213
+ @task_timeout_hint_injected = false # Reset read-timeout hint injection (see LlmCaller)
211
214
  @task_cost_source = :estimated # Reset for new task
212
215
  # Note: Do NOT reset @previous_total_tokens here - it should maintain the value from the last iteration
213
216
  # across tasks to correctly calculate delta tokens in each iteration
@@ -681,6 +684,17 @@ module Clacky
681
684
  end
682
685
  # Store token_usage in the message so replay_history can re-emit it
683
686
  msg[:token_usage] = response[:token_usage] if response[:token_usage]
687
+ # Store per-message latency — this is the source of truth (session.json)
688
+ # for all time-to-first-token / duration / throughput info. The status
689
+ # bar signal reads the last assistant message's latency; no separate
690
+ # config file or top-level session field is introduced.
691
+ if response[:latency]
692
+ msg[:latency] = response[:latency]
693
+ @latest_latency = response[:latency]
694
+ # Push to UI so the status-bar signal updates immediately after the
695
+ # model finishes (before any tool execution delays the next event).
696
+ @ui&.update_sessionbar(latency: response[:latency])
697
+ end
684
698
  # Preserve reasoning_content from the real LLM response.
685
699
  # This is the authoritative signal used by MessageHistory#to_api to
686
700
  # detect thinking-mode providers (DeepSeek V4, Kimi K2 thinking, etc.)
data/lib/clacky/client.rb CHANGED
@@ -89,18 +89,54 @@ module Clacky
89
89
  # ── Agent main path ───────────────────────────────────────────────────────
90
90
 
91
91
  # Send messages with tool-calling support.
92
- # Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage: }
92
+ # Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage:, latency: }
93
+ #
94
+ # Latency measurement:
95
+ # Because the current HTTP path is *non-streaming* (plain POST, response
96
+ # body read in one shot), TTFB (time to response headers) is not exposed
97
+ # by Faraday's default adapter without extra plumbing. What we CAN measure
98
+ # cheaply — and what users actually feel — is total request duration,
99
+ # which for a non-streaming call equals the time from "hit Enter" to
100
+ # "first token visible" (since we receive everything at once).
101
+ #
102
+ # So we record `duration_ms` as the authoritative number and alias it to
103
+ # `ttft_ms` for downstream consumers (status bar uses ttft_ms as its
104
+ # signal metric — see docs). When we migrate to streaming later, this
105
+ # same `ttft_ms` field will start carrying the *actual* first-token
106
+ # latency without any schema change.
93
107
  def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false)
94
108
  caching_enabled = enable_caching && supports_prompt_caching?(model)
95
109
  cloned = deep_clone(messages)
96
110
 
97
- if bedrock?
98
- send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled)
99
- elsif anthropic_format?
100
- send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled)
101
- else
102
- send_openai_request(cloned, model, tools, max_tokens, caching_enabled)
103
- end
111
+ t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
112
+ response =
113
+ if bedrock?
114
+ send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled)
115
+ elsif anthropic_format?
116
+ send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled)
117
+ else
118
+ send_openai_request(cloned, model, tools, max_tokens, caching_enabled)
119
+ end
120
+ t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
121
+
122
+ duration_ms = ((t1 - t0) * 1000).round
123
+ # Throughput is only meaningful with a reasonable output size; below ~10
124
+ # tokens the sample is too small to be informative and the result is
125
+ # wildly high (e.g. 1 token / 50ms → 20 tok/s is meaningless).
126
+ # Canonical usage hashes from message_format/* all use :completion_tokens.
127
+ output_tokens = response[:usage]&.dig(:completion_tokens).to_i
128
+ tps = (output_tokens >= 10 && duration_ms > 0) ? (output_tokens * 1000.0 / duration_ms).round(1) : nil
129
+
130
+ response[:latency] = {
131
+ ttft_ms: duration_ms, # non-streaming: TTFT == full duration
132
+ duration_ms: duration_ms,
133
+ output_tokens: output_tokens,
134
+ tps: tps,
135
+ model: model,
136
+ measured_at: Time.now.to_f,
137
+ streaming: false # future flag — true when we migrate
138
+ }
139
+ response
104
140
  end
105
141
 
106
142
  # Format tool results into canonical messages ready to append to @messages.
@@ -134,12 +134,13 @@ module Clacky
134
134
 
135
135
  # === State updates ===
136
136
 
137
- def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil)
137
+ def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil)
138
138
  data = {}
139
139
  data[:tasks] = tasks if tasks
140
140
  data[:cost] = cost if cost
141
141
  data[:cost_source] = cost_source if cost_source
142
142
  data[:status] = status if status
143
+ data[:latency] = latency if latency
143
144
  emit("session_update", **data) unless data.empty?
144
145
  end
145
146
 
@@ -136,7 +136,7 @@ module Clacky
136
136
 
137
137
  # === State updates (no-ops) ===
138
138
 
139
- def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil); end
139
+ def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil); end
140
140
  def update_todos(todos); end
141
141
  def set_working_status; end
142
142
  def set_idle_status; end
@@ -22,7 +22,7 @@ module Clacky
22
22
  "name" => "OpenClacky",
23
23
  "base_url" => "https://api.openclacky.com",
24
24
  "api" => "bedrock",
25
- "default_model" => "abs-claude-sonnet-4-6",
25
+ "default_model" => "abs-claude-sonnet-4-5",
26
26
  "models" => [
27
27
  "abs-claude-opus-4-7",
28
28
  "abs-claude-opus-4-6",
@@ -131,7 +131,7 @@ module Clacky
131
131
  }.freeze,
132
132
 
133
133
  "clackyai-sea" => {
134
- "name" => "ClackyAI( Sea )",
134
+ "name" => "ClackyAI(Sea)",
135
135
  "base_url" => "https://api.clacky.ai",
136
136
  "api" => "bedrock",
137
137
  "default_model" => "abs-claude-sonnet-4-5",
@@ -152,7 +152,7 @@ module Clacky
152
152
 
153
153
  # === State updates (no-ops for IM) ===
154
154
 
155
- def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil); end
155
+ def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil); end
156
156
  def update_todos(todos); end
157
157
  def set_working_status; end
158
158
  def set_idle_status; end