openclacky 1.0.0.beta.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -1
- data/lib/clacky/agent/llm_caller.rb +87 -4
- data/lib/clacky/agent/message_compressor_helper.rb +46 -28
- data/lib/clacky/agent/session_serializer.rb +47 -2
- data/lib/clacky/agent/skill_evolution.rb +21 -6
- data/lib/clacky/agent/skill_manager.rb +35 -1
- data/lib/clacky/agent.rb +15 -1
- data/lib/clacky/client.rb +44 -8
- data/lib/clacky/json_ui_controller.rb +2 -1
- data/lib/clacky/plain_ui_controller.rb +1 -1
- data/lib/clacky/providers.rb +2 -2
- data/lib/clacky/server/channel/channel_ui_controller.rb +1 -1
- data/lib/clacky/server/http_server.rb +94 -0
- data/lib/clacky/server/session_registry.rb +8 -1
- data/lib/clacky/server/web_ui_controller.rb +3 -2
- data/lib/clacky/session_manager.rb +105 -1
- data/lib/clacky/ui2/ui_controller.rb +2 -1
- data/lib/clacky/ui_interface.rb +1 -1
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/web/app.css +158 -6
- data/lib/clacky/web/app.js +157 -7
- data/lib/clacky/web/i18n.js +45 -24
- data/lib/clacky/web/index.html +10 -0
- data/lib/clacky/web/sessions.js +88 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 49800afa935670c288d9f421595df4246b61e76ed0f2a74e1a7a754e85e26162
|
|
4
|
+
data.tar.gz: dba09cac5a79485b743aaad4568ce2e4fe2e13772d6b8c43a360ec11eca7c762
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2b723771f71d880d99582f6bfd4d23a66f54ee3caa87f7ed228360f015cadb52a20be9d6869c6e35612740ddb889ceb762efa541a41bc25810f5897d47a333e1
|
|
7
|
+
data.tar.gz: 5c425e94d2bf4c4d68175b740d840b9cd6270ef91f2e68e6d8403fbb6fbc5336b07bd65308907dbb8d8c3cd1cb906c4c5f64ae7710a7e0619ab2aaae0ddc278b
|
data/CHANGELOG.md
CHANGED
|
@@ -5,7 +5,28 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
-
## [
|
|
8
|
+
## [1.0.0] - 2026-04-30
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- **Speed test tool in Web UI.** Test API response latency for different models and providers directly from the settings panel, making it easy to find the fastest endpoint for your region.
|
|
12
|
+
- **History chunk loading.** Previously compressed conversation chunks can now be loaded back into the session when needed, so long-running conversations don't lose context.
|
|
13
|
+
- **Default model changed to 4.5.** New default model provides better balance of speed, quality, and cost for most tasks.
|
|
14
|
+
|
|
15
|
+
### Improved
|
|
16
|
+
- **Thinking indicator now visible for more steps.** The "thinking..." indicator stays visible longer during complex operations, giving better feedback about what the agent is doing.
|
|
17
|
+
- **Message timestamps display correctly in Web UI.** User message times now show properly without layout issues, and the scroll behavior is smoother.
|
|
18
|
+
|
|
19
|
+
### Fixed
|
|
20
|
+
- **Scroll position no longer jumps unexpectedly** in the Web UI when loading session history.
|
|
21
|
+
|
|
22
|
+
## [1.0.0.beta.6] - 2026-04-30
|
|
23
|
+
|
|
24
|
+
### Fixed
|
|
25
|
+
- **Compression chunk indexing now uses disk-based discovery.** Chunk files are no longer incorrectly overwritten after the second compression. Previously, chunk index was counted from compressed_summary messages in history — which caps at 1 after rebuild — causing chunk-2.md to be overwritten on every subsequent compression. Now uses durable disk-based chunk discovery via SessionManager, ensuring all compressed chunks are preserved.
|
|
26
|
+
- **Skill evolution no longer creates duplicate skills.** The reflect and auto-create scenarios in skill evolution are now mutually exclusive: when a skill was just used, only reflection runs; when no skill was used, only auto-creation is considered. This prevents near-duplicate "auto-*" skills from being extracted from tasks already served by an existing skill.
|
|
27
|
+
|
|
28
|
+
### Improved
|
|
29
|
+
- **Slash commands no longer misinterpret filesystem paths.** Pasted paths like `/Users/alice/foo` or `/tmp/bar` are no longer mistaken for slash commands, avoiding confusing "skill not found" notices.
|
|
9
30
|
|
|
10
31
|
## [1.0.0.beta.5] - 2026-04-29
|
|
11
32
|
|
|
@@ -86,7 +86,45 @@ module Clacky
|
|
|
86
86
|
# Successful response — if we were probing, confirm primary is healthy.
|
|
87
87
|
handle_probe_success if @config.probing?
|
|
88
88
|
|
|
89
|
-
rescue Faraday::
|
|
89
|
+
rescue Faraday::TimeoutError => e
|
|
90
|
+
# ── Read-timeout path (distinct from connection-level failures) ──
|
|
91
|
+
# Faraday::TimeoutError on our non-streaming POST almost always means
|
|
92
|
+
# the *response* took longer than the 300s read-timeout to come back —
|
|
93
|
+
# i.e. the model is trying to produce a huge output in one shot
|
|
94
|
+
# (e.g. "write me a 2000-line snake game"). Blindly retrying the same
|
|
95
|
+
# request with the same prompt reproduces the same timeout.
|
|
96
|
+
#
|
|
97
|
+
# Strategy:
|
|
98
|
+
# 1. On the FIRST timeout in a task, inject a `[SYSTEM]` user message
|
|
99
|
+
# telling the model to break the work into smaller steps, then
|
|
100
|
+
# retry. The history edit changes the prompt, so the retry is
|
|
101
|
+
# materially different from the failed attempt.
|
|
102
|
+
# 2. On subsequent timeouts in the same task, fall back to the
|
|
103
|
+
# generic "just retry" behaviour (the model may have ignored
|
|
104
|
+
# the hint; don't pile on duplicate hints).
|
|
105
|
+
# 3. Probing-mode timeouts still go through handle_probe_failure.
|
|
106
|
+
retries += 1
|
|
107
|
+
|
|
108
|
+
if @config.probing?
|
|
109
|
+
handle_probe_failure
|
|
110
|
+
retry
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
if retries <= max_retries
|
|
114
|
+
inject_large_output_hint_if_first_timeout(e)
|
|
115
|
+
@ui&.show_progress(
|
|
116
|
+
"Response too slow (likely generating too much at once): #{e.message}",
|
|
117
|
+
progress_type: "retrying",
|
|
118
|
+
phase: "active",
|
|
119
|
+
metadata: { attempt: retries, total: max_retries }
|
|
120
|
+
)
|
|
121
|
+
sleep retry_delay
|
|
122
|
+
retry
|
|
123
|
+
else
|
|
124
|
+
raise AgentError, "[LLM] Request timed out after #{max_retries} retries: #{e.message}"
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
rescue Faraday::ConnectionFailed, Faraday::SSLError, Errno::ECONNREFUSED, Errno::ETIMEDOUT => e
|
|
90
128
|
retries += 1
|
|
91
129
|
|
|
92
130
|
# Probing failure: primary still down — renew cooling-off and retry with fallback.
|
|
@@ -95,9 +133,10 @@ module Clacky
|
|
|
95
133
|
retry
|
|
96
134
|
end
|
|
97
135
|
|
|
98
|
-
#
|
|
99
|
-
# infrastructure blips — do NOT trigger fallback
|
|
100
|
-
#
|
|
136
|
+
# Connection-level errors (DNS, TCP refused, open-timeout, TLS) are
|
|
137
|
+
# transient infrastructure blips — do NOT trigger fallback, and do
|
|
138
|
+
# NOT inject the "break into steps" hint (the model did nothing wrong).
|
|
139
|
+
# Just retry on the current model up to max_retries.
|
|
101
140
|
if retries <= max_retries
|
|
102
141
|
@ui&.show_progress(
|
|
103
142
|
"Network failed: #{e.message}",
|
|
@@ -229,6 +268,50 @@ module Clacky
|
|
|
229
268
|
(msg.include?("thinking") || msg.include?("must be passed back") ||
|
|
230
269
|
msg.include?("must be provided"))
|
|
231
270
|
end
|
|
271
|
+
|
|
272
|
+
# On the FIRST Faraday::TimeoutError within a task, append a [SYSTEM]
|
|
273
|
+
# user message to the history instructing the model to break its work
|
|
274
|
+
# into smaller steps. Subsequent timeouts in the same task are ignored
|
|
275
|
+
# here (caller just retries) so we don't pollute history with duplicate
|
|
276
|
+
# hints.
|
|
277
|
+
#
|
|
278
|
+
# The injected message carries `system_injected: true` so it is:
|
|
279
|
+
# - Hidden from UI replay (session_serializer / replay_history filters)
|
|
280
|
+
# - Skipped by prompt-caching marker placement (client.rb)
|
|
281
|
+
# - Skipped by message compression's "recent user turn" protection
|
|
282
|
+
# (message_compressor_helper.rb)
|
|
283
|
+
#
|
|
284
|
+
# Reset per-task via Agent#run (see @task_timeout_hint_injected = false).
|
|
285
|
+
private def inject_large_output_hint_if_first_timeout(err)
|
|
286
|
+
return if @task_timeout_hint_injected
|
|
287
|
+
|
|
288
|
+
@task_timeout_hint_injected = true
|
|
289
|
+
|
|
290
|
+
hint = "[SYSTEM] The previous LLM response timed out (read timeout after ~300s). " \
|
|
291
|
+
"This usually means the model was trying to produce too much output in a single response. " \
|
|
292
|
+
"Please change your approach:\n" \
|
|
293
|
+
"- Break the task into multiple smaller steps, each producing a short response.\n" \
|
|
294
|
+
"- For long files: first create a skeleton with `write` (structure + placeholder comments only), " \
|
|
295
|
+
"then fill in each section with separate `edit` calls.\n" \
|
|
296
|
+
"- Keep each single tool-call argument (especially file content) well under ~500 lines.\n" \
|
|
297
|
+
"- Do NOT attempt to output the entire deliverable in one response."
|
|
298
|
+
|
|
299
|
+
@history.append({
|
|
300
|
+
role: "user",
|
|
301
|
+
content: hint,
|
|
302
|
+
system_injected: true,
|
|
303
|
+
task_id: @current_task_id
|
|
304
|
+
})
|
|
305
|
+
|
|
306
|
+
Clacky::Logger.info(
|
|
307
|
+
"[llm_caller] Read-timeout detected — injected 'break into smaller steps' hint " \
|
|
308
|
+
"(error=#{err.class}: #{err.message})"
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
@ui&.show_warning(
|
|
312
|
+
"LLM response timed out — asking model to break the task into smaller steps and retrying..."
|
|
313
|
+
)
|
|
314
|
+
end
|
|
232
315
|
end
|
|
233
316
|
end
|
|
234
317
|
end
|
|
@@ -154,12 +154,22 @@ module Clacky
|
|
|
154
154
|
# Note: we need to remove the compression instruction message we just added
|
|
155
155
|
original_messages = @history.to_a[0..-2] # All except the last (compression instruction)
|
|
156
156
|
|
|
157
|
-
# Archive compressed messages to a chunk MD file before discarding them
|
|
158
|
-
#
|
|
159
|
-
#
|
|
160
|
-
#
|
|
161
|
-
|
|
162
|
-
|
|
157
|
+
# Archive compressed messages to a chunk MD file before discarding them.
|
|
158
|
+
#
|
|
159
|
+
# IMPORTANT: chunk_index and previous_chunks MUST come from disk, not from
|
|
160
|
+
# message history. Each compression's rebuild_with_compression keeps only
|
|
161
|
+
# ONE compressed_summary message (the new one), dropping older summaries
|
|
162
|
+
# and embedding their references into the new summary's content. So
|
|
163
|
+
# counting compressed_summary messages in history caps at 1 from the
|
|
164
|
+
# second compression onward — causing chunk-2.md to be overwritten on
|
|
165
|
+
# every subsequent compression, and losing references to chunk-1.md.
|
|
166
|
+
#
|
|
167
|
+
# Disk is the only durable source of truth: chunk files survive process
|
|
168
|
+
# restarts, session reloads, and message rebuilds. SessionManager owns
|
|
169
|
+
# all chunk file I/O (naming, writing, discovery) — we just ask it.
|
|
170
|
+
sm = session_manager
|
|
171
|
+
existing_chunks = sm.chunks_for_current(@session_id, @created_at)
|
|
172
|
+
chunk_index = sm.next_chunk_index(@session_id, @created_at)
|
|
163
173
|
|
|
164
174
|
# Extract topics from the LLM response to store in both the chunk MD front
|
|
165
175
|
# matter and the compressed_summary message hash (for future chunk indexing).
|
|
@@ -173,14 +183,13 @@ module Clacky
|
|
|
173
183
|
topics: topics
|
|
174
184
|
)
|
|
175
185
|
|
|
176
|
-
#
|
|
177
|
-
#
|
|
178
|
-
#
|
|
179
|
-
#
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
.map { |m| { basename: File.basename(m[:chunk_path]), path: m[:chunk_path], topics: m[:topics] } }
|
|
186
|
+
# Build previous_chunks index from the disk-discovered chunks (already
|
|
187
|
+
# sorted by index ascending). This gives the new summary a complete
|
|
188
|
+
# chronological index of all older archives so the AI can recall any
|
|
189
|
+
# past chunk via file_reader, not just the most recent one.
|
|
190
|
+
previous_chunks = existing_chunks.map do |c|
|
|
191
|
+
{ basename: c[:basename], path: c[:path], topics: c[:topics] }
|
|
192
|
+
end
|
|
184
193
|
|
|
185
194
|
@history.replace_all(@message_compressor.rebuild_with_compression(
|
|
186
195
|
compressed_content,
|
|
@@ -348,8 +357,22 @@ module Clacky
|
|
|
348
357
|
end
|
|
349
358
|
end
|
|
350
359
|
|
|
351
|
-
#
|
|
352
|
-
#
|
|
360
|
+
# Lazy accessor for a SessionManager instance used by compression chunk I/O.
|
|
361
|
+
# We keep this local to the helper rather than threading a manager instance
|
|
362
|
+
# through the Agent constructor — Agent itself doesn't persist sessions
|
|
363
|
+
# (CLI / HTTP server do that), but the compression archive lives in the
|
|
364
|
+
# same directory under SessionManager's ownership.
|
|
365
|
+
#
|
|
366
|
+
# NOTE: Uses Clacky::SessionManager::SESSIONS_DIR by default. Tests can
|
|
367
|
+
# stub that constant to point at a tmpdir.
|
|
368
|
+
private def session_manager
|
|
369
|
+
@session_manager ||= Clacky::SessionManager.new
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
# Save the messages being compressed to a chunk MD file for future recall.
|
|
373
|
+
# The filesystem concerns (path, write, chmod) are delegated to SessionManager;
|
|
374
|
+
# this method is responsible only for the business rules of WHAT gets archived.
|
|
375
|
+
#
|
|
353
376
|
# @param original_messages [Array<Hash>] All messages before compression (excluding compression instruction)
|
|
354
377
|
# @param recent_messages [Array<Hash>] Recent messages being kept (to exclude from chunk)
|
|
355
378
|
# @param chunk_index [Integer] Sequential chunk number
|
|
@@ -373,19 +396,14 @@ module Clacky
|
|
|
373
396
|
|
|
374
397
|
return nil if messages_to_archive.empty?
|
|
375
398
|
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
chunk_filename = "#{base_name}-chunk-#{chunk_index}.md"
|
|
381
|
-
chunk_path = File.join(sessions_dir, chunk_filename)
|
|
382
|
-
|
|
383
|
-
md_content = build_chunk_md(messages_to_archive, chunk_index: chunk_index, compression_level: compression_level, topics: topics)
|
|
384
|
-
|
|
385
|
-
File.write(chunk_path, md_content)
|
|
386
|
-
FileUtils.chmod(0o600, chunk_path)
|
|
399
|
+
md_content = build_chunk_md(messages_to_archive,
|
|
400
|
+
chunk_index: chunk_index,
|
|
401
|
+
compression_level: compression_level,
|
|
402
|
+
topics: topics)
|
|
387
403
|
|
|
388
|
-
|
|
404
|
+
# Delegate filesystem concerns (path assembly, write, chmod) to SessionManager —
|
|
405
|
+
# it owns the on-disk layout for sessions and their chunk archives.
|
|
406
|
+
session_manager.write_chunk(@session_id, @created_at, chunk_index, md_content)
|
|
389
407
|
rescue => e
|
|
390
408
|
@ui&.log("Failed to save chunk MD: #{e.message}", level: :warn)
|
|
391
409
|
nil
|
|
@@ -36,6 +36,15 @@ module Clacky
|
|
|
36
36
|
# Restore previous_total_tokens for accurate delta calculation across sessions
|
|
37
37
|
@previous_total_tokens = session_data.dig(:stats, :previous_total_tokens) || 0
|
|
38
38
|
|
|
39
|
+
# Recover the latest latency metric from the most recent assistant message
|
|
40
|
+
# that carries a :latency field. This is the source of truth for the status-bar
|
|
41
|
+
# signal — no separate session-level field is needed. Older sessions (pre-feature)
|
|
42
|
+
# simply start with nil; the signal stays hidden until the next LLM call populates it.
|
|
43
|
+
last_assistant_with_latency = @history.to_a.reverse.find do |m|
|
|
44
|
+
m[:role].to_s == "assistant" && m[:latency]
|
|
45
|
+
end
|
|
46
|
+
@latest_latency = last_assistant_with_latency&.dig(:latency)
|
|
47
|
+
|
|
39
48
|
# Restore Time Machine state
|
|
40
49
|
@task_parents = session_data.dig(:time_machine, :task_parents) || {}
|
|
41
50
|
@current_task_id = session_data.dig(:time_machine, :current_task_id) || 0
|
|
@@ -178,8 +187,18 @@ module Clacky
|
|
|
178
187
|
elsif current_round
|
|
179
188
|
current_round[:events] << msg
|
|
180
189
|
elsif msg[:compressed_summary] && msg[:chunk_path]
|
|
181
|
-
# Compressed summary sitting before any user rounds — expand
|
|
182
|
-
|
|
190
|
+
# Compressed summary sitting before any user rounds — expand ALL chunk
|
|
191
|
+
# MD files that belong to the same session (siblings of chunk_path),
|
|
192
|
+
# in chunk-index ascending order.
|
|
193
|
+
#
|
|
194
|
+
# Under the current "single summary + previous_chunks index" scheme,
|
|
195
|
+
# session.json only keeps the newest compressed_summary message (which
|
|
196
|
+
# points at the newest chunk). Older chunks (chunk-1..chunk-N-1) are
|
|
197
|
+
# referenced only as basenames inside the summary text. Expanding just
|
|
198
|
+
# msg[:chunk_path] would therefore lose all prior chunks on replay.
|
|
199
|
+
chunk_rounds = sibling_chunks_of(msg[:chunk_path]).flat_map { |p|
|
|
200
|
+
parse_chunk_md_to_rounds(p)
|
|
201
|
+
}
|
|
183
202
|
rounds.concat(chunk_rounds)
|
|
184
203
|
# After expanding, treat the last chunk round as the current round so that
|
|
185
204
|
# any orphaned assistant/tool messages that follow in session.json (belonging
|
|
@@ -243,6 +262,32 @@ module Clacky
|
|
|
243
262
|
{ has_more: has_more }
|
|
244
263
|
end
|
|
245
264
|
|
|
265
|
+
# Return all chunk MD file paths that belong to the same session as
|
|
266
|
+
# +chunk_path+, sorted by chunk index ascending (chunk-1, chunk-2, …).
|
|
267
|
+
# Uses the filename convention "<base>-chunk-<N>.md".
|
|
268
|
+
#
|
|
269
|
+
# Handles path resolution the same way parse_chunk_md_to_rounds does:
|
|
270
|
+
# if the stored path doesn't exist, fall back to SESSIONS_DIR + basename
|
|
271
|
+
# (cross-machine / cross-user session bundles).
|
|
272
|
+
private def sibling_chunks_of(chunk_path)
|
|
273
|
+
return [] unless chunk_path
|
|
274
|
+
|
|
275
|
+
resolved = chunk_path.to_s
|
|
276
|
+
unless File.exist?(resolved)
|
|
277
|
+
resolved = File.join(Clacky::SessionManager::SESSIONS_DIR, File.basename(resolved))
|
|
278
|
+
end
|
|
279
|
+
return [] unless File.exist?(resolved)
|
|
280
|
+
|
|
281
|
+
dir = File.dirname(resolved)
|
|
282
|
+
base = File.basename(resolved).sub(/-chunk-\d+\.md\z/, "")
|
|
283
|
+
return [resolved] if base == File.basename(resolved) # unconventional name — just use as-is
|
|
284
|
+
|
|
285
|
+
Dir.glob(File.join(dir, "#{base}-chunk-*.md")).sort_by do |p|
|
|
286
|
+
m = File.basename(p).match(/-chunk-(\d+)\.md\z/)
|
|
287
|
+
m ? m[1].to_i : Float::INFINITY
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
|
|
246
291
|
# Parse a chunk MD file into an array of rounds compatible with replay_history.
|
|
247
292
|
# Each round is { user_msg: Hash, events: Array<Hash> }.
|
|
248
293
|
# Timestamps are synthesised from the chunk's archived_at, spread backwards.
|
|
@@ -10,16 +10,31 @@ module Clacky
|
|
|
10
10
|
# Triggered at the end of Agent#run (post-run hooks), only for main agents.
|
|
11
11
|
module SkillEvolution
|
|
12
12
|
# Main entry point - runs all skill evolution checks
|
|
13
|
-
# Called from Agent#run after the main loop completes
|
|
13
|
+
# Called from Agent#run after the main loop completes.
|
|
14
|
+
#
|
|
15
|
+
# The two scenarios are mutually exclusive by design:
|
|
16
|
+
#
|
|
17
|
+
# * If a skill just ran (@skill_execution_context is set), the user's
|
|
18
|
+
# need was already served by an existing skill. Run Scenario 2
|
|
19
|
+
# (reflect + possibly improve that skill) and skip Scenario 1 —
|
|
20
|
+
# otherwise we would auto-extract a near-duplicate "auto-*" skill
|
|
21
|
+
# from the same task, polluting the skills directory.
|
|
22
|
+
#
|
|
23
|
+
# * If no skill ran, the task was solved with raw tools. That is the
|
|
24
|
+
# signal for Scenario 1: if the pattern is complex/repeatable enough,
|
|
25
|
+
# consider extracting it into a new skill.
|
|
14
26
|
def run_skill_evolution_hooks
|
|
15
27
|
return unless skill_evolution_enabled?
|
|
16
28
|
return if @is_subagent
|
|
17
29
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
30
|
+
if @skill_execution_context
|
|
31
|
+
# Scenario 2: Reflect on executed skill (may invoke skill-creator
|
|
32
|
+
# to UPDATE the existing skill, but will not create a new one).
|
|
33
|
+
maybe_reflect_on_skill
|
|
34
|
+
else
|
|
35
|
+
# Scenario 1: Auto-create new skill from complex task.
|
|
36
|
+
maybe_create_skill_from_task
|
|
37
|
+
end
|
|
23
38
|
end
|
|
24
39
|
|
|
25
40
|
# Check if skill evolution is enabled in config
|
|
@@ -33,12 +33,46 @@ module Clacky
|
|
|
33
33
|
def parse_skill_command(input)
|
|
34
34
|
return { matched: false } unless input.start_with?("/")
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
# Split off the first whitespace-delimited token after the leading "/".
|
|
37
|
+
# Shape of a slash command:
|
|
38
|
+
# /<command>
|
|
39
|
+
# /<command> <arguments...>
|
|
40
|
+
#
|
|
41
|
+
# The key distinction we need to make is "slash command" vs. "filesystem
|
|
42
|
+
# path starting with /". Paths look like "/xxx/yyy", "/Users/alice/foo",
|
|
43
|
+
# "/tmp/bar" — what they all share is a *second* "/" inside the first
|
|
44
|
+
# token. Slash commands, on the other hand, may legitimately contain
|
|
45
|
+
# non-slug characters like ':' or '.' (e.g. "/guizang-ppt-skill:create"),
|
|
46
|
+
# so we deliberately DO NOT require the command to be a clean slug here —
|
|
47
|
+
# find_by_command handles the lookup, and a pilot-error like "/foo.bar"
|
|
48
|
+
# should still surface a friendly "skill not found" notice.
|
|
49
|
+
#
|
|
50
|
+
# Rejected as slash commands (treated as plain user messages):
|
|
51
|
+
# - "/", "//", "/*.rb" — token is empty or begins with a separator/glob
|
|
52
|
+
# - "/ leading space" — whitespace immediately after /
|
|
53
|
+
# - "/Users/alice/foo" — second "/" inside the first token ⇒ a path
|
|
54
|
+
# - "/xxxx/zzzz/" — same
|
|
55
|
+
#
|
|
56
|
+
# Accepted (routed to find_by_command, may yield :not_found notice):
|
|
57
|
+
# - "/commit"
|
|
58
|
+
# - "/skill-add https://…" — "/" appears only in arguments, fine
|
|
59
|
+
# - "/guizang-ppt-skill:create", "/foo.bar" — non-slug but no path shape
|
|
60
|
+
match = input.match(%r{^/(\S+?)(?:\s+(.*))?$})
|
|
37
61
|
return { matched: false } unless match
|
|
38
62
|
|
|
39
63
|
skill_name = match[1]
|
|
40
64
|
arguments = match[2] || ""
|
|
41
65
|
|
|
66
|
+
# Reject path-like first tokens: anything containing a "/" after the
|
|
67
|
+
# leading one belongs to the filesystem, not the command namespace.
|
|
68
|
+
# This also naturally rejects "" (from "/" alone) and "*…" / ".…" style
|
|
69
|
+
# tokens because they won't be registered as a command — but those edge
|
|
70
|
+
# cases fall through to :not_found which is acceptable. The main goal is
|
|
71
|
+
# to stop pasted paths like "/Users/foo/bar" from producing a bogus
|
|
72
|
+
# "skill /Users/foo/bar not found" reply.
|
|
73
|
+
return { matched: false } if skill_name.include?("/")
|
|
74
|
+
return { matched: false } if skill_name.empty?
|
|
75
|
+
|
|
42
76
|
skill = @skill_loader.find_by_command("/#{skill_name}")
|
|
43
77
|
return { matched: true, found: false, skill_name: skill_name, reason: :not_found } unless skill
|
|
44
78
|
|
data/lib/clacky/agent.rb
CHANGED
|
@@ -42,7 +42,8 @@ module Clacky
|
|
|
42
42
|
|
|
43
43
|
attr_reader :session_id, :name, :history, :iterations, :total_cost, :working_dir, :created_at, :total_tasks, :todos,
|
|
44
44
|
:cache_stats, :cost_source, :ui, :skill_loader, :agent_profile,
|
|
45
|
-
:status, :error, :updated_at, :source
|
|
45
|
+
:status, :error, :updated_at, :source,
|
|
46
|
+
:latest_latency # Hash of latency metrics from the most recent LLM call (see Client#send_messages_with_tools)
|
|
46
47
|
attr_accessor :pinned
|
|
47
48
|
|
|
48
49
|
def permission_mode
|
|
@@ -78,6 +79,7 @@ module Clacky
|
|
|
78
79
|
@task_cost_source = :estimated # Track cost source for current task
|
|
79
80
|
@previous_total_tokens = 0 # Track tokens from previous iteration for delta calculation
|
|
80
81
|
@interrupted = false # Flag for user interrupt
|
|
82
|
+
@latest_latency = nil # Most recent LLM call's latency metrics (see Client#send_messages_with_tools)
|
|
81
83
|
@ui = ui # UIController for direct UI interaction
|
|
82
84
|
@debug_logs = [] # Debug logs for troubleshooting
|
|
83
85
|
@pending_injections = [] # Pending inline skill injections to flush after observe()
|
|
@@ -208,6 +210,7 @@ module Clacky
|
|
|
208
210
|
|
|
209
211
|
@start_time = Time.now
|
|
210
212
|
@task_truncation_count = 0 # Reset truncation counter for each task
|
|
213
|
+
@task_timeout_hint_injected = false # Reset read-timeout hint injection (see LlmCaller)
|
|
211
214
|
@task_cost_source = :estimated # Reset for new task
|
|
212
215
|
# Note: Do NOT reset @previous_total_tokens here - it should maintain the value from the last iteration
|
|
213
216
|
# across tasks to correctly calculate delta tokens in each iteration
|
|
@@ -681,6 +684,17 @@ module Clacky
|
|
|
681
684
|
end
|
|
682
685
|
# Store token_usage in the message so replay_history can re-emit it
|
|
683
686
|
msg[:token_usage] = response[:token_usage] if response[:token_usage]
|
|
687
|
+
# Store per-message latency — this is the source of truth (session.json)
|
|
688
|
+
# for all time-to-first-token / duration / throughput info. The status
|
|
689
|
+
# bar signal reads the last assistant message's latency; no separate
|
|
690
|
+
# config file or top-level session field is introduced.
|
|
691
|
+
if response[:latency]
|
|
692
|
+
msg[:latency] = response[:latency]
|
|
693
|
+
@latest_latency = response[:latency]
|
|
694
|
+
# Push to UI so the status-bar signal updates immediately after the
|
|
695
|
+
# model finishes (before any tool execution delays the next event).
|
|
696
|
+
@ui&.update_sessionbar(latency: response[:latency])
|
|
697
|
+
end
|
|
684
698
|
# Preserve reasoning_content from the real LLM response.
|
|
685
699
|
# This is the authoritative signal used by MessageHistory#to_api to
|
|
686
700
|
# detect thinking-mode providers (DeepSeek V4, Kimi K2 thinking, etc.)
|
data/lib/clacky/client.rb
CHANGED
|
@@ -89,18 +89,54 @@ module Clacky
|
|
|
89
89
|
# ── Agent main path ───────────────────────────────────────────────────────
|
|
90
90
|
|
|
91
91
|
# Send messages with tool-calling support.
|
|
92
|
-
# Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage: }
|
|
92
|
+
# Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage:, latency: }
|
|
93
|
+
#
|
|
94
|
+
# Latency measurement:
|
|
95
|
+
# Because the current HTTP path is *non-streaming* (plain POST, response
|
|
96
|
+
# body read in one shot), TTFB (time to response headers) is not exposed
|
|
97
|
+
# by Faraday's default adapter without extra plumbing. What we CAN measure
|
|
98
|
+
# cheaply — and what users actually feel — is total request duration,
|
|
99
|
+
# which for a non-streaming call equals the time from "hit Enter" to
|
|
100
|
+
# "first token visible" (since we receive everything at once).
|
|
101
|
+
#
|
|
102
|
+
# So we record `duration_ms` as the authoritative number and alias it to
|
|
103
|
+
# `ttft_ms` for downstream consumers (status bar uses ttft_ms as its
|
|
104
|
+
# signal metric — see docs). When we migrate to streaming later, this
|
|
105
|
+
# same `ttft_ms` field will start carrying the *actual* first-token
|
|
106
|
+
# latency without any schema change.
|
|
93
107
|
def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false)
|
|
94
108
|
caching_enabled = enable_caching && supports_prompt_caching?(model)
|
|
95
109
|
cloned = deep_clone(messages)
|
|
96
110
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
111
|
+
t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
112
|
+
response =
|
|
113
|
+
if bedrock?
|
|
114
|
+
send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled)
|
|
115
|
+
elsif anthropic_format?
|
|
116
|
+
send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled)
|
|
117
|
+
else
|
|
118
|
+
send_openai_request(cloned, model, tools, max_tokens, caching_enabled)
|
|
119
|
+
end
|
|
120
|
+
t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
121
|
+
|
|
122
|
+
duration_ms = ((t1 - t0) * 1000).round
|
|
123
|
+
# Throughput is only meaningful with a reasonable output size; below ~10
|
|
124
|
+
# tokens the sample is too small to be informative and the result is
|
|
125
|
+
# wildly high (e.g. 1 token / 50ms → 20 tok/s is meaningless).
|
|
126
|
+
# Canonical usage hashes from message_format/* all use :completion_tokens.
|
|
127
|
+
output_tokens = response[:usage]&.dig(:completion_tokens).to_i
|
|
128
|
+
tps = (output_tokens >= 10 && duration_ms > 0) ? (output_tokens * 1000.0 / duration_ms).round(1) : nil
|
|
129
|
+
|
|
130
|
+
response[:latency] = {
|
|
131
|
+
ttft_ms: duration_ms, # non-streaming: TTFT == full duration
|
|
132
|
+
duration_ms: duration_ms,
|
|
133
|
+
output_tokens: output_tokens,
|
|
134
|
+
tps: tps,
|
|
135
|
+
model: model,
|
|
136
|
+
measured_at: Time.now.to_f,
|
|
137
|
+
streaming: false # future flag — true when we migrate
|
|
138
|
+
}
|
|
139
|
+
response
|
|
104
140
|
end
|
|
105
141
|
|
|
106
142
|
# Format tool results into canonical messages ready to append to @messages.
|
|
@@ -134,12 +134,13 @@ module Clacky
|
|
|
134
134
|
|
|
135
135
|
# === State updates ===
|
|
136
136
|
|
|
137
|
-
def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil)
|
|
137
|
+
def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil)
|
|
138
138
|
data = {}
|
|
139
139
|
data[:tasks] = tasks if tasks
|
|
140
140
|
data[:cost] = cost if cost
|
|
141
141
|
data[:cost_source] = cost_source if cost_source
|
|
142
142
|
data[:status] = status if status
|
|
143
|
+
data[:latency] = latency if latency
|
|
143
144
|
emit("session_update", **data) unless data.empty?
|
|
144
145
|
end
|
|
145
146
|
|
|
@@ -136,7 +136,7 @@ module Clacky
|
|
|
136
136
|
|
|
137
137
|
# === State updates (no-ops) ===
|
|
138
138
|
|
|
139
|
-
def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil); end
|
|
139
|
+
def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil); end
|
|
140
140
|
def update_todos(todos); end
|
|
141
141
|
def set_working_status; end
|
|
142
142
|
def set_idle_status; end
|
data/lib/clacky/providers.rb
CHANGED
|
@@ -22,7 +22,7 @@ module Clacky
|
|
|
22
22
|
"name" => "OpenClacky",
|
|
23
23
|
"base_url" => "https://api.openclacky.com",
|
|
24
24
|
"api" => "bedrock",
|
|
25
|
-
"default_model" => "abs-claude-sonnet-4-
|
|
25
|
+
"default_model" => "abs-claude-sonnet-4-5",
|
|
26
26
|
"models" => [
|
|
27
27
|
"abs-claude-opus-4-7",
|
|
28
28
|
"abs-claude-opus-4-6",
|
|
@@ -131,7 +131,7 @@ module Clacky
|
|
|
131
131
|
}.freeze,
|
|
132
132
|
|
|
133
133
|
"clackyai-sea" => {
|
|
134
|
-
"name" => "ClackyAI(
|
|
134
|
+
"name" => "ClackyAI(Sea)",
|
|
135
135
|
"base_url" => "https://api.clacky.ai",
|
|
136
136
|
"api" => "bedrock",
|
|
137
137
|
"default_model" => "abs-claude-sonnet-4-5",
|
|
@@ -152,7 +152,7 @@ module Clacky
|
|
|
152
152
|
|
|
153
153
|
# === State updates (no-ops for IM) ===
|
|
154
154
|
|
|
155
|
-
def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil); end
|
|
155
|
+
def update_sessionbar(tasks: nil, cost: nil, cost_source: nil, status: nil, latency: nil); end
|
|
156
156
|
def update_todos(todos); end
|
|
157
157
|
def set_working_status; end
|
|
158
158
|
def set_idle_status; end
|