openclacky 1.1.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.clacky/skills/gem-release/SKILL.md +27 -31
  3. data/CHANGELOG.md +35 -0
  4. data/Dockerfile +28 -0
  5. data/README.md +28 -7
  6. data/docs/engineering-article.md +343 -0
  7. data/lib/clacky/agent/llm_caller.rb +19 -1
  8. data/lib/clacky/agent/session_serializer.rb +6 -1
  9. data/lib/clacky/agent.rb +14 -5
  10. data/lib/clacky/anthropic_stream_aggregator.rb +135 -0
  11. data/lib/clacky/bedrock_stream_aggregator.rb +137 -0
  12. data/lib/clacky/cli.rb +10 -3
  13. data/lib/clacky/client.rb +146 -17
  14. data/lib/clacky/default_skills/onboard/SKILL.md +6 -2
  15. data/lib/clacky/default_skills/onboard/scripts/import_external_skills.rb +50 -6
  16. data/lib/clacky/message_format/anthropic.rb +17 -1
  17. data/lib/clacky/openai_stream_aggregator.rb +130 -0
  18. data/lib/clacky/providers.rb +34 -0
  19. data/lib/clacky/server/channel/adapters/dingtalk/adapter.rb +142 -5
  20. data/lib/clacky/server/channel/adapters/dingtalk/api_client.rb +309 -0
  21. data/lib/clacky/server/http_server.rb +2 -3
  22. data/lib/clacky/server/web_ui_controller.rb +8 -4
  23. data/lib/clacky/ui2/progress_handle.rb +77 -15
  24. data/lib/clacky/ui2/ui_controller.rb +18 -2
  25. data/lib/clacky/ui_interface.rb +14 -0
  26. data/lib/clacky/utils/model_pricing.rb +96 -25
  27. data/lib/clacky/version.rb +1 -1
  28. data/lib/clacky/web/app.css +14 -4
  29. data/lib/clacky/web/i18n.js +6 -0
  30. data/lib/clacky/web/index.html +4 -2
  31. data/lib/clacky/web/onboard.js +6 -0
  32. data/lib/clacky/web/sessions.js +152 -48
  33. data/lib/clacky/web/settings.js +17 -5
  34. data/lib/clacky/web/vendor/katex/auto-render.min.js +1 -0
  35. data/lib/clacky/web/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
  36. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
  37. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
  38. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
  39. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
  40. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
  41. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
  42. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
  43. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
  44. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
  45. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
  46. data/lib/clacky/web/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
  47. data/lib/clacky/web/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
  48. data/lib/clacky/web/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
  49. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
  50. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
  51. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
  52. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
  53. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
  54. data/lib/clacky/web/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
  55. data/lib/clacky/web/vendor/katex/katex.min.css +1 -0
  56. data/lib/clacky/web/vendor/katex/katex.min.js +1 -0
  57. data/lib/clacky/web/ws-dispatcher.js +19 -4
  58. data/lib/clacky.rb +3 -0
  59. data/scripts/build/lib/apt.sh +30 -10
  60. data/scripts/build/lib/network.sh +3 -2
  61. data/scripts/install.ps1 +14 -3
  62. data/scripts/install.sh +30 -9
  63. metadata +30 -17
  64. data/docs/HOW-TO-USE-CN.md +0 -96
  65. data/docs/HOW-TO-USE.md +0 -94
  66. data/docs/browser-cdp-native-design.md +0 -195
  67. data/docs/c-end-user-positioning.md +0 -64
  68. data/docs/config.example.yml +0 -27
  69. data/docs/deploy-architecture.md +0 -619
  70. data/docs/deploy_subagent_design.md +0 -540
  71. data/docs/install-script-simplification.md +0 -89
  72. data/docs/memory-architecture.md +0 -343
  73. data/docs/openclacky_cloud_api_reference.md +0 -584
  74. data/docs/security-design.md +0 -109
  75. data/docs/session-management-redesign.md +0 -202
  76. data/docs/system-skill-authoring-guide.md +0 -47
  77. data/docs/why-developer.md +0 -371
  78. data/docs/why-openclacky.md +0 -266
data/lib/clacky/agent.rb CHANGED
@@ -427,7 +427,7 @@ module Clacky
427
427
  tool_calls_count: (response[:tool_calls] || []).size
428
428
  )
429
429
  if response[:content] && !response[:content].empty?
430
- emit_assistant_message(response[:content])
430
+ emit_assistant_message(response[:content], reasoning_content: response[:reasoning_content])
431
431
  end
432
432
 
433
433
  # Show token usage after the assistant message so WebUI renders it below the bubble
@@ -448,7 +448,7 @@ module Clacky
448
448
 
449
449
  # Show assistant message if there's content before tool calls
450
450
  if response[:content] && !response[:content].empty?
451
- emit_assistant_message(response[:content])
451
+ emit_assistant_message(response[:content], reasoning_content: response[:reasoning_content])
452
452
  end
453
453
 
454
454
  # Show token usage after assistant message (or immediately if no message).
@@ -1532,11 +1532,20 @@ module Clacky
1532
1532
  # and cannot load file:// directly) and must stay scoped to the Web UI
1533
1533
  # controller. IM channel subscribers need the original file:// markdown so
1534
1534
  # parse_file_links can extract paths and deliver images as native attachments.
1535
- private def emit_assistant_message(content)
1536
- return if content.nil? || content.empty?
1535
+ private def emit_assistant_message(content, reasoning_content: nil)
1536
+ # Prepend reasoning/thinking content (from thinking-mode providers like
1537
+ # DeepSeek V4, Kimi K2) wrapped in <think> tags so the Web UI renders it
1538
+ # as a collapsible thinking block (see sessions.js _renderMarkdown).
1539
+ if reasoning_content && !reasoning_content.to_s.strip.empty?
1540
+ full_content = "<think>\n#{reasoning_content}\n</think>\n#{content}"
1541
+ else
1542
+ full_content = content
1543
+ end
1544
+
1545
+ return if full_content.nil? || full_content.to_s.strip.empty?
1537
1546
 
1538
1547
  parsed = parse_file_links(content)
1539
- @ui&.show_assistant_message(parsed[:text], files: parsed[:files])
1548
+ @ui&.show_assistant_message(full_content, files: parsed[:files])
1540
1549
  end
1541
1550
 
1542
1551
  # Track modified files for Time Machine snapshots
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Clacky
6
+ # Reassembles an Anthropic Messages SSE stream (event: message_start /
7
+ # content_block_start / content_block_delta / content_block_stop /
8
+ # message_delta / message_stop / ping) into the same hash shape that
9
+ # MessageFormat::Anthropic.parse_response expects from a non-streaming
10
+ # response, while invoking on_chunk(input_tokens:, output_tokens:) as
11
+ # usage accumulates.
12
+ #
13
+ # Wire reference: https://docs.anthropic.com/en/api/messages-streaming
14
+ class AnthropicStreamAggregator
15
+ def initialize(on_chunk: nil)
16
+ @on_chunk = on_chunk
17
+ @blocks = {}
18
+ @stop_reason = nil
19
+ @usage = {}
20
+ @last_input_tokens = 0
21
+ @last_output_tokens = 0
22
+ end
23
+
24
+ def handle(event, data_str)
25
+ data = parse_or_nil(data_str)
26
+ return unless data
27
+
28
+ case event
29
+ when "message_start"
30
+ msg = data["message"] || {}
31
+ if (u = msg["usage"])
32
+ @usage.merge!(u)
33
+ emit_usage_progress
34
+ end
35
+ when "content_block_start"
36
+ idx = data["index"] || @blocks.size
37
+ cb = data["content_block"] || {}
38
+ case cb["type"]
39
+ when "tool_use"
40
+ @blocks[idx] = { kind: :tool_use, id: cb["id"], name: cb["name"], input_str: +"" }
41
+ else
42
+ @blocks[idx] = { kind: :text, text: +"" }
43
+ end
44
+ when "content_block_delta"
45
+ idx = data["index"] || 0
46
+ delta = data["delta"] || {}
47
+ block = (@blocks[idx] ||= { kind: :text, text: +"" })
48
+ case delta["type"]
49
+ when "text_delta"
50
+ block[:kind] ||= :text
51
+ block[:text] ||= +""
52
+ block[:text] << delta["text"].to_s
53
+ when "input_json_delta"
54
+ block[:kind] = :tool_use
55
+ block[:input_str] ||= +""
56
+ block[:input_str] << delta["partial_json"].to_s
57
+ when "thinking_delta"
58
+ block[:kind] = :thinking
59
+ block[:thinking] ||= +""
60
+ block[:thinking] << delta["thinking"].to_s
61
+ end
62
+ emit_estimate_progress
63
+ when "content_block_stop"
64
+ # Nothing to do: blocks are finalised in to_h.
65
+ when "message_delta"
66
+ if (d = data["delta"])
67
+ @stop_reason = d["stop_reason"] if d["stop_reason"]
68
+ end
69
+ if (u = data["usage"])
70
+ @usage.merge!(u)
71
+ emit_usage_progress
72
+ end
73
+ when "message_stop", "ping", "error"
74
+ # no-op
75
+ end
76
+ end
77
+
78
+ # Canonical non-streaming Anthropic response shape consumed by
79
+ # MessageFormat::Anthropic.parse_response.
80
+ def to_h
81
+ content_blocks = @blocks.keys.sort.map do |idx|
82
+ b = @blocks[idx]
83
+ case b[:kind]
84
+ when :tool_use
85
+ input_value =
86
+ if b[:input_str].to_s.empty?
87
+ {}
88
+ else
89
+ JSON.parse(b[:input_str]) rescue b[:input_str]
90
+ end
91
+ { "type" => "tool_use", "id" => b[:id], "name" => b[:name], "input" => input_value }
92
+ else
93
+ { "type" => "text", "text" => b[:text].to_s }
94
+ end
95
+ end
96
+
97
+ { "content" => content_blocks, "stop_reason" => @stop_reason, "usage" => @usage }
98
+ end
99
+
100
+ private def parse_or_nil(s)
101
+ JSON.parse(s)
102
+ rescue JSON::ParserError
103
+ nil
104
+ end
105
+
106
+ private def emit_usage_progress
107
+ return unless @on_chunk
108
+ input = @usage["input_tokens"].to_i + @usage["cache_read_input_tokens"].to_i
109
+ output = @usage["output_tokens"].to_i
110
+ return if input == @last_input_tokens && output == @last_output_tokens
111
+ @last_input_tokens = input
112
+ @last_output_tokens = output
113
+ @on_chunk.call(input_tokens: input, output_tokens: output)
114
+ rescue => e
115
+ Clacky::Logger.warn("[AnthropicStreamAggregator] on_chunk: #{e.class}: #{e.message}")
116
+ end
117
+
118
+ private def emit_estimate_progress
119
+ return unless @on_chunk
120
+ output = approximate_output_tokens
121
+ return if output == @last_output_tokens
122
+ @last_output_tokens = output
123
+ @on_chunk.call(input_tokens: @last_input_tokens, output_tokens: output)
124
+ rescue => e
125
+ Clacky::Logger.warn("[AnthropicStreamAggregator] on_chunk: #{e.class}: #{e.message}")
126
+ end
127
+
128
+ private def approximate_output_tokens
129
+ total_chars = @blocks.values.sum do |b|
130
+ b[:text].to_s.bytesize + b[:input_str].to_s.bytesize + b[:thinking].to_s.bytesize
131
+ end
132
+ (total_chars / 4.0).ceil
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,137 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Clacky
6
+ # Reassembles a Bedrock Converse event stream into the same hash shape that
7
+ # MessageFormat::Bedrock.parse_response expects from a non-streaming response,
8
+ # while invoking on_chunk(input_tokens:, output_tokens:) as usage information
9
+ # accumulates.
10
+ #
11
+ # Bedrock event-stream events handled (passed through as raw event JSON):
12
+ #
13
+ # messageStart → { role: "assistant" }
14
+ # contentBlockStart → { start: {toolUse: {toolUseId, name}} | {}, contentBlockIndex: N }
15
+ # contentBlockDelta → { delta: {text: "..."} | {toolUse: {input: "..."}}, contentBlockIndex: N }
16
+ # contentBlockStop → { contentBlockIndex: N }
17
+ # messageStop → { stopReason: "end_turn" | "tool_use" | "max_tokens" | ... }
18
+ # metadata → { usage: {inputTokens, outputTokens, cacheReadInputTokens, cacheWriteInputTokens}, metrics: {...} }
19
+ #
20
+ # Tool-use input is streamed as a sequence of partial JSON strings; we
21
+ # concatenate and let the response parser leave it as a string for downstream
22
+ # tool dispatch (which calls JSON.parse with a {} fallback).
23
+ class BedrockStreamAggregator
24
+ def initialize(on_chunk: nil)
25
+ @on_chunk = on_chunk
26
+ @role = "assistant"
27
+ @blocks = {}
28
+ @stop_reason = nil
29
+ @usage = {}
30
+ @last_input_tokens = 0
31
+ @last_output_tokens = 0
32
+ end
33
+
34
+ def handle(event, data_str)
35
+ data = parse_or_nil(data_str)
36
+ return unless data
37
+
38
+ case event
39
+ when "messageStart"
40
+ @role = data["role"] || @role
41
+ when "contentBlockStart"
42
+ idx = data["contentBlockIndex"] || @blocks.size
43
+ start = data["start"] || {}
44
+ if (tu = start["toolUse"])
45
+ @blocks[idx] = { kind: :tool_use, id: tu["toolUseId"], name: tu["name"], input_str: +"" }
46
+ else
47
+ @blocks[idx] = { kind: :text, text: +"" }
48
+ end
49
+ when "contentBlockDelta"
50
+ idx = data["contentBlockIndex"] || 0
51
+ delta = data["delta"] || {}
52
+ block = (@blocks[idx] ||= { kind: :text, text: +"" })
53
+ if delta["text"]
54
+ block[:kind] ||= :text
55
+ block[:text] ||= +""
56
+ block[:text] << delta["text"]
57
+ elsif (tu = delta["toolUse"])
58
+ block[:kind] = :tool_use
59
+ block[:input_str] ||= +""
60
+ block[:input_str] << tu["input"].to_s
61
+ block[:id] ||= tu["toolUseId"]
62
+ block[:name] ||= tu["name"]
63
+ elsif (rc = delta["reasoningContent"])
64
+ block[:kind] = :reasoning
65
+ block[:reasoning] ||= +""
66
+ block[:reasoning] << rc["text"].to_s
67
+ end
68
+ emit_estimate_progress
69
+ when "contentBlockStop"
70
+ # Nothing to assemble: blocks are kept as-is until messageStop.
71
+ when "messageStop"
72
+ @stop_reason = data["stopReason"] || @stop_reason
73
+ when "metadata"
74
+ if (u = data["usage"])
75
+ @usage.merge!(u)
76
+ emit_usage_progress(u)
77
+ end
78
+ end
79
+ end
80
+
81
+ # Render the canonical non-streaming Bedrock response hash so the existing
82
+ # MessageFormat::Bedrock.parse_response can consume it unchanged.
83
+ def to_h
84
+ content_blocks = @blocks.keys.sort.map do |idx|
85
+ b = @blocks[idx]
86
+ case b[:kind]
87
+ when :tool_use
88
+ input_value = b[:input_str].to_s.empty? ? {} : (JSON.parse(b[:input_str]) rescue b[:input_str])
89
+ { "toolUse" => { "toolUseId" => b[:id], "name" => b[:name], "input" => input_value } }
90
+ else
91
+ { "text" => b[:text].to_s }
92
+ end
93
+ end
94
+
95
+ {
96
+ "output" => { "message" => { "role" => @role, "content" => content_blocks } },
97
+ "stopReason" => @stop_reason,
98
+ "usage" => @usage
99
+ }
100
+ end
101
+
102
+ private def parse_or_nil(s)
103
+ JSON.parse(s)
104
+ rescue JSON::ParserError
105
+ nil
106
+ end
107
+
108
+ private def emit_usage_progress(u)
109
+ return unless @on_chunk
110
+ input = u["inputTokens"].to_i + u["cacheReadInputTokens"].to_i
111
+ output = u["outputTokens"].to_i
112
+ return if input == @last_input_tokens && output == @last_output_tokens
113
+ @last_input_tokens = input
114
+ @last_output_tokens = output
115
+ @on_chunk.call(input_tokens: input, output_tokens: output)
116
+ rescue => e
117
+ Clacky::Logger.warn("[BedrockStreamAggregator] on_chunk: #{e.class}: #{e.message}")
118
+ end
119
+
120
+ private def emit_estimate_progress
121
+ return unless @on_chunk
122
+ output = approximate_output_tokens
123
+ return if output == @last_output_tokens
124
+ @last_output_tokens = output
125
+ @on_chunk.call(input_tokens: @last_input_tokens, output_tokens: output)
126
+ rescue => e
127
+ Clacky::Logger.warn("[BedrockStreamAggregator] on_chunk: #{e.class}: #{e.message}")
128
+ end
129
+
130
+ private def approximate_output_tokens
131
+ total_chars = @blocks.values.sum do |b|
132
+ b[:text].to_s.bytesize + b[:input_str].to_s.bytesize + b[:reasoning].to_s.bytesize
133
+ end
134
+ (total_chars / 4.0).ceil
135
+ end
136
+ end
137
+ end
data/lib/clacky/cli.rb CHANGED
@@ -163,6 +163,7 @@ module Clacky
163
163
  end
164
164
  ensure
165
165
  Dir.chdir(original_dir)
166
+ Clacky::BrowserManager.instance.stop rescue nil
166
167
  end
167
168
  end
168
169
 
@@ -942,8 +943,8 @@ module Clacky
942
943
  $ clacky server
943
944
  $ clacky server --port 8080
944
945
  LONGDESC
945
- option :host, type: :string, default: "127.0.0.1", desc: "Bind host (default: 127.0.0.1)"
946
- option :port, type: :numeric, default: 7070, desc: "Listen port (default: 7070)"
946
+ option :host, type: :string, aliases: ["-b", "--bind"], default: "127.0.0.1", desc: "Bind host (default: 127.0.0.1)"
947
+ option :port, type: :numeric, aliases: "-p", default: 7070, desc: "Listen port (default: 7070)"
947
948
  option :brand_test, type: :boolean, default: false,
948
949
  desc: "Enable brand test mode: mock license activation without calling remote API"
949
950
  option :no_compression, type: :boolean, default: false,
@@ -954,11 +955,17 @@ module Clacky
954
955
  desc: "Disable prompt caching"
955
956
  option :no_skill_evolution, type: :boolean, default: false,
956
957
  desc: "Disable automatic skill evolution"
958
+ option :help, type: :boolean, aliases: "-h", desc: "Show this help message"
957
959
  def server
960
+ if options[:help]
961
+ invoke :help, ["server"]
962
+ return
963
+ end
964
+
958
965
  # ── Security gate ──────────────────────────────────────────────────────
959
966
  # Binding to 0.0.0.0 exposes the server to the public network.
960
967
  # Refuse to start unless CLACKY_ACCESS_KEY env var is set.
961
- if options[:host] == "0.0.0.0" && ENV.fetch("CLACKY_ACCESS_KEY", "").strip.empty?
968
+ if options[:host] == "0.0.0.0" && !ENV.key?("CLACKY_ACCESS_KEY")
962
969
  puts <<~MSG
963
970
  ╔══════════════════════════════════════════════════════════════╗
964
971
  ║ ⚠️ Security Warning: Refusing to start ║
data/lib/clacky/client.rb CHANGED
@@ -119,37 +119,59 @@ module Clacky
119
119
  # signal metric — see docs). When we migrate to streaming later, this
120
120
  # same `ttft_ms` field will start carrying the *actual* first-token
121
121
  # latency without any schema change.
122
- def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false)
122
+ # @param on_chunk [Proc, nil] optional streaming progress callback.
123
+ # Receives keyword args { input_tokens:, output_tokens: } with cumulative
124
+ # token counts. When nil, behaves exactly as the historical non-streaming
125
+ # path. When given but streaming is not yet wired for the active provider,
126
+ # a single synthetic invocation is fired after the response is received,
127
+ # so UI plumbing can be exercised end-to-end without the proxy work.
128
+ def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false, on_chunk: nil)
123
129
  caching_enabled = enable_caching && supports_prompt_caching?(model)
124
130
  cloned = deep_clone(messages)
125
131
 
132
+ streaming_used = false
133
+ first_chunk_at = nil
134
+ wrapped_on_chunk = on_chunk && lambda do |**kwargs|
135
+ first_chunk_at ||= Process.clock_gettime(Process::CLOCK_MONOTONIC)
136
+ on_chunk.call(**kwargs)
137
+ end
138
+
126
139
  t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
127
140
  response =
128
141
  if bedrock?
129
- send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled)
142
+ streaming_used = !on_chunk.nil?
143
+ send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled, on_chunk: wrapped_on_chunk)
130
144
  elsif anthropic_format?
131
- send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled)
145
+ streaming_used = !on_chunk.nil?
146
+ send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled, on_chunk: wrapped_on_chunk)
132
147
  else
133
- send_openai_request(cloned, model, tools, max_tokens, caching_enabled)
148
+ streaming_used = !on_chunk.nil?
149
+ send_openai_request(cloned, model, tools, max_tokens, caching_enabled, on_chunk: wrapped_on_chunk)
134
150
  end
135
151
  t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
136
152
 
153
+ if on_chunk && !streaming_used
154
+ usage = response[:usage] || {}
155
+ safe_invoke_on_chunk(
156
+ on_chunk,
157
+ input_tokens: usage[:prompt_tokens].to_i,
158
+ output_tokens: usage[:completion_tokens].to_i
159
+ )
160
+ end
161
+
137
162
  duration_ms = ((t1 - t0) * 1000).round
138
- # Throughput is only meaningful with a reasonable output size; below ~10
139
- # tokens the sample is too small to be informative and the result is
140
- # wildly high (e.g. 1 token / 50ms → 20 tok/s is meaningless).
141
- # Canonical usage hashes from message_format/* all use :completion_tokens.
163
+ ttft_ms = first_chunk_at ? ((first_chunk_at - t0) * 1000).round : duration_ms
142
164
  output_tokens = response[:usage]&.dig(:completion_tokens).to_i
143
165
  tps = (output_tokens >= 10 && duration_ms > 0) ? (output_tokens * 1000.0 / duration_ms).round(1) : nil
144
166
 
145
167
  response[:latency] = {
146
- ttft_ms: duration_ms, # non-streaming: TTFT == full duration
168
+ ttft_ms: ttft_ms,
147
169
  duration_ms: duration_ms,
148
170
  output_tokens: output_tokens,
149
171
  tps: tps,
150
172
  model: model,
151
173
  measured_at: Time.now.to_f,
152
- streaming: false # future flag — true when we migrate
174
+ streaming: streaming_used
153
175
  }
154
176
  response
155
177
  end
@@ -195,8 +217,10 @@ module Clacky
195
217
 
196
218
  # ── Bedrock Converse request / response ───────────────────────────────────
197
219
 
198
- def send_bedrock_request(messages, model, tools, max_tokens, caching_enabled)
199
- body = MessageFormat::Bedrock.build_request_body(messages, model, tools, max_tokens, caching_enabled)
220
+ def send_bedrock_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil)
221
+ body = MessageFormat::Bedrock.build_request_body(messages, model, tools, max_tokens, caching_enabled)
222
+ return send_bedrock_stream_request(body, model, on_chunk) if on_chunk
223
+
200
224
  response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }
201
225
 
202
226
  raise_error(response) unless response.status == 200
@@ -205,6 +229,29 @@ module Clacky
205
229
  MessageFormat::Bedrock.parse_response(parsed_body)
206
230
  end
207
231
 
232
+ # Streaming variant for Bedrock Converse.
233
+ # Posts to /model/{m}/converse-stream with stream:true; the proxy returns
234
+ # SSE frames whose `event` is the Bedrock event-type and whose `data` is
235
+ # the raw Bedrock event JSON. We accumulate frames into a synthetic
236
+ # non-streaming response and feed it back through the existing parser so
237
+ # downstream code is identical.
238
+ private def send_bedrock_stream_request(body, model, on_chunk)
239
+ stream_body = body.merge(stream: true)
240
+ aggregator = BedrockStreamAggregator.new(on_chunk: on_chunk)
241
+ sse_buf = +""
242
+
243
+ response = bedrock_connection.post(bedrock_stream_endpoint(model)) do |req|
244
+ req.body = stream_body.to_json
245
+ req.options.on_data = proc do |chunk, _bytes_received, _env|
246
+ sse_buf << chunk
247
+ drain_sse_frames(sse_buf) { |event, data| aggregator.handle(event, data) }
248
+ end
249
+ end
250
+
251
+ raise_error(response) unless response.status == 200
252
+ MessageFormat::Bedrock.parse_response(aggregator.to_h)
253
+ end
254
+
208
255
  def parse_simple_bedrock_response(response)
209
256
  raise_error(response) unless response.status == 200
210
257
  data = safe_json_parse(response.body, context: "LLM response")
@@ -216,11 +263,13 @@ module Clacky
216
263
 
217
264
  # ── Anthropic request / response ──────────────────────────────────────────
218
265
 
219
- def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled)
266
+ def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil)
220
267
  # Apply cache_control to the message that marks the cache breakpoint
221
268
  messages = apply_message_caching(messages) if caching_enabled
222
269
 
223
- body = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled)
270
+ body = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled)
271
+ return send_anthropic_stream_request(body, on_chunk) if on_chunk
272
+
224
273
  response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }
225
274
 
226
275
  raise_error(response) unless response.status == 200
@@ -229,6 +278,24 @@ module Clacky
229
278
  MessageFormat::Anthropic.parse_response(parsed_body)
230
279
  end
231
280
 
281
+ private def send_anthropic_stream_request(body, on_chunk)
282
+ stream_body = body.merge(stream: true)
283
+ aggregator = AnthropicStreamAggregator.new(on_chunk: on_chunk)
284
+ sse_buf = +""
285
+
286
+ response = anthropic_connection.post(anthropic_messages_path) do |req|
287
+ req.headers["Accept"] = "text/event-stream"
288
+ req.body = stream_body.to_json
289
+ req.options.on_data = proc do |chunk, _bytes_received, _env|
290
+ sse_buf << chunk
291
+ drain_sse_frames(sse_buf) { |event, data| aggregator.handle(event, data) }
292
+ end
293
+ end
294
+
295
+ raise_error(response) unless response.status == 200
296
+ MessageFormat::Anthropic.parse_response(aggregator.to_h)
297
+ end
298
+
232
299
  def parse_simple_anthropic_response(response)
233
300
  raise_error(response) unless response.status == 200
234
301
  data = safe_json_parse(response.body, context: "LLM response")
@@ -237,24 +304,47 @@ module Clacky
237
304
 
238
305
  # ── OpenAI request / response ─────────────────────────────────────────────
239
306
 
240
- def send_openai_request(messages, model, tools, max_tokens, caching_enabled)
307
+ def send_openai_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil)
241
308
  # Apply cache_control markers to messages when caching is enabled.
242
309
  # OpenRouter proxies Claude with the same cache_control field convention as Anthropic direct.
243
310
  messages = apply_message_caching(messages) if caching_enabled
244
311
 
245
- body = MessageFormat::OpenAI.build_request_body(
312
+ body = MessageFormat::OpenAI.build_request_body(
246
313
  messages, model, tools, max_tokens, caching_enabled,
247
314
  vision_supported: @vision_supported
248
315
  )
316
+ return send_openai_stream_request(body, on_chunk) if on_chunk
317
+
249
318
  response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
250
319
 
251
320
  raise_error(response) unless response.status == 200
252
321
  check_html_response(response)
253
-
322
+
254
323
  parsed_body = safe_json_parse(response.body, context: "LLM response")
255
324
  MessageFormat::OpenAI.parse_response(parsed_body)
256
325
  end
257
326
 
327
+ # Streaming variant for OpenAI-compatible chat completions (DeepSeek/OpenRouter
328
+ # via platform/llm_proxy). Uses Faraday's on_data hook to consume SSE frames,
329
+ # accumulates them, and reconstructs the non-streaming JSON response shape so
330
+ # MessageFormat::OpenAI.parse_response works unchanged.
331
+ private def send_openai_stream_request(body, on_chunk)
332
+ stream_body = body.merge(stream: true, stream_options: { include_usage: true })
333
+ aggregator = OpenAIStreamAggregator.new(on_chunk: on_chunk)
334
+ sse_buf = +""
335
+
336
+ response = openai_connection.post("chat/completions") do |req|
337
+ req.body = stream_body.to_json
338
+ req.options.on_data = proc do |chunk, _bytes_received, _env|
339
+ sse_buf << chunk
340
+ drain_sse_frames(sse_buf) { |_event, data| aggregator.handle(data) }
341
+ end
342
+ end
343
+
344
+ raise_error(response) unless response.status == 200
345
+ MessageFormat::OpenAI.parse_response(aggregator.to_h)
346
+ end
347
+
258
348
  def parse_simple_openai_response(response)
259
349
  raise_error(response) unless response.status == 200
260
350
  parsed_body = safe_json_parse(response.body, context: "LLM response")
@@ -320,6 +410,33 @@ module Clacky
320
410
  "/model/#{model}/converse"
321
411
  end
322
412
 
413
+ # Bedrock Converse streaming endpoint path.
414
+ private def bedrock_stream_endpoint(model)
415
+ "/model/#{model}/converse-stream"
416
+ end
417
+
418
+ # Pull complete SSE frames out of a buffer and yield them as (event, data).
419
+ # An SSE frame ends at a blank line ("\n\n"); incomplete trailing data
420
+ # stays in the buffer for the next chunk. Frames without an explicit
421
+ # `event:` line use the default "message" type per the SSE spec.
422
+ private def drain_sse_frames(buf)
423
+ while (sep = buf.index("\n\n"))
424
+ frame = buf.slice!(0, sep + 2)
425
+ event = "message"
426
+ data_lines = []
427
+ frame.each_line do |line|
428
+ line = line.chomp
429
+ if line.start_with?("event:")
430
+ event = line.sub(/^event:\s*/, "")
431
+ elsif line.start_with?("data:")
432
+ data_lines << line.sub(/^data:\s*/, "")
433
+ end
434
+ end
435
+ next if data_lines.empty?
436
+ yield event, data_lines.join("\n")
437
+ end
438
+ end
439
+
323
440
  def bedrock_connection
324
441
  @bedrock_connection ||= Faraday.new(url: @base_url) do |conn|
325
442
  conn.headers["Content-Type"] = "application/json"
@@ -477,6 +594,18 @@ module Clacky
477
594
  "The request will be retried automatically."
478
595
  end
479
596
 
597
+ # ── Streaming helpers ─────────────────────────────────────────────────────
598
+
599
+ # Invoke the user's on_chunk callback in a way that never lets a callback
600
+ # error tear down the LLM request. Streaming chunks are best-effort UI
601
+ # updates; a buggy progress renderer must not abort an in-flight call.
602
+ private def safe_invoke_on_chunk(on_chunk, **kwargs)
603
+ return unless on_chunk
604
+ on_chunk.call(**kwargs)
605
+ rescue => e
606
+ Clacky::Logger.warn("[on_chunk] callback raised #{e.class}: #{e.message}")
607
+ end
608
+
480
609
  # ── Utilities ─────────────────────────────────────────────────────────────
481
610
 
482
611
  def deep_clone(obj)
@@ -221,8 +221,12 @@ then parse the last stdout line as JSON and read `installed` as N.
221
221
 
222
222
  ### A.10. Import external skills (optional)
223
223
 
224
- Run `test -d ~/.openclaw && echo yes || echo no`. If `no`, skip silently.
225
- If `yes`:
224
+ Check if OpenClaw is installed:
225
+ - Run `test -d ~/.openclaw && echo yes || echo no`
226
+ - If `no` and on WSL (i.e. `/proc/version` contains `microsoft`), also run:
227
+ `powershell.exe -NoProfile -Command '$env:USERPROFILE' 2>/dev/null | tr -d '\r'` to get the Windows home, then check `test -d "$(wslpath '<win_home>')/.openclaw" && echo yes || echo no`
228
+ - If all checks return `no`, skip silently.
229
+ If any check returns `yes`:
226
230
  1. `ruby "SKILL_DIR/scripts/import_external_skills.rb" --source openclaw --dry-run`
227
231
  2. Parse the skill count N.
228
232
  3. Ask via `request_user_feedback`: