openclacky 1.1.1 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.clacky/skills/gem-release/SKILL.md +27 -31
- data/CHANGELOG.md +35 -0
- data/Dockerfile +28 -0
- data/README.md +28 -7
- data/docs/engineering-article.md +343 -0
- data/lib/clacky/agent/llm_caller.rb +19 -1
- data/lib/clacky/agent/session_serializer.rb +6 -1
- data/lib/clacky/agent.rb +14 -5
- data/lib/clacky/anthropic_stream_aggregator.rb +135 -0
- data/lib/clacky/bedrock_stream_aggregator.rb +137 -0
- data/lib/clacky/cli.rb +10 -3
- data/lib/clacky/client.rb +146 -17
- data/lib/clacky/default_skills/onboard/SKILL.md +6 -2
- data/lib/clacky/default_skills/onboard/scripts/import_external_skills.rb +50 -6
- data/lib/clacky/message_format/anthropic.rb +17 -1
- data/lib/clacky/openai_stream_aggregator.rb +130 -0
- data/lib/clacky/providers.rb +34 -0
- data/lib/clacky/server/channel/adapters/dingtalk/adapter.rb +142 -5
- data/lib/clacky/server/channel/adapters/dingtalk/api_client.rb +309 -0
- data/lib/clacky/server/http_server.rb +2 -3
- data/lib/clacky/server/web_ui_controller.rb +8 -4
- data/lib/clacky/ui2/progress_handle.rb +77 -15
- data/lib/clacky/ui2/ui_controller.rb +18 -2
- data/lib/clacky/ui_interface.rb +14 -0
- data/lib/clacky/utils/model_pricing.rb +96 -25
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/web/app.css +14 -4
- data/lib/clacky/web/i18n.js +6 -0
- data/lib/clacky/web/index.html +4 -2
- data/lib/clacky/web/onboard.js +6 -0
- data/lib/clacky/web/sessions.js +152 -48
- data/lib/clacky/web/settings.js +17 -5
- data/lib/clacky/web/vendor/katex/auto-render.min.js +1 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
- data/lib/clacky/web/vendor/katex/katex.min.css +1 -0
- data/lib/clacky/web/vendor/katex/katex.min.js +1 -0
- data/lib/clacky/web/ws-dispatcher.js +19 -4
- data/lib/clacky.rb +3 -0
- data/scripts/build/lib/apt.sh +30 -10
- data/scripts/build/lib/network.sh +3 -2
- data/scripts/install.ps1 +14 -3
- data/scripts/install.sh +30 -9
- metadata +30 -17
- data/docs/HOW-TO-USE-CN.md +0 -96
- data/docs/HOW-TO-USE.md +0 -94
- data/docs/browser-cdp-native-design.md +0 -195
- data/docs/c-end-user-positioning.md +0 -64
- data/docs/config.example.yml +0 -27
- data/docs/deploy-architecture.md +0 -619
- data/docs/deploy_subagent_design.md +0 -540
- data/docs/install-script-simplification.md +0 -89
- data/docs/memory-architecture.md +0 -343
- data/docs/openclacky_cloud_api_reference.md +0 -584
- data/docs/security-design.md +0 -109
- data/docs/session-management-redesign.md +0 -202
- data/docs/system-skill-authoring-guide.md +0 -47
- data/docs/why-developer.md +0 -371
- data/docs/why-openclacky.md +0 -266
data/lib/clacky/agent.rb
CHANGED
|
@@ -427,7 +427,7 @@ module Clacky
|
|
|
427
427
|
tool_calls_count: (response[:tool_calls] || []).size
|
|
428
428
|
)
|
|
429
429
|
if response[:content] && !response[:content].empty?
|
|
430
|
-
emit_assistant_message(response[:content])
|
|
430
|
+
emit_assistant_message(response[:content], reasoning_content: response[:reasoning_content])
|
|
431
431
|
end
|
|
432
432
|
|
|
433
433
|
# Show token usage after the assistant message so WebUI renders it below the bubble
|
|
@@ -448,7 +448,7 @@ module Clacky
|
|
|
448
448
|
|
|
449
449
|
# Show assistant message if there's content before tool calls
|
|
450
450
|
if response[:content] && !response[:content].empty?
|
|
451
|
-
emit_assistant_message(response[:content])
|
|
451
|
+
emit_assistant_message(response[:content], reasoning_content: response[:reasoning_content])
|
|
452
452
|
end
|
|
453
453
|
|
|
454
454
|
# Show token usage after assistant message (or immediately if no message).
|
|
@@ -1532,11 +1532,20 @@ module Clacky
|
|
|
1532
1532
|
# and cannot load file:// directly) and must stay scoped to the Web UI
|
|
1533
1533
|
# controller. IM channel subscribers need the original file:// markdown so
|
|
1534
1534
|
# parse_file_links can extract paths and deliver images as native attachments.
|
|
1535
|
-
private def emit_assistant_message(content)
|
|
1536
|
-
|
|
1535
|
+
private def emit_assistant_message(content, reasoning_content: nil)
|
|
1536
|
+
# Prepend reasoning/thinking content (from thinking-mode providers like
|
|
1537
|
+
# DeepSeek V4, Kimi K2) wrapped in <think> tags so the Web UI renders it
|
|
1538
|
+
# as a collapsible thinking block (see sessions.js _renderMarkdown).
|
|
1539
|
+
if reasoning_content && !reasoning_content.to_s.strip.empty?
|
|
1540
|
+
full_content = "<think>\n#{reasoning_content}\n</think>\n#{content}"
|
|
1541
|
+
else
|
|
1542
|
+
full_content = content
|
|
1543
|
+
end
|
|
1544
|
+
|
|
1545
|
+
return if full_content.nil? || full_content.to_s.strip.empty?
|
|
1537
1546
|
|
|
1538
1547
|
parsed = parse_file_links(content)
|
|
1539
|
-
@ui&.show_assistant_message(
|
|
1548
|
+
@ui&.show_assistant_message(full_content, files: parsed[:files])
|
|
1540
1549
|
end
|
|
1541
1550
|
|
|
1542
1551
|
# Track modified files for Time Machine snapshots
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Clacky
|
|
6
|
+
# Reassembles an Anthropic Messages SSE stream (event: message_start /
|
|
7
|
+
# content_block_start / content_block_delta / content_block_stop /
|
|
8
|
+
# message_delta / message_stop / ping) into the same hash shape that
|
|
9
|
+
# MessageFormat::Anthropic.parse_response expects from a non-streaming
|
|
10
|
+
# response, while invoking on_chunk(input_tokens:, output_tokens:) as
|
|
11
|
+
# usage accumulates.
|
|
12
|
+
#
|
|
13
|
+
# Wire reference: https://docs.anthropic.com/en/api/messages-streaming
|
|
14
|
+
class AnthropicStreamAggregator
|
|
15
|
+
def initialize(on_chunk: nil)
|
|
16
|
+
@on_chunk = on_chunk
|
|
17
|
+
@blocks = {}
|
|
18
|
+
@stop_reason = nil
|
|
19
|
+
@usage = {}
|
|
20
|
+
@last_input_tokens = 0
|
|
21
|
+
@last_output_tokens = 0
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def handle(event, data_str)
|
|
25
|
+
data = parse_or_nil(data_str)
|
|
26
|
+
return unless data
|
|
27
|
+
|
|
28
|
+
case event
|
|
29
|
+
when "message_start"
|
|
30
|
+
msg = data["message"] || {}
|
|
31
|
+
if (u = msg["usage"])
|
|
32
|
+
@usage.merge!(u)
|
|
33
|
+
emit_usage_progress
|
|
34
|
+
end
|
|
35
|
+
when "content_block_start"
|
|
36
|
+
idx = data["index"] || @blocks.size
|
|
37
|
+
cb = data["content_block"] || {}
|
|
38
|
+
case cb["type"]
|
|
39
|
+
when "tool_use"
|
|
40
|
+
@blocks[idx] = { kind: :tool_use, id: cb["id"], name: cb["name"], input_str: +"" }
|
|
41
|
+
else
|
|
42
|
+
@blocks[idx] = { kind: :text, text: +"" }
|
|
43
|
+
end
|
|
44
|
+
when "content_block_delta"
|
|
45
|
+
idx = data["index"] || 0
|
|
46
|
+
delta = data["delta"] || {}
|
|
47
|
+
block = (@blocks[idx] ||= { kind: :text, text: +"" })
|
|
48
|
+
case delta["type"]
|
|
49
|
+
when "text_delta"
|
|
50
|
+
block[:kind] ||= :text
|
|
51
|
+
block[:text] ||= +""
|
|
52
|
+
block[:text] << delta["text"].to_s
|
|
53
|
+
when "input_json_delta"
|
|
54
|
+
block[:kind] = :tool_use
|
|
55
|
+
block[:input_str] ||= +""
|
|
56
|
+
block[:input_str] << delta["partial_json"].to_s
|
|
57
|
+
when "thinking_delta"
|
|
58
|
+
block[:kind] = :thinking
|
|
59
|
+
block[:thinking] ||= +""
|
|
60
|
+
block[:thinking] << delta["thinking"].to_s
|
|
61
|
+
end
|
|
62
|
+
emit_estimate_progress
|
|
63
|
+
when "content_block_stop"
|
|
64
|
+
# Nothing to do: blocks are finalised in to_h.
|
|
65
|
+
when "message_delta"
|
|
66
|
+
if (d = data["delta"])
|
|
67
|
+
@stop_reason = d["stop_reason"] if d["stop_reason"]
|
|
68
|
+
end
|
|
69
|
+
if (u = data["usage"])
|
|
70
|
+
@usage.merge!(u)
|
|
71
|
+
emit_usage_progress
|
|
72
|
+
end
|
|
73
|
+
when "message_stop", "ping", "error"
|
|
74
|
+
# no-op
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Canonical non-streaming Anthropic response shape consumed by
|
|
79
|
+
# MessageFormat::Anthropic.parse_response.
|
|
80
|
+
def to_h
|
|
81
|
+
content_blocks = @blocks.keys.sort.map do |idx|
|
|
82
|
+
b = @blocks[idx]
|
|
83
|
+
case b[:kind]
|
|
84
|
+
when :tool_use
|
|
85
|
+
input_value =
|
|
86
|
+
if b[:input_str].to_s.empty?
|
|
87
|
+
{}
|
|
88
|
+
else
|
|
89
|
+
JSON.parse(b[:input_str]) rescue b[:input_str]
|
|
90
|
+
end
|
|
91
|
+
{ "type" => "tool_use", "id" => b[:id], "name" => b[:name], "input" => input_value }
|
|
92
|
+
else
|
|
93
|
+
{ "type" => "text", "text" => b[:text].to_s }
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
{ "content" => content_blocks, "stop_reason" => @stop_reason, "usage" => @usage }
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
private def parse_or_nil(s)
|
|
101
|
+
JSON.parse(s)
|
|
102
|
+
rescue JSON::ParserError
|
|
103
|
+
nil
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
private def emit_usage_progress
|
|
107
|
+
return unless @on_chunk
|
|
108
|
+
input = @usage["input_tokens"].to_i + @usage["cache_read_input_tokens"].to_i
|
|
109
|
+
output = @usage["output_tokens"].to_i
|
|
110
|
+
return if input == @last_input_tokens && output == @last_output_tokens
|
|
111
|
+
@last_input_tokens = input
|
|
112
|
+
@last_output_tokens = output
|
|
113
|
+
@on_chunk.call(input_tokens: input, output_tokens: output)
|
|
114
|
+
rescue => e
|
|
115
|
+
Clacky::Logger.warn("[AnthropicStreamAggregator] on_chunk: #{e.class}: #{e.message}")
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
private def emit_estimate_progress
|
|
119
|
+
return unless @on_chunk
|
|
120
|
+
output = approximate_output_tokens
|
|
121
|
+
return if output == @last_output_tokens
|
|
122
|
+
@last_output_tokens = output
|
|
123
|
+
@on_chunk.call(input_tokens: @last_input_tokens, output_tokens: output)
|
|
124
|
+
rescue => e
|
|
125
|
+
Clacky::Logger.warn("[AnthropicStreamAggregator] on_chunk: #{e.class}: #{e.message}")
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
private def approximate_output_tokens
|
|
129
|
+
total_chars = @blocks.values.sum do |b|
|
|
130
|
+
b[:text].to_s.bytesize + b[:input_str].to_s.bytesize + b[:thinking].to_s.bytesize
|
|
131
|
+
end
|
|
132
|
+
(total_chars / 4.0).ceil
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Clacky
|
|
6
|
+
# Reassembles a Bedrock Converse event stream into the same hash shape that
|
|
7
|
+
# MessageFormat::Bedrock.parse_response expects from a non-streaming response,
|
|
8
|
+
# while invoking on_chunk(input_tokens:, output_tokens:) as usage information
|
|
9
|
+
# accumulates.
|
|
10
|
+
#
|
|
11
|
+
# Bedrock event-stream events handled (passed through as raw event JSON):
|
|
12
|
+
#
|
|
13
|
+
# messageStart → { role: "assistant" }
|
|
14
|
+
# contentBlockStart → { start: {toolUse: {toolUseId, name}} | {}, contentBlockIndex: N }
|
|
15
|
+
# contentBlockDelta → { delta: {text: "..."} | {toolUse: {input: "..."}}, contentBlockIndex: N }
|
|
16
|
+
# contentBlockStop → { contentBlockIndex: N }
|
|
17
|
+
# messageStop → { stopReason: "end_turn" | "tool_use" | "max_tokens" | ... }
|
|
18
|
+
# metadata → { usage: {inputTokens, outputTokens, cacheReadInputTokens, cacheWriteInputTokens}, metrics: {...} }
|
|
19
|
+
#
|
|
20
|
+
# Tool-use input is streamed as a sequence of partial JSON strings; we
|
|
21
|
+
# concatenate and let the response parser leave it as a string for downstream
|
|
22
|
+
# tool dispatch (which calls JSON.parse with a {} fallback).
|
|
23
|
+
class BedrockStreamAggregator
|
|
24
|
+
def initialize(on_chunk: nil)
|
|
25
|
+
@on_chunk = on_chunk
|
|
26
|
+
@role = "assistant"
|
|
27
|
+
@blocks = {}
|
|
28
|
+
@stop_reason = nil
|
|
29
|
+
@usage = {}
|
|
30
|
+
@last_input_tokens = 0
|
|
31
|
+
@last_output_tokens = 0
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def handle(event, data_str)
|
|
35
|
+
data = parse_or_nil(data_str)
|
|
36
|
+
return unless data
|
|
37
|
+
|
|
38
|
+
case event
|
|
39
|
+
when "messageStart"
|
|
40
|
+
@role = data["role"] || @role
|
|
41
|
+
when "contentBlockStart"
|
|
42
|
+
idx = data["contentBlockIndex"] || @blocks.size
|
|
43
|
+
start = data["start"] || {}
|
|
44
|
+
if (tu = start["toolUse"])
|
|
45
|
+
@blocks[idx] = { kind: :tool_use, id: tu["toolUseId"], name: tu["name"], input_str: +"" }
|
|
46
|
+
else
|
|
47
|
+
@blocks[idx] = { kind: :text, text: +"" }
|
|
48
|
+
end
|
|
49
|
+
when "contentBlockDelta"
|
|
50
|
+
idx = data["contentBlockIndex"] || 0
|
|
51
|
+
delta = data["delta"] || {}
|
|
52
|
+
block = (@blocks[idx] ||= { kind: :text, text: +"" })
|
|
53
|
+
if delta["text"]
|
|
54
|
+
block[:kind] ||= :text
|
|
55
|
+
block[:text] ||= +""
|
|
56
|
+
block[:text] << delta["text"]
|
|
57
|
+
elsif (tu = delta["toolUse"])
|
|
58
|
+
block[:kind] = :tool_use
|
|
59
|
+
block[:input_str] ||= +""
|
|
60
|
+
block[:input_str] << tu["input"].to_s
|
|
61
|
+
block[:id] ||= tu["toolUseId"]
|
|
62
|
+
block[:name] ||= tu["name"]
|
|
63
|
+
elsif (rc = delta["reasoningContent"])
|
|
64
|
+
block[:kind] = :reasoning
|
|
65
|
+
block[:reasoning] ||= +""
|
|
66
|
+
block[:reasoning] << rc["text"].to_s
|
|
67
|
+
end
|
|
68
|
+
emit_estimate_progress
|
|
69
|
+
when "contentBlockStop"
|
|
70
|
+
# Nothing to assemble: blocks are kept as-is until messageStop.
|
|
71
|
+
when "messageStop"
|
|
72
|
+
@stop_reason = data["stopReason"] || @stop_reason
|
|
73
|
+
when "metadata"
|
|
74
|
+
if (u = data["usage"])
|
|
75
|
+
@usage.merge!(u)
|
|
76
|
+
emit_usage_progress(u)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Render the canonical non-streaming Bedrock response hash so the existing
|
|
82
|
+
# MessageFormat::Bedrock.parse_response can consume it unchanged.
|
|
83
|
+
def to_h
|
|
84
|
+
content_blocks = @blocks.keys.sort.map do |idx|
|
|
85
|
+
b = @blocks[idx]
|
|
86
|
+
case b[:kind]
|
|
87
|
+
when :tool_use
|
|
88
|
+
input_value = b[:input_str].to_s.empty? ? {} : (JSON.parse(b[:input_str]) rescue b[:input_str])
|
|
89
|
+
{ "toolUse" => { "toolUseId" => b[:id], "name" => b[:name], "input" => input_value } }
|
|
90
|
+
else
|
|
91
|
+
{ "text" => b[:text].to_s }
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
{
|
|
96
|
+
"output" => { "message" => { "role" => @role, "content" => content_blocks } },
|
|
97
|
+
"stopReason" => @stop_reason,
|
|
98
|
+
"usage" => @usage
|
|
99
|
+
}
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
private def parse_or_nil(s)
|
|
103
|
+
JSON.parse(s)
|
|
104
|
+
rescue JSON::ParserError
|
|
105
|
+
nil
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
private def emit_usage_progress(u)
|
|
109
|
+
return unless @on_chunk
|
|
110
|
+
input = u["inputTokens"].to_i + u["cacheReadInputTokens"].to_i
|
|
111
|
+
output = u["outputTokens"].to_i
|
|
112
|
+
return if input == @last_input_tokens && output == @last_output_tokens
|
|
113
|
+
@last_input_tokens = input
|
|
114
|
+
@last_output_tokens = output
|
|
115
|
+
@on_chunk.call(input_tokens: input, output_tokens: output)
|
|
116
|
+
rescue => e
|
|
117
|
+
Clacky::Logger.warn("[BedrockStreamAggregator] on_chunk: #{e.class}: #{e.message}")
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
private def emit_estimate_progress
|
|
121
|
+
return unless @on_chunk
|
|
122
|
+
output = approximate_output_tokens
|
|
123
|
+
return if output == @last_output_tokens
|
|
124
|
+
@last_output_tokens = output
|
|
125
|
+
@on_chunk.call(input_tokens: @last_input_tokens, output_tokens: output)
|
|
126
|
+
rescue => e
|
|
127
|
+
Clacky::Logger.warn("[BedrockStreamAggregator] on_chunk: #{e.class}: #{e.message}")
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
private def approximate_output_tokens
|
|
131
|
+
total_chars = @blocks.values.sum do |b|
|
|
132
|
+
b[:text].to_s.bytesize + b[:input_str].to_s.bytesize + b[:reasoning].to_s.bytesize
|
|
133
|
+
end
|
|
134
|
+
(total_chars / 4.0).ceil
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
data/lib/clacky/cli.rb
CHANGED
|
@@ -163,6 +163,7 @@ module Clacky
|
|
|
163
163
|
end
|
|
164
164
|
ensure
|
|
165
165
|
Dir.chdir(original_dir)
|
|
166
|
+
Clacky::BrowserManager.instance.stop rescue nil
|
|
166
167
|
end
|
|
167
168
|
end
|
|
168
169
|
|
|
@@ -942,8 +943,8 @@ module Clacky
|
|
|
942
943
|
$ clacky server
|
|
943
944
|
$ clacky server --port 8080
|
|
944
945
|
LONGDESC
|
|
945
|
-
option :host, type: :string, default: "127.0.0.1", desc: "Bind host (default: 127.0.0.1)"
|
|
946
|
-
option :port, type: :numeric, default: 7070, desc: "Listen port (default: 7070)"
|
|
946
|
+
option :host, type: :string, aliases: ["-b", "--bind"], default: "127.0.0.1", desc: "Bind host (default: 127.0.0.1)"
|
|
947
|
+
option :port, type: :numeric, aliases: "-p", default: 7070, desc: "Listen port (default: 7070)"
|
|
947
948
|
option :brand_test, type: :boolean, default: false,
|
|
948
949
|
desc: "Enable brand test mode: mock license activation without calling remote API"
|
|
949
950
|
option :no_compression, type: :boolean, default: false,
|
|
@@ -954,11 +955,17 @@ module Clacky
|
|
|
954
955
|
desc: "Disable prompt caching"
|
|
955
956
|
option :no_skill_evolution, type: :boolean, default: false,
|
|
956
957
|
desc: "Disable automatic skill evolution"
|
|
958
|
+
option :help, type: :boolean, aliases: "-h", desc: "Show this help message"
|
|
957
959
|
def server
|
|
960
|
+
if options[:help]
|
|
961
|
+
invoke :help, ["server"]
|
|
962
|
+
return
|
|
963
|
+
end
|
|
964
|
+
|
|
958
965
|
# ── Security gate ──────────────────────────────────────────────────────
|
|
959
966
|
# Binding to 0.0.0.0 exposes the server to the public network.
|
|
960
967
|
# Refuse to start unless CLACKY_ACCESS_KEY env var is set.
|
|
961
|
-
if options[:host] == "0.0.0.0" && ENV.
|
|
968
|
+
if options[:host] == "0.0.0.0" && !ENV.key?("CLACKY_ACCESS_KEY")
|
|
962
969
|
puts <<~MSG
|
|
963
970
|
╔══════════════════════════════════════════════════════════════╗
|
|
964
971
|
║ ⚠️ Security Warning: Refusing to start ║
|
data/lib/clacky/client.rb
CHANGED
|
@@ -119,37 +119,59 @@ module Clacky
|
|
|
119
119
|
# signal metric — see docs). When we migrate to streaming later, this
|
|
120
120
|
# same `ttft_ms` field will start carrying the *actual* first-token
|
|
121
121
|
# latency without any schema change.
|
|
122
|
-
|
|
122
|
+
# @param on_chunk [Proc, nil] optional streaming progress callback.
|
|
123
|
+
# Receives keyword args { input_tokens:, output_tokens: } with cumulative
|
|
124
|
+
# token counts. When nil, behaves exactly as the historical non-streaming
|
|
125
|
+
# path. When given but streaming is not yet wired for the active provider,
|
|
126
|
+
# a single synthetic invocation is fired after the response is received,
|
|
127
|
+
# so UI plumbing can be exercised end-to-end without the proxy work.
|
|
128
|
+
def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false, on_chunk: nil)
|
|
123
129
|
caching_enabled = enable_caching && supports_prompt_caching?(model)
|
|
124
130
|
cloned = deep_clone(messages)
|
|
125
131
|
|
|
132
|
+
streaming_used = false
|
|
133
|
+
first_chunk_at = nil
|
|
134
|
+
wrapped_on_chunk = on_chunk && lambda do |**kwargs|
|
|
135
|
+
first_chunk_at ||= Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
136
|
+
on_chunk.call(**kwargs)
|
|
137
|
+
end
|
|
138
|
+
|
|
126
139
|
t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
127
140
|
response =
|
|
128
141
|
if bedrock?
|
|
129
|
-
|
|
142
|
+
streaming_used = !on_chunk.nil?
|
|
143
|
+
send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled, on_chunk: wrapped_on_chunk)
|
|
130
144
|
elsif anthropic_format?
|
|
131
|
-
|
|
145
|
+
streaming_used = !on_chunk.nil?
|
|
146
|
+
send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled, on_chunk: wrapped_on_chunk)
|
|
132
147
|
else
|
|
133
|
-
|
|
148
|
+
streaming_used = !on_chunk.nil?
|
|
149
|
+
send_openai_request(cloned, model, tools, max_tokens, caching_enabled, on_chunk: wrapped_on_chunk)
|
|
134
150
|
end
|
|
135
151
|
t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
136
152
|
|
|
153
|
+
if on_chunk && !streaming_used
|
|
154
|
+
usage = response[:usage] || {}
|
|
155
|
+
safe_invoke_on_chunk(
|
|
156
|
+
on_chunk,
|
|
157
|
+
input_tokens: usage[:prompt_tokens].to_i,
|
|
158
|
+
output_tokens: usage[:completion_tokens].to_i
|
|
159
|
+
)
|
|
160
|
+
end
|
|
161
|
+
|
|
137
162
|
duration_ms = ((t1 - t0) * 1000).round
|
|
138
|
-
|
|
139
|
-
# tokens the sample is too small to be informative and the result is
|
|
140
|
-
# wildly high (e.g. 1 token / 50ms → 20 tok/s is meaningless).
|
|
141
|
-
# Canonical usage hashes from message_format/* all use :completion_tokens.
|
|
163
|
+
ttft_ms = first_chunk_at ? ((first_chunk_at - t0) * 1000).round : duration_ms
|
|
142
164
|
output_tokens = response[:usage]&.dig(:completion_tokens).to_i
|
|
143
165
|
tps = (output_tokens >= 10 && duration_ms > 0) ? (output_tokens * 1000.0 / duration_ms).round(1) : nil
|
|
144
166
|
|
|
145
167
|
response[:latency] = {
|
|
146
|
-
ttft_ms:
|
|
168
|
+
ttft_ms: ttft_ms,
|
|
147
169
|
duration_ms: duration_ms,
|
|
148
170
|
output_tokens: output_tokens,
|
|
149
171
|
tps: tps,
|
|
150
172
|
model: model,
|
|
151
173
|
measured_at: Time.now.to_f,
|
|
152
|
-
streaming:
|
|
174
|
+
streaming: streaming_used
|
|
153
175
|
}
|
|
154
176
|
response
|
|
155
177
|
end
|
|
@@ -195,8 +217,10 @@ module Clacky
|
|
|
195
217
|
|
|
196
218
|
# ── Bedrock Converse request / response ───────────────────────────────────
|
|
197
219
|
|
|
198
|
-
def send_bedrock_request(messages, model, tools, max_tokens, caching_enabled)
|
|
199
|
-
body
|
|
220
|
+
def send_bedrock_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil)
|
|
221
|
+
body = MessageFormat::Bedrock.build_request_body(messages, model, tools, max_tokens, caching_enabled)
|
|
222
|
+
return send_bedrock_stream_request(body, model, on_chunk) if on_chunk
|
|
223
|
+
|
|
200
224
|
response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }
|
|
201
225
|
|
|
202
226
|
raise_error(response) unless response.status == 200
|
|
@@ -205,6 +229,29 @@ module Clacky
|
|
|
205
229
|
MessageFormat::Bedrock.parse_response(parsed_body)
|
|
206
230
|
end
|
|
207
231
|
|
|
232
|
+
# Streaming variant for Bedrock Converse.
|
|
233
|
+
# Posts to /model/{m}/converse-stream with stream:true; the proxy returns
|
|
234
|
+
# SSE frames whose `event` is the Bedrock event-type and whose `data` is
|
|
235
|
+
# the raw Bedrock event JSON. We accumulate frames into a synthetic
|
|
236
|
+
# non-streaming response and feed it back through the existing parser so
|
|
237
|
+
# downstream code is identical.
|
|
238
|
+
private def send_bedrock_stream_request(body, model, on_chunk)
|
|
239
|
+
stream_body = body.merge(stream: true)
|
|
240
|
+
aggregator = BedrockStreamAggregator.new(on_chunk: on_chunk)
|
|
241
|
+
sse_buf = +""
|
|
242
|
+
|
|
243
|
+
response = bedrock_connection.post(bedrock_stream_endpoint(model)) do |req|
|
|
244
|
+
req.body = stream_body.to_json
|
|
245
|
+
req.options.on_data = proc do |chunk, _bytes_received, _env|
|
|
246
|
+
sse_buf << chunk
|
|
247
|
+
drain_sse_frames(sse_buf) { |event, data| aggregator.handle(event, data) }
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
raise_error(response) unless response.status == 200
|
|
252
|
+
MessageFormat::Bedrock.parse_response(aggregator.to_h)
|
|
253
|
+
end
|
|
254
|
+
|
|
208
255
|
def parse_simple_bedrock_response(response)
|
|
209
256
|
raise_error(response) unless response.status == 200
|
|
210
257
|
data = safe_json_parse(response.body, context: "LLM response")
|
|
@@ -216,11 +263,13 @@ module Clacky
|
|
|
216
263
|
|
|
217
264
|
# ── Anthropic request / response ──────────────────────────────────────────
|
|
218
265
|
|
|
219
|
-
def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled)
|
|
266
|
+
def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil)
|
|
220
267
|
# Apply cache_control to the message that marks the cache breakpoint
|
|
221
268
|
messages = apply_message_caching(messages) if caching_enabled
|
|
222
269
|
|
|
223
|
-
body
|
|
270
|
+
body = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled)
|
|
271
|
+
return send_anthropic_stream_request(body, on_chunk) if on_chunk
|
|
272
|
+
|
|
224
273
|
response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }
|
|
225
274
|
|
|
226
275
|
raise_error(response) unless response.status == 200
|
|
@@ -229,6 +278,24 @@ module Clacky
|
|
|
229
278
|
MessageFormat::Anthropic.parse_response(parsed_body)
|
|
230
279
|
end
|
|
231
280
|
|
|
281
|
+
private def send_anthropic_stream_request(body, on_chunk)
|
|
282
|
+
stream_body = body.merge(stream: true)
|
|
283
|
+
aggregator = AnthropicStreamAggregator.new(on_chunk: on_chunk)
|
|
284
|
+
sse_buf = +""
|
|
285
|
+
|
|
286
|
+
response = anthropic_connection.post(anthropic_messages_path) do |req|
|
|
287
|
+
req.headers["Accept"] = "text/event-stream"
|
|
288
|
+
req.body = stream_body.to_json
|
|
289
|
+
req.options.on_data = proc do |chunk, _bytes_received, _env|
|
|
290
|
+
sse_buf << chunk
|
|
291
|
+
drain_sse_frames(sse_buf) { |event, data| aggregator.handle(event, data) }
|
|
292
|
+
end
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
raise_error(response) unless response.status == 200
|
|
296
|
+
MessageFormat::Anthropic.parse_response(aggregator.to_h)
|
|
297
|
+
end
|
|
298
|
+
|
|
232
299
|
def parse_simple_anthropic_response(response)
|
|
233
300
|
raise_error(response) unless response.status == 200
|
|
234
301
|
data = safe_json_parse(response.body, context: "LLM response")
|
|
@@ -237,24 +304,47 @@ module Clacky
|
|
|
237
304
|
|
|
238
305
|
# ── OpenAI request / response ─────────────────────────────────────────────
|
|
239
306
|
|
|
240
|
-
def send_openai_request(messages, model, tools, max_tokens, caching_enabled)
|
|
307
|
+
def send_openai_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil)
|
|
241
308
|
# Apply cache_control markers to messages when caching is enabled.
|
|
242
309
|
# OpenRouter proxies Claude with the same cache_control field convention as Anthropic direct.
|
|
243
310
|
messages = apply_message_caching(messages) if caching_enabled
|
|
244
311
|
|
|
245
|
-
body
|
|
312
|
+
body = MessageFormat::OpenAI.build_request_body(
|
|
246
313
|
messages, model, tools, max_tokens, caching_enabled,
|
|
247
314
|
vision_supported: @vision_supported
|
|
248
315
|
)
|
|
316
|
+
return send_openai_stream_request(body, on_chunk) if on_chunk
|
|
317
|
+
|
|
249
318
|
response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
|
|
250
319
|
|
|
251
320
|
raise_error(response) unless response.status == 200
|
|
252
321
|
check_html_response(response)
|
|
253
|
-
|
|
322
|
+
|
|
254
323
|
parsed_body = safe_json_parse(response.body, context: "LLM response")
|
|
255
324
|
MessageFormat::OpenAI.parse_response(parsed_body)
|
|
256
325
|
end
|
|
257
326
|
|
|
327
|
+
# Streaming variant for OpenAI-compatible chat completions (DeepSeek/OpenRouter
|
|
328
|
+
# via platform/llm_proxy). Uses Faraday's on_data hook to consume SSE frames,
|
|
329
|
+
# accumulates them, and reconstructs the non-streaming JSON response shape so
|
|
330
|
+
# MessageFormat::OpenAI.parse_response works unchanged.
|
|
331
|
+
private def send_openai_stream_request(body, on_chunk)
|
|
332
|
+
stream_body = body.merge(stream: true, stream_options: { include_usage: true })
|
|
333
|
+
aggregator = OpenAIStreamAggregator.new(on_chunk: on_chunk)
|
|
334
|
+
sse_buf = +""
|
|
335
|
+
|
|
336
|
+
response = openai_connection.post("chat/completions") do |req|
|
|
337
|
+
req.body = stream_body.to_json
|
|
338
|
+
req.options.on_data = proc do |chunk, _bytes_received, _env|
|
|
339
|
+
sse_buf << chunk
|
|
340
|
+
drain_sse_frames(sse_buf) { |_event, data| aggregator.handle(data) }
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
raise_error(response) unless response.status == 200
|
|
345
|
+
MessageFormat::OpenAI.parse_response(aggregator.to_h)
|
|
346
|
+
end
|
|
347
|
+
|
|
258
348
|
def parse_simple_openai_response(response)
|
|
259
349
|
raise_error(response) unless response.status == 200
|
|
260
350
|
parsed_body = safe_json_parse(response.body, context: "LLM response")
|
|
@@ -320,6 +410,33 @@ module Clacky
|
|
|
320
410
|
"/model/#{model}/converse"
|
|
321
411
|
end
|
|
322
412
|
|
|
413
|
+
# Bedrock Converse streaming endpoint path.
|
|
414
|
+
private def bedrock_stream_endpoint(model)
|
|
415
|
+
"/model/#{model}/converse-stream"
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
# Pull complete SSE frames out of a buffer and yield them as (event, data).
|
|
419
|
+
# An SSE frame ends at a blank line ("\n\n"); incomplete trailing data
|
|
420
|
+
# stays in the buffer for the next chunk. Frames without an explicit
|
|
421
|
+
# `event:` line use the default "message" type per the SSE spec.
|
|
422
|
+
private def drain_sse_frames(buf)
|
|
423
|
+
while (sep = buf.index("\n\n"))
|
|
424
|
+
frame = buf.slice!(0, sep + 2)
|
|
425
|
+
event = "message"
|
|
426
|
+
data_lines = []
|
|
427
|
+
frame.each_line do |line|
|
|
428
|
+
line = line.chomp
|
|
429
|
+
if line.start_with?("event:")
|
|
430
|
+
event = line.sub(/^event:\s*/, "")
|
|
431
|
+
elsif line.start_with?("data:")
|
|
432
|
+
data_lines << line.sub(/^data:\s*/, "")
|
|
433
|
+
end
|
|
434
|
+
end
|
|
435
|
+
next if data_lines.empty?
|
|
436
|
+
yield event, data_lines.join("\n")
|
|
437
|
+
end
|
|
438
|
+
end
|
|
439
|
+
|
|
323
440
|
def bedrock_connection
|
|
324
441
|
@bedrock_connection ||= Faraday.new(url: @base_url) do |conn|
|
|
325
442
|
conn.headers["Content-Type"] = "application/json"
|
|
@@ -477,6 +594,18 @@ module Clacky
|
|
|
477
594
|
"The request will be retried automatically."
|
|
478
595
|
end
|
|
479
596
|
|
|
597
|
+
# ── Streaming helpers ─────────────────────────────────────────────────────
|
|
598
|
+
|
|
599
|
+
# Invoke the user's on_chunk callback in a way that never lets a callback
|
|
600
|
+
# error tear down the LLM request. Streaming chunks are best-effort UI
|
|
601
|
+
# updates; a buggy progress renderer must not abort an in-flight call.
|
|
602
|
+
private def safe_invoke_on_chunk(on_chunk, **kwargs)
|
|
603
|
+
return unless on_chunk
|
|
604
|
+
on_chunk.call(**kwargs)
|
|
605
|
+
rescue => e
|
|
606
|
+
Clacky::Logger.warn("[on_chunk] callback raised #{e.class}: #{e.message}")
|
|
607
|
+
end
|
|
608
|
+
|
|
480
609
|
# ── Utilities ─────────────────────────────────────────────────────────────
|
|
481
610
|
|
|
482
611
|
def deep_clone(obj)
|
|
@@ -221,8 +221,12 @@ then parse the last stdout line as JSON and read `installed` as N.
|
|
|
221
221
|
|
|
222
222
|
### A.10. Import external skills (optional)
|
|
223
223
|
|
|
224
|
-
|
|
225
|
-
|
|
224
|
+
Check if OpenClaw is installed:
|
|
225
|
+
- Run `test -d ~/.openclaw && echo yes || echo no`
|
|
226
|
+
- If `no` and on WSL (i.e. `/proc/version` contains `microsoft`), also run:
|
|
227
|
+
`powershell.exe -NoProfile -Command '$env:USERPROFILE' 2>/dev/null | tr -d '\r'` to get the Windows home, then check `test -d "$(wslpath '<win_home>')/.openclaw" && echo yes || echo no`
|
|
228
|
+
- If all checks return `no`, skip silently.
|
|
229
|
+
If any check returns `yes`:
|
|
226
230
|
1. `ruby "SKILL_DIR/scripts/import_external_skills.rb" --source openclaw --dry-run`
|
|
227
231
|
2. Parse the skill count N.
|
|
228
232
|
3. Ask via `request_user_feedback`:
|