openclacky 0.9.2 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/docs/security-design.md +109 -0
- data/lib/clacky/agent/message_compressor_helper.rb +82 -69
- data/lib/clacky/agent/session_serializer.rb +9 -1
- data/lib/clacky/agent/skill_manager.rb +7 -0
- data/lib/clacky/agent.rb +11 -3
- data/lib/clacky/banner.rb +65 -0
- data/lib/clacky/block_font.rb +331 -0
- data/lib/clacky/brand_config.rb +73 -5
- data/lib/clacky/client.rb +159 -631
- data/lib/clacky/default_skills/activate-license/SKILL.md +118 -0
- data/lib/clacky/default_skills/channel-setup/SKILL.md +10 -20
- data/lib/clacky/message_format/anthropic.rb +241 -0
- data/lib/clacky/message_format/open_ai.rb +135 -0
- data/lib/clacky/server/channel/adapters/wecom/adapter.rb +2 -0
- data/lib/clacky/server/channel/adapters/wecom/ws_client.rb +13 -0
- data/lib/clacky/server/http_server.rb +12 -2
- data/lib/clacky/session_manager.rb +7 -2
- data/lib/clacky/tools/browser.rb +109 -280
- data/lib/clacky/ui2/block_font.rb +10 -0
- data/lib/clacky/ui2/components/welcome_banner.rb +23 -22
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/web/app.css +588 -6
- data/lib/clacky/web/app.js +30 -15
- data/lib/clacky/web/brand.js +141 -9
- data/lib/clacky/web/i18n.js +28 -2
- data/lib/clacky/web/index.html +142 -127
- data/lib/clacky/web/onboard.js +192 -225
- data/lib/clacky/web/sessions.js +12 -8
- data/lib/clacky/web/settings.js +57 -4
- data/lib/clacky.rb +2 -0
- data/scripts/install.sh +60 -15
- metadata +8 -1
data/lib/clacky/client.rb
CHANGED
|
@@ -15,748 +15,276 @@ module Clacky
|
|
|
15
15
|
@use_anthropic_format = anthropic_format
|
|
16
16
|
end
|
|
17
17
|
|
|
18
|
-
#
|
|
19
|
-
#
|
|
20
|
-
# (based on config source: ANTHROPIC_* env vars = true, config file = false)
|
|
18
|
+
# Returns true when the client is talking directly to the Anthropic API
|
|
19
|
+
# (determined at construction time via the anthropic_format flag).
|
|
21
20
|
def anthropic_format?(model = nil)
|
|
22
21
|
@use_anthropic_format
|
|
23
22
|
end
|
|
24
23
|
|
|
25
|
-
#
|
|
26
|
-
|
|
24
|
+
# ── Connection test ───────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
# Test API connection by sending a minimal request.
|
|
27
|
+
# Returns { success: true } or { success: false, error: "..." }.
|
|
27
28
|
def test_connection(model:)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
}
|
|
38
|
-
]
|
|
39
|
-
}.to_json
|
|
40
|
-
end
|
|
41
|
-
handle_test_response(response)
|
|
42
|
-
else
|
|
43
|
-
response = openai_connection.post("chat/completions") do |req|
|
|
44
|
-
req.body = {
|
|
45
|
-
model: model,
|
|
46
|
-
max_tokens: 16,
|
|
47
|
-
messages: [
|
|
48
|
-
{
|
|
49
|
-
role: "user",
|
|
50
|
-
content: "hi"
|
|
51
|
-
}
|
|
52
|
-
]
|
|
53
|
-
}.to_json
|
|
54
|
-
end
|
|
55
|
-
handle_test_response(response)
|
|
56
|
-
end
|
|
29
|
+
minimal_body = { model: model, max_tokens: 16,
|
|
30
|
+
messages: [{ role: "user", content: "hi" }] }.to_json
|
|
31
|
+
|
|
32
|
+
response = if anthropic_format?
|
|
33
|
+
anthropic_connection.post("v1/messages") { |r| r.body = minimal_body }
|
|
34
|
+
else
|
|
35
|
+
openai_connection.post("chat/completions") { |r| r.body = minimal_body }
|
|
36
|
+
end
|
|
37
|
+
handle_test_response(response)
|
|
57
38
|
rescue Faraday::Error => e
|
|
58
|
-
# Network or connection errors
|
|
59
39
|
{ success: false, error: "Connection error: #{e.message}" }
|
|
60
40
|
rescue => e
|
|
61
|
-
# Other errors
|
|
62
41
|
{ success: false, error: e.message }
|
|
63
42
|
end
|
|
64
43
|
|
|
44
|
+
# ── Simple (non-agent) helpers ────────────────────────────────────────────
|
|
45
|
+
|
|
46
|
+
# Send a single string message and return the reply text.
|
|
65
47
|
def send_message(content, model:, max_tokens:)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
req.body = {
|
|
69
|
-
model: model,
|
|
70
|
-
max_tokens: max_tokens,
|
|
71
|
-
messages: [
|
|
72
|
-
{
|
|
73
|
-
role: "user",
|
|
74
|
-
content: content
|
|
75
|
-
}
|
|
76
|
-
]
|
|
77
|
-
}.to_json
|
|
78
|
-
end
|
|
79
|
-
handle_anthropic_simple_response(response)
|
|
80
|
-
else
|
|
81
|
-
response = openai_connection.post("chat/completions") do |req|
|
|
82
|
-
req.body = {
|
|
83
|
-
model: model,
|
|
84
|
-
max_tokens: max_tokens,
|
|
85
|
-
messages: [
|
|
86
|
-
{
|
|
87
|
-
role: "user",
|
|
88
|
-
content: content
|
|
89
|
-
}
|
|
90
|
-
]
|
|
91
|
-
}.to_json
|
|
92
|
-
end
|
|
93
|
-
handle_response(response)
|
|
94
|
-
end
|
|
48
|
+
messages = [{ role: "user", content: content }]
|
|
49
|
+
send_messages(messages, model: model, max_tokens: max_tokens)
|
|
95
50
|
end
|
|
96
51
|
|
|
52
|
+
# Send a messages array and return the reply text.
|
|
97
53
|
def send_messages(messages, model:, max_tokens:)
|
|
98
|
-
if anthropic_format?
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
response
|
|
102
|
-
req.body = body.to_json
|
|
103
|
-
end
|
|
104
|
-
handle_anthropic_simple_response(response)
|
|
54
|
+
if anthropic_format?
|
|
55
|
+
body = MessageFormat::Anthropic.build_request_body(messages, model, [], max_tokens, false)
|
|
56
|
+
response = anthropic_connection.post("v1/messages") { |r| r.body = body.to_json }
|
|
57
|
+
parse_simple_anthropic_response(response)
|
|
105
58
|
else
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
max_tokens: max_tokens,
|
|
110
|
-
messages: messages
|
|
111
|
-
}.to_json
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
handle_response(response)
|
|
59
|
+
body = { model: model, max_tokens: max_tokens, messages: messages }
|
|
60
|
+
response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
|
|
61
|
+
parse_simple_openai_response(response)
|
|
115
62
|
end
|
|
116
63
|
end
|
|
117
64
|
|
|
118
|
-
#
|
|
119
|
-
# Options:
|
|
120
|
-
# - enable_caching: Enable prompt caching for system prompt and tools (default: false)
|
|
121
|
-
def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false)
|
|
122
|
-
# Auto-detect API format based on model name and base_url
|
|
123
|
-
is_anthropic = anthropic_format?(model)
|
|
124
|
-
|
|
125
|
-
# Deep clone messages to avoid modifying the original array
|
|
126
|
-
processed_messages = messages.map { |msg| deep_clone(msg) }
|
|
65
|
+
# ── Agent main path ───────────────────────────────────────────────────────
|
|
127
66
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
67
|
+
# Send messages with tool-calling support.
|
|
68
|
+
# Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage: }
|
|
69
|
+
def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false)
|
|
70
|
+
caching_enabled = enable_caching && supports_prompt_caching?(model)
|
|
71
|
+
cloned = deep_clone(messages)
|
|
131
72
|
|
|
132
|
-
if
|
|
133
|
-
send_anthropic_request(
|
|
73
|
+
if anthropic_format?
|
|
74
|
+
send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled)
|
|
134
75
|
else
|
|
135
|
-
send_openai_request(
|
|
76
|
+
send_openai_request(cloned, model, tools, max_tokens, caching_enabled)
|
|
136
77
|
end
|
|
137
78
|
end
|
|
138
79
|
|
|
139
|
-
# Format tool results
|
|
140
|
-
#
|
|
141
|
-
#
|
|
80
|
+
# Format tool results into canonical messages ready to append to @messages.
|
|
81
|
+
# Always returns canonical format (role: "tool") regardless of API type —
|
|
82
|
+
# conversion to API-native happens inside each send_*_request.
|
|
142
83
|
def format_tool_results(response, tool_results, model:)
|
|
143
84
|
return [] if tool_results.empty?
|
|
144
85
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
# Create a map of tool_call_id -> result for quick lookup
|
|
148
|
-
results_map = tool_results.each_with_object({}) do |result, hash|
|
|
149
|
-
hash[result[:id]] = result
|
|
150
|
-
end
|
|
151
|
-
|
|
152
|
-
if is_anthropic
|
|
153
|
-
# Anthropic format: tool results in user message content array
|
|
154
|
-
tool_result_blocks = response[:tool_calls].map do |tool_call|
|
|
155
|
-
result = results_map[tool_call[:id]]
|
|
156
|
-
if result
|
|
157
|
-
{
|
|
158
|
-
type: "tool_result",
|
|
159
|
-
tool_use_id: tool_call[:id],
|
|
160
|
-
content: result[:content]
|
|
161
|
-
}
|
|
162
|
-
else
|
|
163
|
-
{
|
|
164
|
-
type: "tool_result",
|
|
165
|
-
tool_use_id: tool_call[:id],
|
|
166
|
-
content: JSON.generate({ error: "Tool result missing" })
|
|
167
|
-
}
|
|
168
|
-
end
|
|
169
|
-
end
|
|
170
|
-
|
|
171
|
-
# Return as a user message
|
|
172
|
-
[
|
|
173
|
-
{
|
|
174
|
-
role: "user",
|
|
175
|
-
content: tool_result_blocks
|
|
176
|
-
}
|
|
177
|
-
]
|
|
86
|
+
if anthropic_format?
|
|
87
|
+
MessageFormat::Anthropic.format_tool_results(response, tool_results)
|
|
178
88
|
else
|
|
179
|
-
|
|
180
|
-
response[:tool_calls].map do |tool_call|
|
|
181
|
-
result = results_map[tool_call[:id]]
|
|
182
|
-
if result
|
|
183
|
-
{
|
|
184
|
-
role: "tool",
|
|
185
|
-
tool_call_id: result[:id],
|
|
186
|
-
content: result[:content]
|
|
187
|
-
}
|
|
188
|
-
else
|
|
189
|
-
{
|
|
190
|
-
role: "tool",
|
|
191
|
-
tool_call_id: tool_call[:id],
|
|
192
|
-
content: JSON.generate({ error: "Tool result missing" })
|
|
193
|
-
}
|
|
194
|
-
end
|
|
195
|
-
end
|
|
89
|
+
MessageFormat::OpenAI.format_tool_results(response, tool_results)
|
|
196
90
|
end
|
|
197
91
|
end
|
|
198
92
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
# Send request using OpenAI API format
|
|
202
|
-
def send_openai_request(messages, model, tools, max_tokens, caching_enabled)
|
|
203
|
-
# Apply caching to messages if enabled
|
|
204
|
-
processed_messages = caching_enabled ? apply_message_caching(messages) : messages
|
|
205
|
-
|
|
206
|
-
body = {
|
|
207
|
-
model: model,
|
|
208
|
-
max_tokens: max_tokens,
|
|
209
|
-
messages: processed_messages
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
# Add tools if provided
|
|
213
|
-
if tools&.any?
|
|
214
|
-
if caching_enabled
|
|
215
|
-
cached_tools = tools.map { |tool| deep_clone(tool) }
|
|
216
|
-
cached_tools.last[:cache_control] = { type: "ephemeral" }
|
|
217
|
-
body[:tools] = cached_tools
|
|
218
|
-
else
|
|
219
|
-
body[:tools] = tools
|
|
220
|
-
end
|
|
221
|
-
end
|
|
222
|
-
|
|
223
|
-
response = openai_connection.post("chat/completions") do |req|
|
|
224
|
-
req.body = body.to_json
|
|
225
|
-
end
|
|
226
|
-
|
|
227
|
-
handle_tool_response(response)
|
|
228
|
-
end
|
|
93
|
+
# ── Prompt-caching support ────────────────────────────────────────────────
|
|
229
94
|
|
|
230
|
-
#
|
|
231
|
-
def
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
response = anthropic_connection.post("v1/messages") do |req|
|
|
236
|
-
req.body = body.to_json
|
|
237
|
-
end
|
|
238
|
-
|
|
239
|
-
handle_anthropic_response(response)
|
|
240
|
-
end
|
|
241
|
-
|
|
242
|
-
# Build request body in Anthropic format
|
|
243
|
-
def build_anthropic_body(messages, model, tools, max_tokens, caching_enabled)
|
|
244
|
-
# Separate system messages from regular messages
|
|
245
|
-
system_messages = messages.select { |m| m[:role] == "system" }
|
|
246
|
-
regular_messages = messages.reject { |m| m[:role] == "system" }
|
|
247
|
-
|
|
248
|
-
# Build system for Anthropic - use string format which is most compatible
|
|
249
|
-
system = if system_messages.any?
|
|
250
|
-
system_messages.map do |msg|
|
|
251
|
-
content = msg[:content]
|
|
252
|
-
if content.is_a?(String)
|
|
253
|
-
content
|
|
254
|
-
elsif content.is_a?(Array)
|
|
255
|
-
content.map { |block| block.is_a?(Hash) ? (block[:text] || block.dig(:text) || "") : block.to_s }.compact.join("\n")
|
|
256
|
-
else
|
|
257
|
-
content.to_s
|
|
258
|
-
end
|
|
259
|
-
end.join("\n\n")
|
|
260
|
-
else
|
|
261
|
-
""
|
|
262
|
-
end
|
|
263
|
-
|
|
264
|
-
# Convert regular messages to Anthropic format
|
|
265
|
-
anthropic_messages = regular_messages.map { |msg| convert_to_anthropic_message(msg, caching_enabled) }
|
|
266
|
-
|
|
267
|
-
# Convert tools to Anthropic format
|
|
268
|
-
anthropic_tools = tools&.map { |tool| convert_to_anthropic_tool(tool, caching_enabled) }
|
|
269
|
-
|
|
270
|
-
# Add cache_control to last tool if caching is enabled
|
|
271
|
-
if caching_enabled && anthropic_tools&.any?
|
|
272
|
-
anthropic_tools.last[:cache_control] = { type: "ephemeral" }
|
|
273
|
-
end
|
|
274
|
-
|
|
275
|
-
body = {
|
|
276
|
-
model: model,
|
|
277
|
-
max_tokens: max_tokens,
|
|
278
|
-
messages: anthropic_messages
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
# Only include system if it's not empty
|
|
282
|
-
body[:system] = system if system && !system.empty?
|
|
283
|
-
|
|
284
|
-
body[:tools] = anthropic_tools if anthropic_tools&.any?
|
|
95
|
+
# Returns true for Claude 3.5+ models that support prompt caching.
|
|
96
|
+
def supports_prompt_caching?(model)
|
|
97
|
+
model_str = model.to_s.downcase
|
|
98
|
+
return false unless model_str.include?("claude")
|
|
285
99
|
|
|
286
|
-
|
|
100
|
+
model_str.match?(/claude(?:-3[-.]?[5-9]|-[4-9]|-sonnet-[34])/)
|
|
287
101
|
end
|
|
288
102
|
|
|
289
|
-
|
|
290
|
-
def convert_to_anthropic_message(message, caching_enabled)
|
|
291
|
-
role = message[:role]
|
|
292
|
-
content = message[:content]
|
|
293
|
-
tool_calls = message[:tool_calls]
|
|
294
|
-
|
|
295
|
-
# For assistant messages with tool_calls, convert tool_calls to content blocks
|
|
296
|
-
if role == "assistant" && tool_calls && tool_calls.any?
|
|
297
|
-
# Build content blocks from both content and tool_calls
|
|
298
|
-
blocks = []
|
|
299
|
-
|
|
300
|
-
# Add text content first
|
|
301
|
-
if content.is_a?(String) && !content.empty?
|
|
302
|
-
blocks << { type: "text", text: content }
|
|
303
|
-
elsif content.is_a?(Array)
|
|
304
|
-
blocks.concat(content.map do |block|
|
|
305
|
-
case block[:type]
|
|
306
|
-
when "text"
|
|
307
|
-
{ type: "text", text: block[:text] }
|
|
308
|
-
when "image_url"
|
|
309
|
-
url = block.dig(:image_url, :url) || block[:url]
|
|
310
|
-
if url&.start_with?("data:")
|
|
311
|
-
match = url.match(/^data:([^;]+);base64,(.*)$/)
|
|
312
|
-
if match
|
|
313
|
-
{ type: "image", source: { type: "base64", media_type: match[1], data: match[2] } }
|
|
314
|
-
else
|
|
315
|
-
{ type: "image", source: { type: "url", url: url } }
|
|
316
|
-
end
|
|
317
|
-
else
|
|
318
|
-
{ type: "image", source: { type: "url", url: url } }
|
|
319
|
-
end
|
|
320
|
-
else
|
|
321
|
-
block
|
|
322
|
-
end
|
|
323
|
-
end)
|
|
324
|
-
end
|
|
103
|
+
private
|
|
325
104
|
|
|
326
|
-
|
|
327
|
-
tool_calls.each do |call|
|
|
328
|
-
# Handle both OpenAI format (with function key) and direct format
|
|
329
|
-
if call[:function]
|
|
330
|
-
# OpenAI format
|
|
331
|
-
tool_use_block = {
|
|
332
|
-
type: "tool_use",
|
|
333
|
-
id: call[:id],
|
|
334
|
-
name: call[:function][:name],
|
|
335
|
-
input: call[:function][:arguments].is_a?(String) ? JSON.parse(call[:function][:arguments]) : call[:function][:arguments]
|
|
336
|
-
}
|
|
337
|
-
else
|
|
338
|
-
# Direct format
|
|
339
|
-
tool_use_block = {
|
|
340
|
-
type: "tool_use",
|
|
341
|
-
id: call[:id],
|
|
342
|
-
name: call[:name],
|
|
343
|
-
input: call[:arguments].is_a?(String) ? JSON.parse(call[:arguments]) : call[:arguments]
|
|
344
|
-
}
|
|
345
|
-
end
|
|
346
|
-
blocks << tool_use_block
|
|
347
|
-
end
|
|
105
|
+
# ── Anthropic request / response ──────────────────────────────────────────
|
|
348
106
|
|
|
349
|
-
|
|
350
|
-
|
|
107
|
+
def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled)
|
|
108
|
+
# Apply cache_control to the message that marks the cache breakpoint
|
|
109
|
+
messages = apply_message_caching(messages) if caching_enabled
|
|
351
110
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
return { role: role, content: [{ type: "text", text: content }] }
|
|
355
|
-
end
|
|
111
|
+
body = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled)
|
|
112
|
+
response = anthropic_connection.post("v1/messages") { |r| r.body = body.to_json }
|
|
356
113
|
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
blocks = content.map do |block|
|
|
360
|
-
case block[:type]
|
|
361
|
-
when "text"
|
|
362
|
-
{ type: "text", text: block[:text] }
|
|
363
|
-
when "image_url"
|
|
364
|
-
url = block.dig(:image_url, :url) || block[:url]
|
|
365
|
-
if url&.start_with?("data:")
|
|
366
|
-
match = url.match(/^data:([^;]+);base64,(.*)$/)
|
|
367
|
-
if match
|
|
368
|
-
{ type: "image", source: { type: "base64", media_type: match[1], data: match[2] } }
|
|
369
|
-
else
|
|
370
|
-
{ type: "image", source: { type: "url", url: url } }
|
|
371
|
-
end
|
|
372
|
-
else
|
|
373
|
-
{ type: "image", source: { type: "url", url: url } }
|
|
374
|
-
end
|
|
375
|
-
else
|
|
376
|
-
block
|
|
377
|
-
end
|
|
378
|
-
end
|
|
379
|
-
return { role: role, content: blocks }
|
|
380
|
-
end
|
|
381
|
-
|
|
382
|
-
{ role: role, content: message[:content] }
|
|
114
|
+
raise_error(response) unless response.status == 200
|
|
115
|
+
MessageFormat::Anthropic.parse_response(JSON.parse(response.body))
|
|
383
116
|
end
|
|
384
117
|
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
func = tool[:function] || tool
|
|
390
|
-
{
|
|
391
|
-
name: func[:name],
|
|
392
|
-
description: func[:description],
|
|
393
|
-
input_schema: func[:parameters]
|
|
394
|
-
}
|
|
118
|
+
def parse_simple_anthropic_response(response)
|
|
119
|
+
raise_error(response) unless response.status == 200
|
|
120
|
+
data = JSON.parse(response.body)
|
|
121
|
+
(data["content"] || []).select { |b| b["type"] == "text" }.map { |b| b["text"] }.join("")
|
|
395
122
|
end
|
|
396
123
|
|
|
397
|
-
#
|
|
398
|
-
def handle_anthropic_response(response)
|
|
399
|
-
case response.status
|
|
400
|
-
when 200
|
|
401
|
-
data = JSON.parse(response.body)
|
|
402
|
-
content_blocks = data["content"] || []
|
|
403
|
-
usage = data["usage"] || {}
|
|
404
|
-
|
|
405
|
-
# Extract content
|
|
406
|
-
content = content_blocks.select { |b| b["type"] == "text" }.map { |b| b["text"] }.join("")
|
|
407
|
-
|
|
408
|
-
# Extract tool calls
|
|
409
|
-
tool_calls = content_blocks.select { |b| b["type"] == "tool_use" }.map do |tc|
|
|
410
|
-
{
|
|
411
|
-
id: tc["id"],
|
|
412
|
-
type: "function",
|
|
413
|
-
name: tc["name"],
|
|
414
|
-
arguments: tc["input"].is_a?(String) ? tc["input"] : tc["input"].to_json
|
|
415
|
-
}
|
|
416
|
-
end
|
|
124
|
+
# ── OpenAI request / response ─────────────────────────────────────────────
|
|
417
125
|
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
when "max_tokens" then "length"
|
|
423
|
-
else data["stop_reason"]
|
|
424
|
-
end
|
|
126
|
+
def send_openai_request(messages, model, tools, max_tokens, caching_enabled)
|
|
127
|
+
# Apply cache_control markers to messages when caching is enabled.
|
|
128
|
+
# OpenRouter proxies Claude with the same cache_control field convention as Anthropic direct.
|
|
129
|
+
messages = apply_message_caching(messages) if caching_enabled
|
|
425
130
|
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
prompt_tokens: usage["input_tokens"],
|
|
429
|
-
completion_tokens: usage["output_tokens"],
|
|
430
|
-
total_tokens: usage["input_tokens"].to_i + usage["output_tokens"].to_i
|
|
431
|
-
}
|
|
131
|
+
body = MessageFormat::OpenAI.build_request_body(messages, model, tools, max_tokens, caching_enabled)
|
|
132
|
+
response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
|
|
432
133
|
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
usage_data[:cache_read_input_tokens] = usage["cache_read_input_tokens"]
|
|
436
|
-
end
|
|
437
|
-
if usage["cache_creation_input_tokens"]
|
|
438
|
-
usage_data[:cache_creation_input_tokens] = usage["cache_creation_input_tokens"]
|
|
439
|
-
end
|
|
440
|
-
|
|
441
|
-
{
|
|
442
|
-
content: content,
|
|
443
|
-
tool_calls: tool_calls,
|
|
444
|
-
finish_reason: finish_reason,
|
|
445
|
-
usage: usage_data,
|
|
446
|
-
raw_api_usage: usage
|
|
447
|
-
}
|
|
448
|
-
else
|
|
449
|
-
raise_error(response)
|
|
450
|
-
end
|
|
134
|
+
raise_error(response) unless response.status == 200
|
|
135
|
+
MessageFormat::OpenAI.parse_response(JSON.parse(response.body))
|
|
451
136
|
end
|
|
452
137
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
when 200
|
|
457
|
-
data = JSON.parse(response.body)
|
|
458
|
-
content_blocks = data["content"] || []
|
|
459
|
-
|
|
460
|
-
# Extract and return text content only (simple format, consistent with OpenAI)
|
|
461
|
-
content_blocks.select { |b| b["type"] == "text" }.map { |b| b["text"] }.join("")
|
|
462
|
-
else
|
|
463
|
-
raise_error(response)
|
|
464
|
-
end
|
|
138
|
+
def parse_simple_openai_response(response)
|
|
139
|
+
raise_error(response) unless response.status == 200
|
|
140
|
+
JSON.parse(response.body)["choices"].first["message"]["content"]
|
|
465
141
|
end
|
|
466
142
|
|
|
467
|
-
#
|
|
468
|
-
# Currently only Claude 3.5+ models support this feature
|
|
469
|
-
def supports_prompt_caching?(model)
|
|
470
|
-
model_str = model.to_s.downcase
|
|
471
|
-
|
|
472
|
-
# Only Claude models support prompt caching
|
|
473
|
-
return false unless model_str.include?("claude")
|
|
143
|
+
# ── Prompt caching helpers ────────────────────────────────────────────────
|
|
474
144
|
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
(?:-3[-.]?[5-9])| # 3.5, 3.6, 3.7, 3.8, 3.9 or 3-5, 3-6, etc
|
|
484
|
-
(?:-[4-9])| # 4, 5, 6, 7, 8, 9 (future versions)
|
|
485
|
-
(?:-sonnet-[34]) # OpenRouter: claude-sonnet-3, claude-sonnet-4
|
|
486
|
-
)
|
|
487
|
-
/x
|
|
488
|
-
|
|
489
|
-
model_str.match?(cache_pattern)
|
|
490
|
-
end
|
|
491
|
-
|
|
492
|
-
# Apply cache_control to messages for prompt caching
|
|
493
|
-
# Strategy: Add cache_control on the LAST message before tools
|
|
494
|
-
# This ensures everything from start to the breakpoint gets cached
|
|
145
|
+
# Add cache_control marker to the appropriate message in the array.
|
|
146
|
+
#
|
|
147
|
+
# Strategy: mark the SECOND-TO-LAST non-injected assistant message.
|
|
148
|
+
#
|
|
149
|
+
# Rationale: Anthropic prompt caching is prefix-based. If we mark the last
|
|
150
|
+
# message (typically the latest user turn or tool_result), the cached prefix
|
|
151
|
+
# changes every request because tool results and user inputs vary each turn.
|
|
152
|
+
# This causes alternating miss/hit patterns observed in production.
|
|
495
153
|
#
|
|
496
|
-
#
|
|
497
|
-
#
|
|
498
|
-
#
|
|
499
|
-
#
|
|
154
|
+
# By placing the breakpoint on the most recent ASSISTANT message that precedes
|
|
155
|
+
# the current user turn, we cache everything up to (and including) the last LLM
|
|
156
|
+
# reply — a stable prefix. The new user turn + any tool results live AFTER the
|
|
157
|
+
# breakpoint and are not cached (they change every request anyway).
|
|
158
|
+
#
|
|
159
|
+
# Special cases:
|
|
160
|
+
# - Compression instruction as last message: skip it, find the assistant before it.
|
|
161
|
+
# - Fewer than 2 messages: fall back to marking the last message.
|
|
500
162
|
def apply_message_caching(messages)
|
|
501
163
|
return messages if messages.empty?
|
|
502
164
|
|
|
503
|
-
#
|
|
504
|
-
#
|
|
505
|
-
cache_index =
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
165
|
+
# Walk backwards to find the last assistant message that is not system_injected.
|
|
166
|
+
# That is the stable "end of history" to anchor the cache breakpoint on.
|
|
167
|
+
cache_index = nil
|
|
168
|
+
messages.each_with_index.reverse_each do |msg, idx|
|
|
169
|
+
next if msg[:system_injected]
|
|
170
|
+
if msg[:role] == "assistant"
|
|
171
|
+
cache_index = idx
|
|
172
|
+
break
|
|
173
|
+
end
|
|
509
174
|
end
|
|
510
175
|
|
|
511
|
-
#
|
|
512
|
-
cache_index
|
|
176
|
+
# Fallback: if no assistant message found, mark the last message
|
|
177
|
+
cache_index ||= messages.length - 1
|
|
513
178
|
|
|
514
|
-
# Add cache_control to the target message
|
|
515
179
|
messages.map.with_index do |msg, idx|
|
|
516
|
-
|
|
517
|
-
add_cache_control_to_message(msg)
|
|
518
|
-
else
|
|
519
|
-
msg
|
|
520
|
-
end
|
|
180
|
+
idx == cache_index ? add_cache_control_to_message(msg) : msg
|
|
521
181
|
end
|
|
522
182
|
end
|
|
523
183
|
|
|
524
|
-
#
|
|
525
|
-
# Claude API format: content: [{type: "text", text: "...", cache_control: {...}}]
|
|
184
|
+
# Wrap or extend the message's content with a cache_control marker.
|
|
526
185
|
def add_cache_control_to_message(msg)
|
|
527
186
|
content = msg[:content]
|
|
528
187
|
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
end
|
|
540
|
-
end
|
|
541
|
-
else
|
|
542
|
-
# Unknown format, return as-is
|
|
543
|
-
return msg
|
|
544
|
-
end
|
|
188
|
+
content_array = case content
|
|
189
|
+
when String
|
|
190
|
+
[{ type: "text", text: content, cache_control: { type: "ephemeral" } }]
|
|
191
|
+
when Array
|
|
192
|
+
content.map.with_index do |block, idx|
|
|
193
|
+
idx == content.length - 1 ? block.merge(cache_control: { type: "ephemeral" }) : block
|
|
194
|
+
end
|
|
195
|
+
else
|
|
196
|
+
return msg
|
|
197
|
+
end
|
|
545
198
|
|
|
546
199
|
msg.merge(content: content_array)
|
|
547
200
|
end
|
|
548
201
|
|
|
549
|
-
#
|
|
550
|
-
#
|
|
551
|
-
|
|
552
|
-
|
|
202
|
+
# Only true for the compression-instruction user message inserted by MessageCompressor.
|
|
203
|
+
# Skill shim messages (also system_injected: true) must NOT be treated as compression
|
|
204
|
+
# instructions — doing so shifts the cache breakpoint into the volatile skill content
|
|
205
|
+
# block, causing a full cache miss on every slash command turn.
|
|
206
|
+
def is_compression_instruction?(message)
|
|
207
|
+
return false unless message.is_a?(Hash)
|
|
208
|
+
return false unless message[:system_injected] == true
|
|
209
|
+
return false unless message[:role] == "user"
|
|
210
|
+
|
|
211
|
+
content = message[:content].to_s
|
|
212
|
+
content.include?("CRITICAL: TASK CHANGE - MEMORY COMPRESSION MODE") ||
|
|
213
|
+
content.include?("MEMORY COMPRESSION MODE")
|
|
553
214
|
end
|
|
554
215
|
|
|
555
|
-
#
|
|
556
|
-
def deep_clone(obj)
|
|
557
|
-
case obj
|
|
558
|
-
when Hash
|
|
559
|
-
obj.each_with_object({}) { |(k, v), h| h[k] = deep_clone(v) }
|
|
560
|
-
when Array
|
|
561
|
-
obj.map { |item| deep_clone(item) }
|
|
562
|
-
when String, Symbol, Integer, Float, TrueClass, FalseClass, NilClass
|
|
563
|
-
obj
|
|
564
|
-
else
|
|
565
|
-
obj.dup rescue obj
|
|
566
|
-
end
|
|
567
|
-
end
|
|
216
|
+
# ── HTTP connections ──────────────────────────────────────────────────────
|
|
568
217
|
|
|
569
|
-
# Connection for OpenAI API format (uses Bearer token)
|
|
570
218
|
def openai_connection
|
|
571
219
|
@openai_connection ||= Faraday.new(url: @base_url) do |conn|
|
|
572
|
-
conn.headers["Content-Type"]
|
|
220
|
+
conn.headers["Content-Type"] = "application/json"
|
|
573
221
|
conn.headers["Authorization"] = "Bearer #{@api_key}"
|
|
574
|
-
conn.options.timeout
|
|
222
|
+
conn.options.timeout = 120
|
|
575
223
|
conn.options.open_timeout = 10
|
|
576
|
-
conn.ssl.verify
|
|
224
|
+
conn.ssl.verify = false
|
|
577
225
|
conn.adapter Faraday.default_adapter
|
|
578
226
|
end
|
|
579
227
|
end
|
|
580
228
|
|
|
581
|
-
# Connection for Anthropic API format (uses x-api-key header)
|
|
582
229
|
def anthropic_connection
|
|
583
230
|
@anthropic_connection ||= Faraday.new(url: @base_url) do |conn|
|
|
584
|
-
conn.headers["Content-Type"]
|
|
585
|
-
conn.headers["x-api-key"]
|
|
231
|
+
conn.headers["Content-Type"] = "application/json"
|
|
232
|
+
conn.headers["x-api-key"] = @api_key
|
|
586
233
|
conn.headers["anthropic-version"] = "2023-06-01"
|
|
587
234
|
conn.headers["anthropic-dangerous-direct-browser-access"] = "true"
|
|
588
|
-
conn.options.timeout
|
|
235
|
+
conn.options.timeout = 120
|
|
589
236
|
conn.options.open_timeout = 10
|
|
590
|
-
conn.ssl.verify
|
|
237
|
+
conn.ssl.verify = false
|
|
591
238
|
conn.adapter Faraday.default_adapter
|
|
592
239
|
end
|
|
593
240
|
end
|
|
594
241
|
|
|
595
|
-
|
|
596
|
-
case response.status
|
|
597
|
-
when 200
|
|
598
|
-
{ success: true }
|
|
599
|
-
else
|
|
600
|
-
# Extract error details for better user feedback
|
|
601
|
-
error_body = begin
|
|
602
|
-
JSON.parse(response.body)
|
|
603
|
-
rescue JSON::ParserError
|
|
604
|
-
nil
|
|
605
|
-
end
|
|
606
|
-
error_message = extract_error_message(error_body, response.body)
|
|
607
|
-
{ success: false, error: error_message }
|
|
608
|
-
end
|
|
609
|
-
end
|
|
610
|
-
|
|
611
|
-
def handle_response(response)
|
|
612
|
-
case response.status
|
|
613
|
-
when 200
|
|
614
|
-
data = JSON.parse(response.body)
|
|
615
|
-
data["choices"].first["message"]["content"]
|
|
616
|
-
else
|
|
617
|
-
raise_error(response)
|
|
618
|
-
end
|
|
619
|
-
end
|
|
620
|
-
|
|
621
|
-
def handle_tool_response(response)
|
|
622
|
-
case response.status
|
|
623
|
-
when 200
|
|
624
|
-
data = JSON.parse(response.body)
|
|
625
|
-
message = data["choices"].first["message"]
|
|
626
|
-
usage = data["usage"]
|
|
627
|
-
|
|
628
|
-
# Store raw API usage for debugging
|
|
629
|
-
raw_api_usage = usage.dup
|
|
630
|
-
|
|
631
|
-
# Parse usage with cache information
|
|
632
|
-
usage_data = {
|
|
633
|
-
prompt_tokens: usage["prompt_tokens"],
|
|
634
|
-
completion_tokens: usage["completion_tokens"],
|
|
635
|
-
total_tokens: usage["total_tokens"]
|
|
636
|
-
}
|
|
637
|
-
|
|
638
|
-
# Add OpenRouter cost information if present
|
|
639
|
-
if usage["cost"]
|
|
640
|
-
usage_data[:api_cost] = usage["cost"]
|
|
641
|
-
end
|
|
642
|
-
|
|
643
|
-
# Add cache metrics if present (Claude API with prompt caching)
|
|
644
|
-
if usage["cache_creation_input_tokens"]
|
|
645
|
-
usage_data[:cache_creation_input_tokens] = usage["cache_creation_input_tokens"]
|
|
646
|
-
end
|
|
647
|
-
if usage["cache_read_input_tokens"]
|
|
648
|
-
usage_data[:cache_read_input_tokens] = usage["cache_read_input_tokens"]
|
|
649
|
-
end
|
|
650
|
-
|
|
651
|
-
# Add OpenRouter cache information from prompt_tokens_details
|
|
652
|
-
if usage["prompt_tokens_details"]
|
|
653
|
-
details = usage["prompt_tokens_details"]
|
|
654
|
-
if details["cached_tokens"] && details["cached_tokens"] > 0
|
|
655
|
-
usage_data[:cache_read_input_tokens] = details["cached_tokens"]
|
|
656
|
-
end
|
|
657
|
-
if details["cache_write_tokens"] && details["cache_write_tokens"] > 0
|
|
658
|
-
usage_data[:cache_creation_input_tokens] = details["cache_write_tokens"]
|
|
659
|
-
end
|
|
660
|
-
end
|
|
242
|
+
# ── Error handling ────────────────────────────────────────────────────────
|
|
661
243
|
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
tool_calls: parse_tool_calls(message["tool_calls"]),
|
|
665
|
-
finish_reason: data["choices"].first["finish_reason"],
|
|
666
|
-
usage: usage_data,
|
|
667
|
-
raw_api_usage: raw_api_usage
|
|
668
|
-
}
|
|
669
|
-
|
|
670
|
-
# Preserve reasoning_content if present (e.g. Kimi/Moonshot extended thinking).
|
|
671
|
-
# The API requires this field to be echoed back in the message history on
|
|
672
|
-
# subsequent requests, otherwise it returns HTTP 400.
|
|
673
|
-
result[:reasoning_content] = message["reasoning_content"] if message["reasoning_content"]
|
|
244
|
+
def handle_test_response(response)
|
|
245
|
+
return { success: true } if response.status == 200
|
|
674
246
|
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
raise_error(response)
|
|
678
|
-
end
|
|
247
|
+
error_body = JSON.parse(response.body) rescue nil
|
|
248
|
+
{ success: false, error: extract_error_message(error_body, response.body) }
|
|
679
249
|
end
|
|
680
250
|
|
|
681
|
-
private
|
|
682
|
-
|
|
683
251
|
def raise_error(response)
|
|
684
|
-
|
|
685
|
-
error_body = begin
|
|
686
|
-
JSON.parse(response.body)
|
|
687
|
-
rescue JSON::ParserError
|
|
688
|
-
nil
|
|
689
|
-
end
|
|
690
|
-
|
|
691
|
-
# Extract meaningful error message from response
|
|
252
|
+
error_body = JSON.parse(response.body) rescue nil
|
|
692
253
|
error_message = extract_error_message(error_body, response.body)
|
|
693
254
|
|
|
694
255
|
case response.status
|
|
695
256
|
when 400
|
|
696
|
-
|
|
697
|
-
hint = if error_message.downcase.include?("unavailable") || error_message.downcase.include?("quota")
|
|
698
|
-
" (possibly out of credits)"
|
|
699
|
-
else
|
|
700
|
-
""
|
|
701
|
-
end
|
|
257
|
+
hint = error_message.downcase.match?(/unavailable|quota/) ? " (possibly out of credits)" : ""
|
|
702
258
|
raise AgentError, "API request failed (400): #{error_message}#{hint}"
|
|
703
|
-
when 401
|
|
704
|
-
|
|
705
|
-
when
|
|
706
|
-
|
|
707
|
-
when
|
|
708
|
-
|
|
709
|
-
when 429
|
|
710
|
-
raise AgentError, "Rate limit exceeded"
|
|
711
|
-
when 500..599
|
|
712
|
-
raise AgentError, "Server error (#{response.status}): #{error_message}"
|
|
713
|
-
else
|
|
714
|
-
raise AgentError, "Unexpected error (#{response.status}): #{error_message}"
|
|
259
|
+
when 401 then raise AgentError, "Invalid API key"
|
|
260
|
+
when 403 then raise AgentError, "Access denied: #{error_message}"
|
|
261
|
+
when 404 then raise AgentError, "API endpoint not found: #{error_message}"
|
|
262
|
+
when 429 then raise AgentError, "Rate limit exceeded"
|
|
263
|
+
when 500..599 then raise AgentError, "Server error (#{response.status}): #{error_message}"
|
|
264
|
+
else raise AgentError, "Unexpected error (#{response.status}): #{error_message}"
|
|
715
265
|
end
|
|
716
266
|
end
|
|
717
267
|
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
# Check if response is HTML (indicates wrong endpoint or server error)
|
|
721
|
-
if raw_body.is_a?(String) && raw_body.strip.start_with?('<!DOCTYPE', '<html')
|
|
268
|
+
def extract_error_message(error_body, raw_body)
|
|
269
|
+
if raw_body.is_a?(String) && raw_body.strip.start_with?("<!DOCTYPE", "<html")
|
|
722
270
|
return "Invalid API endpoint or server error (received HTML instead of JSON)"
|
|
723
271
|
end
|
|
724
272
|
|
|
725
273
|
return raw_body unless error_body.is_a?(Hash)
|
|
726
274
|
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
# 4. error (string)
|
|
732
|
-
# 5. raw body (truncated if too long)
|
|
733
|
-
if error_body["upstreamMessage"] && !error_body["upstreamMessage"].empty?
|
|
734
|
-
error_body["upstreamMessage"]
|
|
735
|
-
elsif error_body.dig("error", "message")
|
|
736
|
-
error_body.dig("error", "message")
|
|
737
|
-
elsif error_body["message"]
|
|
738
|
-
error_body["message"]
|
|
739
|
-
elsif error_body["error"].is_a?(String)
|
|
740
|
-
error_body["error"]
|
|
741
|
-
else
|
|
742
|
-
# Truncate raw body if too long
|
|
743
|
-
raw_body.is_a?(String) && raw_body.length > 200 ? "#{raw_body[0..200]}..." : raw_body
|
|
744
|
-
end
|
|
275
|
+
error_body["upstreamMessage"]&.then { |m| return m unless m.empty? }
|
|
276
|
+
error_body.dig("error", "message")&.then { |m| return m }
|
|
277
|
+
error_body["message"]&.then { |m| return m }
|
|
278
|
+
error_body["error"].is_a?(String) ? error_body["error"] : (raw_body.to_s[0..200] + (raw_body.to_s.length > 200 ? "..." : ""))
|
|
745
279
|
end
|
|
746
280
|
|
|
747
|
-
|
|
748
|
-
return nil if tool_calls.nil? || tool_calls.empty?
|
|
281
|
+
# ── Utilities ─────────────────────────────────────────────────────────────
|
|
749
282
|
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
id: call["id"],
|
|
756
|
-
type: call["type"],
|
|
757
|
-
name: function_data["name"],
|
|
758
|
-
arguments: function_data["arguments"]
|
|
759
|
-
}
|
|
283
|
+
def deep_clone(obj)
|
|
284
|
+
case obj
|
|
285
|
+
when Hash then obj.each_with_object({}) { |(k, v), h| h[k] = deep_clone(v) }
|
|
286
|
+
when Array then obj.map { |item| deep_clone(item) }
|
|
287
|
+
else obj
|
|
760
288
|
end
|
|
761
289
|
end
|
|
762
290
|
end
|