openclacky 0.9.2 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +32 -0
- data/docs/security-design.md +109 -0
- data/lib/clacky/agent/message_compressor_helper.rb +82 -69
- data/lib/clacky/agent/session_serializer.rb +9 -1
- data/lib/clacky/agent/skill_manager.rb +7 -0
- data/lib/clacky/agent.rb +11 -3
- data/lib/clacky/banner.rb +65 -0
- data/lib/clacky/block_font.rb +331 -0
- data/lib/clacky/brand_config.rb +73 -5
- data/lib/clacky/client.rb +129 -633
- data/lib/clacky/default_skills/activate-license/SKILL.md +118 -0
- data/lib/clacky/default_skills/channel-setup/SKILL.md +10 -20
- data/lib/clacky/message_format/anthropic.rb +241 -0
- data/lib/clacky/message_format/open_ai.rb +135 -0
- data/lib/clacky/server/channel/adapters/wecom/adapter.rb +2 -0
- data/lib/clacky/server/channel/adapters/wecom/ws_client.rb +13 -0
- data/lib/clacky/server/http_server.rb +12 -2
- data/lib/clacky/session_manager.rb +7 -2
- data/lib/clacky/tools/browser.rb +109 -280
- data/lib/clacky/ui2/block_font.rb +10 -0
- data/lib/clacky/ui2/components/welcome_banner.rb +23 -22
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/web/app.css +588 -6
- data/lib/clacky/web/app.js +30 -15
- data/lib/clacky/web/brand.js +141 -9
- data/lib/clacky/web/i18n.js +28 -2
- data/lib/clacky/web/index.html +142 -127
- data/lib/clacky/web/onboard.js +192 -225
- data/lib/clacky/web/sessions.js +12 -8
- data/lib/clacky/web/settings.js +57 -4
- data/lib/clacky.rb +2 -0
- data/scripts/install.sh +60 -15
- metadata +8 -1
data/lib/clacky/client.rb
CHANGED
|
@@ -15,748 +15,244 @@ module Clacky
|
|
|
15
15
|
@use_anthropic_format = anthropic_format
|
|
16
16
|
end
|
|
17
17
|
|
|
18
|
-
#
|
|
19
|
-
#
|
|
20
|
-
# (based on config source: ANTHROPIC_* env vars = true, config file = false)
|
|
18
|
+
# Returns true when the client is talking directly to the Anthropic API
|
|
19
|
+
# (determined at construction time via the anthropic_format flag).
|
|
21
20
|
def anthropic_format?(model = nil)
|
|
22
21
|
@use_anthropic_format
|
|
23
22
|
end
|
|
24
23
|
|
|
25
|
-
#
|
|
26
|
-
|
|
24
|
+
# ── Connection test ───────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
# Test API connection by sending a minimal request.
|
|
27
|
+
# Returns { success: true } or { success: false, error: "..." }.
|
|
27
28
|
def test_connection(model:)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
}
|
|
38
|
-
]
|
|
39
|
-
}.to_json
|
|
40
|
-
end
|
|
41
|
-
handle_test_response(response)
|
|
42
|
-
else
|
|
43
|
-
response = openai_connection.post("chat/completions") do |req|
|
|
44
|
-
req.body = {
|
|
45
|
-
model: model,
|
|
46
|
-
max_tokens: 16,
|
|
47
|
-
messages: [
|
|
48
|
-
{
|
|
49
|
-
role: "user",
|
|
50
|
-
content: "hi"
|
|
51
|
-
}
|
|
52
|
-
]
|
|
53
|
-
}.to_json
|
|
54
|
-
end
|
|
55
|
-
handle_test_response(response)
|
|
56
|
-
end
|
|
29
|
+
minimal_body = { model: model, max_tokens: 16,
|
|
30
|
+
messages: [{ role: "user", content: "hi" }] }.to_json
|
|
31
|
+
|
|
32
|
+
response = if anthropic_format?
|
|
33
|
+
anthropic_connection.post("v1/messages") { |r| r.body = minimal_body }
|
|
34
|
+
else
|
|
35
|
+
openai_connection.post("chat/completions") { |r| r.body = minimal_body }
|
|
36
|
+
end
|
|
37
|
+
handle_test_response(response)
|
|
57
38
|
rescue Faraday::Error => e
|
|
58
|
-
# Network or connection errors
|
|
59
39
|
{ success: false, error: "Connection error: #{e.message}" }
|
|
60
40
|
rescue => e
|
|
61
|
-
# Other errors
|
|
62
41
|
{ success: false, error: e.message }
|
|
63
42
|
end
|
|
64
43
|
|
|
44
|
+
# ── Simple (non-agent) helpers ────────────────────────────────────────────
|
|
45
|
+
|
|
46
|
+
# Send a single string message and return the reply text.
|
|
65
47
|
def send_message(content, model:, max_tokens:)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
req.body = {
|
|
69
|
-
model: model,
|
|
70
|
-
max_tokens: max_tokens,
|
|
71
|
-
messages: [
|
|
72
|
-
{
|
|
73
|
-
role: "user",
|
|
74
|
-
content: content
|
|
75
|
-
}
|
|
76
|
-
]
|
|
77
|
-
}.to_json
|
|
78
|
-
end
|
|
79
|
-
handle_anthropic_simple_response(response)
|
|
80
|
-
else
|
|
81
|
-
response = openai_connection.post("chat/completions") do |req|
|
|
82
|
-
req.body = {
|
|
83
|
-
model: model,
|
|
84
|
-
max_tokens: max_tokens,
|
|
85
|
-
messages: [
|
|
86
|
-
{
|
|
87
|
-
role: "user",
|
|
88
|
-
content: content
|
|
89
|
-
}
|
|
90
|
-
]
|
|
91
|
-
}.to_json
|
|
92
|
-
end
|
|
93
|
-
handle_response(response)
|
|
94
|
-
end
|
|
48
|
+
messages = [{ role: "user", content: content }]
|
|
49
|
+
send_messages(messages, model: model, max_tokens: max_tokens)
|
|
95
50
|
end
|
|
96
51
|
|
|
52
|
+
# Send a messages array and return the reply text.
|
|
97
53
|
def send_messages(messages, model:, max_tokens:)
|
|
98
|
-
if anthropic_format?
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
response
|
|
102
|
-
req.body = body.to_json
|
|
103
|
-
end
|
|
104
|
-
handle_anthropic_simple_response(response)
|
|
54
|
+
if anthropic_format?
|
|
55
|
+
body = MessageFormat::Anthropic.build_request_body(messages, model, [], max_tokens, false)
|
|
56
|
+
response = anthropic_connection.post("v1/messages") { |r| r.body = body.to_json }
|
|
57
|
+
parse_simple_anthropic_response(response)
|
|
105
58
|
else
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
max_tokens: max_tokens,
|
|
110
|
-
messages: messages
|
|
111
|
-
}.to_json
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
handle_response(response)
|
|
59
|
+
body = { model: model, max_tokens: max_tokens, messages: messages }
|
|
60
|
+
response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
|
|
61
|
+
parse_simple_openai_response(response)
|
|
115
62
|
end
|
|
116
63
|
end
|
|
117
64
|
|
|
118
|
-
#
|
|
119
|
-
# Options:
|
|
120
|
-
# - enable_caching: Enable prompt caching for system prompt and tools (default: false)
|
|
121
|
-
def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false)
|
|
122
|
-
# Auto-detect API format based on model name and base_url
|
|
123
|
-
is_anthropic = anthropic_format?(model)
|
|
124
|
-
|
|
125
|
-
# Deep clone messages to avoid modifying the original array
|
|
126
|
-
processed_messages = messages.map { |msg| deep_clone(msg) }
|
|
65
|
+
# ── Agent main path ───────────────────────────────────────────────────────
|
|
127
66
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
67
|
+
# Send messages with tool-calling support.
|
|
68
|
+
# Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage: }
|
|
69
|
+
def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false)
|
|
70
|
+
caching_enabled = enable_caching && supports_prompt_caching?(model)
|
|
71
|
+
cloned = deep_clone(messages)
|
|
131
72
|
|
|
132
|
-
if
|
|
133
|
-
send_anthropic_request(
|
|
73
|
+
if anthropic_format?
|
|
74
|
+
send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled)
|
|
134
75
|
else
|
|
135
|
-
send_openai_request(
|
|
76
|
+
send_openai_request(cloned, model, tools, max_tokens, caching_enabled)
|
|
136
77
|
end
|
|
137
78
|
end
|
|
138
79
|
|
|
139
|
-
# Format tool results
|
|
140
|
-
#
|
|
141
|
-
#
|
|
80
|
+
# Format tool results into canonical messages ready to append to @messages.
|
|
81
|
+
# Always returns canonical format (role: "tool") regardless of API type —
|
|
82
|
+
# conversion to API-native happens inside each send_*_request.
|
|
142
83
|
def format_tool_results(response, tool_results, model:)
|
|
143
84
|
return [] if tool_results.empty?
|
|
144
85
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
# Create a map of tool_call_id -> result for quick lookup
|
|
148
|
-
results_map = tool_results.each_with_object({}) do |result, hash|
|
|
149
|
-
hash[result[:id]] = result
|
|
150
|
-
end
|
|
151
|
-
|
|
152
|
-
if is_anthropic
|
|
153
|
-
# Anthropic format: tool results in user message content array
|
|
154
|
-
tool_result_blocks = response[:tool_calls].map do |tool_call|
|
|
155
|
-
result = results_map[tool_call[:id]]
|
|
156
|
-
if result
|
|
157
|
-
{
|
|
158
|
-
type: "tool_result",
|
|
159
|
-
tool_use_id: tool_call[:id],
|
|
160
|
-
content: result[:content]
|
|
161
|
-
}
|
|
162
|
-
else
|
|
163
|
-
{
|
|
164
|
-
type: "tool_result",
|
|
165
|
-
tool_use_id: tool_call[:id],
|
|
166
|
-
content: JSON.generate({ error: "Tool result missing" })
|
|
167
|
-
}
|
|
168
|
-
end
|
|
169
|
-
end
|
|
170
|
-
|
|
171
|
-
# Return as a user message
|
|
172
|
-
[
|
|
173
|
-
{
|
|
174
|
-
role: "user",
|
|
175
|
-
content: tool_result_blocks
|
|
176
|
-
}
|
|
177
|
-
]
|
|
86
|
+
if anthropic_format?
|
|
87
|
+
MessageFormat::Anthropic.format_tool_results(response, tool_results)
|
|
178
88
|
else
|
|
179
|
-
|
|
180
|
-
response[:tool_calls].map do |tool_call|
|
|
181
|
-
result = results_map[tool_call[:id]]
|
|
182
|
-
if result
|
|
183
|
-
{
|
|
184
|
-
role: "tool",
|
|
185
|
-
tool_call_id: result[:id],
|
|
186
|
-
content: result[:content]
|
|
187
|
-
}
|
|
188
|
-
else
|
|
189
|
-
{
|
|
190
|
-
role: "tool",
|
|
191
|
-
tool_call_id: tool_call[:id],
|
|
192
|
-
content: JSON.generate({ error: "Tool result missing" })
|
|
193
|
-
}
|
|
194
|
-
end
|
|
195
|
-
end
|
|
89
|
+
MessageFormat::OpenAI.format_tool_results(response, tool_results)
|
|
196
90
|
end
|
|
197
91
|
end
|
|
198
92
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
# Send request using OpenAI API format
|
|
202
|
-
def send_openai_request(messages, model, tools, max_tokens, caching_enabled)
|
|
203
|
-
# Apply caching to messages if enabled
|
|
204
|
-
processed_messages = caching_enabled ? apply_message_caching(messages) : messages
|
|
205
|
-
|
|
206
|
-
body = {
|
|
207
|
-
model: model,
|
|
208
|
-
max_tokens: max_tokens,
|
|
209
|
-
messages: processed_messages
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
# Add tools if provided
|
|
213
|
-
if tools&.any?
|
|
214
|
-
if caching_enabled
|
|
215
|
-
cached_tools = tools.map { |tool| deep_clone(tool) }
|
|
216
|
-
cached_tools.last[:cache_control] = { type: "ephemeral" }
|
|
217
|
-
body[:tools] = cached_tools
|
|
218
|
-
else
|
|
219
|
-
body[:tools] = tools
|
|
220
|
-
end
|
|
221
|
-
end
|
|
222
|
-
|
|
223
|
-
response = openai_connection.post("chat/completions") do |req|
|
|
224
|
-
req.body = body.to_json
|
|
225
|
-
end
|
|
226
|
-
|
|
227
|
-
handle_tool_response(response)
|
|
228
|
-
end
|
|
229
|
-
|
|
230
|
-
# Send request using Anthropic API format
|
|
231
|
-
def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled)
|
|
232
|
-
# Convert OpenAI message format to Anthropic format
|
|
233
|
-
body = build_anthropic_body(messages, model, tools, max_tokens, caching_enabled)
|
|
93
|
+
# ── Prompt-caching support ────────────────────────────────────────────────
|
|
234
94
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
95
|
+
# Returns true for Claude 3.5+ models that support prompt caching.
|
|
96
|
+
def supports_prompt_caching?(model)
|
|
97
|
+
model_str = model.to_s.downcase
|
|
98
|
+
return false unless model_str.include?("claude")
|
|
238
99
|
|
|
239
|
-
|
|
100
|
+
model_str.match?(/claude(?:-3[-.]?[5-9]|-[4-9]|-sonnet-[34])/)
|
|
240
101
|
end
|
|
241
102
|
|
|
242
|
-
|
|
243
|
-
def build_anthropic_body(messages, model, tools, max_tokens, caching_enabled)
|
|
244
|
-
# Separate system messages from regular messages
|
|
245
|
-
system_messages = messages.select { |m| m[:role] == "system" }
|
|
246
|
-
regular_messages = messages.reject { |m| m[:role] == "system" }
|
|
247
|
-
|
|
248
|
-
# Build system for Anthropic - use string format which is most compatible
|
|
249
|
-
system = if system_messages.any?
|
|
250
|
-
system_messages.map do |msg|
|
|
251
|
-
content = msg[:content]
|
|
252
|
-
if content.is_a?(String)
|
|
253
|
-
content
|
|
254
|
-
elsif content.is_a?(Array)
|
|
255
|
-
content.map { |block| block.is_a?(Hash) ? (block[:text] || block.dig(:text) || "") : block.to_s }.compact.join("\n")
|
|
256
|
-
else
|
|
257
|
-
content.to_s
|
|
258
|
-
end
|
|
259
|
-
end.join("\n\n")
|
|
260
|
-
else
|
|
261
|
-
""
|
|
262
|
-
end
|
|
263
|
-
|
|
264
|
-
# Convert regular messages to Anthropic format
|
|
265
|
-
anthropic_messages = regular_messages.map { |msg| convert_to_anthropic_message(msg, caching_enabled) }
|
|
266
|
-
|
|
267
|
-
# Convert tools to Anthropic format
|
|
268
|
-
anthropic_tools = tools&.map { |tool| convert_to_anthropic_tool(tool, caching_enabled) }
|
|
269
|
-
|
|
270
|
-
# Add cache_control to last tool if caching is enabled
|
|
271
|
-
if caching_enabled && anthropic_tools&.any?
|
|
272
|
-
anthropic_tools.last[:cache_control] = { type: "ephemeral" }
|
|
273
|
-
end
|
|
103
|
+
private
|
|
274
104
|
|
|
275
|
-
|
|
276
|
-
model: model,
|
|
277
|
-
max_tokens: max_tokens,
|
|
278
|
-
messages: anthropic_messages
|
|
279
|
-
}
|
|
105
|
+
# ── Anthropic request / response ──────────────────────────────────────────
|
|
280
106
|
|
|
281
|
-
|
|
282
|
-
|
|
107
|
+
def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled)
|
|
108
|
+
# Apply cache_control to the message that marks the cache breakpoint
|
|
109
|
+
messages = apply_message_caching(messages) if caching_enabled
|
|
283
110
|
|
|
284
|
-
body
|
|
111
|
+
body = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled)
|
|
112
|
+
response = anthropic_connection.post("v1/messages") { |r| r.body = body.to_json }
|
|
285
113
|
|
|
286
|
-
|
|
114
|
+
raise_error(response) unless response.status == 200
|
|
115
|
+
MessageFormat::Anthropic.parse_response(JSON.parse(response.body))
|
|
287
116
|
end
|
|
288
117
|
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
content
|
|
293
|
-
tool_calls = message[:tool_calls]
|
|
294
|
-
|
|
295
|
-
# For assistant messages with tool_calls, convert tool_calls to content blocks
|
|
296
|
-
if role == "assistant" && tool_calls && tool_calls.any?
|
|
297
|
-
# Build content blocks from both content and tool_calls
|
|
298
|
-
blocks = []
|
|
299
|
-
|
|
300
|
-
# Add text content first
|
|
301
|
-
if content.is_a?(String) && !content.empty?
|
|
302
|
-
blocks << { type: "text", text: content }
|
|
303
|
-
elsif content.is_a?(Array)
|
|
304
|
-
blocks.concat(content.map do |block|
|
|
305
|
-
case block[:type]
|
|
306
|
-
when "text"
|
|
307
|
-
{ type: "text", text: block[:text] }
|
|
308
|
-
when "image_url"
|
|
309
|
-
url = block.dig(:image_url, :url) || block[:url]
|
|
310
|
-
if url&.start_with?("data:")
|
|
311
|
-
match = url.match(/^data:([^;]+);base64,(.*)$/)
|
|
312
|
-
if match
|
|
313
|
-
{ type: "image", source: { type: "base64", media_type: match[1], data: match[2] } }
|
|
314
|
-
else
|
|
315
|
-
{ type: "image", source: { type: "url", url: url } }
|
|
316
|
-
end
|
|
317
|
-
else
|
|
318
|
-
{ type: "image", source: { type: "url", url: url } }
|
|
319
|
-
end
|
|
320
|
-
else
|
|
321
|
-
block
|
|
322
|
-
end
|
|
323
|
-
end)
|
|
324
|
-
end
|
|
325
|
-
|
|
326
|
-
# Add tool_use blocks
|
|
327
|
-
tool_calls.each do |call|
|
|
328
|
-
# Handle both OpenAI format (with function key) and direct format
|
|
329
|
-
if call[:function]
|
|
330
|
-
# OpenAI format
|
|
331
|
-
tool_use_block = {
|
|
332
|
-
type: "tool_use",
|
|
333
|
-
id: call[:id],
|
|
334
|
-
name: call[:function][:name],
|
|
335
|
-
input: call[:function][:arguments].is_a?(String) ? JSON.parse(call[:function][:arguments]) : call[:function][:arguments]
|
|
336
|
-
}
|
|
337
|
-
else
|
|
338
|
-
# Direct format
|
|
339
|
-
tool_use_block = {
|
|
340
|
-
type: "tool_use",
|
|
341
|
-
id: call[:id],
|
|
342
|
-
name: call[:name],
|
|
343
|
-
input: call[:arguments].is_a?(String) ? JSON.parse(call[:arguments]) : call[:arguments]
|
|
344
|
-
}
|
|
345
|
-
end
|
|
346
|
-
blocks << tool_use_block
|
|
347
|
-
end
|
|
348
|
-
|
|
349
|
-
return { role: role, content: blocks }
|
|
350
|
-
end
|
|
351
|
-
|
|
352
|
-
# Convert string content to array format
|
|
353
|
-
if content.is_a?(String)
|
|
354
|
-
return { role: role, content: [{ type: "text", text: content }] }
|
|
355
|
-
end
|
|
356
|
-
|
|
357
|
-
# Handle array content (already in some format)
|
|
358
|
-
if content.is_a?(Array)
|
|
359
|
-
blocks = content.map do |block|
|
|
360
|
-
case block[:type]
|
|
361
|
-
when "text"
|
|
362
|
-
{ type: "text", text: block[:text] }
|
|
363
|
-
when "image_url"
|
|
364
|
-
url = block.dig(:image_url, :url) || block[:url]
|
|
365
|
-
if url&.start_with?("data:")
|
|
366
|
-
match = url.match(/^data:([^;]+);base64,(.*)$/)
|
|
367
|
-
if match
|
|
368
|
-
{ type: "image", source: { type: "base64", media_type: match[1], data: match[2] } }
|
|
369
|
-
else
|
|
370
|
-
{ type: "image", source: { type: "url", url: url } }
|
|
371
|
-
end
|
|
372
|
-
else
|
|
373
|
-
{ type: "image", source: { type: "url", url: url } }
|
|
374
|
-
end
|
|
375
|
-
else
|
|
376
|
-
block
|
|
377
|
-
end
|
|
378
|
-
end
|
|
379
|
-
return { role: role, content: blocks }
|
|
380
|
-
end
|
|
381
|
-
|
|
382
|
-
{ role: role, content: message[:content] }
|
|
118
|
+
def parse_simple_anthropic_response(response)
|
|
119
|
+
raise_error(response) unless response.status == 200
|
|
120
|
+
data = JSON.parse(response.body)
|
|
121
|
+
(data["content"] || []).select { |b| b["type"] == "text" }.map { |b| b["text"] }.join("")
|
|
383
122
|
end
|
|
384
123
|
|
|
385
|
-
#
|
|
386
|
-
# Handles both OpenAI format (with nested function key) and direct format
|
|
387
|
-
def convert_to_anthropic_tool(tool, caching_enabled)
|
|
388
|
-
# Handle OpenAI format from to_function_definition
|
|
389
|
-
func = tool[:function] || tool
|
|
390
|
-
{
|
|
391
|
-
name: func[:name],
|
|
392
|
-
description: func[:description],
|
|
393
|
-
input_schema: func[:parameters]
|
|
394
|
-
}
|
|
395
|
-
end
|
|
124
|
+
# ── OpenAI request / response ─────────────────────────────────────────────
|
|
396
125
|
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
data = JSON.parse(response.body)
|
|
402
|
-
content_blocks = data["content"] || []
|
|
403
|
-
usage = data["usage"] || {}
|
|
404
|
-
|
|
405
|
-
# Extract content
|
|
406
|
-
content = content_blocks.select { |b| b["type"] == "text" }.map { |b| b["text"] }.join("")
|
|
407
|
-
|
|
408
|
-
# Extract tool calls
|
|
409
|
-
tool_calls = content_blocks.select { |b| b["type"] == "tool_use" }.map do |tc|
|
|
410
|
-
{
|
|
411
|
-
id: tc["id"],
|
|
412
|
-
type: "function",
|
|
413
|
-
name: tc["name"],
|
|
414
|
-
arguments: tc["input"].is_a?(String) ? tc["input"] : tc["input"].to_json
|
|
415
|
-
}
|
|
416
|
-
end
|
|
417
|
-
|
|
418
|
-
# Parse finish reason
|
|
419
|
-
finish_reason = case data["stop_reason"]
|
|
420
|
-
when "end_turn" then "stop"
|
|
421
|
-
when "tool_use" then "tool_calls"
|
|
422
|
-
when "max_tokens" then "length"
|
|
423
|
-
else data["stop_reason"]
|
|
424
|
-
end
|
|
425
|
-
|
|
426
|
-
# Build usage data
|
|
427
|
-
usage_data = {
|
|
428
|
-
prompt_tokens: usage["input_tokens"],
|
|
429
|
-
completion_tokens: usage["output_tokens"],
|
|
430
|
-
total_tokens: usage["input_tokens"].to_i + usage["output_tokens"].to_i
|
|
431
|
-
}
|
|
432
|
-
|
|
433
|
-
# Add cache metrics if present
|
|
434
|
-
if usage["cache_read_input_tokens"]
|
|
435
|
-
usage_data[:cache_read_input_tokens] = usage["cache_read_input_tokens"]
|
|
436
|
-
end
|
|
437
|
-
if usage["cache_creation_input_tokens"]
|
|
438
|
-
usage_data[:cache_creation_input_tokens] = usage["cache_creation_input_tokens"]
|
|
439
|
-
end
|
|
440
|
-
|
|
441
|
-
{
|
|
442
|
-
content: content,
|
|
443
|
-
tool_calls: tool_calls,
|
|
444
|
-
finish_reason: finish_reason,
|
|
445
|
-
usage: usage_data,
|
|
446
|
-
raw_api_usage: usage
|
|
447
|
-
}
|
|
448
|
-
else
|
|
449
|
-
raise_error(response)
|
|
450
|
-
end
|
|
451
|
-
end
|
|
126
|
+
def send_openai_request(messages, model, tools, max_tokens, caching_enabled)
|
|
127
|
+
# Apply cache_control markers to messages when caching is enabled.
|
|
128
|
+
# OpenRouter proxies Claude with the same cache_control field convention as Anthropic direct.
|
|
129
|
+
messages = apply_message_caching(messages) if caching_enabled
|
|
452
130
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
case response.status
|
|
456
|
-
when 200
|
|
457
|
-
data = JSON.parse(response.body)
|
|
458
|
-
content_blocks = data["content"] || []
|
|
131
|
+
body = MessageFormat::OpenAI.build_request_body(messages, model, tools, max_tokens, caching_enabled)
|
|
132
|
+
response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
|
|
459
133
|
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
else
|
|
463
|
-
raise_error(response)
|
|
464
|
-
end
|
|
134
|
+
raise_error(response) unless response.status == 200
|
|
135
|
+
MessageFormat::OpenAI.parse_response(JSON.parse(response.body))
|
|
465
136
|
end
|
|
466
137
|
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
model_str = model.to_s.downcase
|
|
471
|
-
|
|
472
|
-
# Only Claude models support prompt caching
|
|
473
|
-
return false unless model_str.include?("claude")
|
|
474
|
-
|
|
475
|
-
# Pattern matching for supported Claude versions:
|
|
476
|
-
# - claude-3.5-*, claude-3-5-*, claude-3.5.*
|
|
477
|
-
# - claude-3.7-*, claude-3-7-*, claude-3.7.*
|
|
478
|
-
# - claude-4*, claude-sonnet-4*
|
|
479
|
-
# - anthropic/claude-sonnet-4* (OpenRouter format)
|
|
480
|
-
cache_pattern = /
|
|
481
|
-
claude # Must contain "claude"
|
|
482
|
-
(?: # Non-capturing group for version patterns
|
|
483
|
-
(?:-3[-.]?[5-9])| # 3.5, 3.6, 3.7, 3.8, 3.9 or 3-5, 3-6, etc
|
|
484
|
-
(?:-[4-9])| # 4, 5, 6, 7, 8, 9 (future versions)
|
|
485
|
-
(?:-sonnet-[34]) # OpenRouter: claude-sonnet-3, claude-sonnet-4
|
|
486
|
-
)
|
|
487
|
-
/x
|
|
488
|
-
|
|
489
|
-
model_str.match?(cache_pattern)
|
|
138
|
+
def parse_simple_openai_response(response)
|
|
139
|
+
raise_error(response) unless response.status == 200
|
|
140
|
+
JSON.parse(response.body)["choices"].first["message"]["content"]
|
|
490
141
|
end
|
|
491
142
|
|
|
492
|
-
#
|
|
493
|
-
|
|
494
|
-
#
|
|
495
|
-
#
|
|
496
|
-
#
|
|
497
|
-
# (identified by system_injected: true), we place cache_control
|
|
498
|
-
# on the second-to-last message instead. This avoids cache write
|
|
499
|
-
# for the compression instruction, saving ~31K tokens per compression.
|
|
143
|
+
# ── Prompt caching helpers ────────────────────────────────────────────────
|
|
144
|
+
|
|
145
|
+
# Add cache_control marker to the appropriate message in the array.
|
|
146
|
+
# Strategy: mark the last message, unless that message is a compression
|
|
147
|
+
# instruction (system_injected: true) — in that case mark the one before it.
|
|
500
148
|
def apply_message_caching(messages)
|
|
501
149
|
return messages if messages.empty?
|
|
502
150
|
|
|
503
|
-
# Determine cache breakpoint index
|
|
504
|
-
# If last message is a compression instruction, use second-to-last
|
|
505
151
|
cache_index = if is_compression_instruction?(messages.last)
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
# Safety check: ensure cache_index is valid
|
|
512
|
-
cache_index = [0, cache_index].max
|
|
152
|
+
[messages.length - 2, 0].max
|
|
153
|
+
else
|
|
154
|
+
messages.length - 1
|
|
155
|
+
end
|
|
513
156
|
|
|
514
|
-
# Add cache_control to the target message
|
|
515
157
|
messages.map.with_index do |msg, idx|
|
|
516
|
-
|
|
517
|
-
add_cache_control_to_message(msg)
|
|
518
|
-
else
|
|
519
|
-
msg
|
|
520
|
-
end
|
|
158
|
+
idx == cache_index ? add_cache_control_to_message(msg) : msg
|
|
521
159
|
end
|
|
522
160
|
end
|
|
523
161
|
|
|
524
|
-
#
|
|
525
|
-
# Claude API format: content: [{type: "text", text: "...", cache_control: {...}}]
|
|
162
|
+
# Wrap or extend the message's content with a cache_control marker.
|
|
526
163
|
def add_cache_control_to_message(msg)
|
|
527
164
|
content = msg[:content]
|
|
528
165
|
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
end
|
|
540
|
-
end
|
|
541
|
-
else
|
|
542
|
-
# Unknown format, return as-is
|
|
543
|
-
return msg
|
|
544
|
-
end
|
|
166
|
+
content_array = case content
|
|
167
|
+
when String
|
|
168
|
+
[{ type: "text", text: content, cache_control: { type: "ephemeral" } }]
|
|
169
|
+
when Array
|
|
170
|
+
content.map.with_index do |block, idx|
|
|
171
|
+
idx == content.length - 1 ? block.merge(cache_control: { type: "ephemeral" }) : block
|
|
172
|
+
end
|
|
173
|
+
else
|
|
174
|
+
return msg
|
|
175
|
+
end
|
|
545
176
|
|
|
546
177
|
msg.merge(content: content_array)
|
|
547
178
|
end
|
|
548
179
|
|
|
549
|
-
|
|
550
|
-
# Compression instructions are marked with system_injected: true
|
|
551
|
-
private def is_compression_instruction?(message)
|
|
180
|
+
def is_compression_instruction?(message)
|
|
552
181
|
message.is_a?(Hash) && message[:system_injected] == true
|
|
553
182
|
end
|
|
554
183
|
|
|
555
|
-
#
|
|
556
|
-
def deep_clone(obj)
|
|
557
|
-
case obj
|
|
558
|
-
when Hash
|
|
559
|
-
obj.each_with_object({}) { |(k, v), h| h[k] = deep_clone(v) }
|
|
560
|
-
when Array
|
|
561
|
-
obj.map { |item| deep_clone(item) }
|
|
562
|
-
when String, Symbol, Integer, Float, TrueClass, FalseClass, NilClass
|
|
563
|
-
obj
|
|
564
|
-
else
|
|
565
|
-
obj.dup rescue obj
|
|
566
|
-
end
|
|
567
|
-
end
|
|
184
|
+
# ── HTTP connections ──────────────────────────────────────────────────────
|
|
568
185
|
|
|
569
|
-
# Connection for OpenAI API format (uses Bearer token)
|
|
570
186
|
def openai_connection
|
|
571
187
|
@openai_connection ||= Faraday.new(url: @base_url) do |conn|
|
|
572
|
-
conn.headers["Content-Type"]
|
|
188
|
+
conn.headers["Content-Type"] = "application/json"
|
|
573
189
|
conn.headers["Authorization"] = "Bearer #{@api_key}"
|
|
574
|
-
conn.options.timeout
|
|
190
|
+
conn.options.timeout = 120
|
|
575
191
|
conn.options.open_timeout = 10
|
|
576
|
-
conn.ssl.verify
|
|
192
|
+
conn.ssl.verify = false
|
|
577
193
|
conn.adapter Faraday.default_adapter
|
|
578
194
|
end
|
|
579
195
|
end
|
|
580
196
|
|
|
581
|
-
# Connection for Anthropic API format (uses x-api-key header)
|
|
582
197
|
def anthropic_connection
|
|
583
198
|
@anthropic_connection ||= Faraday.new(url: @base_url) do |conn|
|
|
584
|
-
conn.headers["Content-Type"]
|
|
585
|
-
conn.headers["x-api-key"]
|
|
199
|
+
conn.headers["Content-Type"] = "application/json"
|
|
200
|
+
conn.headers["x-api-key"] = @api_key
|
|
586
201
|
conn.headers["anthropic-version"] = "2023-06-01"
|
|
587
202
|
conn.headers["anthropic-dangerous-direct-browser-access"] = "true"
|
|
588
|
-
conn.options.timeout
|
|
203
|
+
conn.options.timeout = 120
|
|
589
204
|
conn.options.open_timeout = 10
|
|
590
|
-
conn.ssl.verify
|
|
205
|
+
conn.ssl.verify = false
|
|
591
206
|
conn.adapter Faraday.default_adapter
|
|
592
207
|
end
|
|
593
208
|
end
|
|
594
209
|
|
|
595
|
-
|
|
596
|
-
case response.status
|
|
597
|
-
when 200
|
|
598
|
-
{ success: true }
|
|
599
|
-
else
|
|
600
|
-
# Extract error details for better user feedback
|
|
601
|
-
error_body = begin
|
|
602
|
-
JSON.parse(response.body)
|
|
603
|
-
rescue JSON::ParserError
|
|
604
|
-
nil
|
|
605
|
-
end
|
|
606
|
-
error_message = extract_error_message(error_body, response.body)
|
|
607
|
-
{ success: false, error: error_message }
|
|
608
|
-
end
|
|
609
|
-
end
|
|
210
|
+
# ── Error handling ────────────────────────────────────────────────────────
|
|
610
211
|
|
|
611
|
-
def
|
|
612
|
-
|
|
613
|
-
when 200
|
|
614
|
-
data = JSON.parse(response.body)
|
|
615
|
-
data["choices"].first["message"]["content"]
|
|
616
|
-
else
|
|
617
|
-
raise_error(response)
|
|
618
|
-
end
|
|
619
|
-
end
|
|
212
|
+
def handle_test_response(response)
|
|
213
|
+
return { success: true } if response.status == 200
|
|
620
214
|
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
when 200
|
|
624
|
-
data = JSON.parse(response.body)
|
|
625
|
-
message = data["choices"].first["message"]
|
|
626
|
-
usage = data["usage"]
|
|
627
|
-
|
|
628
|
-
# Store raw API usage for debugging
|
|
629
|
-
raw_api_usage = usage.dup
|
|
630
|
-
|
|
631
|
-
# Parse usage with cache information
|
|
632
|
-
usage_data = {
|
|
633
|
-
prompt_tokens: usage["prompt_tokens"],
|
|
634
|
-
completion_tokens: usage["completion_tokens"],
|
|
635
|
-
total_tokens: usage["total_tokens"]
|
|
636
|
-
}
|
|
637
|
-
|
|
638
|
-
# Add OpenRouter cost information if present
|
|
639
|
-
if usage["cost"]
|
|
640
|
-
usage_data[:api_cost] = usage["cost"]
|
|
641
|
-
end
|
|
642
|
-
|
|
643
|
-
# Add cache metrics if present (Claude API with prompt caching)
|
|
644
|
-
if usage["cache_creation_input_tokens"]
|
|
645
|
-
usage_data[:cache_creation_input_tokens] = usage["cache_creation_input_tokens"]
|
|
646
|
-
end
|
|
647
|
-
if usage["cache_read_input_tokens"]
|
|
648
|
-
usage_data[:cache_read_input_tokens] = usage["cache_read_input_tokens"]
|
|
649
|
-
end
|
|
650
|
-
|
|
651
|
-
# Add OpenRouter cache information from prompt_tokens_details
|
|
652
|
-
if usage["prompt_tokens_details"]
|
|
653
|
-
details = usage["prompt_tokens_details"]
|
|
654
|
-
if details["cached_tokens"] && details["cached_tokens"] > 0
|
|
655
|
-
usage_data[:cache_read_input_tokens] = details["cached_tokens"]
|
|
656
|
-
end
|
|
657
|
-
if details["cache_write_tokens"] && details["cache_write_tokens"] > 0
|
|
658
|
-
usage_data[:cache_creation_input_tokens] = details["cache_write_tokens"]
|
|
659
|
-
end
|
|
660
|
-
end
|
|
661
|
-
|
|
662
|
-
result = {
|
|
663
|
-
content: message["content"],
|
|
664
|
-
tool_calls: parse_tool_calls(message["tool_calls"]),
|
|
665
|
-
finish_reason: data["choices"].first["finish_reason"],
|
|
666
|
-
usage: usage_data,
|
|
667
|
-
raw_api_usage: raw_api_usage
|
|
668
|
-
}
|
|
669
|
-
|
|
670
|
-
# Preserve reasoning_content if present (e.g. Kimi/Moonshot extended thinking).
|
|
671
|
-
# The API requires this field to be echoed back in the message history on
|
|
672
|
-
# subsequent requests, otherwise it returns HTTP 400.
|
|
673
|
-
result[:reasoning_content] = message["reasoning_content"] if message["reasoning_content"]
|
|
674
|
-
|
|
675
|
-
result
|
|
676
|
-
else
|
|
677
|
-
raise_error(response)
|
|
678
|
-
end
|
|
215
|
+
error_body = JSON.parse(response.body) rescue nil
|
|
216
|
+
{ success: false, error: extract_error_message(error_body, response.body) }
|
|
679
217
|
end
|
|
680
218
|
|
|
681
|
-
private
|
|
682
|
-
|
|
683
219
|
def raise_error(response)
|
|
684
|
-
|
|
685
|
-
error_body = begin
|
|
686
|
-
JSON.parse(response.body)
|
|
687
|
-
rescue JSON::ParserError
|
|
688
|
-
nil
|
|
689
|
-
end
|
|
690
|
-
|
|
691
|
-
# Extract meaningful error message from response
|
|
220
|
+
error_body = JSON.parse(response.body) rescue nil
|
|
692
221
|
error_message = extract_error_message(error_body, response.body)
|
|
693
222
|
|
|
694
223
|
case response.status
|
|
695
224
|
when 400
|
|
696
|
-
|
|
697
|
-
hint = if error_message.downcase.include?("unavailable") || error_message.downcase.include?("quota")
|
|
698
|
-
" (possibly out of credits)"
|
|
699
|
-
else
|
|
700
|
-
""
|
|
701
|
-
end
|
|
225
|
+
hint = error_message.downcase.match?(/unavailable|quota/) ? " (possibly out of credits)" : ""
|
|
702
226
|
raise AgentError, "API request failed (400): #{error_message}#{hint}"
|
|
703
|
-
when 401
|
|
704
|
-
|
|
705
|
-
when
|
|
706
|
-
|
|
707
|
-
when
|
|
708
|
-
|
|
709
|
-
when 429
|
|
710
|
-
raise AgentError, "Rate limit exceeded"
|
|
711
|
-
when 500..599
|
|
712
|
-
raise AgentError, "Server error (#{response.status}): #{error_message}"
|
|
713
|
-
else
|
|
714
|
-
raise AgentError, "Unexpected error (#{response.status}): #{error_message}"
|
|
227
|
+
when 401 then raise AgentError, "Invalid API key"
|
|
228
|
+
when 403 then raise AgentError, "Access denied: #{error_message}"
|
|
229
|
+
when 404 then raise AgentError, "API endpoint not found: #{error_message}"
|
|
230
|
+
when 429 then raise AgentError, "Rate limit exceeded"
|
|
231
|
+
when 500..599 then raise AgentError, "Server error (#{response.status}): #{error_message}"
|
|
232
|
+
else raise AgentError, "Unexpected error (#{response.status}): #{error_message}"
|
|
715
233
|
end
|
|
716
234
|
end
|
|
717
235
|
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
# Check if response is HTML (indicates wrong endpoint or server error)
|
|
721
|
-
if raw_body.is_a?(String) && raw_body.strip.start_with?('<!DOCTYPE', '<html')
|
|
236
|
+
def extract_error_message(error_body, raw_body)
|
|
237
|
+
if raw_body.is_a?(String) && raw_body.strip.start_with?("<!DOCTYPE", "<html")
|
|
722
238
|
return "Invalid API endpoint or server error (received HTML instead of JSON)"
|
|
723
239
|
end
|
|
724
240
|
|
|
725
241
|
return raw_body unless error_body.is_a?(Hash)
|
|
726
242
|
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
# 4. error (string)
|
|
732
|
-
# 5. raw body (truncated if too long)
|
|
733
|
-
if error_body["upstreamMessage"] && !error_body["upstreamMessage"].empty?
|
|
734
|
-
error_body["upstreamMessage"]
|
|
735
|
-
elsif error_body.dig("error", "message")
|
|
736
|
-
error_body.dig("error", "message")
|
|
737
|
-
elsif error_body["message"]
|
|
738
|
-
error_body["message"]
|
|
739
|
-
elsif error_body["error"].is_a?(String)
|
|
740
|
-
error_body["error"]
|
|
741
|
-
else
|
|
742
|
-
# Truncate raw body if too long
|
|
743
|
-
raw_body.is_a?(String) && raw_body.length > 200 ? "#{raw_body[0..200]}..." : raw_body
|
|
744
|
-
end
|
|
243
|
+
error_body["upstreamMessage"]&.then { |m| return m unless m.empty? }
|
|
244
|
+
error_body.dig("error", "message")&.then { |m| return m }
|
|
245
|
+
error_body["message"]&.then { |m| return m }
|
|
246
|
+
error_body["error"].is_a?(String) ? error_body["error"] : (raw_body.to_s[0..200] + (raw_body.to_s.length > 200 ? "..." : ""))
|
|
745
247
|
end
|
|
746
248
|
|
|
747
|
-
|
|
748
|
-
return nil if tool_calls.nil? || tool_calls.empty?
|
|
749
|
-
|
|
750
|
-
tool_calls.map do |call|
|
|
751
|
-
# Handle cases where function might be nil or missing
|
|
752
|
-
function_data = call["function"] || {}
|
|
249
|
+
# ── Utilities ─────────────────────────────────────────────────────────────
|
|
753
250
|
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
}
|
|
251
|
+
def deep_clone(obj)
|
|
252
|
+
case obj
|
|
253
|
+
when Hash then obj.each_with_object({}) { |(k, v), h| h[k] = deep_clone(v) }
|
|
254
|
+
when Array then obj.map { |item| deep_clone(item) }
|
|
255
|
+
else obj
|
|
760
256
|
end
|
|
761
257
|
end
|
|
762
258
|
end
|