openclacky 0.9.2 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/clacky/client.rb CHANGED
@@ -15,748 +15,276 @@ module Clacky
15
15
  @use_anthropic_format = anthropic_format
16
16
  end
17
17
 
18
- # Check if using Anthropic API format
19
- # Determined by the anthropic_format flag passed in constructor
20
- # (based on config source: ANTHROPIC_* env vars = true, config file = false)
18
+ # Returns true when the client is talking directly to the Anthropic API
19
+ # (determined at construction time via the anthropic_format flag).
21
20
  def anthropic_format?(model = nil)
22
21
  @use_anthropic_format
23
22
  end
24
23
 
25
- # Test API connection by sending a minimal request
26
- # Returns { success: true } on success, { success: false, error: "message" } on failure
24
+ # ── Connection test ───────────────────────────────────────────────────────
25
+
26
+ # Test API connection by sending a minimal request.
27
+ # Returns { success: true } or { success: false, error: "..." }.
27
28
  def test_connection(model:)
28
- if anthropic_format?(model)
29
- response = anthropic_connection.post("v1/messages") do |req|
30
- req.body = {
31
- model: model,
32
- max_tokens: 16,
33
- messages: [
34
- {
35
- role: "user",
36
- content: "hi"
37
- }
38
- ]
39
- }.to_json
40
- end
41
- handle_test_response(response)
42
- else
43
- response = openai_connection.post("chat/completions") do |req|
44
- req.body = {
45
- model: model,
46
- max_tokens: 16,
47
- messages: [
48
- {
49
- role: "user",
50
- content: "hi"
51
- }
52
- ]
53
- }.to_json
54
- end
55
- handle_test_response(response)
56
- end
29
+ minimal_body = { model: model, max_tokens: 16,
30
+ messages: [{ role: "user", content: "hi" }] }.to_json
31
+
32
+ response = if anthropic_format?
33
+ anthropic_connection.post("v1/messages") { |r| r.body = minimal_body }
34
+ else
35
+ openai_connection.post("chat/completions") { |r| r.body = minimal_body }
36
+ end
37
+ handle_test_response(response)
57
38
  rescue Faraday::Error => e
58
- # Network or connection errors
59
39
  { success: false, error: "Connection error: #{e.message}" }
60
40
  rescue => e
61
- # Other errors
62
41
  { success: false, error: e.message }
63
42
  end
64
43
 
44
+ # ── Simple (non-agent) helpers ────────────────────────────────────────────
45
+
46
+ # Send a single string message and return the reply text.
65
47
  def send_message(content, model:, max_tokens:)
66
- if anthropic_format?(model)
67
- response = anthropic_connection.post("v1/messages") do |req|
68
- req.body = {
69
- model: model,
70
- max_tokens: max_tokens,
71
- messages: [
72
- {
73
- role: "user",
74
- content: content
75
- }
76
- ]
77
- }.to_json
78
- end
79
- handle_anthropic_simple_response(response)
80
- else
81
- response = openai_connection.post("chat/completions") do |req|
82
- req.body = {
83
- model: model,
84
- max_tokens: max_tokens,
85
- messages: [
86
- {
87
- role: "user",
88
- content: content
89
- }
90
- ]
91
- }.to_json
92
- end
93
- handle_response(response)
94
- end
48
+ messages = [{ role: "user", content: content }]
49
+ send_messages(messages, model: model, max_tokens: max_tokens)
95
50
  end
96
51
 
52
+ # Send a messages array and return the reply text.
97
53
  def send_messages(messages, model:, max_tokens:)
98
- if anthropic_format?(model)
99
- # Convert to Anthropic format
100
- body = build_anthropic_body(messages, model, [], max_tokens, false)
101
- response = anthropic_connection.post("v1/messages") do |req|
102
- req.body = body.to_json
103
- end
104
- handle_anthropic_simple_response(response)
54
+ if anthropic_format?
55
+ body = MessageFormat::Anthropic.build_request_body(messages, model, [], max_tokens, false)
56
+ response = anthropic_connection.post("v1/messages") { |r| r.body = body.to_json }
57
+ parse_simple_anthropic_response(response)
105
58
  else
106
- response = openai_connection.post("chat/completions") do |req|
107
- req.body = {
108
- model: model,
109
- max_tokens: max_tokens,
110
- messages: messages
111
- }.to_json
112
- end
113
-
114
- handle_response(response)
59
+ body = { model: model, max_tokens: max_tokens, messages: messages }
60
+ response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
61
+ parse_simple_openai_response(response)
115
62
  end
116
63
  end
117
64
 
118
- # Send messages with function calling (tools) support
119
- # Options:
120
- # - enable_caching: Enable prompt caching for system prompt and tools (default: false)
121
- def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false)
122
- # Auto-detect API format based on model name and base_url
123
- is_anthropic = anthropic_format?(model)
124
-
125
- # Deep clone messages to avoid modifying the original array
126
- processed_messages = messages.map { |msg| deep_clone(msg) }
65
+ # ── Agent main path ───────────────────────────────────────────────────────
127
66
 
128
- # Apply caching if enabled and supported
129
- caching_supported = supports_prompt_caching?(model)
130
- caching_enabled = enable_caching && caching_supported
67
+ # Send messages with tool-calling support.
68
+ # Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage: }
69
+ def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false)
70
+ caching_enabled = enable_caching && supports_prompt_caching?(model)
71
+ cloned = deep_clone(messages)
131
72
 
132
- if is_anthropic
133
- send_anthropic_request(processed_messages, model, tools, max_tokens, caching_enabled)
73
+ if anthropic_format?
74
+ send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled)
134
75
  else
135
- send_openai_request(processed_messages, model, tools, max_tokens, caching_enabled)
76
+ send_openai_request(cloned, model, tools, max_tokens, caching_enabled)
136
77
  end
137
78
  end
138
79
 
139
- # Format tool results based on API type
140
- # Anthropic API: tool results go in user message content array
141
- # OpenAI API: tool results are separate messages with role: "tool"
80
+ # Format tool results into canonical messages ready to append to @messages.
81
+ # Always returns canonical format (role: "tool") regardless of API type
82
+ # conversion to API-native happens inside each send_*_request.
142
83
  def format_tool_results(response, tool_results, model:)
143
84
  return [] if tool_results.empty?
144
85
 
145
- is_anthropic = anthropic_format?(model)
146
-
147
- # Create a map of tool_call_id -> result for quick lookup
148
- results_map = tool_results.each_with_object({}) do |result, hash|
149
- hash[result[:id]] = result
150
- end
151
-
152
- if is_anthropic
153
- # Anthropic format: tool results in user message content array
154
- tool_result_blocks = response[:tool_calls].map do |tool_call|
155
- result = results_map[tool_call[:id]]
156
- if result
157
- {
158
- type: "tool_result",
159
- tool_use_id: tool_call[:id],
160
- content: result[:content]
161
- }
162
- else
163
- {
164
- type: "tool_result",
165
- tool_use_id: tool_call[:id],
166
- content: JSON.generate({ error: "Tool result missing" })
167
- }
168
- end
169
- end
170
-
171
- # Return as a user message
172
- [
173
- {
174
- role: "user",
175
- content: tool_result_blocks
176
- }
177
- ]
86
+ if anthropic_format?
87
+ MessageFormat::Anthropic.format_tool_results(response, tool_results)
178
88
  else
179
- # OpenAI format: tool results as separate messages
180
- response[:tool_calls].map do |tool_call|
181
- result = results_map[tool_call[:id]]
182
- if result
183
- {
184
- role: "tool",
185
- tool_call_id: result[:id],
186
- content: result[:content]
187
- }
188
- else
189
- {
190
- role: "tool",
191
- tool_call_id: tool_call[:id],
192
- content: JSON.generate({ error: "Tool result missing" })
193
- }
194
- end
195
- end
89
+ MessageFormat::OpenAI.format_tool_results(response, tool_results)
196
90
  end
197
91
  end
198
92
 
199
- private
200
-
201
- # Send request using OpenAI API format
202
- def send_openai_request(messages, model, tools, max_tokens, caching_enabled)
203
- # Apply caching to messages if enabled
204
- processed_messages = caching_enabled ? apply_message_caching(messages) : messages
205
-
206
- body = {
207
- model: model,
208
- max_tokens: max_tokens,
209
- messages: processed_messages
210
- }
211
-
212
- # Add tools if provided
213
- if tools&.any?
214
- if caching_enabled
215
- cached_tools = tools.map { |tool| deep_clone(tool) }
216
- cached_tools.last[:cache_control] = { type: "ephemeral" }
217
- body[:tools] = cached_tools
218
- else
219
- body[:tools] = tools
220
- end
221
- end
222
-
223
- response = openai_connection.post("chat/completions") do |req|
224
- req.body = body.to_json
225
- end
226
-
227
- handle_tool_response(response)
228
- end
93
+ # ── Prompt-caching support ────────────────────────────────────────────────
229
94
 
230
- # Send request using Anthropic API format
231
- def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled)
232
- # Convert OpenAI message format to Anthropic format
233
- body = build_anthropic_body(messages, model, tools, max_tokens, caching_enabled)
234
-
235
- response = anthropic_connection.post("v1/messages") do |req|
236
- req.body = body.to_json
237
- end
238
-
239
- handle_anthropic_response(response)
240
- end
241
-
242
- # Build request body in Anthropic format
243
- def build_anthropic_body(messages, model, tools, max_tokens, caching_enabled)
244
- # Separate system messages from regular messages
245
- system_messages = messages.select { |m| m[:role] == "system" }
246
- regular_messages = messages.reject { |m| m[:role] == "system" }
247
-
248
- # Build system for Anthropic - use string format which is most compatible
249
- system = if system_messages.any?
250
- system_messages.map do |msg|
251
- content = msg[:content]
252
- if content.is_a?(String)
253
- content
254
- elsif content.is_a?(Array)
255
- content.map { |block| block.is_a?(Hash) ? (block[:text] || block.dig(:text) || "") : block.to_s }.compact.join("\n")
256
- else
257
- content.to_s
258
- end
259
- end.join("\n\n")
260
- else
261
- ""
262
- end
263
-
264
- # Convert regular messages to Anthropic format
265
- anthropic_messages = regular_messages.map { |msg| convert_to_anthropic_message(msg, caching_enabled) }
266
-
267
- # Convert tools to Anthropic format
268
- anthropic_tools = tools&.map { |tool| convert_to_anthropic_tool(tool, caching_enabled) }
269
-
270
- # Add cache_control to last tool if caching is enabled
271
- if caching_enabled && anthropic_tools&.any?
272
- anthropic_tools.last[:cache_control] = { type: "ephemeral" }
273
- end
274
-
275
- body = {
276
- model: model,
277
- max_tokens: max_tokens,
278
- messages: anthropic_messages
279
- }
280
-
281
- # Only include system if it's not empty
282
- body[:system] = system if system && !system.empty?
283
-
284
- body[:tools] = anthropic_tools if anthropic_tools&.any?
95
+ # Returns true for Claude 3.5+ models that support prompt caching.
96
+ def supports_prompt_caching?(model)
97
+ model_str = model.to_s.downcase
98
+ return false unless model_str.include?("claude")
285
99
 
286
- body
100
+ model_str.match?(/claude(?:-3[-.]?[5-9]|-[4-9]|-sonnet-[34])/)
287
101
  end
288
102
 
289
- # Convert a message to Anthropic format
290
- def convert_to_anthropic_message(message, caching_enabled)
291
- role = message[:role]
292
- content = message[:content]
293
- tool_calls = message[:tool_calls]
294
-
295
- # For assistant messages with tool_calls, convert tool_calls to content blocks
296
- if role == "assistant" && tool_calls && tool_calls.any?
297
- # Build content blocks from both content and tool_calls
298
- blocks = []
299
-
300
- # Add text content first
301
- if content.is_a?(String) && !content.empty?
302
- blocks << { type: "text", text: content }
303
- elsif content.is_a?(Array)
304
- blocks.concat(content.map do |block|
305
- case block[:type]
306
- when "text"
307
- { type: "text", text: block[:text] }
308
- when "image_url"
309
- url = block.dig(:image_url, :url) || block[:url]
310
- if url&.start_with?("data:")
311
- match = url.match(/^data:([^;]+);base64,(.*)$/)
312
- if match
313
- { type: "image", source: { type: "base64", media_type: match[1], data: match[2] } }
314
- else
315
- { type: "image", source: { type: "url", url: url } }
316
- end
317
- else
318
- { type: "image", source: { type: "url", url: url } }
319
- end
320
- else
321
- block
322
- end
323
- end)
324
- end
103
+ private
325
104
 
326
- # Add tool_use blocks
327
- tool_calls.each do |call|
328
- # Handle both OpenAI format (with function key) and direct format
329
- if call[:function]
330
- # OpenAI format
331
- tool_use_block = {
332
- type: "tool_use",
333
- id: call[:id],
334
- name: call[:function][:name],
335
- input: call[:function][:arguments].is_a?(String) ? JSON.parse(call[:function][:arguments]) : call[:function][:arguments]
336
- }
337
- else
338
- # Direct format
339
- tool_use_block = {
340
- type: "tool_use",
341
- id: call[:id],
342
- name: call[:name],
343
- input: call[:arguments].is_a?(String) ? JSON.parse(call[:arguments]) : call[:arguments]
344
- }
345
- end
346
- blocks << tool_use_block
347
- end
105
+ # ── Anthropic request / response ──────────────────────────────────────────
348
106
 
349
- return { role: role, content: blocks }
350
- end
107
+ def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled)
108
+ # Apply cache_control to the message that marks the cache breakpoint
109
+ messages = apply_message_caching(messages) if caching_enabled
351
110
 
352
- # Convert string content to array format
353
- if content.is_a?(String)
354
- return { role: role, content: [{ type: "text", text: content }] }
355
- end
111
+ body = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled)
112
+ response = anthropic_connection.post("v1/messages") { |r| r.body = body.to_json }
356
113
 
357
- # Handle array content (already in some format)
358
- if content.is_a?(Array)
359
- blocks = content.map do |block|
360
- case block[:type]
361
- when "text"
362
- { type: "text", text: block[:text] }
363
- when "image_url"
364
- url = block.dig(:image_url, :url) || block[:url]
365
- if url&.start_with?("data:")
366
- match = url.match(/^data:([^;]+);base64,(.*)$/)
367
- if match
368
- { type: "image", source: { type: "base64", media_type: match[1], data: match[2] } }
369
- else
370
- { type: "image", source: { type: "url", url: url } }
371
- end
372
- else
373
- { type: "image", source: { type: "url", url: url } }
374
- end
375
- else
376
- block
377
- end
378
- end
379
- return { role: role, content: blocks }
380
- end
381
-
382
- { role: role, content: message[:content] }
114
+ raise_error(response) unless response.status == 200
115
+ MessageFormat::Anthropic.parse_response(JSON.parse(response.body))
383
116
  end
384
117
 
385
- # Convert a tool to Anthropic format
386
- # Handles both OpenAI format (with nested function key) and direct format
387
- def convert_to_anthropic_tool(tool, caching_enabled)
388
- # Handle OpenAI format from to_function_definition
389
- func = tool[:function] || tool
390
- {
391
- name: func[:name],
392
- description: func[:description],
393
- input_schema: func[:parameters]
394
- }
118
+ def parse_simple_anthropic_response(response)
119
+ raise_error(response) unless response.status == 200
120
+ data = JSON.parse(response.body)
121
+ (data["content"] || []).select { |b| b["type"] == "text" }.map { |b| b["text"] }.join("")
395
122
  end
396
123
 
397
- # Handle Anthropic API response
398
- def handle_anthropic_response(response)
399
- case response.status
400
- when 200
401
- data = JSON.parse(response.body)
402
- content_blocks = data["content"] || []
403
- usage = data["usage"] || {}
404
-
405
- # Extract content
406
- content = content_blocks.select { |b| b["type"] == "text" }.map { |b| b["text"] }.join("")
407
-
408
- # Extract tool calls
409
- tool_calls = content_blocks.select { |b| b["type"] == "tool_use" }.map do |tc|
410
- {
411
- id: tc["id"],
412
- type: "function",
413
- name: tc["name"],
414
- arguments: tc["input"].is_a?(String) ? tc["input"] : tc["input"].to_json
415
- }
416
- end
124
+ # ── OpenAI request / response ─────────────────────────────────────────────
417
125
 
418
- # Parse finish reason
419
- finish_reason = case data["stop_reason"]
420
- when "end_turn" then "stop"
421
- when "tool_use" then "tool_calls"
422
- when "max_tokens" then "length"
423
- else data["stop_reason"]
424
- end
126
+ def send_openai_request(messages, model, tools, max_tokens, caching_enabled)
127
+ # Apply cache_control markers to messages when caching is enabled.
128
+ # OpenRouter proxies Claude with the same cache_control field convention as Anthropic direct.
129
+ messages = apply_message_caching(messages) if caching_enabled
425
130
 
426
- # Build usage data
427
- usage_data = {
428
- prompt_tokens: usage["input_tokens"],
429
- completion_tokens: usage["output_tokens"],
430
- total_tokens: usage["input_tokens"].to_i + usage["output_tokens"].to_i
431
- }
131
+ body = MessageFormat::OpenAI.build_request_body(messages, model, tools, max_tokens, caching_enabled)
132
+ response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
432
133
 
433
- # Add cache metrics if present
434
- if usage["cache_read_input_tokens"]
435
- usage_data[:cache_read_input_tokens] = usage["cache_read_input_tokens"]
436
- end
437
- if usage["cache_creation_input_tokens"]
438
- usage_data[:cache_creation_input_tokens] = usage["cache_creation_input_tokens"]
439
- end
440
-
441
- {
442
- content: content,
443
- tool_calls: tool_calls,
444
- finish_reason: finish_reason,
445
- usage: usage_data,
446
- raw_api_usage: usage
447
- }
448
- else
449
- raise_error(response)
450
- end
134
+ raise_error(response) unless response.status == 200
135
+ MessageFormat::OpenAI.parse_response(JSON.parse(response.body))
451
136
  end
452
137
 
453
- # Handle simple Anthropic response (without tool calls)
454
- def handle_anthropic_simple_response(response)
455
- case response.status
456
- when 200
457
- data = JSON.parse(response.body)
458
- content_blocks = data["content"] || []
459
-
460
- # Extract and return text content only (simple format, consistent with OpenAI)
461
- content_blocks.select { |b| b["type"] == "text" }.map { |b| b["text"] }.join("")
462
- else
463
- raise_error(response)
464
- end
138
+ def parse_simple_openai_response(response)
139
+ raise_error(response) unless response.status == 200
140
+ JSON.parse(response.body)["choices"].first["message"]["content"]
465
141
  end
466
142
 
467
- # Check if the model supports prompt caching
468
- # Currently only Claude 3.5+ models support this feature
469
- def supports_prompt_caching?(model)
470
- model_str = model.to_s.downcase
471
-
472
- # Only Claude models support prompt caching
473
- return false unless model_str.include?("claude")
143
+ # ── Prompt caching helpers ────────────────────────────────────────────────
474
144
 
475
- # Pattern matching for supported Claude versions:
476
- # - claude-3.5-*, claude-3-5-*, claude-3.5.*
477
- # - claude-3.7-*, claude-3-7-*, claude-3.7.*
478
- # - claude-4*, claude-sonnet-4*
479
- # - anthropic/claude-sonnet-4* (OpenRouter format)
480
- cache_pattern = /
481
- claude # Must contain "claude"
482
- (?: # Non-capturing group for version patterns
483
- (?:-3[-.]?[5-9])| # 3.5, 3.6, 3.7, 3.8, 3.9 or 3-5, 3-6, etc
484
- (?:-[4-9])| # 4, 5, 6, 7, 8, 9 (future versions)
485
- (?:-sonnet-[34]) # OpenRouter: claude-sonnet-3, claude-sonnet-4
486
- )
487
- /x
488
-
489
- model_str.match?(cache_pattern)
490
- end
491
-
492
- # Apply cache_control to messages for prompt caching
493
- # Strategy: Add cache_control on the LAST message before tools
494
- # This ensures everything from start to the breakpoint gets cached
145
+ # Add cache_control marker to the appropriate message in the array.
146
+ #
147
+ # Strategy: mark the SECOND-TO-LAST non-injected assistant message.
148
+ #
149
+ # Rationale: Anthropic prompt caching is prefix-based. If we mark the last
150
+ # message (typically the latest user turn or tool_result), the cached prefix
151
+ # changes every request because tool results and user inputs vary each turn.
152
+ # This causes alternating miss/hit patterns observed in production.
495
153
  #
496
- # Special case: When compression instruction is the last message
497
- # (identified by system_injected: true), we place cache_control
498
- # on the second-to-last message instead. This avoids cache write
499
- # for the compression instruction, saving ~31K tokens per compression.
154
+ # By placing the breakpoint on the most recent ASSISTANT message that precedes
155
+ # the current user turn, we cache everything up to (and including) the last LLM
156
+ # reply a stable prefix. The new user turn + any tool results live AFTER the
157
+ # breakpoint and are not cached (they change every request anyway).
158
+ #
159
+ # Special cases:
160
+ # - Compression instruction as last message: skip it, find the assistant before it.
161
+ # - Fewer than 2 messages: fall back to marking the last message.
500
162
  def apply_message_caching(messages)
501
163
  return messages if messages.empty?
502
164
 
503
- # Determine cache breakpoint index
504
- # If last message is a compression instruction, use second-to-last
505
- cache_index = if is_compression_instruction?(messages.last)
506
- messages.length - 2
507
- else
508
- messages.length - 1
165
+ # Walk backwards to find the last assistant message that is not system_injected.
166
+ # That is the stable "end of history" to anchor the cache breakpoint on.
167
+ cache_index = nil
168
+ messages.each_with_index.reverse_each do |msg, idx|
169
+ next if msg[:system_injected]
170
+ if msg[:role] == "assistant"
171
+ cache_index = idx
172
+ break
173
+ end
509
174
  end
510
175
 
511
- # Safety check: ensure cache_index is valid
512
- cache_index = [0, cache_index].max
176
+ # Fallback: if no assistant message found, mark the last message
177
+ cache_index ||= messages.length - 1
513
178
 
514
- # Add cache_control to the target message
515
179
  messages.map.with_index do |msg, idx|
516
- if idx == cache_index
517
- add_cache_control_to_message(msg)
518
- else
519
- msg
520
- end
180
+ idx == cache_index ? add_cache_control_to_message(msg) : msg
521
181
  end
522
182
  end
523
183
 
524
- # Convert message content to array format and add cache_control
525
- # Claude API format: content: [{type: "text", text: "...", cache_control: {...}}]
184
+ # Wrap or extend the message's content with a cache_control marker.
526
185
  def add_cache_control_to_message(msg)
527
186
  content = msg[:content]
528
187
 
529
- # Convert content to array format if it's a string
530
- content_array = if content.is_a?(String)
531
- [{ type: "text", text: content, cache_control: { type: "ephemeral" } }]
532
- elsif content.is_a?(Array)
533
- # Content is already an array, add cache_control to the last block
534
- content.map.with_index do |block, idx|
535
- if idx == content.length - 1
536
- block.merge(cache_control: { type: "ephemeral" })
537
- else
538
- block
539
- end
540
- end
541
- else
542
- # Unknown format, return as-is
543
- return msg
544
- end
188
+ content_array = case content
189
+ when String
190
+ [{ type: "text", text: content, cache_control: { type: "ephemeral" } }]
191
+ when Array
192
+ content.map.with_index do |block, idx|
193
+ idx == content.length - 1 ? block.merge(cache_control: { type: "ephemeral" }) : block
194
+ end
195
+ else
196
+ return msg
197
+ end
545
198
 
546
199
  msg.merge(content: content_array)
547
200
  end
548
201
 
549
- # Check if message is a compression instruction (from MessageCompressor)
550
- # Compression instructions are marked with system_injected: true
551
- private def is_compression_instruction?(message)
552
- message.is_a?(Hash) && message[:system_injected] == true
202
+ # Only true for the compression-instruction user message inserted by MessageCompressor.
203
+ # Skill shim messages (also system_injected: true) must NOT be treated as compression
204
+ # instructions — doing so shifts the cache breakpoint into the volatile skill content
205
+ # block, causing a full cache miss on every slash command turn.
206
+ def is_compression_instruction?(message)
207
+ return false unless message.is_a?(Hash)
208
+ return false unless message[:system_injected] == true
209
+ return false unless message[:role] == "user"
210
+
211
+ content = message[:content].to_s
212
+ content.include?("CRITICAL: TASK CHANGE - MEMORY COMPRESSION MODE") ||
213
+ content.include?("MEMORY COMPRESSION MODE")
553
214
  end
554
215
 
555
- # Deep clone a hash/array structure (for tool definitions)
556
- def deep_clone(obj)
557
- case obj
558
- when Hash
559
- obj.each_with_object({}) { |(k, v), h| h[k] = deep_clone(v) }
560
- when Array
561
- obj.map { |item| deep_clone(item) }
562
- when String, Symbol, Integer, Float, TrueClass, FalseClass, NilClass
563
- obj
564
- else
565
- obj.dup rescue obj
566
- end
567
- end
216
+ # ── HTTP connections ──────────────────────────────────────────────────────
568
217
 
569
- # Connection for OpenAI API format (uses Bearer token)
570
218
  def openai_connection
571
219
  @openai_connection ||= Faraday.new(url: @base_url) do |conn|
572
- conn.headers["Content-Type"] = "application/json"
220
+ conn.headers["Content-Type"] = "application/json"
573
221
  conn.headers["Authorization"] = "Bearer #{@api_key}"
574
- conn.options.timeout = 120
222
+ conn.options.timeout = 120
575
223
  conn.options.open_timeout = 10
576
- conn.ssl.verify = false
224
+ conn.ssl.verify = false
577
225
  conn.adapter Faraday.default_adapter
578
226
  end
579
227
  end
580
228
 
581
- # Connection for Anthropic API format (uses x-api-key header)
582
229
  def anthropic_connection
583
230
  @anthropic_connection ||= Faraday.new(url: @base_url) do |conn|
584
- conn.headers["Content-Type"] = "application/json"
585
- conn.headers["x-api-key"] = @api_key
231
+ conn.headers["Content-Type"] = "application/json"
232
+ conn.headers["x-api-key"] = @api_key
586
233
  conn.headers["anthropic-version"] = "2023-06-01"
587
234
  conn.headers["anthropic-dangerous-direct-browser-access"] = "true"
588
- conn.options.timeout = 120
235
+ conn.options.timeout = 120
589
236
  conn.options.open_timeout = 10
590
- conn.ssl.verify = false
237
+ conn.ssl.verify = false
591
238
  conn.adapter Faraday.default_adapter
592
239
  end
593
240
  end
594
241
 
595
- def handle_test_response(response)
596
- case response.status
597
- when 200
598
- { success: true }
599
- else
600
- # Extract error details for better user feedback
601
- error_body = begin
602
- JSON.parse(response.body)
603
- rescue JSON::ParserError
604
- nil
605
- end
606
- error_message = extract_error_message(error_body, response.body)
607
- { success: false, error: error_message }
608
- end
609
- end
610
-
611
- def handle_response(response)
612
- case response.status
613
- when 200
614
- data = JSON.parse(response.body)
615
- data["choices"].first["message"]["content"]
616
- else
617
- raise_error(response)
618
- end
619
- end
620
-
621
- def handle_tool_response(response)
622
- case response.status
623
- when 200
624
- data = JSON.parse(response.body)
625
- message = data["choices"].first["message"]
626
- usage = data["usage"]
627
-
628
- # Store raw API usage for debugging
629
- raw_api_usage = usage.dup
630
-
631
- # Parse usage with cache information
632
- usage_data = {
633
- prompt_tokens: usage["prompt_tokens"],
634
- completion_tokens: usage["completion_tokens"],
635
- total_tokens: usage["total_tokens"]
636
- }
637
-
638
- # Add OpenRouter cost information if present
639
- if usage["cost"]
640
- usage_data[:api_cost] = usage["cost"]
641
- end
642
-
643
- # Add cache metrics if present (Claude API with prompt caching)
644
- if usage["cache_creation_input_tokens"]
645
- usage_data[:cache_creation_input_tokens] = usage["cache_creation_input_tokens"]
646
- end
647
- if usage["cache_read_input_tokens"]
648
- usage_data[:cache_read_input_tokens] = usage["cache_read_input_tokens"]
649
- end
650
-
651
- # Add OpenRouter cache information from prompt_tokens_details
652
- if usage["prompt_tokens_details"]
653
- details = usage["prompt_tokens_details"]
654
- if details["cached_tokens"] && details["cached_tokens"] > 0
655
- usage_data[:cache_read_input_tokens] = details["cached_tokens"]
656
- end
657
- if details["cache_write_tokens"] && details["cache_write_tokens"] > 0
658
- usage_data[:cache_creation_input_tokens] = details["cache_write_tokens"]
659
- end
660
- end
242
+ # ── Error handling ────────────────────────────────────────────────────────
661
243
 
662
- result = {
663
- content: message["content"],
664
- tool_calls: parse_tool_calls(message["tool_calls"]),
665
- finish_reason: data["choices"].first["finish_reason"],
666
- usage: usage_data,
667
- raw_api_usage: raw_api_usage
668
- }
669
-
670
- # Preserve reasoning_content if present (e.g. Kimi/Moonshot extended thinking).
671
- # The API requires this field to be echoed back in the message history on
672
- # subsequent requests, otherwise it returns HTTP 400.
673
- result[:reasoning_content] = message["reasoning_content"] if message["reasoning_content"]
244
+ def handle_test_response(response)
245
+ return { success: true } if response.status == 200
674
246
 
675
- result
676
- else
677
- raise_error(response)
678
- end
247
+ error_body = JSON.parse(response.body) rescue nil
248
+ { success: false, error: extract_error_message(error_body, response.body) }
679
249
  end
680
250
 
681
- private
682
-
683
251
  def raise_error(response)
684
- # Try to parse error body as JSON for better error messages
685
- error_body = begin
686
- JSON.parse(response.body)
687
- rescue JSON::ParserError
688
- nil
689
- end
690
-
691
- # Extract meaningful error message from response
252
+ error_body = JSON.parse(response.body) rescue nil
692
253
  error_message = extract_error_message(error_body, response.body)
693
254
 
694
255
  case response.status
695
256
  when 400
696
- # Bad request - could be invalid model, quota exceeded, etc.
697
- hint = if error_message.downcase.include?("unavailable") || error_message.downcase.include?("quota")
698
- " (possibly out of credits)"
699
- else
700
- ""
701
- end
257
+ hint = error_message.downcase.match?(/unavailable|quota/) ? " (possibly out of credits)" : ""
702
258
  raise AgentError, "API request failed (400): #{error_message}#{hint}"
703
- when 401
704
- raise AgentError, "Invalid API key"
705
- when 403
706
- raise AgentError, "Access denied: #{error_message}"
707
- when 404
708
- raise AgentError, "API endpoint not found: #{error_message}"
709
- when 429
710
- raise AgentError, "Rate limit exceeded"
711
- when 500..599
712
- raise AgentError, "Server error (#{response.status}): #{error_message}"
713
- else
714
- raise AgentError, "Unexpected error (#{response.status}): #{error_message}"
259
+ when 401 then raise AgentError, "Invalid API key"
260
+ when 403 then raise AgentError, "Access denied: #{error_message}"
261
+ when 404 then raise AgentError, "API endpoint not found: #{error_message}"
262
+ when 429 then raise AgentError, "Rate limit exceeded"
263
+ when 500..599 then raise AgentError, "Server error (#{response.status}): #{error_message}"
264
+ else raise AgentError, "Unexpected error (#{response.status}): #{error_message}"
715
265
  end
716
266
  end
717
267
 
718
- # Extract the most meaningful error message from API response
719
- private def extract_error_message(error_body, raw_body)
720
- # Check if response is HTML (indicates wrong endpoint or server error)
721
- if raw_body.is_a?(String) && raw_body.strip.start_with?('<!DOCTYPE', '<html')
268
+ def extract_error_message(error_body, raw_body)
269
+ if raw_body.is_a?(String) && raw_body.strip.start_with?("<!DOCTYPE", "<html")
722
270
  return "Invalid API endpoint or server error (received HTML instead of JSON)"
723
271
  end
724
272
 
725
273
  return raw_body unless error_body.is_a?(Hash)
726
274
 
727
- # Priority order for error messages:
728
- # 1. upstreamMessage (often contains the real reason)
729
- # 2. error.message (Anthropic format)
730
- # 3. message
731
- # 4. error (string)
732
- # 5. raw body (truncated if too long)
733
- if error_body["upstreamMessage"] && !error_body["upstreamMessage"].empty?
734
- error_body["upstreamMessage"]
735
- elsif error_body.dig("error", "message")
736
- error_body.dig("error", "message")
737
- elsif error_body["message"]
738
- error_body["message"]
739
- elsif error_body["error"].is_a?(String)
740
- error_body["error"]
741
- else
742
- # Truncate raw body if too long
743
- raw_body.is_a?(String) && raw_body.length > 200 ? "#{raw_body[0..200]}..." : raw_body
744
- end
275
+ error_body["upstreamMessage"]&.then { |m| return m unless m.empty? }
276
+ error_body.dig("error", "message")&.then { |m| return m }
277
+ error_body["message"]&.then { |m| return m }
278
+ error_body["error"].is_a?(String) ? error_body["error"] : (raw_body.to_s[0..200] + (raw_body.to_s.length > 200 ? "..." : ""))
745
279
  end
746
280
 
747
- def parse_tool_calls(tool_calls)
748
- return nil if tool_calls.nil? || tool_calls.empty?
281
+ # ── Utilities ─────────────────────────────────────────────────────────────
749
282
 
750
- tool_calls.map do |call|
751
- # Handle cases where function might be nil or missing
752
- function_data = call["function"] || {}
753
-
754
- {
755
- id: call["id"],
756
- type: call["type"],
757
- name: function_data["name"],
758
- arguments: function_data["arguments"]
759
- }
283
+ def deep_clone(obj)
284
+ case obj
285
+ when Hash then obj.each_with_object({}) { |(k, v), h| h[k] = deep_clone(v) }
286
+ when Array then obj.map { |item| deep_clone(item) }
287
+ else obj
760
288
  end
761
289
  end
762
290
  end