rubyn-code 0.2.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +151 -5
- data/db/migrations/013_add_failed_status_to_tasks.rb +51 -0
- data/lib/rubyn_code/agent/background_job_handler.rb +71 -0
- data/lib/rubyn_code/agent/conversation.rb +84 -56
- data/lib/rubyn_code/agent/dynamic_tool_schema.rb +152 -0
- data/lib/rubyn_code/agent/feedback_handler.rb +49 -0
- data/lib/rubyn_code/agent/llm_caller.rb +157 -0
- data/lib/rubyn_code/agent/loop.rb +182 -683
- data/lib/rubyn_code/agent/loop_detector.rb +50 -11
- data/lib/rubyn_code/agent/prompts.rb +109 -0
- data/lib/rubyn_code/agent/response_modes.rb +111 -0
- data/lib/rubyn_code/agent/response_parser.rb +111 -0
- data/lib/rubyn_code/agent/system_prompt_builder.rb +211 -0
- data/lib/rubyn_code/agent/tool_processor.rb +178 -0
- data/lib/rubyn_code/agent/usage_tracker.rb +59 -0
- data/lib/rubyn_code/auth/key_encryption.rb +118 -0
- data/lib/rubyn_code/auth/oauth.rb +80 -64
- data/lib/rubyn_code/auth/server.rb +21 -24
- data/lib/rubyn_code/auth/token_store.rb +80 -52
- data/lib/rubyn_code/autonomous/daemon.rb +146 -32
- data/lib/rubyn_code/autonomous/idle_poller.rb +4 -24
- data/lib/rubyn_code/autonomous/task_claimer.rb +46 -44
- data/lib/rubyn_code/background/worker.rb +64 -76
- data/lib/rubyn_code/cli/app.rb +159 -114
- data/lib/rubyn_code/cli/commands/doctor.rb +73 -0
- data/lib/rubyn_code/cli/commands/mcp.rb +77 -0
- data/lib/rubyn_code/cli/commands/model.rb +105 -18
- data/lib/rubyn_code/cli/commands/new_session.rb +45 -0
- data/lib/rubyn_code/cli/commands/provider.rb +123 -0
- data/lib/rubyn_code/cli/commands/skill.rb +52 -3
- data/lib/rubyn_code/cli/daemon_runner.rb +64 -11
- data/lib/rubyn_code/cli/first_run.rb +159 -0
- data/lib/rubyn_code/cli/renderer.rb +109 -60
- data/lib/rubyn_code/cli/repl.rb +48 -374
- data/lib/rubyn_code/cli/repl_commands.rb +177 -0
- data/lib/rubyn_code/cli/repl_lifecycle.rb +76 -0
- data/lib/rubyn_code/cli/repl_setup.rb +181 -0
- data/lib/rubyn_code/cli/setup.rb +6 -2
- data/lib/rubyn_code/cli/stream_formatter.rb +56 -49
- data/lib/rubyn_code/cli/version_check.rb +28 -11
- data/lib/rubyn_code/config/defaults.rb +11 -0
- data/lib/rubyn_code/config/project_profile.rb +185 -0
- data/lib/rubyn_code/config/schema.json +49 -0
- data/lib/rubyn_code/config/settings.rb +103 -1
- data/lib/rubyn_code/config/validator.rb +63 -0
- data/lib/rubyn_code/context/auto_compact.rb +1 -1
- data/lib/rubyn_code/context/context_budget.rb +182 -0
- data/lib/rubyn_code/context/context_collapse.rb +34 -4
- data/lib/rubyn_code/context/decision_compactor.rb +99 -0
- data/lib/rubyn_code/context/manager.rb +44 -8
- data/lib/rubyn_code/context/manual_compact.rb +1 -1
- data/lib/rubyn_code/context/micro_compact.rb +29 -19
- data/lib/rubyn_code/context/schema_filter.rb +64 -0
- data/lib/rubyn_code/db/connection.rb +31 -26
- data/lib/rubyn_code/db/migrator.rb +44 -28
- data/lib/rubyn_code/hooks/built_in.rb +14 -10
- data/lib/rubyn_code/hooks/registry.rb +4 -0
- data/lib/rubyn_code/ide/adapters/tool_output.rb +330 -0
- data/lib/rubyn_code/ide/client.rb +110 -0
- data/lib/rubyn_code/ide/handlers/accept_edit_handler.rb +35 -0
- data/lib/rubyn_code/ide/handlers/approve_tool_use_handler.rb +34 -0
- data/lib/rubyn_code/ide/handlers/cancel_handler.rb +41 -0
- data/lib/rubyn_code/ide/handlers/config_get_handler.rb +63 -0
- data/lib/rubyn_code/ide/handlers/config_set_handler.rb +86 -0
- data/lib/rubyn_code/ide/handlers/initialize_handler.rb +79 -0
- data/lib/rubyn_code/ide/handlers/models_list_handler.rb +39 -0
- data/lib/rubyn_code/ide/handlers/prompt_handler.rb +215 -0
- data/lib/rubyn_code/ide/handlers/review_handler.rb +110 -0
- data/lib/rubyn_code/ide/handlers/session_fork_handler.rb +49 -0
- data/lib/rubyn_code/ide/handlers/session_list_handler.rb +41 -0
- data/lib/rubyn_code/ide/handlers/session_reset_handler.rb +31 -0
- data/lib/rubyn_code/ide/handlers/session_resume_handler.rb +42 -0
- data/lib/rubyn_code/ide/handlers/shutdown_handler.rb +37 -0
- data/lib/rubyn_code/ide/handlers.rb +76 -0
- data/lib/rubyn_code/ide/protocol.rb +111 -0
- data/lib/rubyn_code/ide/server.rb +186 -0
- data/lib/rubyn_code/index/codebase_index.rb +311 -0
- data/lib/rubyn_code/learning/extractor.rb +65 -82
- data/lib/rubyn_code/learning/injector.rb +22 -23
- data/lib/rubyn_code/learning/instinct.rb +71 -42
- data/lib/rubyn_code/learning/shortcut.rb +95 -0
- data/lib/rubyn_code/llm/adapters/anthropic.rb +274 -0
- data/lib/rubyn_code/llm/adapters/anthropic_compatible.rb +60 -0
- data/lib/rubyn_code/llm/adapters/anthropic_streaming.rb +215 -0
- data/lib/rubyn_code/llm/adapters/base.rb +35 -0
- data/lib/rubyn_code/llm/adapters/json_parsing.rb +21 -0
- data/lib/rubyn_code/llm/adapters/openai.rb +246 -0
- data/lib/rubyn_code/llm/adapters/openai_compatible.rb +50 -0
- data/lib/rubyn_code/llm/adapters/openai_message_translator.rb +90 -0
- data/lib/rubyn_code/llm/adapters/openai_streaming.rb +141 -0
- data/lib/rubyn_code/llm/adapters/prompt_caching.rb +60 -0
- data/lib/rubyn_code/llm/client.rb +75 -247
- data/lib/rubyn_code/llm/model_router.rb +237 -0
- data/lib/rubyn_code/llm/streaming.rb +4 -227
- data/lib/rubyn_code/mcp/client.rb +1 -1
- data/lib/rubyn_code/mcp/config.rb +10 -12
- data/lib/rubyn_code/mcp/sse_transport.rb +15 -13
- data/lib/rubyn_code/mcp/stdio_transport.rb +16 -18
- data/lib/rubyn_code/mcp/tool_bridge.rb +31 -62
- data/lib/rubyn_code/memory/search.rb +1 -0
- data/lib/rubyn_code/memory/session_persistence.rb +59 -58
- data/lib/rubyn_code/memory/store.rb +42 -55
- data/lib/rubyn_code/observability/budget_enforcer.rb +46 -32
- data/lib/rubyn_code/observability/cost_calculator.rb +32 -8
- data/lib/rubyn_code/observability/skill_analytics.rb +116 -0
- data/lib/rubyn_code/observability/token_analytics.rb +130 -0
- data/lib/rubyn_code/observability/usage_reporter.rb +79 -61
- data/lib/rubyn_code/output/diff_renderer.rb +102 -77
- data/lib/rubyn_code/output/formatter.rb +11 -11
- data/lib/rubyn_code/permissions/policy.rb +11 -13
- data/lib/rubyn_code/permissions/prompter.rb +8 -9
- data/lib/rubyn_code/protocols/plan_approval.rb +25 -20
- data/lib/rubyn_code/self_test.rb +315 -0
- data/lib/rubyn_code/skills/catalog.rb +66 -0
- data/lib/rubyn_code/skills/document.rb +33 -29
- data/lib/rubyn_code/skills/loader.rb +43 -0
- data/lib/rubyn_code/skills/ttl_manager.rb +100 -0
- data/lib/rubyn_code/sub_agents/runner.rb +20 -25
- data/lib/rubyn_code/tasks/dag.rb +25 -24
- data/lib/rubyn_code/tasks/models.rb +1 -0
- data/lib/rubyn_code/tools/ask_user.rb +44 -0
- data/lib/rubyn_code/tools/background_run.rb +2 -1
- data/lib/rubyn_code/tools/base.rb +39 -32
- data/lib/rubyn_code/tools/bash.rb +7 -1
- data/lib/rubyn_code/tools/edit_file.rb +130 -17
- data/lib/rubyn_code/tools/executor.rb +130 -25
- data/lib/rubyn_code/tools/file_cache.rb +95 -0
- data/lib/rubyn_code/tools/git_commit.rb +12 -10
- data/lib/rubyn_code/tools/git_log.rb +12 -10
- data/lib/rubyn_code/tools/glob.rb +29 -7
- data/lib/rubyn_code/tools/grep.rb +8 -1
- data/lib/rubyn_code/tools/ide_diagnostics.rb +51 -0
- data/lib/rubyn_code/tools/ide_symbols.rb +53 -0
- data/lib/rubyn_code/tools/load_skill.rb +13 -6
- data/lib/rubyn_code/tools/memory_search.rb +14 -13
- data/lib/rubyn_code/tools/memory_write.rb +2 -1
- data/lib/rubyn_code/tools/output_compressor.rb +190 -0
- data/lib/rubyn_code/tools/read_file.rb +17 -6
- data/lib/rubyn_code/tools/registry.rb +11 -0
- data/lib/rubyn_code/tools/review_pr.rb +127 -80
- data/lib/rubyn_code/tools/run_specs.rb +26 -15
- data/lib/rubyn_code/tools/schema.rb +4 -10
- data/lib/rubyn_code/tools/spawn_agent.rb +113 -82
- data/lib/rubyn_code/tools/spawn_teammate.rb +107 -64
- data/lib/rubyn_code/tools/spec_output_parser.rb +118 -0
- data/lib/rubyn_code/tools/task.rb +17 -17
- data/lib/rubyn_code/tools/web_fetch.rb +62 -47
- data/lib/rubyn_code/tools/web_search.rb +66 -48
- data/lib/rubyn_code/tools/write_file.rb +76 -1
- data/lib/rubyn_code/version.rb +1 -1
- data/lib/rubyn_code.rb +62 -1
- data/skills/rubyn_self_test.md +133 -0
- metadata +83 -1
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../message_builder'
|
|
4
|
+
|
|
5
|
+
module RubynCode
|
|
6
|
+
module LLM
|
|
7
|
+
module Adapters
|
|
8
|
+
# SSE streaming parser for OpenAI Chat Completions API.
|
|
9
|
+
#
|
|
10
|
+
# Parses `data: {...}` lines from the SSE stream, accumulates content deltas
|
|
11
|
+
# and tool_calls, and produces a normalized LLM::Response via #finalize.
|
|
12
|
+
class OpenAIStreaming
|
|
13
|
+
include JsonParsing
|
|
14
|
+
|
|
15
|
+
Event = Data.define(:type, :data)
|
|
16
|
+
|
|
17
|
+
STOP_REASON_MAP = {
|
|
18
|
+
'stop' => 'end_turn',
|
|
19
|
+
'tool_calls' => 'tool_use',
|
|
20
|
+
'length' => 'max_tokens',
|
|
21
|
+
'content_filter' => 'end_turn'
|
|
22
|
+
}.freeze
|
|
23
|
+
|
|
24
|
+
def initialize(&block)
|
|
25
|
+
@callback = block
|
|
26
|
+
@buffer = +''
|
|
27
|
+
@content_text = +''
|
|
28
|
+
@tool_calls = {}
|
|
29
|
+
@response_id = nil
|
|
30
|
+
@model = nil
|
|
31
|
+
@finish_reason = nil
|
|
32
|
+
@usage = nil
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def feed(chunk)
|
|
36
|
+
@buffer << chunk
|
|
37
|
+
consume_sse_events
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def finalize
|
|
41
|
+
content = build_content_blocks
|
|
42
|
+
stop = STOP_REASON_MAP[@finish_reason] || @finish_reason || 'end_turn'
|
|
43
|
+
|
|
44
|
+
RubynCode::LLM::Response.new(
|
|
45
|
+
id: @response_id,
|
|
46
|
+
content: content,
|
|
47
|
+
stop_reason: stop,
|
|
48
|
+
usage: @usage || RubynCode::LLM::Usage.new(input_tokens: 0, output_tokens: 0)
|
|
49
|
+
)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
def consume_sse_events
|
|
55
|
+
while (idx = @buffer.index("\n\n"))
|
|
56
|
+
line = @buffer.slice!(0..(idx + 1)).strip
|
|
57
|
+
process_sse_line(line)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def process_sse_line(line)
|
|
62
|
+
return unless line.start_with?('data: ')
|
|
63
|
+
|
|
64
|
+
payload = line.sub('data: ', '')
|
|
65
|
+
return if payload == '[DONE]'
|
|
66
|
+
|
|
67
|
+
data = parse_json(payload)
|
|
68
|
+
return unless data
|
|
69
|
+
|
|
70
|
+
handle_chunk(data)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def handle_chunk(data)
|
|
74
|
+
@response_id ||= data['id']
|
|
75
|
+
@model ||= data['model']
|
|
76
|
+
extract_usage(data)
|
|
77
|
+
|
|
78
|
+
choice = data.dig('choices', 0)
|
|
79
|
+
return unless choice
|
|
80
|
+
|
|
81
|
+
@finish_reason = choice['finish_reason'] if choice['finish_reason']
|
|
82
|
+
process_delta(choice['delta'] || {})
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def extract_usage(data)
|
|
86
|
+
return unless data['usage']
|
|
87
|
+
|
|
88
|
+
@usage = RubynCode::LLM::Usage.new(
|
|
89
|
+
input_tokens: data['usage']['prompt_tokens'].to_i,
|
|
90
|
+
output_tokens: data['usage']['completion_tokens'].to_i
|
|
91
|
+
)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def process_delta(delta)
|
|
95
|
+
handle_content_delta(delta['content']) if delta.key?('content')
|
|
96
|
+
handle_tool_calls_delta(delta['tool_calls']) if delta['tool_calls']
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def handle_content_delta(text)
|
|
100
|
+
return if text.nil? || text.empty?
|
|
101
|
+
|
|
102
|
+
@content_text << text
|
|
103
|
+
@callback&.call(Event.new(type: :text_delta, data: { text: text }))
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def handle_tool_calls_delta(tool_calls)
|
|
107
|
+
tool_calls.each { |tool_call| accumulate_tool_call(tool_call) }
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def accumulate_tool_call(tool_call)
|
|
111
|
+
idx = tool_call['index']
|
|
112
|
+
@tool_calls[idx] ||= { id: nil, name: +'', arguments: +'' }
|
|
113
|
+
|
|
114
|
+
entry = @tool_calls[idx]
|
|
115
|
+
entry[:id] = tool_call['id'] if tool_call['id']
|
|
116
|
+
merge_function_delta(entry, tool_call['function'])
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def merge_function_delta(entry, func)
|
|
120
|
+
return unless func
|
|
121
|
+
|
|
122
|
+
entry[:name] << func['name'].to_s
|
|
123
|
+
entry[:arguments] << func['arguments'].to_s
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def build_content_blocks
|
|
127
|
+
blocks = []
|
|
128
|
+
blocks << RubynCode::LLM::TextBlock.new(text: @content_text) unless @content_text.empty?
|
|
129
|
+
|
|
130
|
+
@tool_calls.keys.sort.each do |idx|
|
|
131
|
+
entry = @tool_calls[idx]
|
|
132
|
+
input = parse_json(entry[:arguments]) || {}
|
|
133
|
+
blocks << RubynCode::LLM::ToolUseBlock.new(id: entry[:id], name: entry[:name], input: input)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
blocks
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubynCode
|
|
4
|
+
module LLM
|
|
5
|
+
module Adapters
|
|
6
|
+
# Anthropic prompt caching logic.
|
|
7
|
+
#
|
|
8
|
+
# Injects `cache_control: { type: 'ephemeral' }` into system blocks,
|
|
9
|
+
# tool definitions, and the last message — enabling Anthropic's prompt
|
|
10
|
+
# caching to skip re-processing static content across turns.
|
|
11
|
+
module PromptCaching
|
|
12
|
+
CACHE_EPHEMERAL = { type: 'ephemeral' }.freeze
|
|
13
|
+
|
|
14
|
+
OAUTH_GATE = "You are Claude Code, Anthropic's official CLI for Claude."
|
|
15
|
+
|
|
16
|
+
private
|
|
17
|
+
|
|
18
|
+
def apply_system_blocks(body, system)
|
|
19
|
+
if oauth_token?
|
|
20
|
+
blocks = [{ type: 'text', text: OAUTH_GATE, cache_control: CACHE_EPHEMERAL }]
|
|
21
|
+
blocks << { type: 'text', text: system, cache_control: CACHE_EPHEMERAL } if system
|
|
22
|
+
body[:system] = blocks
|
|
23
|
+
elsif system
|
|
24
|
+
body[:system] = [{ type: 'text', text: system, cache_control: CACHE_EPHEMERAL }]
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def apply_tool_cache(body, tools)
|
|
29
|
+
return if tools.nil? || tools.empty?
|
|
30
|
+
|
|
31
|
+
cached_tools = tools.map(&:dup)
|
|
32
|
+
cached_tools.last[:cache_control] = CACHE_EPHEMERAL
|
|
33
|
+
body[:tools] = cached_tools
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def add_message_cache_breakpoint(messages)
|
|
37
|
+
return messages if messages.nil? || messages.empty?
|
|
38
|
+
|
|
39
|
+
tagged = messages.map(&:dup)
|
|
40
|
+
tag_last_message_content(tagged.last)
|
|
41
|
+
tagged
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def tag_last_message_content(last_msg)
|
|
45
|
+
content = last_msg[:content]
|
|
46
|
+
case content
|
|
47
|
+
when Array
|
|
48
|
+
return if content.empty?
|
|
49
|
+
|
|
50
|
+
last_msg[:content] = content.map(&:dup)
|
|
51
|
+
last_block = last_msg[:content].last
|
|
52
|
+
last_block[:cache_control] = CACHE_EPHEMERAL if last_block.is_a?(Hash)
|
|
53
|
+
when String
|
|
54
|
+
last_msg[:content] = [{ type: 'text', text: content, cache_control: CACHE_EPHEMERAL }]
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -1,284 +1,112 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require 'faraday'
|
|
4
|
-
require 'json'
|
|
5
|
-
require 'open3'
|
|
6
3
|
require_relative 'message_builder'
|
|
7
4
|
|
|
8
5
|
module RubynCode
|
|
9
6
|
module LLM
|
|
7
|
+
# Thin facade over provider-specific adapters.
|
|
8
|
+
#
|
|
9
|
+
# All consumers (Agent::Loop, REPL, DaemonRunner) talk to Client.
|
|
10
|
+
# Client delegates to the resolved adapter, which can be swapped
|
|
11
|
+
# at runtime via `switch_provider!` or the `/model` command.
|
|
10
12
|
class Client
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
def
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
body = build_request_body(
|
|
39
|
-
messages:, tools:, system:,
|
|
40
|
-
model: model || @model, max_tokens:, stream: use_streaming,
|
|
41
|
-
task_budget: task_budget
|
|
13
|
+
class RequestError < RubynCode::Error; end
|
|
14
|
+
class AuthExpiredError < RubynCode::AuthenticationError; end
|
|
15
|
+
class PromptTooLongError < RequestError; end
|
|
16
|
+
|
|
17
|
+
attr_reader :adapter
|
|
18
|
+
attr_accessor :model
|
|
19
|
+
|
|
20
|
+
def initialize(model: nil, provider: nil, adapter: nil)
|
|
21
|
+
settings = Config::Settings.new
|
|
22
|
+
@model = model || settings.model
|
|
23
|
+
@provider = provider || settings.provider
|
|
24
|
+
@adapter = adapter || resolve_adapter(@provider)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def chat(messages:, tools: nil, system: nil, model: nil, **opts)
|
|
28
|
+
effective_model = model || @model
|
|
29
|
+
max_tokens = opts[:max_tokens] || Config::Defaults::CAPPED_MAX_OUTPUT_TOKENS
|
|
30
|
+
|
|
31
|
+
@adapter.chat(
|
|
32
|
+
messages: messages,
|
|
33
|
+
tools: tools,
|
|
34
|
+
system: system,
|
|
35
|
+
model: effective_model,
|
|
36
|
+
max_tokens: max_tokens,
|
|
37
|
+
on_text: opts[:on_text],
|
|
38
|
+
task_budget: opts[:task_budget]
|
|
42
39
|
)
|
|
43
|
-
|
|
44
|
-
retries = 0
|
|
45
|
-
loop do
|
|
46
|
-
return stream_request(body, on_text) if use_streaming
|
|
47
|
-
|
|
48
|
-
response = connection.post(API_URL) do |req|
|
|
49
|
-
apply_headers(req)
|
|
50
|
-
req.body = JSON.generate(body)
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
if response.status == 429 && retries < MAX_RETRIES
|
|
54
|
-
delay = RETRY_DELAYS[retries] || 10
|
|
55
|
-
RubynCode::Debug.llm("Rate limited, retrying in #{delay}s (#{retries + 1}/#{MAX_RETRIES})...")
|
|
56
|
-
sleep delay
|
|
57
|
-
retries += 1
|
|
58
|
-
next
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
resp = handle_api_response(response)
|
|
62
|
-
|
|
63
|
-
# If on_text is provided but we're not using SSE streaming (API key auth),
|
|
64
|
-
# call the callback with the full text after receiving
|
|
65
|
-
if on_text
|
|
66
|
-
text = (resp.content || []).select { |b| b.respond_to?(:text) }.map(&:text).join
|
|
67
|
-
on_text.call(text) unless text.empty?
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
return resp
|
|
71
|
-
end
|
|
72
40
|
end
|
|
73
41
|
|
|
74
42
|
def stream(messages:, tools: nil, system: nil, model: nil,
|
|
75
43
|
max_tokens: Config::Defaults::CAPPED_MAX_OUTPUT_TOKENS, &block)
|
|
76
|
-
chat(messages
|
|
44
|
+
chat(messages: messages, tools: tools, system: system,
|
|
45
|
+
model: model, max_tokens: max_tokens, on_text: block)
|
|
77
46
|
end
|
|
78
47
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def stream_request(body, on_text)
|
|
82
|
-
streamer = Streaming.new do |event|
|
|
83
|
-
on_text&.call(event.data[:text]) if event.type == :text_delta
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
error_chunks = []
|
|
87
|
-
|
|
88
|
-
response = streaming_connection.post(API_URL) do |req|
|
|
89
|
-
apply_headers(req)
|
|
90
|
-
req.body = JSON.generate(body)
|
|
91
|
-
|
|
92
|
-
req.options.on_data = proc do |chunk, _overall_received_bytes, env|
|
|
93
|
-
if env.status == 200
|
|
94
|
-
streamer.feed(chunk)
|
|
95
|
-
else
|
|
96
|
-
error_chunks << chunk
|
|
97
|
-
end
|
|
98
|
-
end
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
unless response.status == 200
|
|
102
|
-
body_text = error_chunks.join
|
|
103
|
-
body_text = response.body.to_s if body_text.empty?
|
|
104
|
-
parsed = parse_json(body_text)
|
|
105
|
-
error_msg = parsed&.dig('error', 'message') || body_text[0..500]
|
|
106
|
-
RubynCode::Debug.llm("Streaming API error #{response.status}: #{body_text[0..500]}")
|
|
107
|
-
raise AuthExpiredError, "Authentication expired: #{error_msg}" if response.status == 401
|
|
108
|
-
|
|
109
|
-
raise RequestError, "API request failed (#{response.status}): #{error_msg}"
|
|
110
|
-
end
|
|
111
|
-
|
|
112
|
-
streamer.finalize
|
|
48
|
+
def provider_name
|
|
49
|
+
@adapter.provider_name
|
|
113
50
|
end
|
|
114
51
|
|
|
115
|
-
def
|
|
116
|
-
@
|
|
117
|
-
f.options.timeout = 300
|
|
118
|
-
f.options.open_timeout = 30
|
|
119
|
-
f.adapter Faraday.default_adapter
|
|
120
|
-
end
|
|
52
|
+
def models
|
|
53
|
+
@adapter.models
|
|
121
54
|
end
|
|
122
55
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
req.headers['x-app'] = 'cli'
|
|
133
|
-
req.headers['User-Agent'] = 'claude-code/2.1.79'
|
|
134
|
-
req.headers['X-Claude-Code-Session-Id'] = session_id
|
|
135
|
-
req.headers['anthropic-dangerous-direct-browser-access'] = 'true'
|
|
136
|
-
else
|
|
137
|
-
# API key
|
|
138
|
-
req.headers['x-api-key'] = token
|
|
139
|
-
end
|
|
56
|
+
# Switch the active provider (and optionally model) at runtime.
|
|
57
|
+
# Called by the REPL when `/model provider:model` is used.
|
|
58
|
+
#
|
|
59
|
+
# @param provider [String] provider name ('anthropic', 'openai', etc.)
|
|
60
|
+
# @param model [String, nil] optional model to set
|
|
61
|
+
def switch_provider!(provider, model: nil)
|
|
62
|
+
@provider = provider
|
|
63
|
+
@adapter = resolve_adapter(provider)
|
|
64
|
+
@model = model if model
|
|
140
65
|
end
|
|
141
66
|
|
|
142
|
-
|
|
143
|
-
@session_id ||= SecureRandom.uuid
|
|
144
|
-
end
|
|
145
|
-
|
|
146
|
-
CACHE_EPHEMERAL = { type: 'ephemeral' }.freeze
|
|
147
|
-
|
|
148
|
-
def build_request_body(messages:, tools:, system:, model:, max_tokens:, stream:, task_budget: nil)
|
|
149
|
-
body = { model: model, max_tokens: max_tokens }
|
|
150
|
-
|
|
151
|
-
# ── System prompt ──────────────────────────────────────────────
|
|
152
|
-
# Split into static (cacheable across turns) and dynamic blocks.
|
|
153
|
-
# OAuth tokens require OAUTH_GATE as the first block for model access.
|
|
154
|
-
oauth = access_token.include?('sk-ant-oat')
|
|
155
|
-
|
|
156
|
-
if oauth
|
|
157
|
-
blocks = [{ type: 'text', text: OAUTH_GATE, cache_control: CACHE_EPHEMERAL }]
|
|
158
|
-
blocks << { type: 'text', text: system, cache_control: CACHE_EPHEMERAL } if system
|
|
159
|
-
body[:system] = blocks
|
|
160
|
-
elsif system
|
|
161
|
-
body[:system] = [{ type: 'text', text: system, cache_control: CACHE_EPHEMERAL }]
|
|
162
|
-
end
|
|
67
|
+
private
|
|
163
68
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
69
|
+
def build_custom_adapter(provider, config, base_url, available_models)
|
|
70
|
+
case config.fetch('api_format', 'openai')
|
|
71
|
+
when 'anthropic'
|
|
72
|
+
Adapters::AnthropicCompatible.new(provider: provider, base_url: base_url, available_models: available_models)
|
|
73
|
+
else
|
|
74
|
+
Adapters::OpenAICompatible.new(provider: provider, base_url: base_url, available_models: available_models)
|
|
170
75
|
end
|
|
171
|
-
|
|
172
|
-
# ── Messages with cache breakpoint ─────────────────────────────
|
|
173
|
-
# Place a single cache_control breakpoint on the last message so
|
|
174
|
-
# the entire conversation prefix is cached server-side (~5 min TTL).
|
|
175
|
-
# This is the biggest token saver: on turn N, turns 1..(N-1) are
|
|
176
|
-
# served from cache instead of re-tokenized.
|
|
177
|
-
body[:messages] = add_message_cache_breakpoint(messages)
|
|
178
|
-
|
|
179
|
-
body[:stream] = true if stream
|
|
180
|
-
body
|
|
181
76
|
end
|
|
182
77
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
def add_message_cache_breakpoint(messages)
|
|
187
|
-
return messages if messages.nil? || messages.empty?
|
|
188
|
-
|
|
189
|
-
# Deep-dup only the last message to avoid mutating the conversation
|
|
190
|
-
tagged = messages.map(&:dup)
|
|
191
|
-
last_msg = tagged.last
|
|
192
|
-
|
|
193
|
-
content = last_msg[:content]
|
|
194
|
-
case content
|
|
195
|
-
when Array
|
|
196
|
-
return tagged if content.empty?
|
|
197
|
-
|
|
198
|
-
last_msg[:content] = content.map(&:dup)
|
|
199
|
-
last_block = last_msg[:content].last
|
|
200
|
-
last_block[:cache_control] = CACHE_EPHEMERAL if last_block.is_a?(Hash)
|
|
201
|
-
when String
|
|
202
|
-
# Convert to block form so we can attach cache_control
|
|
203
|
-
last_msg[:content] = [{ type: 'text', text: content, cache_control: CACHE_EPHEMERAL }]
|
|
204
|
-
end
|
|
78
|
+
def extract_model_names(config)
|
|
79
|
+
raw = config&.dig('models')
|
|
80
|
+
return [] unless raw
|
|
205
81
|
|
|
206
|
-
|
|
82
|
+
raw.is_a?(Hash) ? raw.values : Array(raw)
|
|
207
83
|
end
|
|
208
84
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
85
|
+
# Builds the appropriate adapter for a given provider name.
|
|
86
|
+
def resolve_adapter(provider)
|
|
87
|
+
case provider
|
|
88
|
+
when 'anthropic' then Adapters::Anthropic.new
|
|
89
|
+
when 'openai' then Adapters::OpenAI.new
|
|
90
|
+
else
|
|
91
|
+
config = Config::Settings.new.provider_config(provider)
|
|
92
|
+
base_url = config&.fetch('base_url', nil)
|
|
217
93
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
end
|
|
94
|
+
if config.nil?
|
|
95
|
+
raise ConfigError,
|
|
96
|
+
"Unknown provider '#{provider}'. " \
|
|
97
|
+
"Add it to config.yml under providers.#{provider} with base_url, env_key, and models."
|
|
223
98
|
end
|
|
224
99
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
end
|
|
230
|
-
|
|
231
|
-
body = parse_json(response.body)
|
|
232
|
-
raise RequestError, 'Invalid response from API' unless body
|
|
233
|
-
|
|
234
|
-
build_api_response(body)
|
|
235
|
-
end
|
|
236
|
-
|
|
237
|
-
def build_api_response(body)
|
|
238
|
-
content = (body['content'] || []).map do |block|
|
|
239
|
-
case block['type']
|
|
240
|
-
when 'text' then TextBlock.new(text: block['text'])
|
|
241
|
-
when 'tool_use' then ToolUseBlock.new(id: block['id'], name: block['name'], input: block['input'])
|
|
100
|
+
unless base_url
|
|
101
|
+
raise ConfigError,
|
|
102
|
+
"Provider '#{provider}' is missing base_url in config.yml. " \
|
|
103
|
+
"Add base_url under providers.#{provider} (e.g., base_url: https://api.#{provider}.com/v1)"
|
|
242
104
|
end
|
|
243
|
-
end.compact
|
|
244
|
-
|
|
245
|
-
usage_data = body['usage'] || {}
|
|
246
|
-
usage = Usage.new(
|
|
247
|
-
input_tokens: usage_data['input_tokens'].to_i,
|
|
248
|
-
output_tokens: usage_data['output_tokens'].to_i,
|
|
249
|
-
cache_creation_input_tokens: usage_data['cache_creation_input_tokens'].to_i,
|
|
250
|
-
cache_read_input_tokens: usage_data['cache_read_input_tokens'].to_i
|
|
251
|
-
)
|
|
252
|
-
|
|
253
|
-
Response.new(id: body['id'], content: content, stop_reason: body['stop_reason'], usage: usage)
|
|
254
|
-
end
|
|
255
105
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
raise AuthExpiredError, 'No valid authentication. Run `rubyn-code --auth` or set ANTHROPIC_API_KEY.'
|
|
260
|
-
end
|
|
261
|
-
|
|
262
|
-
def access_token
|
|
263
|
-
tokens = Auth::TokenStore.load
|
|
264
|
-
raise AuthExpiredError, 'No stored access token' unless tokens&.dig(:access_token)
|
|
265
|
-
|
|
266
|
-
tokens[:access_token]
|
|
267
|
-
end
|
|
268
|
-
|
|
269
|
-
def connection
|
|
270
|
-
@connection ||= Faraday.new do |f|
|
|
271
|
-
f.options.timeout = 300
|
|
272
|
-
f.options.open_timeout = 30
|
|
273
|
-
f.adapter Faraday.default_adapter
|
|
106
|
+
available_models = extract_model_names(config)
|
|
107
|
+
build_custom_adapter(provider, config, base_url, available_models)
|
|
274
108
|
end
|
|
275
109
|
end
|
|
276
|
-
|
|
277
|
-
def parse_json(str)
|
|
278
|
-
JSON.parse(str)
|
|
279
|
-
rescue JSON::ParserError
|
|
280
|
-
nil
|
|
281
|
-
end
|
|
282
110
|
end
|
|
283
111
|
end
|
|
284
112
|
end
|