net-llm 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +31 -0
- data/README.md +54 -3
- data/lib/net/llm/anthropic.rb +9 -87
- data/lib/net/llm/claude.rb +266 -0
- data/lib/net/llm/ollama.rb +91 -25
- data/lib/net/llm/openai.rb +123 -1
- data/lib/net/llm/version.rb +1 -1
- data/lib/net/llm/vertex_ai.rb +38 -0
- data/lib/net/llm.rb +2 -0
- metadata +63 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a7168d17a456b69a77ada9a8c5eb855ae6730a124cdd55dd9e72fee2bfa6fef1
|
|
4
|
+
data.tar.gz: 0fde66f7b0304486f3c5da851d7bbe97347f58fa0692dbd48c8462932ebe1bda
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1969cb1afc0e322f9899f97c95898c9ebf825fd826ce43af0fc0dc66fe86b298ca4538d3426519791661824265d6ff0ce51f3ecd2012b464639a969df4c1142f
|
|
7
|
+
data.tar.gz: 2d765715615d2d66f36fd00e8a64360061f740c8e19af1d1ef80c9d5d1b744964e5f3e741bc7af414d877acec7840adf855ba28159131bad31bcdebbe99dd5b5
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,36 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
2
|
|
|
3
|
+
## [0.5.0] - 2025-01-07
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- VertexAI provider for Claude models via Google Cloud
|
|
7
|
+
- Uses Application Default Credentials (ADC) for authentication
|
|
8
|
+
- Supports streaming and non-streaming modes
|
|
9
|
+
- Model routing with NotImplementedError for unsupported models
|
|
10
|
+
- Unified `fetch(messages, tools = [], &block)` method across all providers
|
|
11
|
+
- Normalized response format with `:delta` and `:complete` types
|
|
12
|
+
- Consistent `tool_calls` structure: `{ id:, name:, arguments: }`
|
|
13
|
+
- Thinking content support in streaming responses
|
|
14
|
+
- Claude class for shared Anthropic protocol logic
|
|
15
|
+
- Automatic system message extraction from messages array
|
|
16
|
+
- Message normalization for tool results and tool_calls
|
|
17
|
+
- Environment variable support for provider configuration
|
|
18
|
+
- `OLLAMA_HOST` for Ollama (default: localhost:11434)
|
|
19
|
+
- `OPENAI_API_KEY` and `OPENAI_BASE_URL` for OpenAI
|
|
20
|
+
- `ANTHROPIC_API_KEY` for Anthropic
|
|
21
|
+
- `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_REGION` for VertexAI
|
|
22
|
+
|
|
23
|
+
### Changed
|
|
24
|
+
- Refactored Anthropic provider to delegate to Claude class
|
|
25
|
+
- Refactored VertexAI provider to delegate to Claude class
|
|
26
|
+
- Updated default Anthropic model to claude-sonnet-4-20250514
|
|
27
|
+
- Updated default VertexAI model to claude-opus-4-5@20251101
|
|
28
|
+
|
|
29
|
+
### Fixed
|
|
30
|
+
- Fixed streaming tool_calls accumulation in Ollama provider
|
|
31
|
+
- Fixed error responses to include response body for debugging
|
|
32
|
+
- Fixed VertexAI model name format (@ separator instead of -)
|
|
33
|
+
|
|
3
34
|
## [0.4.0] - 2025-10-15
|
|
4
35
|
### Added
|
|
5
36
|
- Added tool/function calling support to Ollama provider
|
data/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Net::Llm
|
|
2
2
|
|
|
3
|
-
A minimal Ruby gem providing interfaces to connect to OpenAI, Ollama,
|
|
3
|
+
A minimal Ruby gem providing interfaces to connect to OpenAI, Ollama, Anthropic (Claude), and VertexAI LLM APIs.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -144,7 +144,7 @@ require 'net/llm'
|
|
|
144
144
|
|
|
145
145
|
client = Net::Llm::Anthropic.new(
|
|
146
146
|
api_key: ENV['ANTHROPIC_API_KEY'],
|
|
147
|
-
model: 'claude-
|
|
147
|
+
model: 'claude-sonnet-4-20250514'
|
|
148
148
|
)
|
|
149
149
|
|
|
150
150
|
messages = [
|
|
@@ -194,6 +194,54 @@ tools = [
|
|
|
194
194
|
response = client.messages(messages, tools: tools)
|
|
195
195
|
```
|
|
196
196
|
|
|
197
|
+
### VertexAI
|
|
198
|
+
|
|
199
|
+
```ruby
|
|
200
|
+
require 'net/llm'
|
|
201
|
+
|
|
202
|
+
client = Net::Llm::VertexAI.new(
|
|
203
|
+
project_id: ENV['GOOGLE_CLOUD_PROJECT'],
|
|
204
|
+
region: ENV.fetch('GOOGLE_CLOUD_REGION', 'us-east5'),
|
|
205
|
+
model: 'claude-opus-4-5@20251101'
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
messages = [
|
|
209
|
+
{ role: 'user', content: 'Hello!' }
|
|
210
|
+
]
|
|
211
|
+
|
|
212
|
+
response = client.messages(messages)
|
|
213
|
+
puts response.dig('content', 0, 'text')
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
Uses Application Default Credentials (ADC) for authentication. Run `gcloud auth application-default login` to configure.
|
|
217
|
+
|
|
218
|
+
### Unified Fetch Interface
|
|
219
|
+
|
|
220
|
+
All providers support a unified `fetch` method with a normalized response format:
|
|
221
|
+
|
|
222
|
+
```ruby
|
|
223
|
+
result = client.fetch(messages, tools)
|
|
224
|
+
|
|
225
|
+
result[:type] # :complete
|
|
226
|
+
result[:content] # "Response text"
|
|
227
|
+
result[:thinking] # Extended thinking (Claude only)
|
|
228
|
+
result[:tool_calls] # [{ id:, name:, arguments: }]
|
|
229
|
+
result[:stop_reason] # :end_turn, :tool_use, :max_tokens
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
#### Streaming
|
|
233
|
+
|
|
234
|
+
```ruby
|
|
235
|
+
client.fetch(messages, tools) do |chunk|
|
|
236
|
+
case chunk[:type]
|
|
237
|
+
when :delta
|
|
238
|
+
print chunk[:content]
|
|
239
|
+
when :complete
|
|
240
|
+
puts "\nDone: #{chunk[:stop_reason]}"
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
```
|
|
244
|
+
|
|
197
245
|
## Error Handling
|
|
198
246
|
|
|
199
247
|
All non-streaming API methods return error information as a hash when requests fail:
|
|
@@ -227,6 +275,9 @@ Streaming methods still raise exceptions on HTTP errors.
|
|
|
227
275
|
### Anthropic (Claude)
|
|
228
276
|
- `/v1/messages` (with streaming and tools)
|
|
229
277
|
|
|
278
|
+
### VertexAI
|
|
279
|
+
- Claude models via Google Cloud AI Platform (with streaming and tools)
|
|
280
|
+
|
|
230
281
|
## Development
|
|
231
282
|
|
|
232
283
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
|
@@ -235,7 +286,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
|
235
286
|
|
|
236
287
|
## Contributing
|
|
237
288
|
|
|
238
|
-
|
|
289
|
+
Send me an email. For instructions see https://git-send-email.io/.
|
|
239
290
|
|
|
240
291
|
## License
|
|
241
292
|
|
data/lib/net/llm/anthropic.rb
CHANGED
|
@@ -3,99 +3,21 @@
|
|
|
3
3
|
module Net
|
|
4
4
|
module Llm
|
|
5
5
|
class Anthropic
|
|
6
|
-
attr_reader :api_key, :model
|
|
6
|
+
attr_reader :api_key, :model
|
|
7
7
|
|
|
8
|
-
def initialize(api_key
|
|
8
|
+
def initialize(api_key: ENV.fetch("ANTHROPIC_API_KEY"), model: "claude-sonnet-4-20250514", http: Net::Llm.http)
|
|
9
9
|
@api_key = api_key
|
|
10
10
|
@model = model
|
|
11
|
-
@
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def messages(messages, system: nil, max_tokens: 1024, tools: nil, &block)
|
|
15
|
-
url = "https://api.anthropic.com/v1/messages"
|
|
16
|
-
payload = build_payload(messages, system, max_tokens, tools, block_given?)
|
|
17
|
-
|
|
18
|
-
if block_given?
|
|
19
|
-
stream_request(url, payload, &block)
|
|
20
|
-
else
|
|
21
|
-
post_request(url, payload)
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
private
|
|
26
|
-
|
|
27
|
-
def build_payload(messages, system, max_tokens, tools, stream)
|
|
28
|
-
payload = {
|
|
11
|
+
@claude = Claude.new(
|
|
12
|
+
endpoint: "https://api.anthropic.com/v1/messages",
|
|
13
|
+
headers: { "x-api-key" => api_key, "anthropic-version" => "2023-06-01" },
|
|
29
14
|
model: model,
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
stream: stream
|
|
33
|
-
}
|
|
34
|
-
payload[:system] = system if system
|
|
35
|
-
payload[:tools] = tools if tools
|
|
36
|
-
payload
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
def headers
|
|
40
|
-
{
|
|
41
|
-
"x-api-key" => api_key,
|
|
42
|
-
"anthropic-version" => "2023-06-01"
|
|
43
|
-
}
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
def post_request(url, payload)
|
|
47
|
-
handle_response(http.post(url, headers: headers, body: payload))
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
def handle_response(response)
|
|
51
|
-
if response.is_a?(Net::HTTPSuccess)
|
|
52
|
-
JSON.parse(response.body)
|
|
53
|
-
else
|
|
54
|
-
{ "code" => response.code, "body" => response.body }
|
|
55
|
-
end
|
|
15
|
+
http: http
|
|
16
|
+
)
|
|
56
17
|
end
|
|
57
18
|
|
|
58
|
-
def
|
|
59
|
-
|
|
60
|
-
raise "HTTP #{response.code}" unless response.is_a?(Net::HTTPSuccess)
|
|
61
|
-
|
|
62
|
-
buffer = ""
|
|
63
|
-
response.read_body do |chunk|
|
|
64
|
-
buffer += chunk
|
|
65
|
-
|
|
66
|
-
while (event = extract_sse_event(buffer))
|
|
67
|
-
next if event[:data].nil? || event[:data].empty?
|
|
68
|
-
next if event[:data] == "[DONE]"
|
|
69
|
-
|
|
70
|
-
json = JSON.parse(event[:data])
|
|
71
|
-
block.call(json)
|
|
72
|
-
|
|
73
|
-
break if json["type"] == "message_stop"
|
|
74
|
-
end
|
|
75
|
-
end
|
|
76
|
-
end
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
def extract_sse_event(buffer)
|
|
80
|
-
event_end = buffer.index("\n\n")
|
|
81
|
-
return nil unless event_end
|
|
82
|
-
|
|
83
|
-
event_data = buffer[0...event_end]
|
|
84
|
-
buffer.replace(buffer[(event_end + 2)..-1] || "")
|
|
85
|
-
|
|
86
|
-
event = {}
|
|
87
|
-
event_data.split("\n").each do |line|
|
|
88
|
-
if line.start_with?("event: ")
|
|
89
|
-
event[:event] = line[7..-1]
|
|
90
|
-
elsif line.start_with?("data: ")
|
|
91
|
-
event[:data] = line[6..-1]
|
|
92
|
-
elsif line == "data:"
|
|
93
|
-
event[:data] = ""
|
|
94
|
-
end
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
event
|
|
98
|
-
end
|
|
19
|
+
def messages(...) = @claude.messages(...)
|
|
20
|
+
def fetch(...) = @claude.fetch(...)
|
|
99
21
|
end
|
|
100
22
|
end
|
|
101
23
|
end
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Net
|
|
4
|
+
module Llm
|
|
5
|
+
class Claude
|
|
6
|
+
attr_reader :endpoint, :headers, :model, :http, :anthropic_version
|
|
7
|
+
|
|
8
|
+
def initialize(endpoint:, headers:, http:, model: nil, anthropic_version: nil)
|
|
9
|
+
@endpoint = endpoint
|
|
10
|
+
@headers_source = headers
|
|
11
|
+
@model = model
|
|
12
|
+
@http = http
|
|
13
|
+
@anthropic_version = anthropic_version
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def headers
|
|
17
|
+
@headers_source.respond_to?(:call) ? @headers_source.call : @headers_source
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def messages(messages, system: nil, max_tokens: 1024, tools: nil, &block)
|
|
21
|
+
payload = build_payload(messages, system, max_tokens, tools, block_given?)
|
|
22
|
+
|
|
23
|
+
if block_given?
|
|
24
|
+
stream_request(payload, &block)
|
|
25
|
+
else
|
|
26
|
+
post_request(payload)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def fetch(messages, tools = [], &block)
|
|
31
|
+
system_message, user_messages = extract_system_message(messages)
|
|
32
|
+
anthropic_tools = tools.empty? ? nil : tools.map { |t| normalize_tool_for_anthropic(t) }
|
|
33
|
+
|
|
34
|
+
if block_given?
|
|
35
|
+
fetch_streaming(user_messages, anthropic_tools, system: system_message, &block)
|
|
36
|
+
else
|
|
37
|
+
fetch_non_streaming(user_messages, anthropic_tools, system: system_message)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def build_payload(messages, system, max_tokens, tools, stream)
|
|
44
|
+
payload = { max_tokens: max_tokens, messages: messages, stream: stream }
|
|
45
|
+
payload[:model] = model if model
|
|
46
|
+
payload[:anthropic_version] = anthropic_version if anthropic_version
|
|
47
|
+
payload[:system] = system if system
|
|
48
|
+
payload[:tools] = tools if tools
|
|
49
|
+
payload
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def post_request(payload)
|
|
53
|
+
handle_response(http.post(endpoint, headers: headers, body: payload))
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def handle_response(response)
|
|
57
|
+
if response.is_a?(Net::HTTPSuccess)
|
|
58
|
+
JSON.parse(response.body)
|
|
59
|
+
else
|
|
60
|
+
{ "code" => response.code, "body" => response.body }
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def stream_request(payload, &block)
|
|
65
|
+
http.post(endpoint, headers: headers, body: payload) do |response|
|
|
66
|
+
raise "HTTP #{response.code}: #{response.body}" unless response.is_a?(Net::HTTPSuccess)
|
|
67
|
+
|
|
68
|
+
buffer = ""
|
|
69
|
+
response.read_body do |chunk|
|
|
70
|
+
buffer += chunk
|
|
71
|
+
|
|
72
|
+
while (event = extract_sse_event(buffer))
|
|
73
|
+
next if event[:data].nil? || event[:data].empty?
|
|
74
|
+
next if event[:data] == "[DONE]"
|
|
75
|
+
|
|
76
|
+
json = JSON.parse(event[:data])
|
|
77
|
+
block.call(json)
|
|
78
|
+
|
|
79
|
+
break if json["type"] == "message_stop"
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def extract_sse_event(buffer)
|
|
86
|
+
event_end = buffer.index("\n\n")
|
|
87
|
+
return nil unless event_end
|
|
88
|
+
|
|
89
|
+
event_data = buffer[0...event_end]
|
|
90
|
+
buffer.replace(buffer[(event_end + 2)..] || "")
|
|
91
|
+
|
|
92
|
+
event = {}
|
|
93
|
+
event_data.split("\n").each do |line|
|
|
94
|
+
if line.start_with?("event: ")
|
|
95
|
+
event[:event] = line[7..]
|
|
96
|
+
elsif line.start_with?("data: ")
|
|
97
|
+
event[:data] = line[6..]
|
|
98
|
+
elsif line == "data:"
|
|
99
|
+
event[:data] = ""
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
event
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def extract_system_message(messages)
|
|
107
|
+
system_msg = messages.find { |m| m[:role] == "system" || m["role"] == "system" }
|
|
108
|
+
system_content = system_msg ? (system_msg[:content] || system_msg["content"]) : nil
|
|
109
|
+
other_messages = messages.reject { |m| m[:role] == "system" || m["role"] == "system" }
|
|
110
|
+
normalized_messages = normalize_messages_for_claude(other_messages)
|
|
111
|
+
[system_content, normalized_messages]
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def normalize_messages_for_claude(messages)
|
|
115
|
+
messages.map do |msg|
|
|
116
|
+
role = msg[:role] || msg["role"]
|
|
117
|
+
tool_calls = msg[:tool_calls] || msg["tool_calls"]
|
|
118
|
+
|
|
119
|
+
if role == "tool"
|
|
120
|
+
{
|
|
121
|
+
role: "user",
|
|
122
|
+
content: [{
|
|
123
|
+
type: "tool_result",
|
|
124
|
+
tool_use_id: msg[:tool_call_id] || msg["tool_call_id"],
|
|
125
|
+
content: msg[:content] || msg["content"]
|
|
126
|
+
}]
|
|
127
|
+
}
|
|
128
|
+
elsif role == "assistant" && tool_calls&.any?
|
|
129
|
+
content = []
|
|
130
|
+
text = msg[:content] || msg["content"]
|
|
131
|
+
content << { type: "text", text: text } if text && !text.empty?
|
|
132
|
+
tool_calls.each do |tc|
|
|
133
|
+
func = tc[:function] || tc["function"] || {}
|
|
134
|
+
args = func[:arguments] || func["arguments"]
|
|
135
|
+
input = args.is_a?(String) ? (JSON.parse(args) rescue {}) : (args || {})
|
|
136
|
+
content << {
|
|
137
|
+
type: "tool_use",
|
|
138
|
+
id: tc[:id] || tc["id"],
|
|
139
|
+
name: func[:name] || func["name"] || tc[:name] || tc["name"],
|
|
140
|
+
input: input
|
|
141
|
+
}
|
|
142
|
+
end
|
|
143
|
+
{ role: "assistant", content: content }
|
|
144
|
+
else
|
|
145
|
+
msg
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def fetch_non_streaming(messages, tools, system: nil)
|
|
151
|
+
result = self.messages(messages, system: system, tools: tools)
|
|
152
|
+
return result if result["code"]
|
|
153
|
+
|
|
154
|
+
{
|
|
155
|
+
type: :complete,
|
|
156
|
+
content: extract_text_content(result["content"]),
|
|
157
|
+
thinking: extract_thinking_content(result["content"]),
|
|
158
|
+
tool_calls: extract_tool_calls(result["content"]),
|
|
159
|
+
stop_reason: map_stop_reason(result["stop_reason"])
|
|
160
|
+
}
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def fetch_streaming(messages, tools, system: nil, &block)
|
|
164
|
+
content = ""
|
|
165
|
+
thinking = ""
|
|
166
|
+
tool_calls = []
|
|
167
|
+
stop_reason = :end_turn
|
|
168
|
+
|
|
169
|
+
self.messages(messages, system: system, tools: tools) do |event|
|
|
170
|
+
case event["type"]
|
|
171
|
+
when "content_block_start"
|
|
172
|
+
if event.dig("content_block", "type") == "tool_use"
|
|
173
|
+
tool_calls << {
|
|
174
|
+
id: event.dig("content_block", "id"),
|
|
175
|
+
name: event.dig("content_block", "name"),
|
|
176
|
+
arguments: {}
|
|
177
|
+
}
|
|
178
|
+
end
|
|
179
|
+
when "content_block_delta"
|
|
180
|
+
delta = event["delta"]
|
|
181
|
+
case delta["type"]
|
|
182
|
+
when "text_delta"
|
|
183
|
+
text = delta["text"]
|
|
184
|
+
content += text
|
|
185
|
+
block.call({ type: :delta, content: text, thinking: nil, tool_calls: nil })
|
|
186
|
+
when "thinking_delta"
|
|
187
|
+
text = delta["thinking"]
|
|
188
|
+
thinking += text if text
|
|
189
|
+
block.call({ type: :delta, content: nil, thinking: text, tool_calls: nil })
|
|
190
|
+
when "input_json_delta"
|
|
191
|
+
if tool_calls.any?
|
|
192
|
+
tool_calls.last[:arguments_json] ||= ""
|
|
193
|
+
tool_calls.last[:arguments_json] += delta["partial_json"] || ""
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
when "message_delta"
|
|
197
|
+
stop_reason = map_stop_reason(event.dig("delta", "stop_reason"))
|
|
198
|
+
when "message_stop"
|
|
199
|
+
tool_calls.each do |tc|
|
|
200
|
+
if tc[:arguments_json]
|
|
201
|
+
tc[:arguments] = begin
|
|
202
|
+
JSON.parse(tc[:arguments_json])
|
|
203
|
+
rescue
|
|
204
|
+
{}
|
|
205
|
+
end
|
|
206
|
+
tc.delete(:arguments_json)
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
block.call({
|
|
210
|
+
type: :complete,
|
|
211
|
+
content: content,
|
|
212
|
+
thinking: thinking.empty? ? nil : thinking,
|
|
213
|
+
tool_calls: tool_calls,
|
|
214
|
+
stop_reason: stop_reason
|
|
215
|
+
})
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def extract_text_content(content_blocks)
|
|
221
|
+
return nil unless content_blocks
|
|
222
|
+
|
|
223
|
+
content_blocks
|
|
224
|
+
.select { |b| b["type"] == "text" }
|
|
225
|
+
.map { |b| b["text"] }
|
|
226
|
+
.join
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def extract_thinking_content(content_blocks)
|
|
230
|
+
return nil unless content_blocks
|
|
231
|
+
|
|
232
|
+
thinking = content_blocks
|
|
233
|
+
.select { |b| b["type"] == "thinking" }
|
|
234
|
+
.map { |b| b["thinking"] }
|
|
235
|
+
.join
|
|
236
|
+
|
|
237
|
+
thinking.empty? ? nil : thinking
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def extract_tool_calls(content_blocks)
|
|
241
|
+
return [] unless content_blocks
|
|
242
|
+
|
|
243
|
+
content_blocks
|
|
244
|
+
.select { |b| b["type"] == "tool_use" }
|
|
245
|
+
.map { |b| { id: b["id"], name: b["name"], arguments: b["input"] || {} } }
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def normalize_tool_for_anthropic(tool)
|
|
249
|
+
if tool[:function]
|
|
250
|
+
{ name: tool[:function][:name], description: tool[:function][:description], input_schema: tool[:function][:parameters] }
|
|
251
|
+
else
|
|
252
|
+
tool
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def map_stop_reason(reason)
|
|
257
|
+
case reason
|
|
258
|
+
when "end_turn" then :end_turn
|
|
259
|
+
when "tool_use" then :tool_use
|
|
260
|
+
when "max_tokens" then :max_tokens
|
|
261
|
+
else :end_turn
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
end
|
data/lib/net/llm/ollama.rb
CHANGED
|
@@ -5,76 +5,121 @@ module Net
|
|
|
5
5
|
class Ollama
|
|
6
6
|
attr_reader :host, :model, :http
|
|
7
7
|
|
|
8
|
-
def initialize(host: "localhost:11434", model: "
|
|
8
|
+
def initialize(host: ENV.fetch("OLLAMA_HOST", "localhost:11434"), model: "gpt-oss", http: Net::Llm.http)
|
|
9
9
|
@host = host
|
|
10
10
|
@model = model
|
|
11
11
|
@http = http
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
def chat(messages, tools = [], &block)
|
|
15
|
-
url = build_url("/api/chat")
|
|
16
15
|
payload = { model: model, messages: messages, stream: block_given? }
|
|
17
16
|
payload[:tools] = tools unless tools.empty?
|
|
18
17
|
|
|
19
|
-
|
|
20
|
-
stream_request(url, payload, &block)
|
|
21
|
-
else
|
|
22
|
-
post_request(url, payload)
|
|
23
|
-
end
|
|
18
|
+
execute(build_url("/api/chat"), payload, &block)
|
|
24
19
|
end
|
|
25
20
|
|
|
26
|
-
def
|
|
27
|
-
|
|
28
|
-
|
|
21
|
+
def fetch(messages, tools = [], &block)
|
|
22
|
+
content = ""
|
|
23
|
+
thinking = ""
|
|
24
|
+
tool_calls = []
|
|
29
25
|
|
|
30
26
|
if block_given?
|
|
31
|
-
|
|
27
|
+
chat(messages, tools) do |chunk|
|
|
28
|
+
msg = chunk["message"] || {}
|
|
29
|
+
delta_content = msg["content"]
|
|
30
|
+
delta_thinking = msg["thinking"]
|
|
31
|
+
|
|
32
|
+
content += delta_content if delta_content
|
|
33
|
+
thinking += delta_thinking if delta_thinking
|
|
34
|
+
tool_calls += normalize_tool_calls(msg["tool_calls"]) if msg["tool_calls"]
|
|
35
|
+
|
|
36
|
+
if chunk["done"]
|
|
37
|
+
block.call({
|
|
38
|
+
type: :complete,
|
|
39
|
+
content: content,
|
|
40
|
+
thinking: thinking.empty? ? nil : thinking,
|
|
41
|
+
tool_calls: tool_calls,
|
|
42
|
+
stop_reason: map_stop_reason(chunk["done_reason"])
|
|
43
|
+
})
|
|
44
|
+
else
|
|
45
|
+
block.call({
|
|
46
|
+
type: :delta,
|
|
47
|
+
content: delta_content,
|
|
48
|
+
thinking: delta_thinking,
|
|
49
|
+
tool_calls: nil
|
|
50
|
+
})
|
|
51
|
+
end
|
|
52
|
+
end
|
|
32
53
|
else
|
|
33
|
-
|
|
54
|
+
result = chat(messages, tools)
|
|
55
|
+
msg = result["message"] || {}
|
|
56
|
+
{
|
|
57
|
+
type: :complete,
|
|
58
|
+
content: msg["content"],
|
|
59
|
+
thinking: msg["thinking"],
|
|
60
|
+
tool_calls: normalize_tool_calls(msg["tool_calls"]),
|
|
61
|
+
stop_reason: map_stop_reason(result["done_reason"])
|
|
62
|
+
}
|
|
34
63
|
end
|
|
35
64
|
end
|
|
36
65
|
|
|
66
|
+
def generate(prompt, &block)
|
|
67
|
+
execute(build_url("/api/generate"), {
|
|
68
|
+
model: model,
|
|
69
|
+
prompt: prompt,
|
|
70
|
+
stream: block_given?
|
|
71
|
+
}, &block)
|
|
72
|
+
end
|
|
73
|
+
|
|
37
74
|
def embeddings(input)
|
|
38
|
-
|
|
39
|
-
payload = { model: model, input: input }
|
|
40
|
-
post_request(url, payload)
|
|
75
|
+
post_request(build_url("/api/embed"), { model: model, input: input })
|
|
41
76
|
end
|
|
42
77
|
|
|
43
78
|
def tags
|
|
44
|
-
|
|
45
|
-
response = http.get(url)
|
|
46
|
-
handle_response(response)
|
|
79
|
+
get_request(build_url("/api/tags"))
|
|
47
80
|
end
|
|
48
81
|
|
|
49
82
|
def show(name)
|
|
50
|
-
|
|
51
|
-
payload = { name: name }
|
|
52
|
-
post_request(url, payload)
|
|
83
|
+
post_request(build_url("/api/show"), { name: name })
|
|
53
84
|
end
|
|
54
85
|
|
|
55
86
|
private
|
|
56
87
|
|
|
88
|
+
def execute(url, payload, &block)
|
|
89
|
+
if block_given?
|
|
90
|
+
stream_request(url, payload, &block)
|
|
91
|
+
else
|
|
92
|
+
post_request(url, payload)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
57
96
|
def build_url(path)
|
|
58
97
|
base = host.start_with?("http://", "https://") ? host : "http://#{host}"
|
|
59
98
|
"#{base}#{path}"
|
|
60
99
|
end
|
|
61
100
|
|
|
101
|
+
def get_request(url)
|
|
102
|
+
handle_response(http.get(url))
|
|
103
|
+
end
|
|
104
|
+
|
|
62
105
|
def post_request(url, payload)
|
|
63
|
-
|
|
64
|
-
handle_response(response)
|
|
106
|
+
handle_response(http.post(url, body: payload))
|
|
65
107
|
end
|
|
66
108
|
|
|
67
109
|
def handle_response(response)
|
|
68
110
|
if response.is_a?(Net::HTTPSuccess)
|
|
69
111
|
JSON.parse(response.body)
|
|
70
112
|
else
|
|
71
|
-
{
|
|
113
|
+
{
|
|
114
|
+
"code" => response.code,
|
|
115
|
+
"body" => response.body
|
|
116
|
+
}
|
|
72
117
|
end
|
|
73
118
|
end
|
|
74
119
|
|
|
75
120
|
def stream_request(url, payload, &block)
|
|
76
121
|
http.post(url, body: payload) do |response|
|
|
77
|
-
raise "HTTP #{response.code}" unless response.is_a?(Net::HTTPSuccess)
|
|
122
|
+
raise "HTTP #{response.code}: #{response.body}" unless response.is_a?(Net::HTTPSuccess)
|
|
78
123
|
|
|
79
124
|
buffer = ""
|
|
80
125
|
response.read_body do |chunk|
|
|
@@ -100,6 +145,27 @@ module Net
|
|
|
100
145
|
buffer.replace(buffer[(message_end + 1)..-1] || "")
|
|
101
146
|
message
|
|
102
147
|
end
|
|
148
|
+
|
|
149
|
+
def normalize_tool_calls(tool_calls)
|
|
150
|
+
return [] if tool_calls.nil? || tool_calls.empty?
|
|
151
|
+
|
|
152
|
+
tool_calls.map do |tc|
|
|
153
|
+
{
|
|
154
|
+
id: tc["id"] || tc.dig("function", "id"),
|
|
155
|
+
name: tc.dig("function", "name"),
|
|
156
|
+
arguments: tc.dig("function", "arguments") || {}
|
|
157
|
+
}
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def map_stop_reason(reason)
|
|
162
|
+
case reason
|
|
163
|
+
when "stop" then :end_turn
|
|
164
|
+
when "tool_calls", "tool_use" then :tool_use
|
|
165
|
+
when "length" then :max_tokens
|
|
166
|
+
else :end_turn
|
|
167
|
+
end
|
|
168
|
+
end
|
|
103
169
|
end
|
|
104
170
|
end
|
|
105
171
|
end
|
data/lib/net/llm/openai.rb
CHANGED
|
@@ -5,7 +5,7 @@ module Net
|
|
|
5
5
|
class OpenAI
|
|
6
6
|
attr_reader :api_key, :base_url, :model, :http
|
|
7
7
|
|
|
8
|
-
def initialize(api_key
|
|
8
|
+
def initialize(api_key: ENV.fetch("OPENAI_API_KEY"), base_url: ENV.fetch("OPENAI_BASE_URL", "https://api.openai.com/v1"), model: "gpt-4o-mini", http: Net::Llm.http)
|
|
9
9
|
@api_key = api_key
|
|
10
10
|
@base_url = base_url
|
|
11
11
|
@model = model
|
|
@@ -20,6 +20,14 @@ module Net
|
|
|
20
20
|
))
|
|
21
21
|
end
|
|
22
22
|
|
|
23
|
+
def fetch(messages, tools = [], &block)
|
|
24
|
+
if block_given?
|
|
25
|
+
fetch_streaming(messages, tools, &block)
|
|
26
|
+
else
|
|
27
|
+
fetch_non_streaming(messages, tools)
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
23
31
|
def models
|
|
24
32
|
handle_response(http.get("#{base_url}/models", headers: headers))
|
|
25
33
|
end
|
|
@@ -45,6 +53,120 @@ module Net
|
|
|
45
53
|
{ "code" => response.code, "body" => response.body }
|
|
46
54
|
end
|
|
47
55
|
end
|
|
56
|
+
|
|
57
|
+
def fetch_non_streaming(messages, tools)
|
|
58
|
+
body = { model: model, messages: messages }
|
|
59
|
+
body[:tools] = tools unless tools.empty?
|
|
60
|
+
body[:tool_choice] = "auto" unless tools.empty?
|
|
61
|
+
|
|
62
|
+
result = handle_response(http.post("#{base_url}/chat/completions", headers: headers, body: body))
|
|
63
|
+
return result if result["code"]
|
|
64
|
+
|
|
65
|
+
msg = result.dig("choices", 0, "message") || {}
|
|
66
|
+
{
|
|
67
|
+
type: :complete,
|
|
68
|
+
content: msg["content"],
|
|
69
|
+
thinking: nil,
|
|
70
|
+
tool_calls: normalize_tool_calls(msg["tool_calls"]),
|
|
71
|
+
stop_reason: map_stop_reason(result.dig("choices", 0, "finish_reason"))
|
|
72
|
+
}
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def fetch_streaming(messages, tools, &block)
|
|
76
|
+
body = { model: model, messages: messages, stream: true }
|
|
77
|
+
body[:tools] = tools unless tools.empty?
|
|
78
|
+
body[:tool_choice] = "auto" unless tools.empty?
|
|
79
|
+
|
|
80
|
+
content = ""
|
|
81
|
+
tool_calls = {}
|
|
82
|
+
stop_reason = :end_turn
|
|
83
|
+
|
|
84
|
+
http.post("#{base_url}/chat/completions", headers: headers, body: body) do |response|
|
|
85
|
+
raise "HTTP #{response.code}: #{response.body}" unless response.is_a?(Net::HTTPSuccess)
|
|
86
|
+
|
|
87
|
+
buffer = ""
|
|
88
|
+
response.read_body do |chunk|
|
|
89
|
+
buffer += chunk
|
|
90
|
+
|
|
91
|
+
while (line = extract_line(buffer))
|
|
92
|
+
next if line.empty? || !line.start_with?("data: ")
|
|
93
|
+
|
|
94
|
+
data = line[6..]
|
|
95
|
+
break if data == "[DONE]"
|
|
96
|
+
|
|
97
|
+
json = JSON.parse(data)
|
|
98
|
+
delta = json.dig("choices", 0, "delta") || {}
|
|
99
|
+
|
|
100
|
+
if delta["content"]
|
|
101
|
+
content += delta["content"]
|
|
102
|
+
block.call({ type: :delta, content: delta["content"], thinking: nil, tool_calls: nil })
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
if delta["tool_calls"]
|
|
106
|
+
delta["tool_calls"].each do |tc|
|
|
107
|
+
idx = tc["index"]
|
|
108
|
+
tool_calls[idx] ||= { id: nil, name: nil, arguments_json: "" }
|
|
109
|
+
tool_calls[idx][:id] = tc["id"] if tc["id"]
|
|
110
|
+
tool_calls[idx][:name] = tc.dig("function", "name") if tc.dig("function", "name")
|
|
111
|
+
tool_calls[idx][:arguments_json] += tc.dig("function", "arguments") || ""
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
if json.dig("choices", 0, "finish_reason")
|
|
116
|
+
stop_reason = map_stop_reason(json.dig("choices", 0, "finish_reason"))
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
final_tool_calls = tool_calls.values.map do |tc|
|
|
123
|
+
args = begin
|
|
124
|
+
JSON.parse(tc[:arguments_json])
|
|
125
|
+
rescue
|
|
126
|
+
{}
|
|
127
|
+
end
|
|
128
|
+
{ id: tc[:id], name: tc[:name], arguments: args }
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
block.call({
|
|
132
|
+
type: :complete,
|
|
133
|
+
content: content,
|
|
134
|
+
thinking: nil,
|
|
135
|
+
tool_calls: final_tool_calls,
|
|
136
|
+
stop_reason: stop_reason
|
|
137
|
+
})
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def extract_line(buffer)
|
|
141
|
+
line_end = buffer.index("\n")
|
|
142
|
+
return nil unless line_end
|
|
143
|
+
|
|
144
|
+
line = buffer[0...line_end]
|
|
145
|
+
buffer.replace(buffer[(line_end + 1)..] || "")
|
|
146
|
+
line
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def normalize_tool_calls(tool_calls)
|
|
150
|
+
return [] if tool_calls.nil? || tool_calls.empty?
|
|
151
|
+
|
|
152
|
+
tool_calls.map do |tc|
|
|
153
|
+
args = tc.dig("function", "arguments")
|
|
154
|
+
{
|
|
155
|
+
id: tc["id"],
|
|
156
|
+
name: tc.dig("function", "name"),
|
|
157
|
+
arguments: args.is_a?(String) ? (JSON.parse(args) rescue {}) : (args || {})
|
|
158
|
+
}
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def map_stop_reason(reason)
|
|
163
|
+
case reason
|
|
164
|
+
when "stop" then :end_turn
|
|
165
|
+
when "tool_calls" then :tool_use
|
|
166
|
+
when "length" then :max_tokens
|
|
167
|
+
else :end_turn
|
|
168
|
+
end
|
|
169
|
+
end
|
|
48
170
|
end
|
|
49
171
|
end
|
|
50
172
|
end
|
data/lib/net/llm/version.rb
CHANGED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Net
|
|
4
|
+
module Llm
|
|
5
|
+
class VertexAI
|
|
6
|
+
attr_reader :project_id, :region, :model
|
|
7
|
+
|
|
8
|
+
def initialize(project_id: ENV.fetch("GOOGLE_CLOUD_PROJECT"), region: ENV.fetch("GOOGLE_CLOUD_REGION", "us-east5"), model: "claude-opus-4-5@20251101", http: Net::Llm.http)
|
|
9
|
+
@project_id = project_id
|
|
10
|
+
@region = region
|
|
11
|
+
@model = model
|
|
12
|
+
@handler = build_handler(http)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def messages(...) = @handler.messages(...)
|
|
16
|
+
def fetch(...) = @handler.fetch(...)
|
|
17
|
+
|
|
18
|
+
private
|
|
19
|
+
|
|
20
|
+
def build_handler(http)
|
|
21
|
+
if model.start_with?("claude-")
|
|
22
|
+
Claude.new(
|
|
23
|
+
endpoint: "https://#{region}-aiplatform.googleapis.com/v1/projects/#{project_id}/locations/#{region}/publishers/anthropic/models/#{model}:rawPredict",
|
|
24
|
+
headers: -> { { "Authorization" => "Bearer #{access_token}" } },
|
|
25
|
+
http: http,
|
|
26
|
+
anthropic_version: "vertex-2023-10-16"
|
|
27
|
+
)
|
|
28
|
+
else
|
|
29
|
+
raise NotImplementedError, "Model '#{model}' is not yet supported. Only Claude models (claude-*) are currently implemented."
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def access_token
|
|
34
|
+
@access_token ||= `gcloud auth application-default print-access-token`.strip
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
data/lib/net/llm.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: net-llm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- mo khan
|
|
@@ -51,6 +51,62 @@ dependencies:
|
|
|
51
51
|
- - "~>"
|
|
52
52
|
- !ruby/object:Gem::Version
|
|
53
53
|
version: '1.0'
|
|
54
|
+
- !ruby/object:Gem::Dependency
|
|
55
|
+
name: rake
|
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - "~>"
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: '13.0'
|
|
61
|
+
type: :development
|
|
62
|
+
prerelease: false
|
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - "~>"
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: '13.0'
|
|
68
|
+
- !ruby/object:Gem::Dependency
|
|
69
|
+
name: rspec
|
|
70
|
+
requirement: !ruby/object:Gem::Requirement
|
|
71
|
+
requirements:
|
|
72
|
+
- - "~>"
|
|
73
|
+
- !ruby/object:Gem::Version
|
|
74
|
+
version: '3.0'
|
|
75
|
+
type: :development
|
|
76
|
+
prerelease: false
|
|
77
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
78
|
+
requirements:
|
|
79
|
+
- - "~>"
|
|
80
|
+
- !ruby/object:Gem::Version
|
|
81
|
+
version: '3.0'
|
|
82
|
+
- !ruby/object:Gem::Dependency
|
|
83
|
+
name: vcr
|
|
84
|
+
requirement: !ruby/object:Gem::Requirement
|
|
85
|
+
requirements:
|
|
86
|
+
- - "~>"
|
|
87
|
+
- !ruby/object:Gem::Version
|
|
88
|
+
version: '6.0'
|
|
89
|
+
type: :development
|
|
90
|
+
prerelease: false
|
|
91
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
92
|
+
requirements:
|
|
93
|
+
- - "~>"
|
|
94
|
+
- !ruby/object:Gem::Version
|
|
95
|
+
version: '6.0'
|
|
96
|
+
- !ruby/object:Gem::Dependency
|
|
97
|
+
name: webmock
|
|
98
|
+
requirement: !ruby/object:Gem::Requirement
|
|
99
|
+
requirements:
|
|
100
|
+
- - "~>"
|
|
101
|
+
- !ruby/object:Gem::Version
|
|
102
|
+
version: '3.0'
|
|
103
|
+
type: :development
|
|
104
|
+
prerelease: false
|
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
106
|
+
requirements:
|
|
107
|
+
- - "~>"
|
|
108
|
+
- !ruby/object:Gem::Version
|
|
109
|
+
version: '3.0'
|
|
54
110
|
description: A minimal Ruby gem providing interfaces to connect to OpenAI, Ollama,
|
|
55
111
|
and Anthropic (Claude) LLM APIs
|
|
56
112
|
email:
|
|
@@ -65,17 +121,19 @@ files:
|
|
|
65
121
|
- Rakefile
|
|
66
122
|
- lib/net/llm.rb
|
|
67
123
|
- lib/net/llm/anthropic.rb
|
|
124
|
+
- lib/net/llm/claude.rb
|
|
68
125
|
- lib/net/llm/ollama.rb
|
|
69
126
|
- lib/net/llm/openai.rb
|
|
70
127
|
- lib/net/llm/version.rb
|
|
128
|
+
- lib/net/llm/vertex_ai.rb
|
|
71
129
|
- sig/net/llm.rbs
|
|
72
|
-
homepage: https://
|
|
130
|
+
homepage: https://src.mokhan.ca/xlgmokha/net-llm/
|
|
73
131
|
licenses:
|
|
74
132
|
- MIT
|
|
75
133
|
metadata:
|
|
76
|
-
homepage_uri: https://
|
|
77
|
-
source_code_uri: https://
|
|
78
|
-
changelog_uri: https://
|
|
134
|
+
homepage_uri: https://src.mokhan.ca/xlgmokha/net-llm/
|
|
135
|
+
source_code_uri: https://src.mokhan.ca/xlgmokha/net-llm/
|
|
136
|
+
changelog_uri: https://src.mokhan.ca/xlgmokha/net-llm/blob/main/CHANGELOG.md.html
|
|
79
137
|
rdoc_options: []
|
|
80
138
|
require_paths:
|
|
81
139
|
- lib
|