llm_ruby 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +70 -36
- data/lib/llm/clients/anthropic/response.rb +48 -0
- data/lib/llm/clients/anthropic.rb +113 -0
- data/lib/llm/clients/gemini/request.rb +75 -0
- data/lib/llm/clients/gemini/response.rb +61 -0
- data/lib/llm/clients/gemini.rb +102 -0
- data/lib/llm/clients/open_ai/response.rb +45 -32
- data/lib/llm/clients/open_ai.rb +86 -82
- data/lib/llm/info.rb +261 -89
- data/lib/llm/response.rb +9 -1
- data/lib/llm/schema.rb +75 -0
- data/lib/llm/stop_reason.rb +8 -5
- data/lib/llm.rb +9 -2
- metadata +12 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0de9e01920a472c7a04f9f794747090e3bf279b403d4efa206b1db7c2b006987
|
4
|
+
data.tar.gz: bd77b3a4c5540a82d4a6f1b93213a8b3da60cc9342e581335adeac6fe93c999f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bc4b0263dfeaf1db4dded66667ea13e65dfc035d27579a2b032a59ae63f29fd588630d749d492d281322269fbc9d3909f8648f464d250baa99ff5b2afd582193
|
7
|
+
data.tar.gz: a4c5713df43e27127afa0b3cbf044ce8d805235f73abd05b3f79dc9fc7a814c193caac9192392a8670523cf74be96da2b52dc648d965116e8e9bbc3f8a2aaaaa
|
data/README.md
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
# LLMRuby
|
2
2
|
|
3
|
-
|
3
|
+
[](https://badge.fury.io/rb/llm_ruby)
|
4
|
+

|
5
|
+
[](https://opensource.org/licenses/MIT)
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
LLMRuby is a Ruby gem that provides a consistent interface for interacting with multiple Large Language Model (LLM) APIs. Most OpenAI, Anthropic and Gemini models are currently supported.
|
4
10
|
|
5
11
|
## Installation
|
6
12
|
|
@@ -12,14 +18,14 @@ gem 'llm_ruby'
|
|
12
18
|
|
13
19
|
And then execute:
|
14
20
|
|
15
|
-
```
|
16
|
-
|
21
|
+
```shell
|
22
|
+
bundle install
|
17
23
|
```
|
18
24
|
|
19
25
|
Or install it yourself as:
|
20
26
|
|
21
|
-
```
|
22
|
-
|
27
|
+
```shell
|
28
|
+
gem install llm_ruby
|
23
29
|
```
|
24
30
|
|
25
31
|
## Usage
|
@@ -27,7 +33,7 @@ $ gem install llm_ruby
|
|
27
33
|
### Basic Usage
|
28
34
|
|
29
35
|
```ruby
|
30
|
-
require '
|
36
|
+
require 'llm_ruby'
|
31
37
|
|
32
38
|
# Initialize an LLM instance
|
33
39
|
llm = LLM.from_string!("gpt-4")
|
@@ -46,10 +52,10 @@ puts response.content
|
|
46
52
|
LLMRuby supports streaming responses:
|
47
53
|
|
48
54
|
```ruby
|
49
|
-
require '
|
55
|
+
require 'llm_ruby'
|
50
56
|
|
51
57
|
# Initialize an LLM instance
|
52
|
-
llm = LLM.from_string!("gpt-
|
58
|
+
llm = LLM.from_string!("gpt-4o")
|
53
59
|
|
54
60
|
# Create a client
|
55
61
|
client = llm.client
|
@@ -87,7 +93,7 @@ Here is an example of how to use the response object:
|
|
87
93
|
|
88
94
|
```ruby
|
89
95
|
# Initialize an LLM instance
|
90
|
-
llm = LLM.from_string!("gpt-
|
96
|
+
llm = LLM.from_string!("gpt-4o")
|
91
97
|
|
92
98
|
# Create a client
|
93
99
|
client = llm.client
|
@@ -101,37 +107,69 @@ puts "Raw response: #{response.raw_response}"
|
|
101
107
|
puts "Stop reason: #{response.stop_reason}"
|
102
108
|
```
|
103
109
|
|
104
|
-
|
105
110
|
## Available Models
|
106
111
|
|
107
112
|
LLMRuby supports various OpenAI models, including GPT-3.5 and GPT-4 variants. You can see the full list of supported models in the `KNOWN_MODELS` constant:
|
108
113
|
|
109
|
-
|
110
|
-
|
111
|
-
|
|
112
|
-
|
113
|
-
| gpt-3.5-turbo
|
114
|
-
| gpt-3.5-turbo-
|
115
|
-
| gpt-
|
116
|
-
| gpt-
|
117
|
-
| gpt-4
|
118
|
-
| gpt-4-
|
119
|
-
| gpt-4-
|
120
|
-
| gpt-4-
|
121
|
-
| gpt-4-
|
122
|
-
| gpt-4-
|
123
|
-
| gpt-4o
|
124
|
-
| gpt-4o-mini
|
125
|
-
| gpt-4o-2024-
|
126
|
-
| gpt-4o-2024-
|
127
|
-
|
114
|
+
### OpenAI Models
|
115
|
+
|
116
|
+
| Canonical Name | Display Name |
|
117
|
+
|----------------------------|--------------------------------------|
|
118
|
+
| gpt-3.5-turbo | GPT-3.5 Turbo |
|
119
|
+
| gpt-3.5-turbo-0125 | GPT-3.5 Turbo 0125 |
|
120
|
+
| gpt-3.5-turbo-16k | GPT-3.5 Turbo 16K |
|
121
|
+
| gpt-3.5-turbo-1106 | GPT-3.5 Turbo 1106 |
|
122
|
+
| gpt-4 | GPT-4 |
|
123
|
+
| gpt-4-1106-preview | GPT-4 Turbo 1106 |
|
124
|
+
| gpt-4-turbo-2024-04-09 | GPT-4 Turbo 2024-04-09 |
|
125
|
+
| gpt-4-0125-preview | GPT-4 Turbo 0125 |
|
126
|
+
| gpt-4-turbo-preview | GPT-4 Turbo |
|
127
|
+
| gpt-4-0613 | GPT-4 0613 |
|
128
|
+
| gpt-4o | GPT-4o |
|
129
|
+
| gpt-4o-mini | GPT-4o Mini |
|
130
|
+
| gpt-4o-mini-2024-07-18 | GPT-4o Mini 2024-07-18 |
|
131
|
+
| gpt-4o-2024-05-13 | GPT-4o 2024-05-13 |
|
132
|
+
| gpt-4o-2024-08-06 | GPT-4o 2024-08-06 |
|
133
|
+
| gpt-4o-2024-11-20 | GPT-4o 2024-11-20 |
|
134
|
+
| chatgpt-4o-latest | ChatGPT 4o Latest |
|
135
|
+
| o1 | o1 |
|
136
|
+
| o1-2024-12-17 | o1 2024-12-17 |
|
137
|
+
| o1-preview | o1 Preview |
|
138
|
+
| o1-preview-2024-09-12 | o1 Preview 2024-09-12 |
|
139
|
+
| o1-mini | o1 Mini |
|
140
|
+
| o1-mini-2024-09-12 | o1 Mini 2024-09-12 |
|
141
|
+
| o3-mini | o3 Mini |
|
142
|
+
| o3-mini-2025-01-31 | o3 Mini 2025-01-31 |
|
143
|
+
|
144
|
+
### Anthropic Models
|
145
|
+
|
146
|
+
| Canonical Name | Display Name |
|
147
|
+
|----------------------------|--------------------------------------|
|
148
|
+
| claude-3-5-sonnet-20241022 | Claude 3.5 Sonnet 2024-10-22 |
|
149
|
+
| claude-3-5-haiku-20241022 | Claude 3.5 Haiku 2024-10-22 |
|
150
|
+
| claude-3-5-sonnet-20240620 | Claude 3.5 Sonnet 2024-06-20 |
|
151
|
+
| claude-3-opus-20240229 | Claude 3.5 Opus 2024-02-29 |
|
152
|
+
| claude-3-sonnet-20240229 | Claude 3.5 Sonnet 2024-02-29 |
|
153
|
+
| claude-3-haiku-20240307 | Claude 3.5 Opus 2024-03-07 |
|
154
|
+
|
155
|
+
### Google Models
|
156
|
+
|
157
|
+
| Canonical Name | Display Name |
|
158
|
+
|--------------------------------------|------------------------------------------|
|
159
|
+
| gemini-2.0-flash | Gemini 2.0 Flash |
|
160
|
+
| gemini-2.0-flash-lite-preview-02-05 | Gemini 2.0 Flash Lite Preview 02-05 |
|
161
|
+
| gemini-1.5-flash | Gemini 1.5 Flash |
|
162
|
+
| gemini-1.5-pro | Gemini 1.5 Pro |
|
163
|
+
| gemini-1.5-flash-8b | Gemini 1.5 Flash 8B |
|
128
164
|
|
129
165
|
## Configuration
|
130
166
|
|
131
|
-
Set your OpenAI API key as an environment variable:
|
167
|
+
Set your OpenAI, Anthropic or Google API key as an environment variable:
|
132
168
|
|
133
|
-
```
|
169
|
+
```shell
|
134
170
|
export OPENAI_API_KEY=your_api_key_here
|
171
|
+
export ANTHROPIC_API_KEY=your_api_key_here
|
172
|
+
export GEMINI_API_KEY=your_api_key_here
|
135
173
|
```
|
136
174
|
|
137
175
|
## Development
|
@@ -142,12 +180,8 @@ To install this gem onto your local machine, run `bundle exec rake install`.
|
|
142
180
|
|
143
181
|
## Contributing
|
144
182
|
|
145
|
-
Bug reports and pull requests are welcome
|
183
|
+
Bug reports and pull requests are welcome.
|
146
184
|
|
147
185
|
## License
|
148
186
|
|
149
187
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
150
|
-
|
151
|
-
## Acknowledgements
|
152
|
-
|
153
|
-
This gem is developed and maintained by [Context](https://context.ai).
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class LLM
|
4
|
+
module Clients
|
5
|
+
class Anthropic
|
6
|
+
class Response
|
7
|
+
def initialize(raw_response)
|
8
|
+
@raw_response = raw_response
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_normalized_response
|
12
|
+
LLM::Response.new(
|
13
|
+
content: content,
|
14
|
+
raw_response: parsed_response,
|
15
|
+
stop_reason: normalize_stop_reason
|
16
|
+
)
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.normalize_stop_reason(stop_reason)
|
20
|
+
case stop_reason
|
21
|
+
when "end_turn"
|
22
|
+
LLM::StopReason::STOP
|
23
|
+
when "stop_sequence"
|
24
|
+
LLM::StopReason::STOP_SEQUENCE
|
25
|
+
when "max_tokens"
|
26
|
+
LLM::StopReason::MAX_TOKENS_REACHED
|
27
|
+
else
|
28
|
+
LLM::StopReason::OTHER
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def content
|
35
|
+
parsed_response.dig("content", 0, "text")
|
36
|
+
end
|
37
|
+
|
38
|
+
def normalize_stop_reason
|
39
|
+
self.class.normalize_stop_reason(parsed_response["stop_reason"])
|
40
|
+
end
|
41
|
+
|
42
|
+
def parsed_response
|
43
|
+
@raw_response.parsed_response
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "httparty"
|
4
|
+
|
5
|
+
class LLM
|
6
|
+
module Clients
|
7
|
+
class Anthropic
|
8
|
+
include HTTParty
|
9
|
+
base_uri "https://api.anthropic.com"
|
10
|
+
|
11
|
+
def initialize(llm:)
|
12
|
+
@llm = llm
|
13
|
+
end
|
14
|
+
|
15
|
+
def chat(messages, options = {})
|
16
|
+
request = payload(messages, options)
|
17
|
+
|
18
|
+
return chat_streaming(request, options[:on_message], options[:on_complete]) if options[:stream]
|
19
|
+
|
20
|
+
resp = post_url("/v1/messages", body: request.to_json)
|
21
|
+
|
22
|
+
Response.new(resp).to_normalized_response
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def chat_streaming(request, on_message, on_complete)
|
28
|
+
buffer = +""
|
29
|
+
chunks = []
|
30
|
+
output_data = {}
|
31
|
+
|
32
|
+
wrapped_on_complete = lambda { |stop_reason|
|
33
|
+
output_data[:stop_reason] = stop_reason
|
34
|
+
on_complete&.call(stop_reason)
|
35
|
+
}
|
36
|
+
|
37
|
+
request[:stream] = true
|
38
|
+
|
39
|
+
proc = handle_event_stream(buffer, chunks, on_message_proc: on_message, on_complete_proc: wrapped_on_complete)
|
40
|
+
|
41
|
+
_resp = post_url_streaming("/v1/messages", body: request.to_json, &proc)
|
42
|
+
|
43
|
+
LLM::Response.new(
|
44
|
+
content: buffer,
|
45
|
+
raw_response: chunks,
|
46
|
+
stop_reason: Response.normalize_stop_reason(output_data[:stop_reason])
|
47
|
+
)
|
48
|
+
end
|
49
|
+
|
50
|
+
def handle_event_stream(buffer, chunks, on_message_proc:, on_complete_proc:)
|
51
|
+
each_json_chunk do |type, chunk|
|
52
|
+
chunks << chunk
|
53
|
+
case type
|
54
|
+
when "content_block_delta"
|
55
|
+
new_content = chunk.dig("delta", "text")
|
56
|
+
buffer << new_content
|
57
|
+
on_message_proc&.call(new_content)
|
58
|
+
when "message_delta"
|
59
|
+
finish_reason = chunk.dig("delta", "stop_reason")
|
60
|
+
on_complete_proc&.call(finish_reason)
|
61
|
+
else
|
62
|
+
next
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def each_json_chunk
|
68
|
+
parser = EventStreamParser::Parser.new
|
69
|
+
|
70
|
+
proc do |chunk|
|
71
|
+
# TODO: Add error handling.
|
72
|
+
|
73
|
+
parser.feed(chunk) do |type, data|
|
74
|
+
yield(type, JSON.parse(data))
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def payload(messages, options = {})
|
80
|
+
{
|
81
|
+
system: combined_system_messages(messages),
|
82
|
+
messages: messages.filter { |m| m[:role].to_sym != :system },
|
83
|
+
model: @llm.canonical_name,
|
84
|
+
max_tokens: options[:max_output_tokens] || @llm.default_params[:max_output_tokens],
|
85
|
+
temperature: options[:temperature],
|
86
|
+
top_p: options[:top_p],
|
87
|
+
top_k: options[:top_k],
|
88
|
+
stream: options[:stream]
|
89
|
+
}.compact
|
90
|
+
end
|
91
|
+
|
92
|
+
def combined_system_messages(messages)
|
93
|
+
messages.filter { |m| m[:role].to_sym == :system }.map { |m| m[:content] }.join('\n\n')
|
94
|
+
end
|
95
|
+
|
96
|
+
def post_url(url, body:)
|
97
|
+
self.class.post(url, body: body, headers: default_headers)
|
98
|
+
end
|
99
|
+
|
100
|
+
def post_url_streaming(url, **kwargs, &block)
|
101
|
+
self.class.post(url, **kwargs.merge(headers: default_headers, stream_body: true), &block)
|
102
|
+
end
|
103
|
+
|
104
|
+
def default_headers
|
105
|
+
{
|
106
|
+
"anthropic-version" => "2023-06-01",
|
107
|
+
"x-api-key" => ENV["ANTHROPIC_API_KEY"],
|
108
|
+
"Content-Type" => "application/json"
|
109
|
+
}
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class LLM
|
4
|
+
module Clients
|
5
|
+
class Gemini
|
6
|
+
class Request
|
7
|
+
def initialize(messages, options)
|
8
|
+
@messages = messages
|
9
|
+
@options = options
|
10
|
+
end
|
11
|
+
|
12
|
+
def model_for_url
|
13
|
+
"models/#{model}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def params
|
17
|
+
generation_config = {}
|
18
|
+
if options[:response_format]
|
19
|
+
generation_config = {
|
20
|
+
responseMimeType: "application/json",
|
21
|
+
responseSchema: options[:response_format]&.gemini_response_format
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
{
|
26
|
+
systemInstruction: normalized_prompt,
|
27
|
+
contents: normalized_messages,
|
28
|
+
generationConfig: generation_config
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
attr_reader :messages, :options
|
35
|
+
|
36
|
+
def model
|
37
|
+
options[:model]
|
38
|
+
end
|
39
|
+
|
40
|
+
def normalized_messages
|
41
|
+
user_visible_messages
|
42
|
+
.map(&method(:message_to_gemini_message))
|
43
|
+
end
|
44
|
+
|
45
|
+
def message_to_gemini_message(message)
|
46
|
+
{
|
47
|
+
role: ROLES_MAP[message[:role]],
|
48
|
+
parts: [{text: message[:content]}]
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
def normalized_prompt
|
53
|
+
return nil if system_messages.empty?
|
54
|
+
|
55
|
+
system_messages
|
56
|
+
.map { |message| message[:content] }
|
57
|
+
.join("\n\n")
|
58
|
+
end
|
59
|
+
|
60
|
+
def system_messages
|
61
|
+
messages.filter { |message| message[:role] == :system }
|
62
|
+
end
|
63
|
+
|
64
|
+
def user_visible_messages
|
65
|
+
messages.filter { |message| message[:role] != :system }
|
66
|
+
end
|
67
|
+
|
68
|
+
ROLES_MAP = {
|
69
|
+
assistant: :model,
|
70
|
+
user: :user
|
71
|
+
}.freeze
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class LLM
|
4
|
+
module Clients
|
5
|
+
class Gemini
|
6
|
+
class Response
|
7
|
+
def initialize(raw_response)
|
8
|
+
@raw_response = raw_response
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_normalized_response
|
12
|
+
LLM::Response.new(
|
13
|
+
content: content,
|
14
|
+
raw_response: parsed_response,
|
15
|
+
stop_reason: translated_stop_reason,
|
16
|
+
structured_output: structured_output
|
17
|
+
)
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.normalize_stop_reason(stop_reason)
|
21
|
+
case stop_reason
|
22
|
+
when "STOP"
|
23
|
+
LLM::StopReason::STOP
|
24
|
+
when "MAX_TOKENS"
|
25
|
+
LLM::StopReason::MAX_TOKENS
|
26
|
+
when "SAFETY"
|
27
|
+
LLM::StopReason::SAFETY
|
28
|
+
else
|
29
|
+
LLM::StopReason::OTHER
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
attr_reader :raw_response
|
36
|
+
|
37
|
+
def content
|
38
|
+
parsed_response.dig("candidates", 0, "content", "parts", 0, "text")
|
39
|
+
end
|
40
|
+
|
41
|
+
def stop_reason
|
42
|
+
parsed_response.dig("candidates", 0, "finishReason")
|
43
|
+
end
|
44
|
+
|
45
|
+
def translated_stop_reason
|
46
|
+
self.class.normalize_stop_reason(stop_reason)
|
47
|
+
end
|
48
|
+
|
49
|
+
def parsed_response
|
50
|
+
raw_response.parsed_response
|
51
|
+
end
|
52
|
+
|
53
|
+
def structured_output
|
54
|
+
@structured_output ||= JSON.parse(parsed_response.dig("candidates", 0, "content", "parts", 0, "text"))
|
55
|
+
rescue JSON::ParserError
|
56
|
+
nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "httparty"
|
4
|
+
require "event_stream_parser"
|
5
|
+
|
6
|
+
class LLM
|
7
|
+
module Clients
|
8
|
+
class Gemini
|
9
|
+
include HTTParty
|
10
|
+
base_uri "https://generativelanguage.googleapis.com"
|
11
|
+
|
12
|
+
def initialize(llm:)
|
13
|
+
@llm = llm
|
14
|
+
end
|
15
|
+
|
16
|
+
def chat(messages, options = {})
|
17
|
+
req = Request.new(messages, options)
|
18
|
+
|
19
|
+
return chat_streaming(req, options[:on_message], options[:on_complete]) if options[:stream]
|
20
|
+
|
21
|
+
resp = post_url(
|
22
|
+
"/v1beta/models/#{llm.canonical_name}:generateContent",
|
23
|
+
body: req.params.to_json
|
24
|
+
)
|
25
|
+
|
26
|
+
Response.new(resp).to_normalized_response
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
attr_reader :llm
|
32
|
+
|
33
|
+
def chat_streaming(request, on_message, on_complete)
|
34
|
+
buffer = +""
|
35
|
+
chunks = []
|
36
|
+
output_data = {}
|
37
|
+
|
38
|
+
wrapped_on_complete = lambda { |stop_reason|
|
39
|
+
output_data[:stop_reason] = stop_reason
|
40
|
+
on_complete&.call(stop_reason)
|
41
|
+
}
|
42
|
+
|
43
|
+
proc = handle_event_stream(buffer, chunks, on_message_proc: on_message, on_complete_proc: wrapped_on_complete)
|
44
|
+
|
45
|
+
_resp = post_url_streaming(
|
46
|
+
"/v1beta/models/#{llm.canonical_name}:streamGenerateContent?alt=sse",
|
47
|
+
body: request.params.to_json,
|
48
|
+
&proc
|
49
|
+
)
|
50
|
+
|
51
|
+
LLM::Response.new(
|
52
|
+
content: buffer,
|
53
|
+
raw_response: chunks,
|
54
|
+
stop_reason: Response.normalize_stop_reason(output_data[:stop_reason])
|
55
|
+
)
|
56
|
+
end
|
57
|
+
|
58
|
+
def handle_event_stream(buffer, chunks, on_message_proc:, on_complete_proc:)
|
59
|
+
each_json_chunk do |_type, chunk|
|
60
|
+
chunks << chunk
|
61
|
+
|
62
|
+
new_content = chunk.dig("candidates", 0, "content", "parts", 0, "text")
|
63
|
+
|
64
|
+
unless new_content.nil?
|
65
|
+
on_message_proc&.call(new_content)
|
66
|
+
buffer << new_content
|
67
|
+
end
|
68
|
+
|
69
|
+
stop_reason = chunk.dig("candidates", 0, "finishReason")
|
70
|
+
on_complete_proc&.call(stop_reason) unless stop_reason.nil?
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def each_json_chunk
|
75
|
+
parser = EventStreamParser::Parser.new
|
76
|
+
|
77
|
+
proc do |chunk|
|
78
|
+
# TODO: Add error handling.
|
79
|
+
|
80
|
+
parser.feed(chunk) do |type, data|
|
81
|
+
yield(type, JSON.parse(data))
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def post_url(url, **kwargs)
|
87
|
+
self.class.post(url, **kwargs.merge(headers: default_headers))
|
88
|
+
end
|
89
|
+
|
90
|
+
def post_url_streaming(url, **kwargs, &block)
|
91
|
+
self.class.post(url, **kwargs.merge(headers: default_headers, stream_body: true), &block)
|
92
|
+
end
|
93
|
+
|
94
|
+
def default_headers
|
95
|
+
{
|
96
|
+
"x-goog-api-key" => ENV["GEMINI_API_KEY"],
|
97
|
+
"Content-Type" => "application/json"
|
98
|
+
}
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -1,42 +1,55 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
class LLM
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
class LLM
|
4
|
+
module Clients
|
5
|
+
class OpenAI
|
6
|
+
class Response
|
7
|
+
def initialize(raw_response)
|
8
|
+
@raw_response = raw_response
|
9
|
+
end
|
7
10
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
11
|
+
def to_normalized_response
|
12
|
+
LLM::Response.new(
|
13
|
+
content: content,
|
14
|
+
raw_response: parsed_response,
|
15
|
+
stop_reason: normalize_stop_reason,
|
16
|
+
structured_output: structured_output
|
17
|
+
)
|
18
|
+
end
|
15
19
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
20
|
+
def self.normalize_stop_reason(stop_reason)
|
21
|
+
case stop_reason
|
22
|
+
when "stop"
|
23
|
+
LLM::StopReason::STOP
|
24
|
+
when "safety"
|
25
|
+
LLM::StopReason::SAFETY
|
26
|
+
when "max_tokens"
|
27
|
+
LLM::StopReason::MAX_TOKENS_REACHED
|
28
|
+
else
|
29
|
+
LLM::StopReason::OTHER
|
30
|
+
end
|
31
|
+
end
|
28
32
|
|
29
|
-
|
33
|
+
private
|
30
34
|
|
31
|
-
|
32
|
-
|
33
|
-
|
35
|
+
def content
|
36
|
+
parsed_response.dig("choices", 0, "message", "content")
|
37
|
+
end
|
34
38
|
|
35
|
-
|
36
|
-
|
37
|
-
|
39
|
+
def normalize_stop_reason
|
40
|
+
self.class.normalize_stop_reason(parsed_response.dig("choices", 0, "finish_reason"))
|
41
|
+
end
|
38
42
|
|
39
|
-
|
40
|
-
|
43
|
+
def parsed_response
|
44
|
+
@raw_response.parsed_response
|
45
|
+
end
|
46
|
+
|
47
|
+
def structured_output
|
48
|
+
@structured_output ||= JSON.parse(parsed_response.dig("choices", 0, "message", "content"))
|
49
|
+
rescue JSON::ParserError
|
50
|
+
nil
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
41
54
|
end
|
42
55
|
end
|