llm.rb 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +264 -110
  3. data/lib/llm/buffer.rb +83 -0
  4. data/lib/llm/chat.rb +131 -0
  5. data/lib/llm/file.rb +26 -40
  6. data/lib/llm/http_client.rb +10 -5
  7. data/lib/llm/message.rb +14 -8
  8. data/lib/llm/mime.rb +54 -0
  9. data/lib/llm/multipart.rb +98 -0
  10. data/lib/llm/provider.rb +96 -19
  11. data/lib/llm/providers/anthropic/error_handler.rb +2 -0
  12. data/lib/llm/providers/anthropic/format.rb +2 -0
  13. data/lib/llm/providers/anthropic/response_parser.rb +3 -1
  14. data/lib/llm/providers/anthropic.rb +14 -5
  15. data/lib/llm/providers/gemini/audio.rb +77 -0
  16. data/lib/llm/providers/gemini/error_handler.rb +2 -0
  17. data/lib/llm/providers/gemini/files.rb +160 -0
  18. data/lib/llm/providers/gemini/format.rb +12 -6
  19. data/lib/llm/providers/gemini/images.rb +99 -0
  20. data/lib/llm/providers/gemini/response_parser.rb +27 -1
  21. data/lib/llm/providers/gemini.rb +62 -6
  22. data/lib/llm/providers/ollama/error_handler.rb +2 -0
  23. data/lib/llm/providers/ollama/format.rb +13 -5
  24. data/lib/llm/providers/ollama/response_parser.rb +3 -1
  25. data/lib/llm/providers/ollama.rb +30 -7
  26. data/lib/llm/providers/openai/audio.rb +97 -0
  27. data/lib/llm/providers/openai/error_handler.rb +2 -0
  28. data/lib/llm/providers/openai/files.rb +148 -0
  29. data/lib/llm/providers/openai/format.rb +21 -8
  30. data/lib/llm/providers/openai/images.rb +109 -0
  31. data/lib/llm/providers/openai/response_parser.rb +58 -5
  32. data/lib/llm/providers/openai/responses.rb +78 -0
  33. data/lib/llm/providers/openai.rb +52 -6
  34. data/lib/llm/providers/voyageai.rb +2 -2
  35. data/lib/llm/response/audio.rb +13 -0
  36. data/lib/llm/response/audio_transcription.rb +14 -0
  37. data/lib/llm/response/audio_translation.rb +14 -0
  38. data/lib/llm/response/download_file.rb +15 -0
  39. data/lib/llm/response/file.rb +42 -0
  40. data/lib/llm/response/filelist.rb +18 -0
  41. data/lib/llm/response/image.rb +29 -0
  42. data/lib/llm/response/output.rb +56 -0
  43. data/lib/llm/response.rb +18 -6
  44. data/lib/llm/utils.rb +19 -0
  45. data/lib/llm/version.rb +1 -1
  46. data/lib/llm.rb +5 -2
  47. data/llm.gemspec +1 -6
  48. data/spec/anthropic/completion_spec.rb +1 -1
  49. data/spec/gemini/completion_spec.rb +1 -1
  50. data/spec/gemini/conversation_spec.rb +31 -0
  51. data/spec/gemini/files_spec.rb +124 -0
  52. data/spec/gemini/images_spec.rb +47 -0
  53. data/spec/llm/conversation_spec.rb +101 -61
  54. data/spec/ollama/completion_spec.rb +1 -1
  55. data/spec/ollama/conversation_spec.rb +31 -0
  56. data/spec/openai/audio_spec.rb +55 -0
  57. data/spec/openai/completion_spec.rb +1 -1
  58. data/spec/openai/files_spec.rb +150 -0
  59. data/spec/openai/images_spec.rb +95 -0
  60. data/spec/openai/responses_spec.rb +51 -0
  61. data/spec/setup.rb +8 -0
  62. metadata +31 -49
  63. data/LICENSE.txt +0 -21
  64. data/lib/llm/conversation.rb +0 -90
  65. data/lib/llm/message_queue.rb +0 -54
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 732a483717a5ec8e443077fb71294b1e301c3a8867b225c1fc2a58bd02fe3130
4
- data.tar.gz: a1c2591a07c413cebfdffa99d133855bb177cc4a6607860333dbc9991da8d33e
3
+ metadata.gz: 9073b7495fb9bdad2deec1d2c086b6d3b554c5a440dd884108a2fa8d12f7c8a9
4
+ data.tar.gz: 514902fc97de61dc18df8c22d51d9e86472a62e1ffb0c4ce4394b0684cddbd8a
5
5
  SHA512:
6
- metadata.gz: 4f5983f97b3c1e25f4147ec81f6d91df5073ea03dc4031690979f68b2053bf73bae07f7c57c3f7c9813dfd5b43eb1bd7364d5f5929234013d6b19bb49f9271ec
7
- data.tar.gz: 286f560ce2d9e048e481796d27fd6c9a658b92cec0267f9527fad49bf5349ece0d05b943c086cce3c12bbd82f53dbf086ed91c516d39b815bd6106edca21914a
6
+ metadata.gz: 0d0c35fa38ed3481872e29131d15e03e5a4bf0ad8a96c42ba64a5f48ed32584973d39b53ca630c966d54b6700a83a44abb1f4224c1bb9c1ca7f9e7a2d953e1c3
7
+ data.tar.gz: 8889034558c56a2bc1ff5321cf0ca45d82ac83ac7122c741e859caed7d060b34b99824cc53d20a5add4949dd135cf65c383f8400e7c112a44110fc1d4e0d2f4d
data/README.md CHANGED
@@ -2,7 +2,8 @@
2
2
 
3
3
  llm.rb is a lightweight library that provides a common interface
4
4
  and set of functionality for multiple Large Language Models (LLMs). It
5
- is designed to be simple, flexible, and easy to use.
5
+ is designed to be simple, flexible, and easy to use – and it has been
6
+ implemented with no dependencies outside Ruby's standard library.
6
7
 
7
8
  ## Examples
8
9
 
@@ -10,10 +11,10 @@ is designed to be simple, flexible, and easy to use.
10
11
 
11
12
  #### LLM::Provider
12
13
 
13
- All providers inherit from [LLM::Provider](https://0x1eef.github.io/x/llm/LLM/Provider.html) –
14
+ All providers inherit from [LLM::Provider](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html) –
14
15
  they share a common interface and set of functionality. Each provider can be instantiated
15
16
  using an API key (if required) and an optional set of configuration options via
16
- [the singleton methods of LLM](https://0x1eef.github.io/x/llm/LLM.html). For example:
17
+ [the singleton methods of LLM](https://0x1eef.github.io/x/llm.rb/LLM.html). For example:
17
18
 
18
19
  ```ruby
19
20
  #!/usr/bin/env ruby
@@ -25,37 +26,28 @@ llm = LLM.anthropic("yourapikey")
25
26
  llm = LLM.ollama(nil)
26
27
  ```
27
28
 
28
- ### Completions
29
+ ### Conversations
29
30
 
30
- #### Conversation
31
+ #### Completions
31
32
 
32
- The
33
- [LLM::Provider#chat](https://0x1eef.github.io/x/llm/LLM/Provider.html#chat-instance_method)
34
- method returns a lazy-variant of a
35
- [LLM::Conversation](https://0x1eef.github.io/x/llm/LLM/Conversation.html)
36
- object, and it allows for a "lazy" conversation where messages are batched and
37
- sent to the provider only when necessary. The non-lazy counterpart is available via the
38
- [LLM::Provider#chat!](https://0x1eef.github.io/x/llm/LLM/Provider.html#chat!-instance_method)
39
- method.
40
-
41
- Both lazy and non-lazy conversations maintain a message thread that can
42
- be reused as context throughout a conversation. For the sake of brevity the system
43
- prompt is loaded from
44
- [a file](./share/llm/prompts/system.txt)
45
- in the following example – all other prompts are "user" prompts –
46
- and a single request is made to the provider when iterating over the messages
47
- belonging to a lazy conversation:
33
+ The following example enables lazy mode for a
34
+ [LLM::Chat](https://0x1eef.github.io/x/llm.rb/LLM/Chat.html)
35
+ object by entering into a "lazy" conversation where messages are buffered and
36
+ sent to the provider only when necessary. Both lazy and non-lazy conversations
37
+ maintain a message thread that can be reused as context throughout a conversation.
38
+ The example uses the stateless chat completions API that all LLM providers support:
48
39
 
49
40
  ```ruby
50
41
  #!/usr/bin/env ruby
51
42
  require "llm"
52
43
 
53
44
  llm = LLM.openai(ENV["KEY"])
54
- convo = llm.chat File.read("./share/llm/prompts/system.txt"), :system
55
- convo.chat "Tell me the answer to 5 + 15"
56
- convo.chat "Tell me the answer to (5 + 15) * 2"
57
- convo.chat "Tell me the answer to ((5 + 15) * 2) / 10"
58
- convo.messages.each { print "[#{_1.role}] ", _1.content, "\n" }
45
+ bot = LLM::Chat.new(llm).lazy
46
+ bot.chat File.read("./share/llm/prompts/system.txt"), :system
47
+ bot.chat "Tell me the answer to 5 + 15", :user
48
+ bot.chat "Tell me the answer to (5 + 15) * 2", :user
49
+ bot.chat "Tell me the answer to ((5 + 15) * 2) / 10", :user
50
+ bot.messages.each { print "[#{_1.role}] ", _1.content, "\n" }
59
51
 
60
52
  ##
61
53
  # [system] You are my math assistant.
@@ -73,128 +65,288 @@ convo.messages.each { print "[#{_1.role}] ", _1.content, "\n" }
73
65
  # The answer to ((5 + 15) * 2) / 10 is 4.
74
66
  ```
75
67
 
76
- #### Prompts
77
-
78
- Both lazy and non-lazy conversations accept text as a prompt.
79
- Depending on the provider, they may also accept a
80
- [URI](https://docs.ruby-lang.org/en/master/URI.html)
81
- or
82
- [LLM::File](https://0x1eef.github.io/x/llm/LLM/File.html)
83
- object. Generally a
84
- [URI](https://docs.ruby-lang.org/en/master/URI.html)
85
- object is used to reference an image on the web, and an
86
- [LLM::File](https://0x1eef.github.io/x/llm/LLM/File.html)
87
- object is used to reference a file on the local filesystem.
88
- The following list shows the types of prompts that each
89
- provider accepts:
90
-
91
- * OpenAI       =>   String, URI
92
- * Gemini        =>   String, LLM::File
93
- * Anthropic   =>   String, URI
94
- * Ollama        =>   String, URI
68
+ #### Responses
69
+
70
+ The responses API is a recent addition
71
+ [provided by OpenAI](https://platform.openai.com/docs/guides/conversation-state?api-mode=responses)
72
+ that lets a client store message state on their servers – and in turn
73
+ a client can avoid maintaining state manually as well as avoid sending
74
+ the entire conversation with each request that is made. Although it is
75
+ primarily supported by OpenAI at the moment, we might see other providers
76
+ support it in the future. For now
77
+ [llm.rb supports the responses API](https://0x1eef.github.io/x/llm.rb/LLM/OpenAI/Responses.html)
78
+ for the OpenAI provider:
79
+
80
+ ```ruby
81
+ #!/usr/bin/env ruby
82
+ require "llm"
83
+
84
+ llm = LLM.openai(ENV["KEY"])
85
+ bot = LLM::Chat.new(llm).lazy
86
+ bot.respond File.read("./share/llm/prompts/system.txt"), :developer
87
+ bot.respond "Tell me the answer to 5 + 15", :user
88
+ bot.respond "Tell me the answer to (5 + 15) * 2", :user
89
+ bot.respond "Tell me the answer to ((5 + 15) * 2) / 10", :user
90
+ bot.messages.each { print "[#{_1.role}] ", _1.content, "\n" }
91
+
92
+ ##
93
+ # [developer] You are my math assistant.
94
+ # I will provide you with (simple) equations.
95
+ # You will provide answers in the format "The answer to <equation> is <answer>".
96
+ # I will provide you a set of messages. Reply to all of them.
97
+ # A message is considered unanswered if there is no corresponding assistant response.
98
+ #
99
+ # [user] Tell me the answer to 5 + 15
100
+ # [user] Tell me the answer to (5 + 15) * 2
101
+ # [user] Tell me the answer to ((5 + 15) * 2) / 10
102
+ #
103
+ # [assistant] The answer to 5 + 15 is 20.
104
+ # The answer to (5 + 15) * 2 is 40.
105
+ # The answer to ((5 + 15) * 2) / 10 is 4.
106
+ ```
107
+
108
+ ### Audio
109
+
110
+ #### Speech
111
+
112
+ Some but not all providers implement audio generation capabilities that
113
+ can create text from speech, transcribe audio to text, or translate
114
+ audio to text (usually English). The following example uses the OpenAI provider
115
+ to create an audio file from a text prompt. The audio is then moved to
116
+ `${HOME}/hello.mp3` as the final step. As always, consult the provider's
117
+ documentation (eg [OpenAI docs](https://platform.openai.com/docs/api-reference/audio/create))
118
+ for more information on how to use the audio generation API:
119
+
120
+ ```ruby
121
+ #!/usr/bin/env ruby
122
+ require "llm"
123
+ require "open-uri"
124
+ require "fileutils"
125
+
126
+ llm = LLM.openai(ENV["KEY"])
127
+ res = llm.audio.create_speech(input: "Hello world")
128
+ File.binwrite File.join(Dir.home, "hello.mp3"),
129
+ res.audio.string
130
+ ```
131
+
132
+ #### Transcribe
133
+
134
+ The following example transcribes an audio file to text. The audio file
135
+ (`${HOME}/hello.mp3`) was theoretically created in the previous example,
136
+ and the result is printed to the console. The example uses the OpenAI
137
+ provider to transcribe the audio file. As always, consult the provider's
138
+ documentation (eg
139
+ [OpenAI docs](https://platform.openai.com/docs/api-reference/audio/createTranscription),
140
+ [Gemini docs](https://ai.google.dev/gemini-api/docs/audio))
141
+ for more information on how to use the audio transcription API.
142
+
143
+ Please also see provider-specific documentation for more provider-specific
144
+ examples and documentation
145
+ (eg
146
+ [LLM::Gemini::Audio](https://0x1eef.github.io/x/llm.rb/LLM/Gemini/Audio.html),
147
+ [LLM::OpenAI::Audio](https://0x1eef.github.io/x/llm.rb/LLM/OpenAI/Audio.html)):
148
+
149
+ ```ruby
150
+ #!/usr/bin/env ruby
151
+ require "llm"
152
+ require "open-uri"
153
+ require "fileutils"
154
+
155
+ llm = LLM.openai(ENV["KEY"])
156
+ res = llm.audio.create_transcription(
157
+ file: LLM::File(File.join(Dir.home, "hello.mp3"))
158
+ )
159
+ print res.text, "\n" # => "Hello world."
160
+ ```
161
+
162
+ #### Translate
163
+
164
+ The following example translates an audio file to text. In this example
165
+ the audio file (`${HOME}/bomdia.mp3`) is theoretically in Portuguese,
166
+ and it is translated to English. The example uses the OpenAI provider,
167
+ and at the time of writing, it can only translate to English. As always,
168
+ consult the provider's documentation (eg
169
+ [OpenAI docs](https://platform.openai.com/docs/api-reference/audio/createTranslation),
170
+ [Gemini docs](https://ai.google.dev/gemini-api/docs/audio))
171
+ for more information on how to use the audio translation API.
172
+
173
+ Please also see provider-specific documentation for more provider-specific
174
+ examples and documentation
175
+ (eg
176
+ [LLM::Gemini::Audio](https://0x1eef.github.io/x/llm.rb/LLM/Gemini/Audio.html),
177
+ [LLM::OpenAI::Audio](https://0x1eef.github.io/x/llm.rb/LLM/OpenAI/Audio.html)):
178
+
179
+
180
+ ```ruby
181
+ require "llm"
182
+ require "open-uri"
183
+ require "fileutils"
184
+
185
+ llm = LLM.openai(ENV["KEY"])
186
+ res = llm.audio.create_translation(
187
+ file: LLM::File(File.join(Dir.home, "bomdia.mp3"))
188
+ )
189
+ print res.text, "\n" # => "Good morning."
190
+ ```
191
+
192
+ ### Images
193
+
194
+ #### Create
195
+
196
+ Some but all LLM providers implement image generation capabilities that
197
+ can create new images from a prompt, or edit an existing image with a
198
+ prompt. The following example uses the OpenAI provider to create an
199
+ image of a dog on a rocket to the moon. The image is then moved to
200
+ `${HOME}/dogonrocket.png` as the final step.
201
+
202
+ Please also see provider-specific documentation for more provider-specific
203
+ examples and documentation
204
+ (eg
205
+ [LLM::Gemini::Images](https://0x1eef.github.io/x/llm.rb/LLM/Gemini/Images.html),
206
+ [LLM::OpenAI::Images](https://0x1eef.github.io/x/llm.rb/LLM/OpenAI/Images.html)):
207
+
208
+ ```ruby
209
+ #!/usr/bin/env ruby
210
+ require "llm"
211
+ require "open-uri"
212
+ require "fileutils"
213
+
214
+ llm = LLM.openai(ENV["KEY"])
215
+ res = llm.images.create(prompt: "a dog on a rocket to the moon")
216
+ res.urls.each do |url|
217
+ FileUtils.mv OpenURI.open_uri(url).path,
218
+ File.join(Dir.home, "dogonrocket.png")
219
+ end
220
+ ```
221
+
222
+ #### Edit
223
+
224
+ The following example is focused on editing a local image with the aid
225
+ of a prompt. The image (`/images/cat.png`) is returned to us with the cat
226
+ now wearing a hat. The image is then moved to `${HOME}/catwithhat.png` as
227
+ the final step.
228
+
229
+ Results and quality may vary, consider prompt adjustments if the results
230
+ are not satisfactory, and consult the provider's documentation
231
+ (eg
232
+ [OpenAI docs](https://platform.openai.com/docs/api-reference/images/createEdit),
233
+ [Gemini docs](https://ai.google.dev/gemini-api/docs/image-generation))
234
+ for more information on how to use the image editing API.
235
+
236
+ Please also see provider-specific documentation for more provider-specific
237
+ examples and documentation
238
+ (eg
239
+ [LLM::Gemini::Images](https://0x1eef.github.io/x/llm.rb/LLM/Gemini/Images.html),
240
+ [LLM::OpenAI::Images](https://0x1eef.github.io/x/llm.rb/LLM/OpenAI/Images.html)):
241
+
242
+ ```ruby
243
+ #!/usr/bin/env ruby
244
+ require "llm"
245
+ require "open-uri"
246
+ require "fileutils"
247
+
248
+ llm = LLM.openai(ENV["KEY"])
249
+ res = llm.images.edit(
250
+ image: LLM::File("/images/cat.png"),
251
+ prompt: "a cat with a hat",
252
+ )
253
+ res.urls.each do |url|
254
+ FileUtils.mv OpenURI.open_uri(url).path,
255
+ File.join(Dir.home, "catwithhat.png")
256
+ end
257
+ ```
258
+
259
+ #### Variations
260
+
261
+ The following example is focused on creating variations of a local image.
262
+ The image (`/images/cat.png`) is returned to us with five different variations.
263
+ The images are then moved to `${HOME}/catvariation0.png`, `${HOME}/catvariation1.png`
264
+ and so on as the final step. Consult the provider's documentation
265
+ (eg [OpenAI docs](https://platform.openai.com/docs/api-reference/images/createVariation))
266
+ for more information on how to use the image variations API:
267
+
268
+ ```ruby
269
+ #!/usr/bin/env ruby
270
+ require "llm"
271
+ require "open-uri"
272
+ require "fileutils"
273
+
274
+ llm = LLM.openai(ENV["KEY"])
275
+ res = llm.images.create_variation(
276
+ image: LLM::File("/images/cat.png"),
277
+ n: 5
278
+ )
279
+ res.urls.each.with_index do |url, index|
280
+ FileUtils.mv OpenURI.open_uri(url).path,
281
+ File.join(Dir.home, "catvariation#{index}.png")
282
+ end
283
+ ```
95
284
 
96
285
  ### Embeddings
97
286
 
98
287
  #### Text
99
288
 
100
289
  The
101
- [`LLM::Provider#embed`](https://0x1eef.github.io/x/llm/LLM/Provider.html#embed-instance_method)
290
+ [`LLM::Provider#embed`](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html#embed-instance_method)
102
291
  method generates a vector representation of one or more chunks
103
292
  of text. Embeddings capture the semantic meaning of text &ndash;
104
293
  a common use-case for them is to store chunks of text in a
105
294
  vector database, and then to query the database for *semantically
106
295
  similar* text. These chunks of similar text can then support the
107
296
  generation of a prompt that is used to query a large language model,
108
- which will go on to generate a response.
109
-
110
- For example, a user query might find similar text that adds important
111
- context to the prompt that informs the large language model in how to respond.
112
- The chunks of text may also carry metadata that can be used to further filter
113
- and contextualize the search results. This technique is popularly known as
114
- retrieval-augmented generation (RAG). Embeddings can also be used for
115
- other purposes as well &ndash; RAG is just one of the most popular use-cases.
116
-
117
- Let's take a look at an example that generates a couple of vectors
118
- for two chunks of text:
297
+ which will go on to generate a response:
119
298
 
120
299
  ```ruby
121
300
  #!/usr/bin/env ruby
122
301
  require "llm"
123
302
 
124
303
  llm = LLM.openai(ENV["KEY"])
125
- res = llm.embed(["programming is fun", "ruby is a programming language"])
304
+ res = llm.embed(["programming is fun", "ruby is a programming language", "sushi is art"])
126
305
  print res.class, "\n"
127
306
  print res.embeddings.size, "\n"
128
307
  print res.embeddings[0].size, "\n"
129
308
 
130
309
  ##
131
310
  # LLM::Response::Embedding
132
- # 2
311
+ # 3
133
312
  # 1536
134
313
  ```
135
314
 
136
- ### LLM
315
+ ### Memory
137
316
 
138
- #### Timeouts
317
+ #### Child process
139
318
 
140
- When running the ollama provider locally it might take a while for
141
- the language model to reply &ndash; depending on hardware and the
142
- size of the model. The following example demonstrates how to wait
143
- a longer period of time for a response through the use of the
144
- `timeout` configuration option with the `qwq` model. The following
145
- example waits up to 15 minutes for a response:
319
+ When it comes to the generation of audio, images, and video memory consumption
320
+ can be a potential problem. There are a few strategies in place to deal with this,
321
+ and one lesser known strategy is to let a child process handle the memory cost
322
+ by delegating media generation to a child process.
146
323
 
147
- ```ruby
148
- #!/usr/bin/env ruby
149
- require "llm"
150
-
151
- llm = LLM.ollama(nil, timeout: 60*15)
152
- llm.chat "What is the meaning of life ?", model: "qwq"
153
- llm.last_message.tap { print "[assistant] ", _1.content, "\n" }
154
- ```
155
-
156
- #### Models
157
-
158
- Generally each Large Language Model provides multiple models to choose
159
- from, and each model has its own set of capabilities and limitations.
160
- The following example demonstrates how to query the list of models
161
- through the
162
- [LLM::Provider#models](http://0x1eef.github.io/x/llm/LLM/Provider.html#models-instance_method)
163
- method &ndash; the example happens to use the ollama provider but
164
- this can be done for any provider:
324
+ Once a child process exits, any memory it had used is freed immediately and
325
+ the parent process can continue to have a small memory footprint. In a sense
326
+ it is similar to being able to use malloc + free from Ruby. The following example
327
+ demonstrates how that might look like in practice:
165
328
 
166
329
  ```ruby
167
330
  #!/usr/bin/env ruby
168
331
  require "llm"
169
332
 
170
- ##
171
- # List models
172
- llm = LLM.ollama(nil)
173
- llm.models.each { print "#{_2.name}: #{_2.description}", "\n" }
174
-
175
- ##
176
- # Select a model
177
- llm.chat "Hello, world!", model: llm.models["qwq"]
178
-
179
- ##
180
- # This also works
181
- llm.chat "Hello, world!", model: "qwq"
333
+ llm = LLM.gemini(ENV["KEY"])
334
+ fork do
335
+ %w[dog cat sheep goat capybara].each do |animal|
336
+ res = llm.images.create(prompt: "a #{animal} on a rocket to the moon")
337
+ File.binwrite "#{animal}.png", res.images[0].binary
338
+ end
339
+ end
340
+ Process.wait
182
341
  ```
183
- ## Providers
184
342
 
185
- - [x] [Anthropic](https://www.anthropic.com/)
186
- - [x] [OpenAI](https://platform.openai.com/docs/overview)
187
- - [x] [Gemini](https://ai.google.dev/gemini-api/docs)
188
- - [x] [Ollama](https://github.com/ollama/ollama#readme)
189
- - [ ] Hugging Face
190
- - [ ] Cohere
191
- - [ ] AI21 Labs
192
- - [ ] Replicate
193
- - [ ] Mistral AI
343
+ ## API reference
194
344
 
195
- ## Documentation
345
+ The README tries to provide a high-level overview of the library. For everything
346
+ else there's the API reference. It covers classes and methods that the README glances
347
+ over or doesn't cover at all. The API reference is available at
348
+ [0x1eef.github.io/x/llm.rb](https://0x1eef.github.io/x/llm.rb).
196
349
 
197
- A complete API reference is available at [0x1eef.github.io/x/llm](https://0x1eef.github.io/x/llm)
198
350
 
199
351
  ## Install
200
352
 
@@ -204,4 +356,6 @@ llm.rb can be installed via rubygems.org:
204
356
 
205
357
  ## License
206
358
 
207
- MIT. See [LICENSE.txt](LICENSE.txt) for more details
359
+ [BSD Zero Clause](https://choosealicense.com/licenses/0bsd/)
360
+ <br>
361
+ See [LICENSE](./LICENSE)
data/lib/llm/buffer.rb ADDED
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM
4
+ ##
5
+ # @private
6
+ # {LLM::Buffer LLM::Buffer} provides an Enumerable object that
7
+ # yields each message in a conversation on-demand, and only sends
8
+ # a request to the LLM when a response is needed.
9
+ class Buffer
10
+ include Enumerable
11
+
12
+ ##
13
+ # @param [LLM::Provider] provider
14
+ # @return [LLM::Buffer]
15
+ def initialize(provider)
16
+ @provider = provider
17
+ @pending = []
18
+ @completed = []
19
+ end
20
+
21
+ ##
22
+ # @yield [LLM::Message]
23
+ # Yields each message in the conversation thread
24
+ # @raise (see LLM::Provider#complete)
25
+ # @return [void]
26
+ def each
27
+ empty! unless @pending.empty?
28
+ @completed.each { yield(_1) }
29
+ end
30
+
31
+ ##
32
+ # @param [[LLM::Message, Hash]] item
33
+ # A message and its parameters
34
+ # @return [void]
35
+ def <<(item)
36
+ @pending << item
37
+ self
38
+ end
39
+ alias_method :push, :<<
40
+
41
+ ##
42
+ # @return [String]
43
+ def inspect
44
+ "#<#{self.class.name}:0x#{object_id.to_s(16)} " \
45
+ "completed_count=#{@completed.size} pending_count=#{@pending.size}>"
46
+ end
47
+
48
+ private
49
+
50
+ def empty!
51
+ message, params, method = @pending[-1]
52
+ if method == :complete
53
+ complete!(message, params)
54
+ elsif method == :respond
55
+ respond!(message, params)
56
+ else
57
+ raise LLM::Error, "Unknown method: #{method}"
58
+ end
59
+ end
60
+
61
+ def complete!(message, params)
62
+ messages = @pending[0..-2].map { _1[0] }
63
+ completion = @provider.complete(
64
+ message.content,
65
+ message.role,
66
+ **params.merge(messages:)
67
+ )
68
+ @completed.concat([*messages, message, completion.choices[0]])
69
+ @pending.clear
70
+ end
71
+
72
+ def respond!(message, params)
73
+ input = @pending[0..-2].map { _1[0] }
74
+ @response = @provider.responses.create(
75
+ message.content,
76
+ message.role,
77
+ **params.merge(input:).merge(@response ? {previous_response_id: @response.id} : {})
78
+ )
79
+ @completed.concat([*input, message, @response.outputs[0]])
80
+ @pending.clear
81
+ end
82
+ end
83
+ end
data/lib/llm/chat.rb ADDED
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM
4
+ ##
5
+ # {LLM::Chat LLM::Chat} provides a chat object that maintains a
6
+ # thread of messages that acts as context throughout a conversation.
7
+ # A conversation can use the chat completions API that most LLM providers
8
+ # support or the responses API that a select few LLM providers support.
9
+ #
10
+ # @example
11
+ # #!/usr/bin/env ruby
12
+ # require "llm"
13
+ #
14
+ # llm = LLM.openai(ENV["KEY"])
15
+ # bot = LLM::Chat.new(llm).lazy
16
+ # bot.chat("Your task is to answer all of my questions", :system)
17
+ # bot.chat("Your answers should be short and concise", :system)
18
+ # bot.chat("What is 5 + 7 ?", :user)
19
+ # bot.chat("Why is the sky blue ?", :user)
20
+ # bot.chat("Why did the chicken cross the road ?", :user)
21
+ # bot.messages.map { print "[#{_1.role}]", _1.content, "\n" }
22
+ class Chat
23
+ ##
24
+ # @return [Array<LLM::Message>]
25
+ attr_reader :messages
26
+
27
+ ##
28
+ # @param [LLM::Provider] provider
29
+ # A provider
30
+ # @param [Hash] params
31
+ # The parameters to maintain throughout the conversation
32
+ def initialize(provider, params = {})
33
+ @provider = provider
34
+ @params = params
35
+ @lazy = false
36
+ @messages = []
37
+ end
38
+
39
+ ##
40
+ # Maintain a conversation via the chat completions API
41
+ # @param prompt (see LLM::Provider#prompt)
42
+ # @param role (see LLM::Provider#prompt)
43
+ # @param params (see LLM::Provider#prompt)
44
+ # @return [LLM::Chat]
45
+ def chat(prompt, role = :user, **params)
46
+ if lazy?
47
+ @messages << [LLM::Message.new(role, prompt), @params.merge(params), :complete]
48
+ self
49
+ else
50
+ completion = complete!(prompt, role, params)
51
+ @messages.concat [Message.new(role, prompt), completion.choices[0]]
52
+ self
53
+ end
54
+ end
55
+
56
+ ##
57
+ # Maintain a conversation via the responses API
58
+ # @note Not all LLM providers support this API
59
+ # @param prompt (see LLM::Provider#prompt)
60
+ # @param role (see LLM::Provider#prompt)
61
+ # @param params (see LLM::Provider#prompt)
62
+ # @return [LLM::Chat]
63
+ def respond(prompt, role = :user, **params)
64
+ if lazy?
65
+ @messages << [LLM::Message.new(role, prompt), @params.merge(params), :respond]
66
+ self
67
+ else
68
+ @response = respond!(prompt, role, params)
69
+ @messages.concat [Message.new(role, prompt), @response.outputs[0]]
70
+ self
71
+ end
72
+ end
73
+
74
+ ##
75
+ # The last message in the conversation.
76
+ # @note
77
+ # The `read_response` and `recent_message` methods are aliases of
78
+ # the `last_message` method, and you can choose the name that best
79
+ # fits your context or code style.
80
+ # @param [#to_s] role
81
+ # The role of the last message.
82
+ # @return [LLM::Message]
83
+ def last_message(role: @provider.assistant_role)
84
+ messages.reverse_each.find { _1.role == role.to_s }
85
+ end
86
+ alias_method :recent_message, :last_message
87
+ alias_method :read_response, :last_message
88
+
89
+ ##
90
+ # Enables lazy mode for the conversation.
91
+ # @return [LLM::Chat]
92
+ def lazy
93
+ tap do
94
+ next if lazy?
95
+ @lazy = true
96
+ @messages = LLM::Buffer.new(@provider)
97
+ end
98
+ end
99
+
100
+ ##
101
+ # @return [Boolean]
102
+ # Returns true if the conversation is lazy
103
+ def lazy?
104
+ @lazy
105
+ end
106
+
107
+ def inspect
108
+ "#<#{self.class.name}:0x#{object_id.to_s(16)} " \
109
+ "@provider=#{@provider.class}, @params=#{@params.inspect}, " \
110
+ "@messages=#{@messages.inspect}, @lazy=#{@lazy.inspect}>"
111
+ end
112
+
113
+ private
114
+
115
+ def respond!(prompt, role, params)
116
+ @provider.responses.create(
117
+ prompt,
118
+ role,
119
+ **@params.merge(params.merge(@response ? {previous_response_id: @response.id} : {}))
120
+ )
121
+ end
122
+
123
+ def complete!(prompt, role, params)
124
+ @provider.complete(
125
+ prompt,
126
+ role,
127
+ **@params.merge(params.merge(messages:))
128
+ )
129
+ end
130
+ end
131
+ end