llm.rb 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +318 -110
  3. data/lib/llm/buffer.rb +83 -0
  4. data/lib/llm/chat.rb +131 -0
  5. data/lib/llm/error.rb +3 -3
  6. data/lib/llm/file.rb +36 -40
  7. data/lib/llm/message.rb +21 -8
  8. data/lib/llm/mime.rb +54 -0
  9. data/lib/llm/multipart.rb +100 -0
  10. data/lib/llm/provider.rb +123 -21
  11. data/lib/llm/providers/anthropic/error_handler.rb +3 -1
  12. data/lib/llm/providers/anthropic/format.rb +2 -0
  13. data/lib/llm/providers/anthropic/response_parser.rb +3 -1
  14. data/lib/llm/providers/anthropic.rb +14 -5
  15. data/lib/llm/providers/gemini/audio.rb +77 -0
  16. data/lib/llm/providers/gemini/error_handler.rb +4 -2
  17. data/lib/llm/providers/gemini/files.rb +162 -0
  18. data/lib/llm/providers/gemini/format.rb +12 -6
  19. data/lib/llm/providers/gemini/images.rb +99 -0
  20. data/lib/llm/providers/gemini/response_parser.rb +27 -1
  21. data/lib/llm/providers/gemini.rb +62 -6
  22. data/lib/llm/providers/ollama/error_handler.rb +3 -1
  23. data/lib/llm/providers/ollama/format.rb +13 -5
  24. data/lib/llm/providers/ollama/response_parser.rb +3 -1
  25. data/lib/llm/providers/ollama.rb +30 -7
  26. data/lib/llm/providers/openai/audio.rb +97 -0
  27. data/lib/llm/providers/openai/error_handler.rb +3 -1
  28. data/lib/llm/providers/openai/files.rb +148 -0
  29. data/lib/llm/providers/openai/format.rb +22 -8
  30. data/lib/llm/providers/openai/images.rb +109 -0
  31. data/lib/llm/providers/openai/response_parser.rb +58 -5
  32. data/lib/llm/providers/openai/responses.rb +85 -0
  33. data/lib/llm/providers/openai.rb +52 -6
  34. data/lib/llm/providers/voyageai/error_handler.rb +1 -1
  35. data/lib/llm/providers/voyageai.rb +2 -2
  36. data/lib/llm/response/audio.rb +13 -0
  37. data/lib/llm/response/audio_transcription.rb +14 -0
  38. data/lib/llm/response/audio_translation.rb +14 -0
  39. data/lib/llm/response/download_file.rb +15 -0
  40. data/lib/llm/response/file.rb +42 -0
  41. data/lib/llm/response/filelist.rb +18 -0
  42. data/lib/llm/response/image.rb +29 -0
  43. data/lib/llm/response/output.rb +56 -0
  44. data/lib/llm/response.rb +18 -6
  45. data/lib/llm/utils.rb +19 -0
  46. data/lib/llm/version.rb +1 -1
  47. data/lib/llm.rb +5 -2
  48. data/llm.gemspec +1 -6
  49. data/spec/anthropic/completion_spec.rb +1 -1
  50. data/spec/gemini/completion_spec.rb +1 -1
  51. data/spec/gemini/conversation_spec.rb +31 -0
  52. data/spec/gemini/files_spec.rb +124 -0
  53. data/spec/gemini/images_spec.rb +47 -0
  54. data/spec/llm/conversation_spec.rb +107 -62
  55. data/spec/ollama/completion_spec.rb +1 -1
  56. data/spec/ollama/conversation_spec.rb +31 -0
  57. data/spec/openai/audio_spec.rb +55 -0
  58. data/spec/openai/completion_spec.rb +5 -4
  59. data/spec/openai/files_spec.rb +204 -0
  60. data/spec/openai/images_spec.rb +95 -0
  61. data/spec/openai/responses_spec.rb +51 -0
  62. data/spec/setup.rb +8 -0
  63. metadata +31 -50
  64. data/LICENSE.txt +0 -21
  65. data/lib/llm/conversation.rb +0 -90
  66. data/lib/llm/http_client.rb +0 -29
  67. data/lib/llm/message_queue.rb +0 -54
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 732a483717a5ec8e443077fb71294b1e301c3a8867b225c1fc2a58bd02fe3130
4
- data.tar.gz: a1c2591a07c413cebfdffa99d133855bb177cc4a6607860333dbc9991da8d33e
3
+ metadata.gz: 3939075c064b4abfd8853c3f67b6db7df6111d340d658d4d8ad0c4d1bccc96bc
4
+ data.tar.gz: 0ca274d3e4b032c25730aef896df903681c28033ebb0907c965339a33aff56d1
5
5
  SHA512:
6
- metadata.gz: 4f5983f97b3c1e25f4147ec81f6d91df5073ea03dc4031690979f68b2053bf73bae07f7c57c3f7c9813dfd5b43eb1bd7364d5f5929234013d6b19bb49f9271ec
7
- data.tar.gz: 286f560ce2d9e048e481796d27fd6c9a658b92cec0267f9527fad49bf5349ece0d05b943c086cce3c12bbd82f53dbf086ed91c516d39b815bd6106edca21914a
6
+ metadata.gz: feaf87457b8fa5b4f756a5fe8cc1f670c8b0286a730fe00273bc99678092fe7f704d58f01ba0a0baf4072a0dcee063bc87cf88bc7cdf53125334476adbce41f6
7
+ data.tar.gz: 3be8b460d9b483c0e172d9159b2394ea39da7a1475aee3ab47b224303e2a251f3b04f0543402494485040998225f84342be986db8c7b8ea80df92f561d4d6d92
data/README.md CHANGED
@@ -2,7 +2,10 @@
2
2
 
3
3
  llm.rb is a lightweight library that provides a common interface
4
4
  and set of functionality for multiple Large Language Models (LLMs). It
5
- is designed to be simple, flexible, and easy to use.
5
+ is designed to be simple, flexible, and easy to use – and it has been
6
+ implemented with zero dependencies outside Ruby's standard library. See the
7
+ [philosophy](#philosophy) section for more information on the design principles
8
+ behind llm.rb.
6
9
 
7
10
  ## Examples
8
11
 
@@ -10,10 +13,10 @@ is designed to be simple, flexible, and easy to use.
10
13
 
11
14
  #### LLM::Provider
12
15
 
13
- All providers inherit from [LLM::Provider](https://0x1eef.github.io/x/llm/LLM/Provider.html) –
16
+ All providers inherit from [LLM::Provider](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html) –
14
17
  they share a common interface and set of functionality. Each provider can be instantiated
15
18
  using an API key (if required) and an optional set of configuration options via
16
- [the singleton methods of LLM](https://0x1eef.github.io/x/llm/LLM.html). For example:
19
+ [the singleton methods of LLM](https://0x1eef.github.io/x/llm.rb/LLM.html). For example:
17
20
 
18
21
  ```ruby
19
22
  #!/usr/bin/env ruby
@@ -25,37 +28,28 @@ llm = LLM.anthropic("yourapikey")
25
28
  llm = LLM.ollama(nil)
26
29
  ```
27
30
 
28
- ### Completions
31
+ ### Conversations
29
32
 
30
- #### Conversation
33
+ #### Completions
31
34
 
32
- The
33
- [LLM::Provider#chat](https://0x1eef.github.io/x/llm/LLM/Provider.html#chat-instance_method)
34
- method returns a lazy-variant of a
35
- [LLM::Conversation](https://0x1eef.github.io/x/llm/LLM/Conversation.html)
36
- object, and it allows for a "lazy" conversation where messages are batched and
37
- sent to the provider only when necessary. The non-lazy counterpart is available via the
38
- [LLM::Provider#chat!](https://0x1eef.github.io/x/llm/LLM/Provider.html#chat!-instance_method)
39
- method.
40
-
41
- Both lazy and non-lazy conversations maintain a message thread that can
42
- be reused as context throughout a conversation. For the sake of brevity the system
43
- prompt is loaded from
44
- [a file](./share/llm/prompts/system.txt)
45
- in the following example – all other prompts are "user" prompts –
46
- and a single request is made to the provider when iterating over the messages
47
- belonging to a lazy conversation:
35
+ The following example enables lazy mode for a
36
+ [LLM::Chat](https://0x1eef.github.io/x/llm.rb/LLM/Chat.html)
37
+ object by entering into a "lazy" conversation where messages are buffered and
38
+ sent to the provider only when necessary. Both lazy and non-lazy conversations
39
+ maintain a message thread that can be reused as context throughout a conversation.
40
+ The example uses the stateless chat completions API that all LLM providers support:
48
41
 
49
42
  ```ruby
50
43
  #!/usr/bin/env ruby
51
44
  require "llm"
52
45
 
53
46
  llm = LLM.openai(ENV["KEY"])
54
- convo = llm.chat File.read("./share/llm/prompts/system.txt"), :system
55
- convo.chat "Tell me the answer to 5 + 15"
56
- convo.chat "Tell me the answer to (5 + 15) * 2"
57
- convo.chat "Tell me the answer to ((5 + 15) * 2) / 10"
58
- convo.messages.each { print "[#{_1.role}] ", _1.content, "\n" }
47
+ bot = LLM::Chat.new(llm).lazy
48
+ bot.chat File.read("./share/llm/prompts/system.txt"), :system
49
+ bot.chat "Tell me the answer to 5 + 15", :user
50
+ bot.chat "Tell me the answer to (5 + 15) * 2", :user
51
+ bot.chat "Tell me the answer to ((5 + 15) * 2) / 10", :user
52
+ bot.messages.each { print "[#{_1.role}] ", _1.content, "\n" }
59
53
 
60
54
  ##
61
55
  # [system] You are my math assistant.
@@ -73,128 +67,324 @@ convo.messages.each { print "[#{_1.role}] ", _1.content, "\n" }
73
67
  # The answer to ((5 + 15) * 2) / 10 is 4.
74
68
  ```
75
69
 
76
- #### Prompts
77
-
78
- Both lazy and non-lazy conversations accept text as a prompt.
79
- Depending on the provider, they may also accept a
80
- [URI](https://docs.ruby-lang.org/en/master/URI.html)
81
- or
82
- [LLM::File](https://0x1eef.github.io/x/llm/LLM/File.html)
83
- object. Generally a
84
- [URI](https://docs.ruby-lang.org/en/master/URI.html)
85
- object is used to reference an image on the web, and an
86
- [LLM::File](https://0x1eef.github.io/x/llm/LLM/File.html)
87
- object is used to reference a file on the local filesystem.
88
- The following list shows the types of prompts that each
89
- provider accepts:
90
-
91
- * OpenAI       =>   String, URI
92
- * Gemini        =>   String, LLM::File
93
- * Anthropic   =>   String, URI
94
- * Ollama        =>   String, URI
70
+ #### Responses
71
+
72
+ The responses API is a recent addition
73
+ [provided by OpenAI](https://platform.openai.com/docs/guides/conversation-state?api-mode=responses)
74
+ that lets a client store message state on their servers – and in turn
75
+ a client can avoid maintaining state manually as well as avoid sending
76
+ the entire conversation with each request that is made. Although it is
77
+ primarily supported by OpenAI at the moment, we might see other providers
78
+ support it in the future. For now
79
+ [llm.rb supports the responses API](https://0x1eef.github.io/x/llm.rb/LLM/OpenAI/Responses.html)
80
+ for the OpenAI provider:
81
+
82
+ ```ruby
83
+ #!/usr/bin/env ruby
84
+ require "llm"
85
+
86
+ llm = LLM.openai(ENV["KEY"])
87
+ bot = LLM::Chat.new(llm).lazy
88
+ bot.respond File.read("./share/llm/prompts/system.txt"), :developer
89
+ bot.respond "Tell me the answer to 5 + 15", :user
90
+ bot.respond "Tell me the answer to (5 + 15) * 2", :user
91
+ bot.respond "Tell me the answer to ((5 + 15) * 2) / 10", :user
92
+ bot.messages.each { print "[#{_1.role}] ", _1.content, "\n" }
93
+
94
+ ##
95
+ # [developer] You are my math assistant.
96
+ # I will provide you with (simple) equations.
97
+ # You will provide answers in the format "The answer to <equation> is <answer>".
98
+ # I will provide you a set of messages. Reply to all of them.
99
+ # A message is considered unanswered if there is no corresponding assistant response.
100
+ #
101
+ # [user] Tell me the answer to 5 + 15
102
+ # [user] Tell me the answer to (5 + 15) * 2
103
+ # [user] Tell me the answer to ((5 + 15) * 2) / 10
104
+ #
105
+ # [assistant] The answer to 5 + 15 is 20.
106
+ # The answer to (5 + 15) * 2 is 40.
107
+ # The answer to ((5 + 15) * 2) / 10 is 4.
108
+ ```
109
+
110
+ ### Audio
111
+
112
+ #### Speech
113
+
114
+ Some but not all providers implement audio generation capabilities that
115
+ can create speech from text, transcribe audio to text, or translate
116
+ audio to text (usually English). The following example uses the OpenAI provider
117
+ to create an audio file from a text prompt. The audio is then moved to
118
+ `${HOME}/hello.mp3` as the final step. As always, consult the provider's
119
+ documentation (eg [OpenAI docs](https://platform.openai.com/docs/api-reference/audio/create))
120
+ for more information on how to use the audio generation API:
121
+
122
+ ```ruby
123
+ #!/usr/bin/env ruby
124
+ require "llm"
125
+ require "open-uri"
126
+ require "fileutils"
127
+
128
+ llm = LLM.openai(ENV["KEY"])
129
+ res = llm.audio.create_speech(input: "Hello world")
130
+ File.binwrite File.join(Dir.home, "hello.mp3"),
131
+ res.audio.string
132
+ ```
133
+
134
+ #### Transcribe
135
+
136
+ The following example transcribes an audio file to text. The audio file
137
+ (`${HOME}/hello.mp3`) was theoretically created in the previous example,
138
+ and the result is printed to the console. The example uses the OpenAI
139
+ provider to transcribe the audio file. As always, consult the provider's
140
+ documentation (eg
141
+ [OpenAI docs](https://platform.openai.com/docs/api-reference/audio/createTranscription),
142
+ [Gemini docs](https://ai.google.dev/gemini-api/docs/audio))
143
+ for more information on how to use the audio transcription API.
144
+
145
+ Please also see provider-specific documentation for more provider-specific
146
+ examples and documentation
147
+ (eg
148
+ [LLM::Gemini::Audio](https://0x1eef.github.io/x/llm.rb/LLM/Gemini/Audio.html),
149
+ [LLM::OpenAI::Audio](https://0x1eef.github.io/x/llm.rb/LLM/OpenAI/Audio.html)):
150
+
151
+ ```ruby
152
+ #!/usr/bin/env ruby
153
+ require "llm"
154
+ require "open-uri"
155
+ require "fileutils"
156
+
157
+ llm = LLM.openai(ENV["KEY"])
158
+ res = llm.audio.create_transcription(
159
+ file: LLM::File(File.join(Dir.home, "hello.mp3"))
160
+ )
161
+ print res.text, "\n" # => "Hello world."
162
+ ```
163
+
164
+ #### Translate
165
+
166
+ The following example translates an audio file to text. In this example
167
+ the audio file (`${HOME}/bomdia.mp3`) is theoretically in Portuguese,
168
+ and it is translated to English. The example uses the OpenAI provider,
169
+ and at the time of writing, it can only translate to English. As always,
170
+ consult the provider's documentation (eg
171
+ [OpenAI docs](https://platform.openai.com/docs/api-reference/audio/createTranslation),
172
+ [Gemini docs](https://ai.google.dev/gemini-api/docs/audio))
173
+ for more information on how to use the audio translation API.
174
+
175
+ Please also see provider-specific documentation for more provider-specific
176
+ examples and documentation
177
+ (eg
178
+ [LLM::Gemini::Audio](https://0x1eef.github.io/x/llm.rb/LLM/Gemini/Audio.html),
179
+ [LLM::OpenAI::Audio](https://0x1eef.github.io/x/llm.rb/LLM/OpenAI/Audio.html)):
180
+
181
+
182
+ ```ruby
183
+ require "llm"
184
+ require "open-uri"
185
+ require "fileutils"
186
+
187
+ llm = LLM.openai(ENV["KEY"])
188
+ res = llm.audio.create_translation(
189
+ file: LLM::File(File.join(Dir.home, "bomdia.mp3"))
190
+ )
191
+ print res.text, "\n" # => "Good morning."
192
+ ```
193
+
194
+ ### Images
195
+
196
+ #### Create
197
+
198
+ Some but not all LLM providers implement image generation capabilities that
199
+ can create new images from a prompt, or edit an existing image with a
200
+ prompt. The following example uses the OpenAI provider to create an
201
+ image of a dog on a rocket to the moon. The image is then moved to
202
+ `${HOME}/dogonrocket.png` as the final step.
203
+
204
+ Please also see provider-specific documentation for more provider-specific
205
+ examples and documentation
206
+ (eg
207
+ [LLM::Gemini::Images](https://0x1eef.github.io/x/llm.rb/LLM/Gemini/Images.html),
208
+ [LLM::OpenAI::Images](https://0x1eef.github.io/x/llm.rb/LLM/OpenAI/Images.html)):
209
+
210
+ ```ruby
211
+ #!/usr/bin/env ruby
212
+ require "llm"
213
+ require "open-uri"
214
+ require "fileutils"
215
+
216
+ llm = LLM.openai(ENV["KEY"])
217
+ res = llm.images.create(prompt: "a dog on a rocket to the moon")
218
+ res.urls.each do |url|
219
+ FileUtils.mv OpenURI.open_uri(url).path,
220
+ File.join(Dir.home, "dogonrocket.png")
221
+ end
222
+ ```
223
+
224
+ #### Edit
225
+
226
+ The following example is focused on editing a local image with the aid
227
+ of a prompt. The image (`/images/cat.png`) is returned to us with the cat
228
+ now wearing a hat. The image is then moved to `${HOME}/catwithhat.png` as
229
+ the final step.
230
+
231
+ Results and quality may vary, consider prompt adjustments if the results
232
+ are not satisfactory, and consult the provider's documentation
233
+ (eg
234
+ [OpenAI docs](https://platform.openai.com/docs/api-reference/images/createEdit),
235
+ [Gemini docs](https://ai.google.dev/gemini-api/docs/image-generation))
236
+ for more information on how to use the image editing API.
237
+
238
+ Please also see provider-specific documentation for more provider-specific
239
+ examples and documentation
240
+ (eg
241
+ [LLM::Gemini::Images](https://0x1eef.github.io/x/llm.rb/LLM/Gemini/Images.html),
242
+ [LLM::OpenAI::Images](https://0x1eef.github.io/x/llm.rb/LLM/OpenAI/Images.html)):
243
+
244
+ ```ruby
245
+ #!/usr/bin/env ruby
246
+ require "llm"
247
+ require "open-uri"
248
+ require "fileutils"
249
+
250
+ llm = LLM.openai(ENV["KEY"])
251
+ res = llm.images.edit(
252
+ image: LLM::File("/images/cat.png"),
253
+ prompt: "a cat with a hat",
254
+ )
255
+ res.urls.each do |url|
256
+ FileUtils.mv OpenURI.open_uri(url).path,
257
+ File.join(Dir.home, "catwithhat.png")
258
+ end
259
+ ```
260
+
261
+ #### Variations
262
+
263
+ The following example is focused on creating variations of a local image.
264
+ The image (`/images/cat.png`) is returned to us with five different variations.
265
+ The images are then moved to `${HOME}/catvariation0.png`, `${HOME}/catvariation1.png`
266
+ and so on as the final step. Consult the provider's documentation
267
+ (eg [OpenAI docs](https://platform.openai.com/docs/api-reference/images/createVariation))
268
+ for more information on how to use the image variations API:
269
+
270
+ ```ruby
271
+ #!/usr/bin/env ruby
272
+ require "llm"
273
+ require "open-uri"
274
+ require "fileutils"
275
+
276
+ llm = LLM.openai(ENV["KEY"])
277
+ res = llm.images.create_variation(
278
+ image: LLM::File("/images/cat.png"),
279
+ n: 5
280
+ )
281
+ res.urls.each.with_index do |url, index|
282
+ FileUtils.mv OpenURI.open_uri(url).path,
283
+ File.join(Dir.home, "catvariation#{index}.png")
284
+ end
285
+ ```
286
+
287
+ ### Files
288
+
289
+ #### Create
290
+
291
+ Most LLM providers provide a Files API where you can upload files
292
+ that can be referenced from a prompt and llm.rb has first-class support
293
+ for this feature. The following example uses the OpenAI provider to describe
294
+ the contents of a PDF file after it has been uploaded. The file (an instance
295
+ of [LLM::Response::File](https://0x1eef.github.io/x/llm.rb/LLM/Response/File.html))
296
+ is passed directly to the chat method, and generally any object a prompt supports
297
+ can be given to the chat method.
298
+
299
+ Please also see provider-specific documentation for more provider-specific
300
+ examples and documentation
301
+ (eg
302
+ [LLM::Gemini::Files](https://0x1eef.github.io/x/llm.rb/LLM/Gemini/Files.html),
303
+ [LLM::OpenAI::Files](https://0x1eef.github.io/x/llm.rb/LLM/OpenAI/Files.html)):
304
+
305
+ ```ruby
306
+ #!/usr/bin/env ruby
307
+ require "llm"
308
+
309
+ llm = LLM.openai(ENV["KEY"])
310
+ bot = LLM::Chat.new(llm).lazy
311
+ file = llm.files.create(file: LLM::File("/documents/openbsd_is_awesome.pdf"))
312
+ bot.chat(file)
313
+ bot.chat("What is this file about?")
314
+ bot.messages.select(&:assistant?).each { print "[#{_1.role}] ", _1.content, "\n" }
315
+
316
+ ##
317
+ # [assistant] This file is about OpenBSD, a free and open-source Unix-like operating system
318
+ # based on the Berkeley Software Distribution (BSD). It is known for its
319
+ # emphasis on security, code correctness, and code simplicity. The file
320
+ # contains information about the features, installation, and usage of OpenBSD.
321
+ ```
95
322
 
96
323
  ### Embeddings
97
324
 
98
325
  #### Text
99
326
 
100
327
  The
101
- [`LLM::Provider#embed`](https://0x1eef.github.io/x/llm/LLM/Provider.html#embed-instance_method)
328
+ [`LLM::Provider#embed`](https://0x1eef.github.io/x/llm.rb/LLM/Provider.html#embed-instance_method)
102
329
  method generates a vector representation of one or more chunks
103
330
  of text. Embeddings capture the semantic meaning of text &ndash;
104
331
  a common use-case for them is to store chunks of text in a
105
332
  vector database, and then to query the database for *semantically
106
333
  similar* text. These chunks of similar text can then support the
107
334
  generation of a prompt that is used to query a large language model,
108
- which will go on to generate a response.
109
-
110
- For example, a user query might find similar text that adds important
111
- context to the prompt that informs the large language model in how to respond.
112
- The chunks of text may also carry metadata that can be used to further filter
113
- and contextualize the search results. This technique is popularly known as
114
- retrieval-augmented generation (RAG). Embeddings can also be used for
115
- other purposes as well &ndash; RAG is just one of the most popular use-cases.
116
-
117
- Let's take a look at an example that generates a couple of vectors
118
- for two chunks of text:
335
+ which will go on to generate a response:
119
336
 
120
337
  ```ruby
121
338
  #!/usr/bin/env ruby
122
339
  require "llm"
123
340
 
124
341
  llm = LLM.openai(ENV["KEY"])
125
- res = llm.embed(["programming is fun", "ruby is a programming language"])
342
+ res = llm.embed(["programming is fun", "ruby is a programming language", "sushi is art"])
126
343
  print res.class, "\n"
127
344
  print res.embeddings.size, "\n"
128
345
  print res.embeddings[0].size, "\n"
129
346
 
130
347
  ##
131
348
  # LLM::Response::Embedding
132
- # 2
349
+ # 3
133
350
  # 1536
134
351
  ```
135
352
 
136
- ### LLM
137
-
138
- #### Timeouts
139
-
140
- When running the ollama provider locally it might take a while for
141
- the language model to reply &ndash; depending on hardware and the
142
- size of the model. The following example demonstrates how to wait
143
- a longer period of time for a response through the use of the
144
- `timeout` configuration option with the `qwq` model. The following
145
- example waits up to 15 minutes for a response:
146
-
147
- ```ruby
148
- #!/usr/bin/env ruby
149
- require "llm"
353
+ ### Memory
150
354
 
151
- llm = LLM.ollama(nil, timeout: 60*15)
152
- llm.chat "What is the meaning of life ?", model: "qwq"
153
- llm.last_message.tap { print "[assistant] ", _1.content, "\n" }
154
- ```
355
+ #### Child process
155
356
 
156
- #### Models
357
+ When it comes to the generation of audio, images, and video memory consumption
358
+ can be a potential problem. There are a few strategies in place to deal with this,
359
+ and one lesser known strategy is to let a child process handle the memory cost
360
+ by delegating media generation to a child process.
157
361
 
158
- Generally each Large Language Model provides multiple models to choose
159
- from, and each model has its own set of capabilities and limitations.
160
- The following example demonstrates how to query the list of models
161
- through the
162
- [LLM::Provider#models](http://0x1eef.github.io/x/llm/LLM/Provider.html#models-instance_method)
163
- method &ndash; the example happens to use the ollama provider but
164
- this can be done for any provider:
362
+ Once a child process exits, any memory it had used is freed immediately and
363
+ the parent process can continue to have a small memory footprint. In a sense
364
+ it is similar to being able to use malloc + free from Ruby. The following example
365
+ demonstrates how that might look like in practice:
165
366
 
166
367
  ```ruby
167
368
  #!/usr/bin/env ruby
168
369
  require "llm"
169
370
 
170
- ##
171
- # List models
172
- llm = LLM.ollama(nil)
173
- llm.models.each { print "#{_2.name}: #{_2.description}", "\n" }
174
-
175
- ##
176
- # Select a model
177
- llm.chat "Hello, world!", model: llm.models["qwq"]
178
-
179
- ##
180
- # This also works
181
- llm.chat "Hello, world!", model: "qwq"
371
+ llm = LLM.gemini(ENV["KEY"])
372
+ fork do
373
+ %w[dog cat sheep goat capybara].each do |animal|
374
+ res = llm.images.create(prompt: "a #{animal} on a rocket to the moon")
375
+ File.binwrite "#{animal}.png", res.images[0].binary
376
+ end
377
+ end
378
+ Process.wait
182
379
  ```
183
- ## Providers
184
380
 
185
- - [x] [Anthropic](https://www.anthropic.com/)
186
- - [x] [OpenAI](https://platform.openai.com/docs/overview)
187
- - [x] [Gemini](https://ai.google.dev/gemini-api/docs)
188
- - [x] [Ollama](https://github.com/ollama/ollama#readme)
189
- - [ ] Hugging Face
190
- - [ ] Cohere
191
- - [ ] AI21 Labs
192
- - [ ] Replicate
193
- - [ ] Mistral AI
381
+ ## API reference
194
382
 
195
- ## Documentation
383
+ The README tries to provide a high-level overview of the library. For everything
384
+ else there's the API reference. It covers classes and methods that the README glances
385
+ over or doesn't cover at all. The API reference is available at
386
+ [0x1eef.github.io/x/llm.rb](https://0x1eef.github.io/x/llm.rb).
196
387
 
197
- A complete API reference is available at [0x1eef.github.io/x/llm](https://0x1eef.github.io/x/llm)
198
388
 
199
389
  ## Install
200
390
 
@@ -202,6 +392,24 @@ llm.rb can be installed via rubygems.org:
202
392
 
203
393
  gem install llm.rb
204
394
 
395
+ ## Philosophy
396
+
397
+ llm.rb was built for developers who believe that simplicity is strength.
398
+ It provides a clean, dependency-free interface to Large Language Models,
399
+ treating Ruby itself as the primary platform &ndash; not Rails or any other
400
+ specific framework or library. There is no hidden magic or extreme
401
+ metaprogramming.
402
+
403
+ Every part of llm.rb is designed to be explicit, composable, memory-safe,
404
+ and production-ready without compromise. No unnecessary abstractions,
405
+ no global configuration, and no dependencies that aren't part of standard
406
+ Ruby. It has been inspired in part by other languages such as Python, but
407
+ it is not a port of any other library.
408
+
409
+ Good software doesn’t need marketing. It just needs to work. :)
410
+
205
411
  ## License
206
412
 
207
- MIT. See [LICENSE.txt](LICENSE.txt) for more details
413
+ [BSD Zero Clause](https://choosealicense.com/licenses/0bsd/)
414
+ <br>
415
+ See [LICENSE](./LICENSE)
data/lib/llm/buffer.rb ADDED
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM
4
+ ##
5
+ # @private
6
+ # {LLM::Buffer LLM::Buffer} provides an Enumerable object that
7
+ # yields each message in a conversation on-demand, and only sends
8
+ # a request to the LLM when a response is needed.
9
+ class Buffer
10
+ include Enumerable
11
+
12
+ ##
13
+ # @param [LLM::Provider] provider
14
+ # @return [LLM::Buffer]
15
+ def initialize(provider)
16
+ @provider = provider
17
+ @pending = []
18
+ @completed = []
19
+ end
20
+
21
+ ##
22
+ # @yield [LLM::Message]
23
+ # Yields each message in the conversation thread
24
+ # @raise (see LLM::Provider#complete)
25
+ # @return [void]
26
+ def each
27
+ empty! unless @pending.empty?
28
+ @completed.each { yield(_1) }
29
+ end
30
+
31
+ ##
32
+ # @param [[LLM::Message, Hash]] item
33
+ # A message and its parameters
34
+ # @return [void]
35
+ def <<(item)
36
+ @pending << item
37
+ self
38
+ end
39
+ alias_method :push, :<<
40
+
41
+ ##
42
+ # @return [String]
43
+ def inspect
44
+ "#<#{self.class.name}:0x#{object_id.to_s(16)} " \
45
+ "completed_count=#{@completed.size} pending_count=#{@pending.size}>"
46
+ end
47
+
48
+ private
49
+
50
+ def empty!
51
+ message, params, method = @pending[-1]
52
+ if method == :complete
53
+ complete!(message, params)
54
+ elsif method == :respond
55
+ respond!(message, params)
56
+ else
57
+ raise LLM::Error, "Unknown method: #{method}"
58
+ end
59
+ end
60
+
61
+ def complete!(message, params)
62
+ messages = @pending[0..-2].map { _1[0] }
63
+ completion = @provider.complete(
64
+ message.content,
65
+ message.role,
66
+ **params.merge(messages:)
67
+ )
68
+ @completed.concat([*messages, message, completion.choices[0]])
69
+ @pending.clear
70
+ end
71
+
72
+ def respond!(message, params)
73
+ input = @pending[0..-2].map { _1[0] }
74
+ @response = @provider.responses.create(
75
+ message.content,
76
+ message.role,
77
+ **params.merge(input:).merge(@response ? {previous_response_id: @response.id} : {})
78
+ )
79
+ @completed.concat([*input, message, @response.outputs[0]])
80
+ @pending.clear
81
+ end
82
+ end
83
+ end